diff --git a/Data/UTest/SphericalPrevious.data.double b/Data/UTest/SphericalPrevious.data.double index 3be9a5bb56d5a603927a4e504ffa7135c85d5aaf..5a94708ae98ac956f0994a93873d65dbb5a01b83 100644 Binary files a/Data/UTest/SphericalPrevious.data.double and b/Data/UTest/SphericalPrevious.data.double differ diff --git a/Src/Adaptive/FVariadicParticleContainer.hpp b/Src/Adaptive/FVariadicParticleContainer.hpp index 72750fe227ce6e71ddb09ef480f4a895cf7dc796..85cfd214b9010c074866d130044493ca3e894a82 100644 --- a/Src/Adaptive/FVariadicParticleContainer.hpp +++ b/Src/Adaptive/FVariadicParticleContainer.hpp @@ -165,7 +165,7 @@ private: */ template<class... Types> FSize getSavedSize_impl(std::tuple<Types*...>) const { - FSize tmp = sizeof(this->size()); + FSize tmp = sizeof(FSize); // Sum, for T in Types, of (sizeof(T) * this->size()) auto l = {(tmp += sizeof(Types) * this->size(), 0)..., 0}; (void)l; @@ -198,7 +198,7 @@ private: */ template<class BufferWriter, std::size_t ... Is> void save_impl(BufferWriter& buffer, inria::index_sequence<Is...>) const { - buffer << this->size(); + buffer << FSize(this->size()); // For each sub-array `s`, write `s` to the buffer auto l = { (buffer.write(std::get<Is>(this->data()), this->size()),0)..., diff --git a/Src/Files/FMpiStaticTreeBuilder.hpp b/Src/Files/FMpiStaticTreeBuilder.hpp new file mode 100644 index 0000000000000000000000000000000000000000..10d85f9a52c8c98ba2ce6bc79df6e2db82ac0f87 --- /dev/null +++ b/Src/Files/FMpiStaticTreeBuilder.hpp @@ -0,0 +1,208 @@ +// See LICENCE file at project root +#ifndef FMPISTATICTREEBUILDER_H +#define FMPISTATICTREEBUILDER_H + +#include "../Utils/FMpi.hpp" +#include "../Utils/FQuickSortMpi.hpp" +#include "../Utils/FBitonicSort.hpp" +#include "../Utils/FTic.hpp" +#include "../Utils/FEnv.hpp" + +#include "../Utils/FMemUtils.hpp" + +#include "../Containers/FVector.hpp" + +#include "../Utils/FLeafBalance.hpp" +#include "../Utils/FEqualize.hpp" + +#include "../Containers/FCoordinateComputer.hpp" + +/** + * This class manage the loading of particles for the mpi version. + * It work in several steps. + * First it load the data from a file or an array and sort them amon the MPI processes. + * Then, it carrefully manage if a leaf is shared by multiple processes. + * Finally it balances the data using an external interval builder. + * + */ +template<class FReal, class ParticleClass> +class FMpiStaticTreeBuilder{ +public: + /** + * A particle may not have a MortonIndex Method (set/get morton index) + * But in this algorithm they are sorted based on their morton indexes. + * So an IndexedParticle is storing a real particle + its index. + */ + struct IndexedParticle{ + public: + MortonIndex index; + ParticleClass particle; + + operator MortonIndex() const { + return this->index; + } + }; + + ////////////////////////////////////////////////////////////////////////// + // The builder function + ////////////////////////////////////////////////////////////////////////// + + template <class ContainerClass> + static void DistributeArrayToContainer(const FMpi::FComm& communicator, const ParticleClass originalParticlesArray[], const FSize originalNbParticles, + const FPoint<FReal>& boxCenter, const FReal boxWidth, const int treeHeight, + ContainerClass* particleSaver){ + // Allocate the particles array + std::unique_ptr<IndexedParticle[]> originalParticlesCore(new IndexedParticle[originalNbParticles]); + FMemUtils::memset(originalParticlesCore.get(), 0, sizeof(IndexedParticle) * originalNbParticles); + + FPoint<FReal> boxCorner(boxCenter - (boxWidth/2)); + FTreeCoordinate host; + const FReal boxWidthAtLeafLevel = boxWidth / FReal(1 << (treeHeight - 1) ); + + FLOG(FTic counterTime); + + MortonIndex minMaxIndexes[2]; + minMaxIndexes[0] = std::numeric_limits<decltype(MortonIndex())>::max(); + minMaxIndexes[1] = std::numeric_limits<decltype(MortonIndex())>::min(); + + // Fill the array and compute the morton index + for(FSize idxPart = 0 ; idxPart < originalNbParticles ; ++idxPart){ + originalParticlesCore[idxPart].particle = originalParticlesArray[idxPart]; + host.setX( FCoordinateComputer::GetTreeCoordinate<FReal>( originalParticlesCore[idxPart].particle.getPosition().getX() - boxCorner.getX(), boxWidth, boxWidthAtLeafLevel, + treeHeight )); + host.setY( FCoordinateComputer::GetTreeCoordinate<FReal>( originalParticlesCore[idxPart].particle.getPosition().getY() - boxCorner.getY(), boxWidth, boxWidthAtLeafLevel, + treeHeight )); + host.setZ( FCoordinateComputer::GetTreeCoordinate<FReal>( originalParticlesCore[idxPart].particle.getPosition().getZ() - boxCorner.getZ(), boxWidth, boxWidthAtLeafLevel, + treeHeight )); + + originalParticlesCore[idxPart].index = host.getMortonIndex(); + + minMaxIndexes[0] = std::min(host.getMortonIndex(),minMaxIndexes[0]); + minMaxIndexes[1] = std::max(host.getMortonIndex(),minMaxIndexes[1]); + } + + FQuickSort<IndexedParticle>::QsOmp(originalParticlesCore.get(), originalNbParticles); + + MortonIndex globalMinMaxIndexes[2]; + FMpi::MpiAssert(MPI_Allreduce(&minMaxIndexes[0], + &globalMinMaxIndexes[0], + 1, + MPI_LONG_LONG, + MPI_MIN, + communicator.getComm()), __LINE__); + + FMpi::MpiAssert(MPI_Allreduce(&minMaxIndexes[1], + &globalMinMaxIndexes[1], + 1, + MPI_LONG_LONG, + MPI_MAX, + communicator.getComm()), __LINE__); + + + const int nb_processes = communicator.processCount(); + const int my_rank = communicator.processId(); + + const MortonIndex intervalSize = 1 + globalMinMaxIndexes[1] - globalMinMaxIndexes[0]; + const MortonIndex stepNbCells = (intervalSize+nb_processes-1)/nb_processes; + + std::vector<int> nb_items_to_send(nb_processes, 0); + for(FSize idxPart = 0 ; idxPart < originalNbParticles ; ++idxPart){ + nb_items_to_send[(originalParticlesCore[idxPart].index - globalMinMaxIndexes[0])/stepNbCells] += 1; + } + + std::vector<int> offset_items_to_send(nb_processes+1, 0); + for(int idxProc = 0; idxProc < nb_processes ; ++idxProc){ + assert(std::numeric_limits<int>::max()-offset_items_to_send[idxProc] >= nb_items_to_send[idxProc]); + offset_items_to_send[idxProc+1] = offset_items_to_send[idxProc] + nb_items_to_send[idxProc]; + } + + std::vector<int> nb_items_to_sendrecv_all(nb_processes*nb_processes); + FMpi::MpiAssert(MPI_Allgather(const_cast<int*>(nb_items_to_send.data()), nb_processes, MPI_INT, + nb_items_to_sendrecv_all.data(), nb_processes, MPI_INT, + communicator.getComm()), __LINE__); + + int total_to_recv = 0; + std::vector<int> nb_items_to_recv(nb_processes, 0); + std::vector<int> offset_items_to_recv(nb_processes+1, 0); + for(int idx_proc = 0 ; idx_proc < nb_processes ; ++idx_proc){ + const int nbrecv = nb_items_to_sendrecv_all[idx_proc*nb_processes + my_rank]; + assert(static_cast<long long int>(total_to_recv) + static_cast<long long int>(nbrecv) <= std::numeric_limits<int>::max()); + total_to_recv += nbrecv; + nb_items_to_recv[idx_proc] = nbrecv; + assert(static_cast<long long int>(nb_items_to_recv[idx_proc]) + static_cast<long long int>(offset_items_to_recv[idx_proc]) <= std::numeric_limits<int>::max()); + offset_items_to_recv[idx_proc+1] = nb_items_to_recv[idx_proc] + + offset_items_to_recv[idx_proc]; + } + + + std::unique_ptr<IndexedParticle[]> out_to_recv(new IndexedParticle[total_to_recv]); + + { + // Convert to byte! + const int sizeOfParticle = sizeof(IndexedParticle); + + for(int& val : nb_items_to_send){ + assert(std::numeric_limits<int>::max()-sizeOfParticle >= val); + val *= sizeOfParticle; + } + for(int& val : offset_items_to_send){ + assert(std::numeric_limits<int>::max()-sizeOfParticle >= val); + val *= sizeOfParticle; + } + for(int& val : nb_items_to_recv){ + assert(std::numeric_limits<int>::max()-sizeOfParticle >= val); + val *= sizeOfParticle; + } + for(int& val : offset_items_to_recv){ + assert(std::numeric_limits<int>::max()-sizeOfParticle >= val); + val *= sizeOfParticle; + } + + FMpi::MpiAssert(MPI_Alltoallv(originalParticlesCore.get(), const_cast<int*>(nb_items_to_send.data()), + const_cast<int*>(offset_items_to_send.data()), MPI_BYTE, out_to_recv.get(), + const_cast<int*>(nb_items_to_recv.data()), const_cast<int*>(offset_items_to_recv.data()), MPI_BYTE, + communicator.getComm()), __LINE__); + } + + for(FSize idxPart = 0 ; idxPart < total_to_recv ; ++idxPart){ + particleSaver->push(out_to_recv[idxPart].particle); + } + +#ifdef SCALFMM_USE_LOG + /** To produce stats after the Equalize phase */ + { + const FSize finalNbParticles = particleSaver->getSize(); + + if(communicator.processId() != 0){ + FMpi::MpiAssert(MPI_Gather(const_cast<FSize*>(&finalNbParticles),1,FMpi::GetType(finalNbParticles),nullptr, + 1,FMpi::GetType(finalNbParticles),0,communicator.getComm()), __LINE__); + } + else{ + const int nbProcs = communicator.processCount(); + std::unique_ptr<FSize[]> nbPartsPerProc(new FSize[nbProcs]); + + FMpi::MpiAssert(MPI_Gather(const_cast<FSize*>(&finalNbParticles),1,FMpi::GetType(finalNbParticles),nbPartsPerProc.get(), + 1,FMpi::GetType(finalNbParticles),0,communicator.getComm()), __LINE__); + + FReal averageNbParticles = 0; + FSize minNbParticles = finalNbParticles; + FSize maxNbParticles = finalNbParticles; + + for(int idxProc = 0 ; idxProc < nbProcs ; ++idxProc){ + maxNbParticles = FMath::Max(maxNbParticles, nbPartsPerProc[idxProc]); + minNbParticles = FMath::Min(minNbParticles, nbPartsPerProc[idxProc]); + averageNbParticles += FReal(nbPartsPerProc[idxProc]); + } + averageNbParticles /= float(nbProcs); + + printf("Particles Distribution: End of Equalize Phase : \n \t Min number of parts : %lld \n \t Max number of parts : %lld \n \t Average number of parts : %e \n", + minNbParticles,maxNbParticles,averageNbParticles); + } + } +#endif + } + + +}; + +#endif // FMPITREEBUILDER_H diff --git a/Src/Files/FTreeIO.hpp b/Src/Files/FTreeIO.hpp index 9e86fa0439acc8c81c6cc00b48c4b255aec14bc3..5cc7d682097fa90537d610b9c16b771b0feb79d0 100644 --- a/Src/Files/FTreeIO.hpp +++ b/Src/Files/FTreeIO.hpp @@ -192,6 +192,7 @@ public: FSize sizeOfLeaf = 0; file.read((char*)&sizeOfLeaf, sizeof(FSize)); + buffer.seek(0); buffer.reserve(sizeOfLeaf); file.read((char*)buffer.data(), sizeOfLeaf); @@ -226,6 +227,7 @@ public: FSize sizeOfCell = 0; file.read((char*)&sizeOfCell, sizeof(FSize)); + buffer.seek(0); buffer.reserve(sizeOfCell); file.read((char*)buffer.data(), sizeOfCell); diff --git a/UTests/utestSphericalWithPrevious.cpp b/UTests/utestSphericalWithPrevious.cpp index 718bdaf3c0b910840c29b7ab02d2b0336f7f3653..54c00723403401bcfe2e3b2c81e68cf69017dce9 100644 --- a/UTests/utestSphericalWithPrevious.cpp +++ b/UTests/utestSphericalWithPrevious.cpp @@ -97,7 +97,7 @@ class TestSphericalWithPrevious : public FUTester<TestSphericalWithPrevious> { algo.execute(); // If needed save the result - //FTreeIO<FReal>::Save<OctreeClass, CellClass, LeafClass, ContainerClass >(DataFile.c_str(), testTree); + // FTreeIO<FReal>::Save<OctreeClass, CellClass, LeafClass, ContainerClass >(DataFile.c_str(), testTree); // Load previous result OctreeClass goodTree(NbLevels, SizeSubLevels, loader.getBoxWidth(), loader.getCenterOfBox()); @@ -127,10 +127,10 @@ class TestSphericalWithPrevious : public FUTester<TestSphericalWithPrevious> { const ContainerClass* goodLeaf = goodOctreeIterator.getCurrentListSrc(); for(FSize idxPart = 0 ; idxPart < testLeaf->getNbParticles() ; ++idxPart ){ - uassert( IsSimilar(goodLeaf->getPotentials()[idxPart], testLeaf->getPotentials()[idxPart]) ); - uassert( IsSimilar(goodLeaf->getForcesX()[idxPart], testLeaf->getForcesX()[idxPart]) ); - uassert( IsSimilar(goodLeaf->getForcesY()[idxPart], testLeaf->getForcesY()[idxPart]) ); - uassert( IsSimilar(goodLeaf->getForcesZ()[idxPart], testLeaf->getForcesZ()[idxPart]) ); + uassert( IsSimilar(goodLeaf->getPotentials()[idxPart], testLeaf->getPotentials()[idxPart]) ); + uassert( IsSimilar(goodLeaf->getForcesX()[idxPart], testLeaf->getForcesX()[idxPart]) ); + uassert( IsSimilar(goodLeaf->getForcesY()[idxPart], testLeaf->getForcesY()[idxPart]) ); + uassert( IsSimilar(goodLeaf->getForcesZ()[idxPart], testLeaf->getForcesZ()[idxPart]) ); } if(!testOctreeIterator.moveRight()){ diff --git a/UTests/utestStaticMpiTreeBuilder.cpp b/UTests/utestStaticMpiTreeBuilder.cpp new file mode 100644 index 0000000000000000000000000000000000000000..71e93200477016ad86a70ed4bbf503977268be4d --- /dev/null +++ b/UTests/utestStaticMpiTreeBuilder.cpp @@ -0,0 +1,140 @@ +// See LICENCE file at project root + +// ==== CMAKE ===== +// @FUSE_MPI +// ================ + +#include "ScalFmmConfig.h" +#include <cstdlib> +#include <string.h> +#include <stdexcept> +#include <algorithm> +#include <vector> + +#include "FUTester.hpp" + +#include "Utils/FMpi.hpp" +#include "Containers/FVector.hpp" + +#include "Files/FRandomLoader.hpp" +#include "Utils/FLeafBalance.hpp" +#include "Containers/FTreeCoordinate.hpp" +#include "Containers/FCoordinateComputer.hpp" + +#include "Utils/FQuickSortMpi.hpp" +#include "Utils/FBitonicSort.hpp" +#include "Files/FMpiStaticTreeBuilder.hpp" +#include "Core/FCoreCommon.hpp" + +#include "Utils/FPoint.hpp" +#include "Utils/FMath.hpp" + + +class TestMpiTreeBuilder : public FUTesterMpi< class TestMpiTreeBuilder> { + + template <class FReal> + struct TestParticle{ + FSize indexInFile; + FPoint<FReal> position; + FReal physicalValue; + + const FPoint<FReal>& getPosition()const{ + return position; + } + + bool operator==(const TestParticle& other){ + return indexInFile == other.indexInFile + && position.getX() == other.position.getX() + && position.getY() == other.position.getY() + && position.getZ() == other.position.getZ() + && physicalValue == other.physicalValue; + } + }; + + template <class FReal, int localNbParticlesLocal> + void RunTest(){ + const int totalNbParticles = localNbParticlesLocal*app.global().processCount(); + std::unique_ptr<TestParticle<FReal>[]> globalParticles(new TestParticle<FReal>[totalNbParticles]); + + FRandomLoader<FReal> loader(totalNbParticles, 1., FPoint<FReal>(0,0,0), 0); + + for(int idxPart = 0 ; idxPart < totalNbParticles ; ++idxPart){ + loader.fillParticle(&globalParticles[idxPart].position); + globalParticles[idxPart].physicalValue = FReal(idxPart); + globalParticles[idxPart].indexInFile = idxPart; + } + + for(int treeHeigt = 3 ; treeHeigt < 8 ; ++treeHeigt){ + FVector<TestParticle<FReal>> finalParticles; + FMpiStaticTreeBuilder< FReal,TestParticle<FReal> >::DistributeArrayToContainer(app.global(), + &globalParticles[localNbParticlesLocal*app.global().processId()], + localNbParticlesLocal, + loader.getCenterOfBox(), + loader.getBoxWidth(),treeHeigt, + &finalParticles); + + { + assert(finalParticles.getSize() <= std::numeric_limits<int>::max()); + int myNbParticles = int(finalParticles.getSize()); + int nbParticlesAfter = 0; + FMpi::MpiAssert(MPI_Allreduce(&myNbParticles, + &nbParticlesAfter, + 1, + MPI_INT, + MPI_SUM, + app.global().getComm()), __LINE__); + uassert(nbParticlesAfter == totalNbParticles); + } + + std::unique_ptr<int[]> exists(new int[totalNbParticles]()); + + for(int idxPart = 0 ;idxPart < finalParticles.getSize() ; ++idxPart){ + uassert(finalParticles[idxPart] == globalParticles[finalParticles[idxPart].indexInFile]); + uassert(exists[finalParticles[idxPart].indexInFile] == 0); + exists[finalParticles[idxPart].indexInFile] += 1; + } + + std::unique_ptr<int[]> existsAll(new int[totalNbParticles]()); + + FMpi::MpiAssert(MPI_Allreduce(exists.get(), + existsAll.get(), + totalNbParticles, + MPI_INT, + MPI_SUM, + app.global().getComm()), __LINE__); + + + for(int idxPart = 0 ; idxPart < totalNbParticles ; ++idxPart){ + uassert(existsAll[idxPart] == 1); + } + } + } + + /** If memstas is running print the memory used */ + void PostTest() { + if( FMemStats::controler.isUsed() ){ + std::cout << app.global().processId() << "-> Memory used at the end " << FMemStats::controler.getCurrentAllocated() + << " Bytes (" << FMemStats::controler.getCurrentAllocatedMB() << "MB)\n"; + std::cout << app.global().processId() << "-> Max memory used " << FMemStats::controler.getMaxAllocated() + << " Bytes (" << FMemStats::controler.getMaxAllocatedMB() << "MB)\n"; + std::cout << app.global().processId() << "-> Total memory used " << FMemStats::controler.getTotalAllocated() + << " Bytes (" << FMemStats::controler.getTotalAllocatedMB() << "MB)\n"; + } + } + + void SetTests(){ + AddTest(&TestMpiTreeBuilder::RunTest<double, 1000>,"Generate particles and distribute them"); + AddTest(&TestMpiTreeBuilder::RunTest<float, 1000>,"Generate particles and distribute them"); + + AddTest(&TestMpiTreeBuilder::RunTest<double, 1200>,"Generate particles and distribute them"); + AddTest(&TestMpiTreeBuilder::RunTest<float, 1200>,"Generate particles and distribute them"); + } + +public: + TestMpiTreeBuilder(int argc,char ** argv) : FUTesterMpi(argc,argv){ + } + + +}; + +TestClassMpi(TestMpiTreeBuilder);