diff --git a/Src/Core/FFmmAlgorithmThreadProc.hpp b/Src/Core/FFmmAlgorithmThreadProc.hpp index c177d64ec80078ecb6cda95f2317676b6e5a4109..4d097822a13e2e0cfbb62662549f534c80d7bc7c 100644 --- a/Src/Core/FFmmAlgorithmThreadProc.hpp +++ b/Src/Core/FFmmAlgorithmThreadProc.hpp @@ -80,12 +80,12 @@ public: * @param inKernels the kernels to call * An assert is launched if one of the arguments is null */ - FFmmAlgorithmThreadProc(FMpi& inApp, OctreeClass* const inTree, KernelClass* const inKernels) + FFmmAlgorithmThreadProc(const FMpi::FComm& comm, OctreeClass* const inTree, KernelClass* const inKernels) : tree(inTree) , kernels(0), numberOfLeafs(0), - MaxThreads(omp_get_max_threads()), nbProcess(inApp.processCount()), idProcess(inApp.processId()), - OctreeHeight(tree->getHeight()),intervals(new Interval[inApp.processCount()]), - realIntervalsPerLevel(new Interval[inApp.processCount() * tree->getHeight()]), - workingIntervalsPerLevel(new Interval[inApp.processCount() * tree->getHeight()]){ + MaxThreads(omp_get_max_threads()), nbProcess(comm.processCount()), idProcess(comm.processId()), + OctreeHeight(tree->getHeight()),intervals(new Interval[comm.processCount()]), + realIntervalsPerLevel(new Interval[comm.processCount() * tree->getHeight()]), + workingIntervalsPerLevel(new Interval[comm.processCount() * tree->getHeight()]){ fassert(tree, "tree cannot be null", __LINE__, __FILE__); diff --git a/Src/Files/FMpiFmaLoader.hpp b/Src/Files/FMpiFmaLoader.hpp index 3863422c6c0bb6c3f4ffb67b29e2e772a705d68b..d8a0a35c2445852d2cd50268b4fa382ce2bb6dda 100644 --- a/Src/Files/FMpiFmaLoader.hpp +++ b/Src/Files/FMpiFmaLoader.hpp @@ -54,112 +54,115 @@ public: * @param filename the name of the file to open * you can test if file is successfuly open by calling hasNotFinished() */ - FMpiFmaLoader(const char* const filename, FMpi& app) + FMpiFmaLoader(const char* const filename, const FMpi::FComm& comm, const bool useMpiIO = false) : boxWidth(0), totalNbParticles(0), nbParticles(0), isOpenFlag(false), particles(0), idxParticles(0) { - /*char nonConstFilename[512]; - strcpy(nonConstFilename,filename); - MPI_File file; - if(MPI_File_open(MPI::COMM_WORLD, nonConstFilename, MPI::MODE_RDONLY, MPI::INFO_NULL, &file) == MPI_SUCCESS){ - int sizeOfElement(0); - FReal xyzBoxWidth[4]; - - MPI_Status status; - if( MPI_File_read(file, &sizeOfElement, 1, MPI_INT, &status) == MPI_SUCCESS - && MPI_File_read(file, &this->totalNbParticles, 1, MPI_LONG_LONG, &status) == MPI_SUCCESS - && MPI_File_read(file, xyzBoxWidth, 4, MPI_FLOAT, &status) == MPI_SUCCESS ){ - - FDEBUG(if(sizeOfElement != sizeof(FReal)){) + if( useMpiIO ){ + char nonConstFilename[512]; + strcpy(nonConstFilename,filename); + MPI_File file; + if(MPI_File_open(MPI::COMM_WORLD, nonConstFilename, MPI::MODE_RDONLY, MPI::INFO_NULL, &file) == MPI_SUCCESS){ + int sizeOfElement(0); + FReal xyzBoxWidth[4]; + + MPI_Status status; + if( MPI_File_read(file, &sizeOfElement, 1, MPI_INT, &status) == MPI_SUCCESS + && MPI_File_read(file, &this->totalNbParticles, 1, MPI_LONG_LONG, &status) == MPI_SUCCESS + && MPI_File_read(file, xyzBoxWidth, 4, MPI_FLOAT, &status) == MPI_SUCCESS ){ + + FDEBUG(if(sizeOfElement != sizeof(FReal)){) + FDEBUG( FDebug::Controller.writeFromLine("Warning type size between file and FReal are differents\n", __LINE__, __FILE__); ) + FDEBUG(}) + + this->boxWidth = xyzBoxWidth[3]; + this->centerOfBox.setPosition(xyzBoxWidth[0],xyzBoxWidth[1],xyzBoxWidth[2]); + this->boxWidth *= 2; + this->isOpenFlag = true; + + // load my particles + MPI_Offset headDataOffSet(0); + MPI_File_get_position(file, &headDataOffSet); + + MPI_Offset filesize(0); + MPI_File_get_size(file, &filesize); // in bytes + filesize = (filesize - headDataOffSet) / sizeof(FReal); + if(filesize/4 != this->totalNbParticles){ + printf("Error fileSize %lld, nbPart %lld\n",filesize/4, this->totalNbParticles); + } + // in number of floats + const FSize startPart = comm.getLeft(this->totalNbParticles); + const FSize endPart = comm.getRight(this->totalNbParticles); + nbParticles = (endPart - startPart); + const FSize bufsize = nbParticles * 4; + // local number to read + particles = new FReal[bufsize]; + + if( sizeof(FReal) == sizeof(float) ){ + MPI_File_read_at(file, headDataOffSet + startPart * 4 * sizeof(FReal), particles, int(bufsize), MPI_FLOAT, &status); + } + else{ + MPI_File_read_at(file, headDataOffSet + startPart * 4 * sizeof(FReal), particles, int(bufsize), MPI_DOUBLE, &status); + } + + + // check if needed + int count(0); + MPI_Get_count(&status, MPI_INT, &count); + FDEBUG(if(count / 4 != this->nbParticles){) + FDEBUG( FDebug::Controller<< "Error read " << count << " data, nbPart is " << this->nbParticles << __LINE__ << " " << __FILE__ << "\n"; ) + FDEBUG(}) + } + else{ + this->totalNbParticles = 0; + } + MPI_File_close(&file); + } + } + else { + FILE* file(fopen(filename, "rb")); + int removeWarning(0); + // test if open + if(file != NULL) { + int sizeOfElement(0); + removeWarning += fread(&sizeOfElement, sizeof(int), 1, file); + FDEBUG(if(sizeOfElement != int(sizeof(FReal)) ){) FDEBUG( FDebug::Controller.writeFromLine("Warning type size between file and FReal are differents\n", __LINE__, __FILE__); ) + printf("%d sizeofelement\n",sizeOfElement); FDEBUG(}) + removeWarning += fread(&this->totalNbParticles, sizeof(FSize), 1, file); - this->boxWidth = xyzBoxWidth[3]; - this->centerOfBox.setPosition(xyzBoxWidth[0],xyzBoxWidth[1],xyzBoxWidth[2]); + removeWarning += fread(&this->boxWidth, sizeof(FReal), 1, file); this->boxWidth *= 2; + + FReal x,y,z; + removeWarning += fread(&x, sizeof(FReal), 1, file); + removeWarning += fread(&y, sizeof(FReal), 1, file); + removeWarning += fread(&z, sizeof(FReal), 1, file); + this->centerOfBox.setPosition(x,y,z); + this->isOpenFlag = true; - // load my particles - MPI_Offset headDataOffSet(0); - MPI_File_get_position(file, &headDataOffSet); + const long int headDataOffSet = ftell(file); + fseek(file, 0L, SEEK_END); + const long int filesize = (ftell(file) - headDataOffSet) / sizeof(FReal); - MPI_Offset filesize(0); - MPI_File_get_size(file, &filesize); // in bytes - filesize = (filesize - headDataOffSet) / sizeof(FReal); if(filesize/4 != this->totalNbParticles){ - printf("Error fileSize %lld, nbPart %lld\n",filesize/4, this->totalNbParticles); + printf("Error fileSize %ld, nbPart %lld\n", filesize/4, this->totalNbParticles); } + // in number of floats - const FSize startPart = app.getLeft(this->totalNbParticles); - const FSize endPart = app.getRight(this->totalNbParticles); + const FSize startPart = comm.getLeft(this->totalNbParticles); + const FSize endPart = comm.getRight(this->totalNbParticles); nbParticles = (endPart - startPart); const FSize bufsize = nbParticles * 4; // local number to read particles = new FReal[bufsize]; - if( sizeof(FReal) == sizeof(float) ){ - MPI_File_read_at(file, headDataOffSet + startPart * 4 * sizeof(FReal), particles, int(bufsize), MPI_FLOAT, &status); - } - else{ - MPI_File_read_at(file, headDataOffSet + startPart * 4 * sizeof(FReal), particles, int(bufsize), MPI_DOUBLE, &status); - } + fseek(file, long(headDataOffSet + startPart * 4 * sizeof(FReal)), SEEK_SET); + removeWarning += fread(particles, sizeof(FReal), int(bufsize), file); - // check if needed - int count(0); - MPI_Get_count(&status, MPI_INT, &count); - FDEBUG(if(count / 4 != this->nbParticles){) - FDEBUG( FDebug::Controller<< "Error read " << count << " data, nbPart is " << this->nbParticles << __LINE__ << " " << __FILE__ << "\n"; ) - FDEBUG(}) - } - else{ - this->totalNbParticles = 0; - } - MPI_File_close(&file); - }*/ - - FILE* file(fopen(filename, "rb")); - int removeWarning(0); - // test if open - if(file != NULL) { - int sizeOfElement(0); - removeWarning += fread(&sizeOfElement, sizeof(int), 1, file); - FDEBUG(if(sizeOfElement != int(sizeof(FReal)) ){) - FDEBUG( FDebug::Controller.writeFromLine("Warning type size between file and FReal are differents\n", __LINE__, __FILE__); ) - printf("%d sizeofelement\n",sizeOfElement); - FDEBUG(}) - removeWarning += fread(&this->totalNbParticles, sizeof(FSize), 1, file); - - removeWarning += fread(&this->boxWidth, sizeof(FReal), 1, file); - this->boxWidth *= 2; - - FReal x,y,z; - removeWarning += fread(&x, sizeof(FReal), 1, file); - removeWarning += fread(&y, sizeof(FReal), 1, file); - removeWarning += fread(&z, sizeof(FReal), 1, file); - this->centerOfBox.setPosition(x,y,z); - - this->isOpenFlag = true; - - const long int headDataOffSet = ftell(file); - fseek(file, 0L, SEEK_END); - const long int filesize = (ftell(file) - headDataOffSet) / sizeof(FReal); - - if(filesize/4 != this->totalNbParticles){ - printf("Error fileSize %ld, nbPart %lld\n", filesize/4, this->totalNbParticles); + fclose(file); } - - // in number of floats - const FSize startPart = app.getLeft(this->totalNbParticles); - const FSize endPart = app.getRight(this->totalNbParticles); - nbParticles = (endPart - startPart); - const FSize bufsize = nbParticles * 4; - // local number to read - particles = new FReal[bufsize]; - - fseek(file, long(headDataOffSet + startPart * 4 * sizeof(FReal)), SEEK_SET); - - removeWarning += fread(particles, sizeof(FReal), int(bufsize), file); - - fclose(file); } } diff --git a/Src/Files/FMpiTreeBuilder.hpp b/Src/Files/FMpiTreeBuilder.hpp index 443bb9029f39b0e7639a2bf88184ea4a05446d43..3e83773d817dac0b96f5e0f6c5fa76c826a966d9 100644 --- a/Src/Files/FMpiTreeBuilder.hpp +++ b/Src/Files/FMpiTreeBuilder.hpp @@ -8,13 +8,21 @@ #include "../Utils/FMemUtils.hpp" #include "../Utils/FTrace.hpp" -/** This class manage the loading of particles - * for the mpi version. - * It use a BinLoader and then sort the data - * with a parallel quick sort. +/** This class manage the loading of particles for the mpi version. + * It use a BinLoader and then sort the data with a parallel quick sort + * or bitonic sort. + * After it needs to merge the leaves to finally equalize the data. */ template<class ParticleClass> class FMpiTreeBuilder{ +public: + /** What sorting algorithm to use */ + enum SortingType{ + QuickSort, + BitonicSort, + }; + +private: /** This method has been tacken from the octree * it computes a tree coordinate (x or y or z) from real position */ @@ -27,83 +35,10 @@ class FMpiTreeBuilder{ return index; } - /** get current rank */ - static int MpiGetRank(MPI_Comm comm = MPI_COMM_WORLD){ - int rank(0); - MPI_Comm_rank(comm, &rank); - return rank; - } - - /** get current nb procs */ - static int MpiGetNbProcs(MPI_Comm comm = MPI_COMM_WORLD){ - int nb(0); - MPI_Comm_size(comm, &nb); - return nb; - } - - /** receive data from a tag function */ - static void receiveDataFromTag(const int inSize, const int inTag, void* const inData, int* const inSource = 0, int* const inFilledSize = 0){ - MPI_Status status; - MPI_Recv(inData, inSize, MPI_CHAR, MPI_ANY_SOURCE, inTag, MPI_COMM_WORLD, &status); - if(inSource) *inSource = status.MPI_SOURCE; - if(inFilledSize) MPI_Get_count(&status,MPI_CHAR,inFilledSize); - } - - template< class T > - static T GetLeft(const T inSize) { - const double step = (double(inSize) / double(MpiGetNbProcs())); - return T(FMath::Ceil(step * double(MpiGetRank()))); - } - - template< class T > - static T GetRight(const T inSize) { - const double step = (double(inSize) / double(MpiGetNbProcs())); - const T res = T(FMath::Ceil(step * double(MpiGetRank()+1))); - if(res > inSize) return inSize; - else return res; - } - - template< class T > - static T GetOtherRight(const T inSize, const int other) { - const double step = (double(inSize) / MpiGetNbProcs()); - const T res = T(FMath::Ceil(step * (other+1))); - if(res > inSize) return inSize; - else return res; - } - - template< class T > - static T GetOtherLeft(const T inSize, const int other) { - const double step = (double(inSize) / double(MpiGetNbProcs())); - return T(FMath::Ceil(step * double(other))); - } - - template <class T1, class T2> - static T1 Min( const T1 v1, const T2 v2){ - return T1(v1 < v2 ? v1 : v2); - } - - template <class T1, class T2> - static T1 Max( const T1 v1, const T2 v2){ - return T1(v1 > v2 ? v1 : v2); - } - - - /** This struct is used to represent a particles group to - * sort them easily - */ - struct ParticlesGroup { - int number; - int positionInArray; - MortonIndex index; - ParticlesGroup(const int inNumber = 0 , const int inPositionInArray = 0, const MortonIndex inIndex = 0) - : number(inNumber), positionInArray(inPositionInArray), index(inIndex) { - } - }; - /** A particle may not have a MortonIndex Method * but they are sorted on their morton index - * so this struct store a particle and its index + * so this struct store a particle + its index */ struct IndexedParticle{ MortonIndex index; @@ -114,75 +49,63 @@ class FMpiTreeBuilder{ } }; - char* intervals; - FSize nbLeavesInIntervals; + ////////////////////////////////////////////////////////////////////////// + // To sort tha particles we hold + ////////////////////////////////////////////////////////////////////////// -private: - // Forbid copy - FMpiTreeBuilder(const FMpiTreeBuilder&){} - FMpiTreeBuilder& operator=(const FMpiTreeBuilder&){return *this;} -public: - /** Constructor */ - FMpiTreeBuilder() - : intervals(0), nbLeavesInIntervals(0) { - } + template <class LoaderClass> + static void SortParticles( const FMpi::FComm& communicator, LoaderClass& loader, const SortingType type, + const int TreeHeight, IndexedParticle**const outputArray, FSize* const outputSize){ + FTRACE( FTrace::FFunction functionTrace(__FUNCTION__ , "Loader to Tree" , __FILE__ , __LINE__) ); + + // create particles + IndexedParticle*const realParticlesIndexed = new IndexedParticle[loader.getNumberOfParticles()]; + FMemUtils::memset(realParticlesIndexed, 0, sizeof(IndexedParticle) * loader.getNumberOfParticles()); + F3DPosition boxCorner(loader.getCenterOfBox() - (loader.getBoxWidth()/2)); + FTreeCoordinate host; + + const FReal boxWidthAtLeafLevel = loader.getBoxWidth() / FReal(1 << (TreeHeight - 1) ); + for(long idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + loader.fillParticle(realParticlesIndexed[idxPart].particle); + host.setX( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getX() - boxCorner.getX(), boxWidthAtLeafLevel )); + host.setY( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getY() - boxCorner.getY(), boxWidthAtLeafLevel )); + host.setZ( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getZ() - boxCorner.getZ(), boxWidthAtLeafLevel )); + + realParticlesIndexed[idxPart].index = host.getMortonIndex(TreeHeight - 1); + } - /** Destructor */ - virtual ~FMpiTreeBuilder(){ - delete [] intervals; + // sort particles + if(type == QuickSort){ + FQuickSort<IndexedParticle,MortonIndex, FSize>::QsMpi(realParticlesIndexed, loader.getNumberOfParticles(), *outputArray, *outputSize,communicator); + delete [] (realParticlesIndexed); + } + else { + FBitonicSort<IndexedParticle,MortonIndex, FSize>::Sort( realParticlesIndexed, loader.getNumberOfParticles(), communicator ); + *outputArray = realParticlesIndexed; + *outputSize = loader.getNumberOfParticles(); + } } - /** Split and sort the file */ - template <class LoaderClass> - bool splitAndSortFile(LoaderClass& loader, const int NbLevels){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader" , __FILE__ , __LINE__) ); - const int rank = MpiGetRank(); - const int nbProcs = MpiGetNbProcs(); - // First we create the particles that belong to us (our proc) - // we compute their morton index to be able to sort them - // + ////////////////////////////////////////////////////////////////////////// + // To merge the leaves + ////////////////////////////////////////////////////////////////////////// - IndexedParticle* outputArray = 0; - FSize outputSize = 0; - { - FTRACE( FTrace::FRegion regionTrace("Insert Particles", __FUNCTION__ , __FILE__ , __LINE__) ); - // create particles - IndexedParticle*const realParticlesIndexed = new IndexedParticle[loader.getNumberOfParticles()]; - FMemUtils::memset(realParticlesIndexed, 0, sizeof(IndexedParticle) * loader.getNumberOfParticles()); - F3DPosition boxCorner(loader.getCenterOfBox() - (loader.getBoxWidth()/2)); - FTreeCoordinate host; - - const FReal boxWidthAtLeafLevel = loader.getBoxWidth() / FReal(1 << (NbLevels - 1) ); - for(long idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ - loader.fillParticle(realParticlesIndexed[idxPart].particle); - host.setX( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getX() - boxCorner.getX(), boxWidthAtLeafLevel )); - host.setY( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getY() - boxCorner.getY(), boxWidthAtLeafLevel )); - host.setZ( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getZ() - boxCorner.getZ(), boxWidthAtLeafLevel )); - - realParticlesIndexed[idxPart].index = host.getMortonIndex(NbLevels - 1); - } + static void MergeLeaves(const FMpi::FComm& communicator, IndexedParticle*& workingArray, FSize& workingSize, + char** leavesArray, FSize* const leavesSize){ + FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) ); + const int rank = communicator.processId(); + const int nbProcs = communicator.processCount(); - // sort particles - if(false){ - FQuickSort<IndexedParticle,MortonIndex, FSize>::QsMpi(realParticlesIndexed, loader.getNumberOfParticles(),outputArray,outputSize); - delete [] (realParticlesIndexed); - } - else { - FBitonicSort::sort<IndexedParticle,MortonIndex>( realParticlesIndexed, loader.getNumberOfParticles() ); - outputArray = realParticlesIndexed; - outputSize = loader.getNumberOfParticles(); - } - } // be sure there is no splited leaves // to do that we exchange the first index with the left proc { FTRACE( FTrace::FRegion regionTrace("Remove Splited leaves", __FUNCTION__ , __FILE__ , __LINE__) ); MortonIndex otherFirstIndex = -1; - if(outputSize != 0 && rank != 0 && rank != nbProcs - 1){ - MPI_Sendrecv(&outputArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, + if(workingSize != 0 && rank != 0 && rank != nbProcs - 1){ + MPI_Sendrecv(&workingArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, &otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } @@ -190,7 +113,7 @@ public: MPI_Recv(&otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else if( rank == nbProcs - 1){ - MPI_Send( &outputArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, MPI_COMM_WORLD); + MPI_Send( &workingArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, MPI_COMM_WORLD); } else { MPI_Recv(&otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, MPI_COMM_WORLD, MPI_STATUS_IGNORE); @@ -198,20 +121,20 @@ public: } // at this point every one know the first index of his right neighbors - const bool needToRecvBeforeSend = (rank != 0 && ((outputSize && otherFirstIndex == outputArray[0].index ) || !outputSize)); + const bool needToRecvBeforeSend = (rank != 0 && ((workingSize && otherFirstIndex == workingArray[0].index ) || !workingSize)); MPI_Request requestSendLeaf; IndexedParticle* sendBuffer = 0; if(rank != nbProcs - 1 && needToRecvBeforeSend == false){ - FSize idxPart = outputSize - 1 ; - while(idxPart >= 0 && outputArray[idxPart].index == otherFirstIndex){ + FSize idxPart = workingSize - 1 ; + while(idxPart >= 0 && workingArray[idxPart].index == otherFirstIndex){ --idxPart; } - const int particlesToSend = int(outputSize - 1 - idxPart); + const int particlesToSend = int(workingSize - 1 - idxPart); if(particlesToSend){ - outputSize -= particlesToSend; + workingSize -= particlesToSend; sendBuffer = new IndexedParticle[particlesToSend]; - memcpy(sendBuffer, &outputArray[idxPart + 1], particlesToSend * sizeof(IndexedParticle)); + memcpy(sendBuffer, &workingArray[idxPart + 1], particlesToSend * sizeof(IndexedParticle)); MPI_Isend( sendBuffer, particlesToSend * sizeof(IndexedParticle), MPI_BYTE, rank + 1, FMpi::TagSplittedLeaf, MPI_COMM_WORLD, &requestSendLeaf); } @@ -230,15 +153,15 @@ public: if(sendByOther){ sendByOther /= sizeof(IndexedParticle); - const IndexedParticle* const reallocOutputArray = outputArray; - const FSize reallocOutputSize = outputSize; + const IndexedParticle* const reallocOutputArray = workingArray; + const FSize reallocOutputSize = workingSize; - outputSize += sendByOther; - outputArray = new IndexedParticle[outputSize]; - FMemUtils::memcpy(&outputArray[sendByOther], reallocOutputArray, reallocOutputSize * sizeof(IndexedParticle)); + workingSize += sendByOther; + workingArray = new IndexedParticle[workingSize]; + FMemUtils::memcpy(&workingArray[sendByOther], reallocOutputArray, reallocOutputSize * sizeof(IndexedParticle)); delete[] reallocOutputArray; - MPI_Recv(outputArray, sizeof(IndexedParticle) * sendByOther, MPI_BYTE, rank - 1, FMpi::TagSplittedLeaf, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + MPI_Recv(workingArray, sizeof(IndexedParticle) * sendByOther, MPI_BYTE, rank - 1, FMpi::TagSplittedLeaf, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else{ MPI_Recv( 0, 0, MPI_BYTE, rank - 1, FMpi::TagSplittedLeaf, MPI_COMM_WORLD, MPI_STATUS_IGNORE); @@ -246,10 +169,10 @@ public: } if(rank != nbProcs - 1 && needToRecvBeforeSend == true){ - MPI_Send( outputArray, int(outputSize * sizeof(IndexedParticle)), MPI_BYTE, rank + 1, FMpi::TagSplittedLeaf, MPI_COMM_WORLD); - delete[] outputArray; - outputArray = 0; - outputSize = 0; + MPI_Send( workingArray, int(workingSize * sizeof(IndexedParticle)), MPI_BYTE, rank + 1, FMpi::TagSplittedLeaf, MPI_COMM_WORLD); + delete[] workingArray; + workingArray = 0; + workingSize = 0; } else if(rank != nbProcs - 1){ MPI_Wait( &requestSendLeaf, MPI_STATUS_IGNORE); @@ -262,18 +185,20 @@ public: FTRACE( FTrace::FRegion regionTrace("Remove Splited leaves", __FUNCTION__ , __FILE__ , __LINE__) ); // We now copy the data from a sorted type into real particles array + counter - nbLeavesInIntervals = 0; - if(outputSize){ - intervals = new char[outputSize * (sizeof(ParticleClass) + sizeof(int))]; + (*leavesSize) = 0; + (*leavesArray) = 0; + + if(workingSize){ + (*leavesArray) = new char[workingSize * (sizeof(ParticleClass) + sizeof(int))]; MortonIndex previousIndex = -1; - char* writeIndex = intervals; + char* writeIndex = (*leavesArray); int* writeCounter = 0; - for( FSize idxPart = 0; idxPart < outputSize ; ++idxPart){ - if( outputArray[idxPart].index != previousIndex ){ - previousIndex = outputArray[idxPart].index; - ++nbLeavesInIntervals; + for( FSize idxPart = 0; idxPart < workingSize ; ++idxPart){ + if( workingArray[idxPart].index != previousIndex ){ + previousIndex = workingArray[idxPart].index; + ++(*leavesSize); writeCounter = reinterpret_cast<int*>( writeIndex ); writeIndex += sizeof(int); @@ -281,24 +206,30 @@ public: (*writeCounter) = 0; } - memcpy(writeIndex, &outputArray[idxPart].particle, sizeof(ParticleClass)); + memcpy(writeIndex, &workingArray[idxPart].particle, sizeof(ParticleClass)); writeIndex += sizeof(ParticleClass); ++(*writeCounter); } } - delete [] outputArray; - } + delete [] workingArray; - return true; + workingArray = 0; + workingSize = 0; + } } + ////////////////////////////////////////////////////////////////////////// + // To equalize (same number of leaves among the procs) + ////////////////////////////////////////////////////////////////////////// + /** Put the interval into a tree */ template <class OctreeClass> - bool intervalsToTree(OctreeClass& realTree){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader" , __FILE__ , __LINE__) ); - const int rank = MpiGetRank(); - const int nbProcs = MpiGetNbProcs(); + static void EqualizeAndFillTree(const FMpi::FComm& communicator, OctreeClass& realTree, + char*& leavesArray, FSize& nbLeavesInIntervals ){ + FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) ); + const int rank = communicator.processId(); + const int nbProcs = communicator.processCount(); const FSize currentNbLeafs = nbLeavesInIntervals; // We have to know the number of leaves each procs holds @@ -317,8 +248,8 @@ public: // So we can know the number of total leafs and // the number of leaves each procs must have at the end const FSize totalNbLeaves = (currentLeafsOnMyLeft + currentNbLeafs + currentLeafsOnMyRight); - const FSize correctLeftLeavesNumber = GetLeft(totalNbLeaves); - const FSize correctRightLeavesIndex = GetRight(totalNbLeaves); + const FSize correctLeftLeavesNumber = communicator.getLeft(totalNbLeaves); + const FSize correctRightLeavesIndex = communicator.getRight(totalNbLeaves); // This will be used for the all to all int leavesToSend[nbProcs]; @@ -342,7 +273,7 @@ public: // Find the first proc that need my data int idxProc = rank - 1; while( idxProc > 0 ){ - const FSize thisProcRight = GetOtherRight(totalNbLeaves, idxProc - 1); + const FSize thisProcRight = communicator.getOtherRight(totalNbLeaves, idxProc - 1); // Go to left until proc-1 has a right index lower than my current left if( thisProcRight < currentLeafsOnMyLeft){ break; @@ -353,12 +284,12 @@ public: // Count data for this proc leftProcToStartSend = idxProc; int ICanGive = int(currentNbLeafs); - leavesToSend[idxProc] = int(Min(GetOtherRight(totalNbLeaves, idxProc), totalNbLeaves - currentLeafsOnMyRight) - - Max( currentLeafsOnMyLeft , GetOtherLeft(totalNbLeaves, idxProc))); + leavesToSend[idxProc] = int(FMath::Min(communicator.getOtherRight(totalNbLeaves, idxProc), totalNbLeaves - currentLeafsOnMyRight) + - FMath::Max( currentLeafsOnMyLeft , communicator.getOtherLeft(totalNbLeaves, idxProc))); { bytesOffset[idxProc] = 0; for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){ - currentIntervalPosition += ((*(int*)&intervals[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int); + currentIntervalPosition += ((*(int*)&leavesArray[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int); } bytesToSend[idxProc] = int(currentIntervalPosition - bytesOffset[idxProc]); } @@ -367,11 +298,11 @@ public: // count data to other proc while(idxProc < rank && ICanGive){ - leavesToSend[idxProc] = int(Min( GetOtherRight(totalNbLeaves, idxProc) - GetOtherLeft(totalNbLeaves, idxProc), ICanGive)); + leavesToSend[idxProc] = int(FMath::Min( communicator.getOtherRight(totalNbLeaves, idxProc) - communicator.getOtherLeft(totalNbLeaves, idxProc), FSize(ICanGive))); bytesOffset[idxProc] = int(currentIntervalPosition); for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){ - currentIntervalPosition += ((*(int*)&intervals[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int); + currentIntervalPosition += ((*(int*)&leavesArray[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int); } bytesToSend[idxProc] = int(currentIntervalPosition - bytesOffset[idxProc]); @@ -392,8 +323,8 @@ public: } // We have to jump the correct number of leaves - for(FSize idxLeaf = Max(iNeedToSendLeftCount,0) ; idxLeaf < endForMe ; ++idxLeaf){ - const int nbPartInLeaf = (*(int*)&intervals[currentIntervalPosition]); + for(FSize idxLeaf = FMath::Max(iNeedToSendLeftCount,FSize(0)) ; idxLeaf < endForMe ; ++idxLeaf){ + const int nbPartInLeaf = (*(int*)&leavesArray[currentIntervalPosition]); currentIntervalPosition += (nbPartInLeaf * sizeof(ParticleClass)) + sizeof(int); } } @@ -404,8 +335,8 @@ public: // Find the last proc on the right that need my data int idxProc = rank + 1; while( idxProc < nbProcs ){ - const FSize thisProcLeft = GetOtherLeft(totalNbLeaves, idxProc); - const FSize thisProcRight = GetOtherRight(totalNbLeaves, idxProc); + const FSize thisProcLeft = communicator.getOtherLeft(totalNbLeaves, idxProc); + const FSize thisProcRight = communicator.getOtherRight(totalNbLeaves, idxProc); // Progress until the proc+1 has its left index upper to my current right if( thisProcLeft < currentLeafsOnMyLeft || (totalNbLeaves - currentLeafsOnMyRight) < thisProcRight){ break; @@ -415,13 +346,13 @@ public: // Count the data int ICanGive = int(currentLeafsOnMyLeft + currentNbLeafs - correctRightLeavesIndex); - leavesToSend[idxProc] = int(Min(GetOtherRight(totalNbLeaves, idxProc) , (totalNbLeaves - currentLeafsOnMyRight)) - - Max(GetOtherLeft(totalNbLeaves, idxProc), currentLeafsOnMyLeft) ); + leavesToSend[idxProc] = int(FMath::Min(communicator.getOtherRight(totalNbLeaves, idxProc) , (totalNbLeaves - currentLeafsOnMyRight)) + - FMath::Max(communicator.getOtherLeft(totalNbLeaves, idxProc), currentLeafsOnMyLeft) ); { bytesOffset[idxProc] = int(currentIntervalPosition); for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){ - currentIntervalPosition += ((*(int*)&intervals[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int); + currentIntervalPosition += ((*(int*)&leavesArray[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int); } bytesToSend[idxProc] = int(currentIntervalPosition - bytesOffset[idxProc]); } @@ -430,11 +361,11 @@ public: // Now Count the data to other while(idxProc < nbProcs && ICanGive){ - leavesToSend[idxProc] = int(Min( GetOtherRight(totalNbLeaves, idxProc) - GetOtherLeft(totalNbLeaves, idxProc), ICanGive)); + leavesToSend[idxProc] = int(FMath::Min( communicator.getOtherRight(totalNbLeaves, idxProc) - communicator.getOtherLeft(totalNbLeaves, idxProc), FSize(ICanGive))); bytesOffset[idxProc] = int(currentIntervalPosition); for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){ - currentIntervalPosition += ((*(int*)&intervals[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int); + currentIntervalPosition += ((*(int*)&leavesArray[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int); } bytesToSend[idxProc] = int(currentIntervalPosition - bytesOffset[idxProc]); @@ -465,7 +396,7 @@ public: // Send alll to all char* const recvbuf = new char[sumBytesToRecv]; - MPI_Alltoallv(intervals, bytesToSend, bytesOffset, MPI_BYTE, + MPI_Alltoallv(leavesArray, bytesToSend, bytesOffset, MPI_BYTE, recvbuf, bytesToRecv, bytesOffsetToRecv, MPI_BYTE, MPI_COMM_WORLD); @@ -496,9 +427,9 @@ public: endForMe -= iNeedToSendRightCount; } - for(FSize idxLeaf = Max(iNeedToSendLeftCount,0) ; idxLeaf < endForMe ; ++idxLeaf){ - const int nbPartInLeaf = (*(int*)&intervals[currentIntervalPosition]); - ParticleClass* const particles = reinterpret_cast<ParticleClass*>(&intervals[currentIntervalPosition] + sizeof(int)); + for(FSize idxLeaf = FMath::Max(iNeedToSendLeftCount,FSize(0)) ; idxLeaf < endForMe ; ++idxLeaf){ + const int nbPartInLeaf = (*(int*)&leavesArray[currentIntervalPosition]); + ParticleClass* const particles = reinterpret_cast<ParticleClass*>(&leavesArray[currentIntervalPosition] + sizeof(int)); for(int idxPart = 0 ; idxPart < nbPartInLeaf ; ++idxPart){ realTree.insert(particles[idxPart]); @@ -508,9 +439,33 @@ public: } } + delete[] leavesArray; + leavesArray = 0; + nbLeavesInIntervals = 0; + } + +public: - return true; + ////////////////////////////////////////////////////////////////////////// + // The builder function + ////////////////////////////////////////////////////////////////////////// + + template <class LoaderClass, class OctreeClass> + static void LoaderToTree(const FMpi::FComm& communicator, LoaderClass& loader, + OctreeClass& realTree, const SortingType type = QuickSort){ + + IndexedParticle* particlesArray = 0; + FSize particlesSize = 0; + SortParticles(communicator, loader, type, realTree.getHeight(), &particlesArray, &particlesSize); + + char* leavesArray = 0; + FSize leavesSize = 0; + MergeLeaves(communicator, particlesArray, particlesSize, &leavesArray, &leavesSize); + + EqualizeAndFillTree(communicator, realTree, leavesArray, leavesSize); } + + }; #endif // FMPITREEBUILDER_H diff --git a/Src/Utils/FBitonicSort.hpp b/Src/Utils/FBitonicSort.hpp index 30c4328303fa25c3c88df2a502f1baf7a9f19685..3933ef75e7feb9d6504673fcd3684964a2c2e777 100644 --- a/Src/Utils/FBitonicSort.hpp +++ b/Src/Utils/FBitonicSort.hpp @@ -10,6 +10,13 @@ #include "FMpi.hpp" #include "FQuickSort.hpp" + +/** This class is a parallel bitonic sort + * it is based on the paper : + * Library Support for Parallel Sorting in Scientific Computations + * Holger Dachsel1 , Michael Hofmann2, , and Gudula R ̈nger2 + */ +template <class SortType, class CompareType, class IndexType> class FBitonicSort { private: @@ -17,22 +24,15 @@ private: // Bitonic parallel sort ! //////////////////////////////////////////////////////////////// - // Mpi flag - static const int FlagMin = 5; - static const int FlagMax = 6; - static const int FlagMinMess = 4; - static const int FlagMaxMess = 3; - // This function exchange data with the other rank, // its send the max value and receive min value - template <class SortType, class CompareType, class IndexType> - static void sendMaxAndGetMin(SortType array[], const IndexType size, const int otherRank){ + static void SendMaxAndGetMin(SortType array[], const IndexType size, const int otherRank){ IndexType left = -1; IndexType right = size - 1; IndexType pivot = left + (right - left + 1)/2; CompareType otherValue = -1; CompareType tempCompareValue = CompareType(array[pivot]); - MPI_Sendrecv(&tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMin,&otherValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMax,MPI_COMM_WORLD,MPI_STATUS_IGNORE); + MPI_Sendrecv(&tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FMpi::TagBitonicMin,&otherValue,sizeof(CompareType),MPI_BYTE,otherRank,FMpi::TagBitonicMax,MPI_COMM_WORLD,MPI_STATUS_IGNORE); while( pivot != left && pivot != right && array[pivot] != otherValue) { @@ -45,19 +45,19 @@ private: pivot = left + (right - left + 1)/2; tempCompareValue = CompareType(array[pivot]); - MPI_Sendrecv(&tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMin,&otherValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMax,MPI_COMM_WORLD,MPI_STATUS_IGNORE); + MPI_Sendrecv(&tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FMpi::TagBitonicMin,&otherValue,sizeof(CompareType),MPI_BYTE,otherRank,FMpi::TagBitonicMax,MPI_COMM_WORLD,MPI_STATUS_IGNORE); } if( otherValue <= array[pivot] ){ MPI_Sendrecv_replace(&array[pivot], int((size - pivot) * sizeof(SortType)) , MPI_BYTE, - otherRank, FlagMinMess, otherRank, FlagMaxMess, + otherRank, FMpi::TagBitonicMinMess, otherRank, FMpi::TagBitonicMaxMess, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else if( array[pivot] < otherValue){ if(pivot != size - 1){ MPI_Sendrecv_replace(&array[pivot + 1], int((size - pivot - 1) * sizeof(SortType)) , MPI_BYTE, - otherRank, FlagMinMess, otherRank, FlagMaxMess, + otherRank, FMpi::TagBitonicMinMess, otherRank, FMpi::TagBitonicMaxMess, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } @@ -66,14 +66,13 @@ private: // This function exchange data with the other rank, // its send the min value and receive max value - template <class SortType, class CompareType, class IndexType> - static void sendMinAndGetMax(SortType array[], const IndexType size, const int otherRank){ + static void SendMinAndGetMax(SortType array[], const IndexType size, const int otherRank){ IndexType left = 0; IndexType right = size ; IndexType pivot = left + (right - left)/2; CompareType otherValue = -1; CompareType tempCompareValue = CompareType(array[pivot]); - MPI_Sendrecv(&tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMax,&otherValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMin,MPI_COMM_WORLD,MPI_STATUS_IGNORE); + MPI_Sendrecv(&tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FMpi::TagBitonicMax,&otherValue,sizeof(CompareType),MPI_BYTE,otherRank,FMpi::TagBitonicMin,MPI_COMM_WORLD,MPI_STATUS_IGNORE); while( pivot != left && array[pivot] != otherValue) { @@ -85,19 +84,19 @@ private: } pivot = left + (right - left)/2; tempCompareValue = CompareType(array[pivot]); - MPI_Sendrecv(&tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMax,&otherValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMin,MPI_COMM_WORLD,MPI_STATUS_IGNORE); + MPI_Sendrecv(&tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FMpi::TagBitonicMax,&otherValue,sizeof(CompareType),MPI_BYTE,otherRank,FMpi::TagBitonicMin,MPI_COMM_WORLD,MPI_STATUS_IGNORE); } if( array[pivot] <= otherValue ){ MPI_Sendrecv_replace(&array[0], int((pivot + 1) * sizeof(SortType)) , MPI_BYTE, - otherRank, FlagMaxMess, otherRank, FlagMinMess, + otherRank, FMpi::TagBitonicMaxMess, otherRank, FMpi::TagBitonicMinMess, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } else if( otherValue < array[pivot]){ if(pivot != 0){ MPI_Sendrecv_replace(&array[0], int((pivot) * sizeof(SortType)) , MPI_BYTE, - otherRank, FlagMaxMess, otherRank, FlagMinMess, + otherRank, FMpi::TagBitonicMaxMess, otherRank, FMpi::TagBitonicMinMess, MPI_COMM_WORLD, MPI_STATUS_IGNORE); } } @@ -127,9 +126,10 @@ public: endfor endfor */ - template <class SortType, class CompareType, class IndexType> - static void sort(SortType array[], const IndexType size, const int np, const int rank){ + static void Sort(SortType array[], const IndexType size, const FMpi::FComm& comm){ FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Bitonic" , __FILE__ , __LINE__) ); + const int np = comm.processCount(); + const int rank = comm.processId(); FQuickSort<SortType,CompareType,IndexType>::QsOmp(array, size); @@ -146,10 +146,10 @@ public: const int otherRank = rank ^ (1 << otherBit); if( diBit != myOtherBit ){ - sendMinAndGetMax<SortType,CompareType>(array, size, otherRank); + SendMinAndGetMax(array, size, otherRank); } else{ - sendMaxAndGetMin<SortType,CompareType>(array, size, otherRank); + SendMaxAndGetMin(array, size, otherRank); } // A merge sort is possible since the array is composed // by two part already sorted, but we want to do this in space @@ -157,17 +157,6 @@ public: } } } - - template <class SortType, class CompareType, class IndexType> - static void sort(SortType array[], const IndexType size){ - int rank = 0; - int nprocs = 0; - - MPI_Comm_size(MPI_COMM_WORLD,&nprocs); - MPI_Comm_rank(MPI_COMM_WORLD,&rank); - - sort<SortType, CompareType>(array, size, nprocs, rank); - } }; #endif // BITONICSORT_HPP diff --git a/Src/Utils/FMpi.hpp b/Src/Utils/FMpi.hpp index 315a2f497736efdbc8d81e2c675ea8069f12df65..16fb396f1656ee08d31671c28d161fa7e99f888d 100644 --- a/Src/Utils/FMpi.hpp +++ b/Src/Utils/FMpi.hpp @@ -2,7 +2,6 @@ #define FMPI_HPP // /!\ Please, you must read the license at the bottom of this page - #include "FGlobal.hpp" #include "FMath.hpp" @@ -15,13 +14,13 @@ * @class FMpi * Please read the license * -* This namespace is to compile with or without mpi -* It defines a class to access MPI data, if the lib is compiled -* without mpi support then simulate data. */ class FMpi { public: +//////////////////////////////////////////////////////// +// MPI Flag +//////////////////////////////////////////////////////// enum FMpiTag { // FMpiTreeBuilder TagExchangeIndexs, @@ -37,294 +36,255 @@ public: TagFmmL2L, TagFmmP2P, + // Bitonic, + TagBitonicMin, + TagBitonicMax, + TagBitonicMinMess, + TagBitonicMaxMess, + // Last defined tag TagLast, }; -#ifdef SCALFMM_USE_MPI +//////////////////////////////////////////////////////// +// FComm to factorize MPI_Comm work +//////////////////////////////////////////////////////// + + /** This class is used to put all the usual method + * related mpi comm + */ + class FComm { + int rank; //< rank related to the comm + int nbProc; //< nb proc in this group + + MPI_Comm communicator; //< current mpi communicator + MPI_Group group; //< current mpi group + + // Forbid copy + FComm(const FComm&){} + FComm& operator=(const FComm&){return *this;} + + // reset : get rank and nb proc from mpi + void reset(){ + FMpi::Assert( MPI_Comm_rank(communicator,&rank), __LINE__ ); + FMpi::Assert( MPI_Comm_size(communicator,&nbProc), __LINE__ ); + } + + public: + /** Constructor : dup the comm given in parameter */ + explicit FComm(MPI_Comm inCommunicator ) { + FMpi::Assert( MPI_Comm_dup(inCommunicator, &communicator), __LINE__ , "comm dup"); + FMpi::Assert( MPI_Comm_group(communicator, &group), __LINE__ , "comm group"); + + reset(); + } + + /** Free communicator and group */ + virtual ~FComm(){ + FMpi::Assert( MPI_Comm_free(&communicator), __LINE__ ); + FMpi::Assert( MPI_Group_free(&group), __LINE__ ); + } + + /** To get the mpi comm needed for communication */ + MPI_Comm getComm() const { + return communicator; + } + + /** The current rank */ + int processId() const { + return rank; + } + + /** The current number of procs in the group */ + int processCount() const { + return nbProc; + } + + //////////////////////////////////////////////////////////// + // Split/Chunk functions + //////////////////////////////////////////////////////////// + + /** Get a left index related to a size */ + template< class T > + T getLeft(const T inSize) const { + const double step = (double(inSize) / double(processCount())); + return T(FMath::Ceil(step * double(processId()))); + } + + /** Get a right index related to a size */ + template< class T > + T getRight(const T inSize) const { + const double step = (double(inSize) / double(processCount())); + const T res = T(FMath::Ceil(step * double(processId()+1))); + if(res > inSize) return inSize; + else return res; + } + + /** Get a right index related to a size and another id */ + template< class T > + T getOtherRight(const T inSize, const int other) const { + const double step = (double(inSize) / double(processCount())); + const T res = T(FMath::Ceil(step * double(other+1))); + if(res > inSize) return inSize; + else return res; + } + + /** Get a left index related to a size and another id */ + template< class T > + T getOtherLeft(const T inSize, const int other) const { + const double step = (double(inSize) / double(processCount())); + return T(FMath::Ceil(step * double(other))); + } + + /** Get a proc id from and index */ + template< class T > + int getProc(const int position, const T inSize) const { + const double step = (double(inSize) / processCount()); + return int(position/step); + } + + //////////////////////////////////////////////////////////// + // Mpi interface functions + //////////////////////////////////////////////////////////// + + + /** Reduce a value for proc == 0 */ + template< class T > + T reduceSum(T data) const { + T result; + FMpi::Assert( MPI_Reduce( &data, &result, 1, FMpi::GetType(data), MPI_SUM, 0, communicator ), __LINE__); + return result; + } + + /** Reduce an average */ + template< class T > + T reduceAverageAll(T data) const { + T result[processCount()]; + FMpi::Assert( MPI_Allgather( &data, 1, FMpi::GetType(data), result, 1, FMpi::GetType(data), getComm()), __LINE__ ); + + T average = 0; + for(int idxProc = 0 ; idxProc < processCount() ;++idxProc){ + average += result[idxProc] / processCount(); + } + return average; + } + + /** Change the group size */ + void groupReduce(const int from , const int to){ + int procsIdArray[to - from + 1]; + for(int idxProc = from ;idxProc <= to ; ++idxProc){ + procsIdArray[idxProc - from] = idxProc; + } + + MPI_Group previousGroup = group; + FMpi::Assert( MPI_Group_incl(previousGroup, to - from + 1 , procsIdArray, &group), __LINE__ ); + + MPI_Comm previousComm = communicator; + FMpi::Assert( MPI_Comm_create(previousComm, group, &communicator), __LINE__ ); + + MPI_Comm_free(&previousComm); + MPI_Group_free(&previousGroup); + + reset(); + } + }; //////////////////////////////////////////////////////// -// Use MPI +// FMpi methods //////////////////////////////////////////////////////// - typedef MPI_Request Request; /* -[fourmi062:15896] [[13237,0],1]-[[13237,1],1] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104) -[fourmi056:04597] [[13237,0],3]-[[13237,1],3] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104) -[fourmi053:08571] [[13237,0],5]-[[13237,1],5] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104) + We use init with thread because of an openmpi error: -Erreur pour le proc1 -[[13237,1],1][btl_openib_component.c:3227:handle_wc] from fourmi062 to: fourmi056 error polling LP CQ with status LOCAL LENGTH ERROR status number 1 for wr_id 7134664 opcode 0 vendor error 105 qp_idx 3 -Tous on la meme erreur le 2e 1 est remplacé par le rang. -*/ - FMpi(int inArgc, char ** inArgv ) { + [fourmi062:15896] [[13237,0],1]-[[13237,1],1] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104) + [fourmi056:04597] [[13237,0],3]-[[13237,1],3] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104) + [fourmi053:08571] [[13237,0],5]-[[13237,1],5] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104) + + Erreur pour le proc1 + [[13237,1],1][btl_openib_component.c:3227:handle_wc] from fourmi062 to: fourmi056 error polling LP CQ with status LOCAL LENGTH ERROR status number 1 for wr_id 7134664 opcode 0 vendor error 105 qp_idx 3 + Tous on la meme erreur le 2e 1 est remplacé par le rang. + */ + FMpi(int inArgc, char ** inArgv ) : communicator(0) { int provided = 0; - MPI_Init_thread(&inArgc,&inArgv, MPI_THREAD_MULTIPLE, &provided); + FMpi::Assert( MPI_Init_thread(&inArgc,&inArgv, MPI_THREAD_MULTIPLE, &provided), __LINE__); + communicator = new FComm(MPI_COMM_WORLD); } + /** Delete the communicator and call mpi finalize */ ~FMpi(){ + delete communicator; MPI_Finalize(); } - void allgather(void* const sendbuf, const int sendcount, void* const recvbuf, const int recvcount, MPI_Datatype datatype = MPI_INT){ - MPI_Allgather( sendbuf, sendcount, datatype, recvbuf, recvcount, datatype, MPI_COMM_WORLD); + /** Get the global communicator */ + const FComm& global() { + return (*communicator); } - void sendData(const int inReceiver, const int inSize, void* const inData, const int inTag){ - MPI_Send(inData, inSize, MPI_CHAR , inReceiver, inTag, MPI_COMM_WORLD); - } + //////////////////////////////////////////////////////////// + // Mpi Types meta function + //////////////////////////////////////////////////////////// - void isendData(const int inReceiver, const int inSize, void* const inData, const int inTag, MPI_Request*const request){ - MPI_Isend(inData, inSize, MPI_CHAR , inReceiver, inTag, MPI_COMM_WORLD, request); + static MPI_Datatype GetType(long long&){ + return MPI_LONG_LONG; } - void iWait( MPI_Request*const request ){ - MPI_Status status; - MPI_Wait(request, &status); + static MPI_Datatype GetType(double&){ + return MPI_DOUBLE; } - void iWaitall(MPI_Request requests[], const int count){ - MPI_Status status[count]; - MPI_Waitall(count, requests, status); + static MPI_Datatype GetType(float&){ + return MPI_FLOAT; } - void waitMessage(int*const sender, int*const tag = 0, const int restrictsender = MPI_ANY_SOURCE, const int restricttag = MPI_ANY_TAG){ - MPI_Status status; - MPI_Probe( restrictsender, restricttag, MPI_COMM_WORLD, &status ); - if(sender) *sender = status.MPI_SOURCE; - if(tag) *tag = status.MPI_TAG; - } - - void receiveData(const int inSize, void* const inData, int* const inSource = 0, int* const inTag = 0, int* const inFilledSize = 0){ - MPI_Status status; - MPI_Recv(inData, inSize, MPI_CHAR, MPI_ANY_SOURCE, MPI_ANY_TAG,MPI_COMM_WORLD, &status); - if(inSource) *inSource = status.MPI_SOURCE; - if(inTag) *inTag = status.MPI_TAG; - if(inFilledSize) MPI_Get_count(&status,MPI_CHAR,inFilledSize); - } - - void receiveDataFromTag(const int inSize, const int inTag, void* const inData, int* const inSource = 0, int* const inFilledSize = 0){ - MPI_Status status; - MPI_Recv(inData, inSize, MPI_CHAR, MPI_ANY_SOURCE, inTag, MPI_COMM_WORLD, &status); - if(inSource) *inSource = status.MPI_SOURCE; - if(inFilledSize) MPI_Get_count(&status,MPI_CHAR,inFilledSize); - } - - void receiveDataFromTagAndSource(const int inSize, const int inTag, const int inSource, void* const inData, int* const inFilledSize = 0){ - MPI_Status status; - MPI_Recv(inData, inSize, MPI_CHAR, inSource, inTag, MPI_COMM_WORLD, &status); - if(inFilledSize) MPI_Get_count(&status,MPI_CHAR,inFilledSize); - } - - bool receivedData(){ - int flag; - MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE); - return flag; - } - - bool receivedData(int* const tag){ - MPI_Status status; - int flag; - MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, &status); - *tag = status.MPI_TAG; - return flag; - } - - int processId() { - int id; - MPI_Comm_rank(MPI_COMM_WORLD,&id); - return id; - } - - int processCount() { - int count; - MPI_Comm_size(MPI_COMM_WORLD,&count); - return count; - } - - void processBarrier() { - MPI_Barrier(MPI_COMM_WORLD); - } - - void abort(const int inErrorCode = 1) { - MPI_Abort(MPI_COMM_WORLD, inErrorCode); - } - - template < class T > - MPI_Datatype getType(){ + static MPI_Datatype GetType(int&){ return MPI_INT; } - template< class T > - T reduceSum(T data){ - T result; - MPI_Reduce( &data, &result, 1, getType<T>(), MPI_SUM, 0, MPI_COMM_WORLD ); - return result; - } - - template< class T > - T reduceMin(T myMin){ - T result; - MPI_Reduce( &myMin, &result, 1, getType<T>(), MPI_MIN, 0, MPI_COMM_WORLD ); - return result; - } + //////////////////////////////////////////////////////////// + // Mpi interface functions + //////////////////////////////////////////////////////////// - template< class T > - T reduceMax(T myMax){ - T result; - MPI_Reduce( &myMax, &result, 1, getType<T>(), MPI_MAX, 0, MPI_COMM_WORLD ); - return result; + /** generic mpi assert function */ + static void Assert(const int test, const unsigned line, const char* const message = 0){ + if(test != MPI_SUCCESS){ + printf("[ERROR-QS] Test failled at line %d, result is %d", line, test); + if(message) printf(", message: %s",message); + printf("\n"); + fflush(stdout); + MPI_Abort(MPI_COMM_WORLD, int(line) ); + } } - template< class T > - T broadcast(T value, const int from = 0){ - MPI_Bcast ( &value, 1, getType<T>(), from, MPI_COMM_WORLD ); - return value; + /** Compute a left index from data */ + template <class T> + static T GetLeft(const T inSize, const int inIdProc, const int inNbProc) { + const double step = (double(inSize) / inNbProc); + return T(ceil(step * inIdProc)); } -#else -//////////////////////////////////////////////////////// -// Without MPI -//////////////////////////////////////////////////////// - typedef int Request; - - FMpi(int inArgc, char ** inArgv ) {} - - void allgather(void* const , const int , void* const , const int, MPI_Datatype = 0){ - } - - void sendData(const int, const int, void* const, const int ){} - - void isendData(const int , const int , void* const , const int , Request*const ){ - } - - void iWait( Request*const ){ - } - - void receiveData(const int, void* const, int* const inSource = 0, int* const inTag = 0, int* const inFilledSize = 0){ - if(inSource) *inSource = 0; - if(inTag) *inTag = 0; - if(inFilledSize) *inFilledSize = 0; - } - - void receiveDataFromTag(const int , const int , void* const , int* const inSource = 0, int* const inFilledSize = 0){ - if(inSource) *inSource = 0; - if(inFilledSize) *inFilledSize = 0; - } - - void receiveDataFromTagAndSource(const int , const int , const int , void* const , int* const inFilledSize = 0){ - if(inFilledSize) *inFilledSize = 0; - } - - bool receivedData(){ - return false; - } - - bool receivedData(int* const){ - return false; - } - - int processId() { - return 0; - } - - int processCount() { - return 1; - } - - void processBarrier() {} - - void abort(const int inErrorCode = 1) { - exit(inErrorCode); - } - - template< class T > - T reduceSum(T data){ - return data; - } - - template< class T > - T reduceMin(T myMin){ - return myMin; - } - - template< class T > - T reduceMax(T myMax){ - return myMax; - } - - template< class T > - T broadcast(T value, const int = 0){ - return value; - } - -#endif - -//////////////////////////////////////////////////////// -// To use in any case -//////////////////////////////////////////////////////// - bool isMaster() { - return !processId(); - } - - bool isAlone() { - return processCount() == 1; - } - - bool isSlave() { - return processId(); - } - - template< class T > - T getLeft(const T inSize) { - const float step = (float(inSize) / float(processCount())); - return T(FMath::Ceil(step * float(processId()))); - } - - template< class T > - T getRight(const T inSize) { - const float step = (float(inSize) / float(processCount())); - const T res = T(FMath::Ceil(step * float(processId()+1))); + /** Compute a right index from data */ + template <class T> + static T GetRight(const T inSize, const int inIdProc, const int inNbProc) { + const double step = (double(inSize) / inNbProc); + const T res = T(ceil(step * (inIdProc+1))); if(res > inSize) return inSize; else return res; } - template< class T > - T getOtherRight(const T inSize, const int other) { - const float step = (float(inSize) / float(processCount())); - const T res = T(FMath::Ceil(step * float(other+1))); - if(res > inSize) return inSize; - else return res; + /** Compute a proc id from index & data */ + template <class T> + static int GetProc(const T position, const T inSize, const int inNbProc) { + const double step = double(inSize) / double(inNbProc); + return int(double(position)/step); } - template< class T > - int getProc(const int position, const T inSize) { - const float step = (float(inSize) / processCount()); - return int(position/step); - } + +private: + /** The original communicator */ + FComm* communicator; }; -#ifdef SCALFMM_USE_MPI -template <> -MPI_Datatype FMpi::getType<long long>(){ - return MPI_LONG_LONG; -} - -template <> -MPI_Datatype FMpi::getType<double>(){ - return MPI_DOUBLE; -} - -template <> -MPI_Datatype FMpi::getType<float>(){ - return MPI_FLOAT; -} - -template <> -MPI_Datatype FMpi::getType<int>(){ - return MPI_INT; -} -#endif #endif //FMPI_HPP diff --git a/Src/Utils/FQuickSort.hpp b/Src/Utils/FQuickSort.hpp index d61494d827a4b707ed874e2b6ec51fbfddf4b8d0..8a5579eaa8e01756a64ce7eec5514a653d3b4616 100644 --- a/Src/Utils/FQuickSort.hpp +++ b/Src/Utils/FQuickSort.hpp @@ -16,25 +16,22 @@ #include "FOmpBarrier.hpp" +/** This class is parallel quick sort + * It hold a mpi version + * + 2 openmp versions (one on tasks and the other like mpi) + * + a sequential version + * + * The task based algorithm is easy to undestand, + * for the mpi/openmp2nd please see + * Introduction to parallel computing (Grama Gupta Karypis Kumar) + */ + template <class SortType, class CompareType, class IndexType> class FQuickSort { //////////////////////////////////////////////////////////// // Miscialenous functions //////////////////////////////////////////////////////////// - - /** To get max between 2 values */ - template <class NumType> - static NumType Max(const NumType inV1, const NumType inV2){ - return (inV1 > inV2 ? inV1 : inV2); - } - - /** To get min between 2 values */ - template <class NumType> - static NumType Min(const NumType inV1, const NumType inV2){ - return (inV1 < inV2 ? inV1 : inV2); - } - /** swap to value */ template <class NumType> static inline void Swap(NumType& value, NumType& other){ @@ -43,92 +40,11 @@ class FQuickSort { other = temp; } - - //////////////////////////////////////////////////////////// - // Split information - //////////////////////////////////////////////////////////// - - static IndexType getLeft(const IndexType inSize, const int inIdProc, const int inNbProc) { - const double step = (double(inSize) / inNbProc); - return IndexType(ceil(step * inIdProc)); - } - - static IndexType getRight(const IndexType inSize, const int inIdProc, const int inNbProc) { - const double step = (double(inSize) / inNbProc); - const IndexType res = IndexType(ceil(step * (inIdProc+1))); - if(res > inSize) return inSize; - else return res; - } - - static int getProc(const IndexType position, const IndexType inSize, const int inNbProc) { - const double step = double(inSize) / double(inNbProc); - return int(double(position)/step); - } - - - //////////////////////////////////////////////////////////// - // MPI Function - //////////////////////////////////////////////////////////// - - /** generic mpi assert function */ - static void mpiassert(const int test, const unsigned line, const char* const message = 0){ - if(test != MPI_SUCCESS){ - printf("[ERROR-QS] Test failled at line %d, result is %d", line, test); - if(message) printf(", message: %s",message); - printf("\n"); - fflush(stdout); - MPI_Abort(MPI_COMM_WORLD, int(line) ); - } - } - - /** get current rank */ - static int MpiGetRank(MPI_Comm comm){ - int rank(0); - mpiassert( MPI_Comm_rank(comm, &rank), __LINE__ ); - return rank; - } - - /** get current nb procs */ - static int MpiGetNbProcs(MPI_Comm comm){ - int nb(0); - mpiassert( MPI_Comm_size(comm, &nb), __LINE__); - return nb; - } - - /** get the pivot from several procs in Comm */ - static CompareType MpiGetPivot(CompareType myData, MPI_Comm& comm, const int nbProcs){ - CompareType result[nbProcs]; - mpiassert( MPI_Allgather( &myData, sizeof(CompareType), MPI_BYTE, result, sizeof(CompareType), MPI_BYTE, comm), __LINE__ ); - // We do an average of the first element of each proc array - CompareType sum = 0; - for(int idxProc = 0 ; idxProc < nbProcs ;++idxProc){ - sum += result[idxProc] / nbProcs; - } - return sum ; - } - - /** change the group and communicator */ - static void MpiChangeGroup(MPI_Group& currentGroup, MPI_Comm& currentComm, const int from , const int to){ - int procsIdArray[to - from + 1]; - for(int idxProc = from ;idxProc <= to ; ++idxProc){ - procsIdArray[idxProc - from] = idxProc; - } - - MPI_Group previousGroup = currentGroup; - mpiassert( MPI_Group_incl(previousGroup, to - from + 1 , procsIdArray, ¤tGroup), __LINE__ ); - - MPI_Comm previousComm = currentComm; - mpiassert( MPI_Comm_create(previousComm, currentGroup, ¤tComm), __LINE__ ); - - MPI_Comm_free(&previousComm); - MPI_Group_free(&previousGroup); - } - //////////////////////////////////////////////////////////// // Quick sort //////////////////////////////////////////////////////////// - /* use in the sequential qs */ + /* Use in the sequential qs */ static IndexType QsPartition(SortType array[], IndexType left, IndexType right){ const IndexType part = right; Swap(array[part],array[(right + left ) / 2]); @@ -153,7 +69,7 @@ class FQuickSort { return left; } - /* a local iteration of qs */ + /* A local iteration of qs */ static void QsLocal(SortType array[], const CompareType& pivot, IndexType myLeft, IndexType myRight, IndexType& prefix, IndexType& sufix){ @@ -179,7 +95,7 @@ class FQuickSort { sufix = myRight - myLeft - prefix + 1; } - /* a sequential qs */ + /* The sequential qs */ static void QsSequentialStep(SortType array[], const IndexType left, const IndexType right){ if(left < right){ const IndexType part = QsPartition(array, left, right); @@ -206,7 +122,7 @@ class FQuickSort { } } - /* the openmp qs */ + /* The openmp qs */ static void QsOmpNoTask(SortType array[], const IndexType size){ FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Quicksort" , __FILE__ , __LINE__) ); struct Fix{ @@ -230,8 +146,8 @@ class FQuickSort { { const int myThreadId = omp_get_thread_num(); - IndexType myLeft = getLeft(size, myThreadId, omp_get_num_threads()); - IndexType myRight = getRight(size, myThreadId, omp_get_num_threads()) - 1; + IndexType myLeft = FMpi::GetLeft(size, myThreadId, omp_get_num_threads()); + IndexType myRight = FMpi::GetRight(size, myThreadId, omp_get_num_threads()) - 1; IndexType startIndex = 0; IndexType endIndex = size - 1; @@ -285,7 +201,7 @@ class FQuickSort { barriers[firstProc].wait(); // find my next QsLocal part - int splitProc = getProc(sufoffset - startIndex, nbElements, lastProc - firstProc + 1) + firstProc; + int splitProc = FMpi::GetProc(sufoffset - startIndex, nbElements, lastProc - firstProc + 1) + firstProc; if(splitProc == lastProc){ --splitProc; } @@ -299,8 +215,8 @@ class FQuickSort { firstProc = splitProc + 1; } - myLeft = getLeft(endIndex - startIndex + 1, myThreadId - firstProc, lastProc - firstProc + 1) + startIndex; - myRight = getRight(endIndex - startIndex + 1, myThreadId - firstProc, lastProc - firstProc + 1) + startIndex - 1; + myLeft = FMpi::GetLeft(endIndex - startIndex + 1, myThreadId - firstProc, lastProc - firstProc + 1) + startIndex; + myRight = FMpi::GetRight(endIndex - startIndex + 1, myThreadId - firstProc, lastProc - firstProc + 1) + startIndex - 1; } QsSequentialStep(array,myLeft,myRight); @@ -331,7 +247,7 @@ public: } /* the mpi qs */ - static void QsMpi(const SortType originalArray[], IndexType size, SortType* & outputArray, IndexType& outputSize, MPI_Comm originalComm = MPI_COMM_WORLD){ + static void QsMpi(const SortType originalArray[], IndexType size, SortType* & outputArray, IndexType& outputSize, const FMpi::FComm& originalComm){ FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Quicksort" , __FILE__ , __LINE__) ); // We need a structure see the algorithm detail to know more struct Fix{ @@ -345,27 +261,22 @@ public: outputSize = size; // alloc outputArray to store pre/sufixe, maximum needed is nb procs[comm world] + 1 - Fix fixes[MpiGetNbProcs(MPI_COMM_WORLD) + 1]; - Fix fixesSum[MpiGetNbProcs(MPI_COMM_WORLD) + 1]; - memset(fixes,0,sizeof(Fix) * MpiGetNbProcs(MPI_COMM_WORLD)); - memset(fixesSum,0,sizeof(Fix) * (MpiGetNbProcs(MPI_COMM_WORLD) + 1) ); + Fix fixes[originalComm.processCount() + 1]; + Fix fixesSum[originalComm.processCount() + 1]; + memset(fixes,0,sizeof(Fix) * originalComm.processCount()); + memset(fixesSum,0,sizeof(Fix) * (originalComm.processCount() + 1) ); // receiving buffer IndexType bufferSize = 0; SortType* buffer = 0; - // Create the first group - MPI_Group currentGroup; // Create the first com - MPI_Comm currentComm; - - mpiassert( MPI_Comm_dup(originalComm, ¤tComm), __LINE__ ); - mpiassert( MPI_Comm_group(currentComm, ¤tGroup), __LINE__ ); + FMpi::FComm currentComm(originalComm.getComm()); // While I am not working alone on my own data - while( MpiGetNbProcs(currentComm) != 1 ){ - const int currentRank = MpiGetRank(currentComm); - const int currentNbProcs = MpiGetNbProcs(currentComm); + while( currentComm.processCount() != 1 ){ + const int currentRank = currentComm.processId(); + const int currentNbProcs = currentComm.processCount(); MPI_Request requests[currentNbProcs * 2]; int iterRequest = 0; @@ -375,12 +286,12 @@ public: ///////////////////////////////////////////////// // sort QsLocal part of the outputArray - const CompareType pivot = MpiGetPivot(outputArray[size/2], currentComm, currentNbProcs); + const CompareType pivot = currentComm.reduceAverageAll( CompareType(outputArray[size/2]) ); Fix myFix; QsLocal(outputArray, pivot, 0, size - 1, myFix.pre, myFix.suf); // exchange fixes - mpiassert( MPI_Allgather( &myFix, sizeof(Fix), MPI_BYTE, fixes, sizeof(Fix), MPI_BYTE, currentComm), __LINE__ ); + FMpi::Assert( MPI_Allgather( &myFix, sizeof(Fix), MPI_BYTE, fixes, sizeof(Fix), MPI_BYTE, currentComm.getComm()), __LINE__ ); // each procs compute the summation fixesSum[0].pre = 0; @@ -391,7 +302,7 @@ public: } // then I need to know which procs will be in the middle - int splitProc = getProc(fixesSum[currentNbProcs].pre - 1, fixesSum[currentNbProcs].pre + fixesSum[currentNbProcs].suf, currentNbProcs); + int splitProc = FMpi::GetProc(fixesSum[currentNbProcs].pre - 1, fixesSum[currentNbProcs].pre + fixesSum[currentNbProcs].suf, currentNbProcs); if(splitProc == currentNbProcs - 1){ --splitProc; } @@ -406,19 +317,19 @@ public: const int firstProcInSuf = splitProc + 1; const IndexType elementsInSuf = fixesSum[currentNbProcs].suf; - const int firstProcToSend = getProc(fixesSum[currentRank].suf, elementsInSuf, procsInSuf) + firstProcInSuf; - const int lastProcToSend = getProc(fixesSum[currentRank + 1].suf - 1, elementsInSuf, procsInSuf) + firstProcInSuf; + const int firstProcToSend = FMpi::GetProc(fixesSum[currentRank].suf, elementsInSuf, procsInSuf) + firstProcInSuf; + const int lastProcToSend = FMpi::GetProc(fixesSum[currentRank + 1].suf - 1, elementsInSuf, procsInSuf) + firstProcInSuf; IndexType sent = 0; for(int idxProc = firstProcToSend ; idxProc <= lastProcToSend ; ++idxProc){ - const IndexType thisProcRight = getRight(elementsInSuf, idxProc - firstProcInSuf, procsInSuf); + const IndexType thisProcRight = FMpi::GetRight(elementsInSuf, idxProc - firstProcInSuf, procsInSuf); IndexType sendToProc = thisProcRight - fixesSum[currentRank].suf - sent; if(sendToProc + sent > fixes[currentRank].suf){ sendToProc = fixes[currentRank].suf - sent; } if( sendToProc ){ - mpiassert( MPI_Isend(&outputArray[sent + fixes[currentRank].pre], int(sendToProc * sizeof(SortType)), MPI_BYTE , idxProc, FMpi::TagQuickSort, currentComm, &requests[iterRequest++]), __LINE__ ); + FMpi::Assert( MPI_Isend(&outputArray[sent + fixes[currentRank].pre], int(sendToProc * sizeof(SortType)), MPI_BYTE , idxProc, FMpi::TagQuickSort, currentComm.getComm(), &requests[iterRequest++]), __LINE__ ); } sent += sendToProc; } @@ -429,19 +340,19 @@ public: const int procsInPre = splitProc + 1; const IndexType elementsInPre = fixesSum[currentNbProcs].pre; - const int firstProcToSend = getProc(fixesSum[currentRank].pre, elementsInPre, procsInPre); - const int lastProcToSend = getProc(fixesSum[currentRank + 1].pre - 1, elementsInPre, procsInPre); + const int firstProcToSend = FMpi::GetProc(fixesSum[currentRank].pre, elementsInPre, procsInPre); + const int lastProcToSend = FMpi::GetProc(fixesSum[currentRank + 1].pre - 1, elementsInPre, procsInPre); IndexType sent = 0; for(int idxProc = firstProcToSend ; idxProc <= lastProcToSend ; ++idxProc){ - const IndexType thisProcRight = getRight(elementsInPre, idxProc, procsInPre); + const IndexType thisProcRight = FMpi::GetRight(elementsInPre, idxProc, procsInPre); IndexType sendToProc = thisProcRight - fixesSum[currentRank].pre - sent; if(sendToProc + sent > fixes[currentRank].pre){ sendToProc = fixes[currentRank].pre - sent; } if(sendToProc){ - mpiassert( MPI_Isend(&outputArray[sent], int(sendToProc * sizeof(SortType)), MPI_BYTE , idxProc, FMpi::TagQuickSort, currentComm, &requests[iterRequest++]), __LINE__ ); + FMpi::Assert( MPI_Isend(&outputArray[sent], int(sendToProc * sizeof(SortType)), MPI_BYTE , idxProc, FMpi::TagQuickSort, currentComm.getComm(), &requests[iterRequest++]), __LINE__ ); } sent += sendToProc; } @@ -456,8 +367,8 @@ public: const int procsInPre = splitProc + 1; const IndexType elementsInPre = fixesSum[currentNbProcs].pre; - IndexType myLeft = getLeft(elementsInPre, currentRank, procsInPre); - IndexType myRightLimit = getRight(elementsInPre, currentRank, procsInPre); + IndexType myLeft = FMpi::GetLeft(elementsInPre, currentRank, procsInPre); + IndexType myRightLimit = FMpi::GetRight(elementsInPre, currentRank, procsInPre); size = myRightLimit - myLeft; if(bufferSize < size){ @@ -474,18 +385,18 @@ public: IndexType indexArray = 0; while( idxProc < currentNbProcs && indexArray < myRightLimit - myLeft){ - const IndexType firstIndex = Max(myLeft , fixesSum[idxProc].pre ); - const IndexType endIndex = Min(fixesSum[idxProc + 1].pre, myRightLimit); + const IndexType firstIndex = FMath::Max(myLeft , fixesSum[idxProc].pre ); + const IndexType endIndex = FMath::Min(fixesSum[idxProc + 1].pre, myRightLimit); if( (endIndex - firstIndex) ){ - mpiassert( MPI_Irecv(&buffer[indexArray], int((endIndex - firstIndex) * sizeof(SortType)), MPI_BYTE, idxProc, FMpi::TagQuickSort, currentComm, &requests[iterRequest++]), __LINE__ ); + FMpi::Assert( MPI_Irecv(&buffer[indexArray], int((endIndex - firstIndex) * sizeof(SortType)), MPI_BYTE, idxProc, FMpi::TagQuickSort, currentComm.getComm(), &requests[iterRequest++]), __LINE__ ); } indexArray += endIndex - firstIndex; ++idxProc; } // Proceed all send/receive - mpiassert( MPI_Waitall(iterRequest, requests, MPI_STATUSES_IGNORE), __LINE__ ); + FMpi::Assert( MPI_Waitall(iterRequest, requests, MPI_STATUSES_IGNORE), __LINE__ ); - MpiChangeGroup(currentGroup, currentComm, 0, splitProc); + currentComm.groupReduce( 0, splitProc); } else{ // I am in L-Part (larger than pivot) @@ -493,8 +404,8 @@ public: const IndexType elementsInSuf = fixesSum[currentNbProcs].suf; const int rankInL = currentRank - splitProc - 1; - IndexType myLeft = getLeft(elementsInSuf, rankInL, procsInSuf); - IndexType myRightLimit = getRight(elementsInSuf, rankInL, procsInSuf); + IndexType myLeft = FMpi::GetLeft(elementsInSuf, rankInL, procsInSuf); + IndexType myRightLimit = FMpi::GetRight(elementsInSuf, rankInL, procsInSuf); size = myRightLimit - myLeft; if(bufferSize < size){ @@ -511,18 +422,18 @@ public: IndexType indexArray = 0; while( idxProc < currentNbProcs && indexArray < myRightLimit - myLeft){ - const IndexType firstIndex = Max(myLeft , fixesSum[idxProc].suf ); - const IndexType endIndex = Min(fixesSum[idxProc + 1].suf, myRightLimit); + const IndexType firstIndex = FMath::Max(myLeft , fixesSum[idxProc].suf ); + const IndexType endIndex = FMath::Min(fixesSum[idxProc + 1].suf, myRightLimit); if( (endIndex - firstIndex) ){ - mpiassert( MPI_Irecv(&buffer[indexArray], int((endIndex - firstIndex) * sizeof(SortType)), MPI_BYTE, idxProc, FMpi::TagQuickSort, currentComm, &requests[iterRequest++]), __LINE__ ); + FMpi::Assert( MPI_Irecv(&buffer[indexArray], int((endIndex - firstIndex) * sizeof(SortType)), MPI_BYTE, idxProc, FMpi::TagQuickSort, currentComm.getComm(), &requests[iterRequest++]), __LINE__ ); } indexArray += endIndex - firstIndex; ++idxProc; } // Proceed all send/receive - mpiassert( MPI_Waitall(iterRequest, requests, MPI_STATUSES_IGNORE), __LINE__ ); + FMpi::Assert( MPI_Waitall(iterRequest, requests, MPI_STATUSES_IGNORE), __LINE__ ); - MpiChangeGroup(currentGroup, currentComm, splitProc + 1, currentNbProcs - 1); + currentComm.groupReduce( splitProc + 1, currentNbProcs - 1); } @@ -543,8 +454,6 @@ public: // Clean delete[] buffer; - MPI_Comm_free(¤tComm); - MPI_Group_free(¤tGroup); // Normal Quick sort QsOmp(outputArray, size); diff --git a/Tests/testFmbAlgorithmProc.cpp b/Tests/testFmbAlgorithmProc.cpp index 2c2d4ea4be0df3d8a9c9a4b1a58e2f145e7a078d..8f99af1a30db79ce3c9af2cc6a0757e0dd33906c 100644 --- a/Tests/testFmbAlgorithmProc.cpp +++ b/Tests/testFmbAlgorithmProc.cpp @@ -246,7 +246,7 @@ int main(int argc, char ** argv){ std::cout << "Opening : " << filename << "\n"; } - FMpiFmaLoader<ParticleClass> loader(filename, app); + FMpiFmaLoader<ParticleClass> loader(filename, app.global()); if(!loader.isOpen()){ std::cout << "Loader Error, " << filename << " is missing\n"; return 1; @@ -262,27 +262,14 @@ int main(int argc, char ** argv){ std::cout << "\tHeight : " << NbLevels << " \t sub-height : " << SizeSubLevels << std::endl; counter.tic(); - if( app.processCount() != 1){ + if( app.global().processCount() != 1){ ////////////////////////////////////////////////////////////////////////////////// - // We sort our particles + // Build tree from mpi loader ////////////////////////////////////////////////////////////////////////////////// - std::cout << "Create intervals ..." << std::endl; + std::cout << "Build Tree ..." << std::endl; counter.tic(); - FMpiTreeBuilder<ParticleClass> builder; - - builder.splitAndSortFile(loader, NbLevels); - - counter.tac(); - std::cout << "Done " << "(" << counter.elapsed() << "s)." << std::endl; - - ////////////////////////////////////////////////////////////////////////////////// - // Now we can build the real tree - ////////////////////////////////////////////////////////////////////////////////// - std::cout << "Create real tree ..." << std::endl; - counter.tic(); - - builder.intervalsToTree(tree); + FMpiTreeBuilder<ParticleClass>::LoaderToTree(app.global(), loader, tree); counter.tac(); std::cout << "Done " << "(" << counter.elapsed() << "s)." << std::endl; @@ -290,10 +277,9 @@ int main(int argc, char ** argv){ ////////////////////////////////////////////////////////////////////////////////// } else{ - FFmaBinLoader<ParticleClass> loaderSeq(filename); ParticleClass partToInsert; - for(FSize idxPart = 0 ; idxPart < loaderSeq.getNumberOfParticles() ; ++idxPart){ - loaderSeq.fillParticle(partToInsert); + for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + loader.fillParticle(partToInsert); tree.insert(partToInsert); } } @@ -307,7 +293,7 @@ int main(int argc, char ** argv){ counter.tic(); KernelClass kernels(NbLevels,loader.getBoxWidth()); - FmmClass algo(app,&tree,&kernels); + FmmClass algo(app.global(),&tree,&kernels); algo.execute(); counter.tac(); @@ -333,13 +319,13 @@ int main(int argc, char ** argv){ std::cout << "My potential is " << potential << std::endl; - potential = app.reduceSum(potential); - forces.setX(app.reduceSum(forces.getX())); - forces.setY(app.reduceSum(forces.getY())); - forces.setZ(app.reduceSum(forces.getZ())); + potential = app.global().reduceSum(potential); + forces.setX(app.global().reduceSum(forces.getX())); + forces.setY(app.global().reduceSum(forces.getY())); + forces.setZ(app.global().reduceSum(forces.getZ())); - if(app.isMaster()){ + if(app.global().processId() == 0){ std::cout << "Foces Sum x = " << forces.getX() << " y = " << forces.getY() << " z = " << forces.getZ() << std::endl; std::cout << "Potential Sum = " << potential << std::endl; } diff --git a/Tests/testFmmAlgorithmProc.cpp b/Tests/testFmmAlgorithmProc.cpp index 23e0d1966a69689e5339c5680fb9a5422be08081..a35cc8bfb734954cbccdd0bbc58a83f5157d140c 100644 --- a/Tests/testFmmAlgorithmProc.cpp +++ b/Tests/testFmmAlgorithmProc.cpp @@ -48,7 +48,7 @@ // Check if tree is built correctly template<class OctreeClass> void ValidateTree(OctreeClass& realTree, - OctreeClass& treeValide, FMpi& app){ + OctreeClass& treeValide, const FMpi::FComm& comm){ FSize totalNbLeafs = 0; { @@ -59,10 +59,10 @@ void ValidateTree(OctreeClass& realTree, } while(octreeIterator.moveRight()); } - const FSize myLeftLeaf = app.getLeft(totalNbLeafs); - const FSize myRightLeaf = app.getRight(totalNbLeafs); + const FSize myLeftLeaf = comm.getLeft(totalNbLeafs); + const FSize myRightLeaf = comm.getRight(totalNbLeafs); - //printf("%d should go from %d to %d leaf (on %d total leafs)\n", app.processId(), myLeftLeaf, myRightLeaf, totalNbLeafs); + //printf("%d should go from %d to %d leaf (on %d total leafs)\n", comm.processId(), myLeftLeaf, myRightLeaf, totalNbLeafs); typename OctreeClass::Iterator octreeIteratorValide(&treeValide); octreeIteratorValide.gotoBottomLeft(); @@ -335,7 +335,7 @@ int main(int argc, char ** argv){ std::cout << "Opening : " << filename << "\n"; } - FMpiFmaLoader<ParticleClass> loader(filename,app); + FMpiFmaLoader<ParticleClass> loader(filename,app.global()); if(!loader.isOpen()){ std::cout << "Loader Error, " << filename << " is missing\n"; return 1; @@ -344,27 +344,14 @@ int main(int argc, char ** argv){ // The real tree to work on OctreeClass realTree(NbLevels, SizeSubLevels,loader.getBoxWidth(),loader.getCenterOfBox()); - if( app.processCount() != 1){ + if( app.global().processCount() != 1){ ////////////////////////////////////////////////////////////////////////////////// - // We sort our particles + // Build tree from mpi loader ////////////////////////////////////////////////////////////////////////////////// - std::cout << "Create intervals ..." << std::endl; + std::cout << "Build Tree ..." << std::endl; counter.tic(); - FMpiTreeBuilder<ParticleClass> builder; - - builder.splitAndSortFile(loader, NbLevels); - - counter.tac(); - std::cout << "Done " << "(" << counter.elapsed() << "s)." << std::endl; - - ////////////////////////////////////////////////////////////////////////////////// - // Now we can build the real tree - ////////////////////////////////////////////////////////////////////////////////// - std::cout << "Create real tree ..." << std::endl; - counter.tic(); - - builder.intervalsToTree(realTree); + FMpiTreeBuilder<ParticleClass>::LoaderToTree(app.global(), loader, realTree); counter.tac(); std::cout << "Done " << "(" << counter.elapsed() << "s)." << std::endl; @@ -372,10 +359,9 @@ int main(int argc, char ** argv){ ////////////////////////////////////////////////////////////////////////////////// } else{ - FFmaBinLoader<ParticleClass> loaderSeq(filename); ParticleClass partToInsert; - for(FSize idxPart = 0 ; idxPart < loaderSeq.getNumberOfParticles() ; ++idxPart){ - loaderSeq.fillParticle(partToInsert); + for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + loader.fillParticle(partToInsert); realTree.insert(partToInsert); } } @@ -400,7 +386,7 @@ int main(int argc, char ** argv){ std::cout << "Validate tree ..." << std::endl; counter.tic(); - ValidateTree(realTree, treeValide, app); + ValidateTree(realTree, treeValide, app.global()); counter.tac(); std::cout << "Done " << "(" << counter.elapsed() << "s)." << std::endl; @@ -412,7 +398,7 @@ int main(int argc, char ** argv){ KernelClass kernels; - FmmClassProc algo(app,&realTree,&kernels); + FmmClassProc algo(app.global(),&realTree,&kernels); algo.execute(); counter.tac();