diff --git a/Src/Files/FMpiTreeBuilder.hpp b/Src/Files/FMpiTreeBuilder.hpp index 6b72423ba0232a14c485466aab86f7e78a4c70ac..31b4ab9564efd8ad96cd38ee0eff9aab66a158d9 100755 --- a/Src/Files/FMpiTreeBuilder.hpp +++ b/Src/Files/FMpiTreeBuilder.hpp @@ -34,278 +34,278 @@ template<class ParticleClass> class FMpiTreeBuilder{ public: - /** What sorting algorithm to use */ - enum SortingType{ - QuickSort, - BitonicSort, - }; + /** What sorting algorithm to use */ + enum SortingType{ + QuickSort, + BitonicSort, + }; - /** A particle may not have a MortonIndex Method + /** A particle may not have a MortonIndex Method * but they are sorted on their morton index * so this struct store a particle + its index */ - struct IndexedParticle{ - public: - MortonIndex index; - ParticleClass particle; + struct IndexedParticle{ + public: + MortonIndex index; + ParticleClass particle; + + operator MortonIndex(){ + return this->index; + } + }; - operator MortonIndex(){ - return this->index; - } - }; - private: - /** This method has been tacken from the octree + /** This method has been tacken from the octree * it computes a tree coordinate (x or y or z) from real position */ - static int getTreeCoordinate(const FReal inRelativePosition, const FReal boxWidthAtLeafLevel) { - const FReal indexFReal = inRelativePosition / boxWidthAtLeafLevel; - const int index = int(FMath::dfloor(indexFReal)); - if( index && FMath::LookEqual(inRelativePosition, boxWidthAtLeafLevel * FReal(index) ) ){ - return index - 1; + static int getTreeCoordinate(const FReal inRelativePosition, const FReal boxWidthAtLeafLevel) { + const FReal indexFReal = inRelativePosition / boxWidthAtLeafLevel; + const int index = int(FMath::dfloor(indexFReal)); + if( index && FMath::LookEqual(inRelativePosition, boxWidthAtLeafLevel * FReal(index) ) ){ + return index - 1; + } + return index; } - return index; - } - ////////////////////////////////////////////////////////////////////////// - // To sort tha particles we hold - ////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// + // To sort tha particles we hold + ////////////////////////////////////////////////////////////////////////// - template <class LoaderClass> - static void SortParticles( const FMpi::FComm& communicator, LoaderClass& loader, const SortingType type, - const int TreeHeight, IndexedParticle**const outputArray, FSize* const outputSize){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__ , "Loader to Tree" , __FILE__ , __LINE__) ); + template <class LoaderClass> + static void SortParticles( const FMpi::FComm& communicator, LoaderClass& loader, const SortingType type, + const int TreeHeight, IndexedParticle**const outputArray, FSize* const outputSize){ + FTRACE( FTrace::FFunction functionTrace(__FUNCTION__ , "Loader to Tree" , __FILE__ , __LINE__) ); - // create particles - IndexedParticle*const realParticlesIndexed = new IndexedParticle[loader.getNumberOfParticles()]; - FMemUtils::memset(realParticlesIndexed, 0, sizeof(IndexedParticle) * loader.getNumberOfParticles()); - FPoint boxCorner(loader.getCenterOfBox() - (loader.getBoxWidth()/2)); - FTreeCoordinate host; + // create particles + IndexedParticle*const realParticlesIndexed = new IndexedParticle[loader.getNumberOfParticles()]; + FMemUtils::memset(realParticlesIndexed, 0, sizeof(IndexedParticle) * loader.getNumberOfParticles()); + FPoint boxCorner(loader.getCenterOfBox() - (loader.getBoxWidth()/2)); + FTreeCoordinate host; - const FReal boxWidthAtLeafLevel = loader.getBoxWidth() / FReal(1 << (TreeHeight - 1) ); - for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ - loader.fillParticle(realParticlesIndexed[idxPart].particle); - host.setX( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getX() - boxCorner.getX(), boxWidthAtLeafLevel )); - host.setY( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getY() - boxCorner.getY(), boxWidthAtLeafLevel )); - host.setZ( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getZ() - boxCorner.getZ(), boxWidthAtLeafLevel )); + const FReal boxWidthAtLeafLevel = loader.getBoxWidth() / FReal(1 << (TreeHeight - 1) ); + for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + loader.fillParticle(realParticlesIndexed[idxPart].particle); + host.setX( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getX() - boxCorner.getX(), boxWidthAtLeafLevel )); + host.setY( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getY() - boxCorner.getY(), boxWidthAtLeafLevel )); + host.setZ( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getZ() - boxCorner.getZ(), boxWidthAtLeafLevel )); - realParticlesIndexed[idxPart].index = host.getMortonIndex(TreeHeight - 1); - } + realParticlesIndexed[idxPart].index = host.getMortonIndex(TreeHeight - 1); + } - // sort particles - if(type == QuickSort){ - FQuickSortMpi<IndexedParticle,MortonIndex, FSize>::QsMpi(realParticlesIndexed, loader.getNumberOfParticles(), *outputArray, *outputSize,communicator); - delete [] (realParticlesIndexed); - } - else { - FBitonicSort<IndexedParticle,MortonIndex, FSize>::Sort( realParticlesIndexed, loader.getNumberOfParticles(), communicator ); - *outputArray = realParticlesIndexed; - *outputSize = loader.getNumberOfParticles(); + // sort particles + if(type == QuickSort){ + FQuickSortMpi<IndexedParticle,MortonIndex, FSize>::QsMpi(realParticlesIndexed, loader.getNumberOfParticles(), *outputArray, *outputSize,communicator); + delete [] (realParticlesIndexed); + } + else { + FBitonicSort<IndexedParticle,MortonIndex, FSize>::Sort( realParticlesIndexed, loader.getNumberOfParticles(), communicator ); + *outputArray = realParticlesIndexed; + *outputSize = loader.getNumberOfParticles(); + } } - } - static void SortParticlesFromArray( const FMpi::FComm& communicator, const ParticleClass array[], const FSize size, const SortingType type, - const FPoint& centerOfBox, const FReal boxWidth, - const int TreeHeight, IndexedParticle**const outputArray, FSize* const outputSize){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__ , "Loader to Tree" , __FILE__ , __LINE__) ); + static void SortParticlesFromArray( const FMpi::FComm& communicator, const ParticleClass array[], const FSize size, const SortingType type, + const FPoint& centerOfBox, const FReal boxWidth, + const int TreeHeight, IndexedParticle**const outputArray, FSize* const outputSize){ + FTRACE( FTrace::FFunction functionTrace(__FUNCTION__ , "Loader to Tree" , __FILE__ , __LINE__) ); - // create particles - IndexedParticle*const realParticlesIndexed = new IndexedParticle[size]; - FMemUtils::memset(realParticlesIndexed, 0, sizeof(IndexedParticle) * size); + // create particles + IndexedParticle*const realParticlesIndexed = new IndexedParticle[size]; + FMemUtils::memset(realParticlesIndexed, 0, sizeof(IndexedParticle) * size); - FPoint boxCorner(centerOfBox - (boxWidth/2)); - FTreeCoordinate host; + FPoint boxCorner(centerOfBox - (boxWidth/2)); + FTreeCoordinate host; - const FReal boxWidthAtLeafLevel = boxWidth / FReal(1 << (TreeHeight - 1) ); + const FReal boxWidthAtLeafLevel = boxWidth / FReal(1 << (TreeHeight - 1) ); - for(int idxPart = 0 ; idxPart < size ; ++idxPart){ - realParticlesIndexed[idxPart].particle = array[idxPart]; - host.setX( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getX() - boxCorner.getX(), boxWidthAtLeafLevel )); - host.setY( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getY() - boxCorner.getY(), boxWidthAtLeafLevel )); - host.setZ( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getZ() - boxCorner.getZ(), boxWidthAtLeafLevel )); + for(int idxPart = 0 ; idxPart < size ; ++idxPart){ + realParticlesIndexed[idxPart].particle = array[idxPart]; + host.setX( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getX() - boxCorner.getX(), boxWidthAtLeafLevel )); + host.setY( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getY() - boxCorner.getY(), boxWidthAtLeafLevel )); + host.setZ( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getZ() - boxCorner.getZ(), boxWidthAtLeafLevel )); - realParticlesIndexed[idxPart].index = host.getMortonIndex(TreeHeight - 1); - } + realParticlesIndexed[idxPart].index = host.getMortonIndex(TreeHeight - 1); + } - // sort particles - if(type == QuickSort){ - FQuickSortMpi<IndexedParticle,MortonIndex, FSize>::QsMpi(realParticlesIndexed, size, *outputArray, *outputSize,communicator); - delete [] (realParticlesIndexed); - } - else { - FBitonicSort<IndexedParticle,MortonIndex, FSize>::Sort( realParticlesIndexed, size, communicator ); - *outputArray = realParticlesIndexed; - *outputSize = size; + // sort particles + if(type == QuickSort){ + FQuickSortMpi<IndexedParticle,MortonIndex, FSize>::QsMpi(realParticlesIndexed, size, outputArray, outputSize,communicator); + delete [] (realParticlesIndexed); + } + else { + FBitonicSort<IndexedParticle,MortonIndex, FSize>::Sort( realParticlesIndexed, size, communicator ); + *outputArray = realParticlesIndexed; + *outputSize = size; + } } - } - ////////////////////////////////////////////////////////////////////////// - // To merge the leaves - ////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// + // To merge the leaves + ////////////////////////////////////////////////////////////////////////// - static void MergeLeaves(const FMpi::FComm& communicator, IndexedParticle*& workingArray, FSize* workingSize, - FSize ** leavesIndices, ParticleClass** leavesArray, FSize* const leavesSize){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) ); - const int rank = communicator.processId(); - const int nbProcs = communicator.processCount(); - if(nbProcs == 1){ - //Nothing to do there : there is no need to verify if leaves are split, if there is one process... - } - else{ - // be sure there is no splited leaves - // to do that we exchange the first index with the left proc - { - FTRACE( FTrace::FRegion regionTrace("Remove Splited leaves", __FUNCTION__ , __FILE__ , __LINE__) ); - - MortonIndex otherFirstIndex = -1; - if((*workingSize) != 0 && rank != 0 && rank != nbProcs - 1){ - MPI_Sendrecv(&workingArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, - &otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, - communicator.getComm(), MPI_STATUS_IGNORE); - } - else if( rank == 0){ - MPI_Recv(&otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, communicator.getComm(), MPI_STATUS_IGNORE); - } - else if( rank == nbProcs - 1){ - MPI_Send( &workingArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, communicator.getComm()); - } - else { - MPI_Recv(&otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, communicator.getComm(), MPI_STATUS_IGNORE); - MPI_Send(&otherFirstIndex, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, communicator.getComm()); - } - - // at this point every one know the first index of his right neighbors - const bool needToRecvBeforeSend = (rank != 0 && (((*workingSize) && otherFirstIndex == workingArray[0].index ) || !(*workingSize))); - MPI_Request requestSendLeaf; - - IndexedParticle* sendBuffer = nullptr; - if(rank != nbProcs - 1 && needToRecvBeforeSend == false){ - FSize idxPart = (*workingSize) - 1 ; - while(idxPart >= 0 && workingArray[idxPart].index == otherFirstIndex){ - --idxPart; - } - const int particlesToSend = int((*workingSize) - 1 - idxPart); - if(particlesToSend){ - (*workingSize) -= particlesToSend; - sendBuffer = new IndexedParticle[particlesToSend]; - memcpy(sendBuffer, &workingArray[idxPart + 1], particlesToSend * sizeof(IndexedParticle)); - - MPI_Isend( sendBuffer, particlesToSend * int(sizeof(IndexedParticle)), MPI_BYTE, - rank + 1, FMpi::TagSplittedLeaf, communicator.getComm(), &requestSendLeaf); - } - else{ - MPI_Isend( nullptr, 0, MPI_BYTE, rank + 1, FMpi::TagSplittedLeaf, communicator.getComm(), &requestSendLeaf); - } - } - - if( rank != 0 ){ - int sendByOther = 0; - - MPI_Status probStatus; - MPI_Probe(rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), &probStatus); - MPI_Get_count( &probStatus, MPI_BYTE, &sendByOther); - - if(sendByOther){ - sendByOther /= int(sizeof(IndexedParticle)); - - const IndexedParticle* const reallocOutputArray = workingArray; - const FSize reallocOutputSize = (*workingSize); - - (*workingSize) += sendByOther; - workingArray = new IndexedParticle[(*workingSize)]; - FMemUtils::memcpy(&workingArray[sendByOther], reallocOutputArray, reallocOutputSize * sizeof(IndexedParticle)); - delete[] reallocOutputArray; - - MPI_Recv(workingArray, int(sizeof(IndexedParticle)) * sendByOther, MPI_BYTE, - rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), MPI_STATUS_IGNORE); - } - else{ - MPI_Recv( nullptr, 0, MPI_BYTE, rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), MPI_STATUS_IGNORE); - } - } - - if(rank != nbProcs - 1 && needToRecvBeforeSend == true){ - MPI_Send( workingArray, int((*workingSize) * sizeof(IndexedParticle)), MPI_BYTE, - rank + 1, FMpi::TagSplittedLeaf, communicator.getComm()); - delete[] workingArray; - workingArray = nullptr; - (*workingSize) = 0; - } - else if(rank != nbProcs - 1){ - MPI_Wait( &requestSendLeaf, MPI_STATUS_IGNORE); - delete[] sendBuffer; - sendBuffer = nullptr; - } - } - } - {//Filling the Array with leaves and parts //// COULD BE MOVED IN AN OTHER FUCTION - - (*leavesSize) = 0; //init ptr - (*leavesArray) = nullptr; //init ptr - (*leavesIndices) = nullptr; //init ptr - - if((*workingSize)){ - - //Array of particles - *leavesArray = new ParticleClass[(*workingSize)]; - - //Temporary array, we don't know yet how many leaves there will be - FSize * tempIndicesArray = new FSize[(*workingSize)]; - memset(tempIndicesArray,0,sizeof(FSize)*(*workingSize)); - - FSize idxInIndices = 0; - MortonIndex previousIndex = -1; - for(FSize idxPart = 0 ; idxPart < (*workingSize) ; ++idxPart){ - if(workingArray[idxPart].index != previousIndex){ - previousIndex = workingArray[idxPart]; - tempIndicesArray[idxInIndices] = idxPart; - idxInIndices++; - } - memcpy(&(*leavesArray)[idxPart],&workingArray[idxPart].particle,sizeof(ParticleClass)); - } - *leavesSize = idxInIndices; - - *leavesIndices = new FSize[idxInIndices]; - memcpy(*leavesIndices,tempIndicesArray,(*leavesSize)*sizeof(FSize)); - - } - - delete [] workingArray; - - workingArray = nullptr; - - } - } - - - ////////////////////////////////////////////////////////////////////////// - // To equalize (same number of leaves among the procs) - ////////////////////////////////////////////////////////////////////////// - - /** Put the interval into a tree */ - template <class ContainerClass> - static void EqualizeAndFillTree(const FMpi::FComm& communicator, ContainerClass* particlesSaver, - FSize * leavesIndices, ParticleClass* leavesArray, FSize& nbLeavesInIntervals, FSize nbParts, - FAbstractBalanceAlgorithm * balancer){ - - - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) ); - const int myRank = communicator.processId(); - const int nbProcs = communicator.processCount(); - const FSize myCurrentsParts = nbParts; - - if(nbProcs == 1){ //I'm the only one ! - //Just copy each part into the Particle Saver - for(FSize idxPart =0 ; idxPart < myCurrentsParts ; ++idxPart){ - particlesSaver->push(leavesArray[idxPart]); - } + static void MergeLeaves(const FMpi::FComm& communicator, IndexedParticle*& workingArray, FSize* workingSize, + FSize ** leavesIndices, ParticleClass** leavesArray, FSize* const leavesSize){ + FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) ); + const int rank = communicator.processId(); + const int nbProcs = communicator.processCount(); + if(nbProcs == 1){ + //Nothing to do there : there is no need to verify if leaves are split, if there is one process... + } + else{ + // be sure there is no splited leaves + // to do that we exchange the first index with the left proc + { + FTRACE( FTrace::FRegion regionTrace("Remove Splited leaves", __FUNCTION__ , __FILE__ , __LINE__) ); + + MortonIndex otherFirstIndex = -1; + if((*workingSize) != 0 && rank != 0 && rank != nbProcs - 1){ + MPI_Sendrecv(&workingArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, + &otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, + communicator.getComm(), MPI_STATUS_IGNORE); + } + else if( rank == 0){ + MPI_Recv(&otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, communicator.getComm(), MPI_STATUS_IGNORE); + } + else if( rank == nbProcs - 1){ + MPI_Send( &workingArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, communicator.getComm()); + } + else { + MPI_Recv(&otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, communicator.getComm(), MPI_STATUS_IGNORE); + MPI_Send(&otherFirstIndex, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, communicator.getComm()); + } + + // at this point every one know the first index of his right neighbors + const bool needToRecvBeforeSend = (rank != 0 && (((*workingSize) && otherFirstIndex == workingArray[0].index ) || !(*workingSize))); + MPI_Request requestSendLeaf; + + IndexedParticle* sendBuffer = nullptr; + if(rank != nbProcs - 1 && needToRecvBeforeSend == false){ + FSize idxPart = (*workingSize) - 1 ; + while(idxPart >= 0 && workingArray[idxPart].index == otherFirstIndex){ + --idxPart; + } + const int particlesToSend = int((*workingSize) - 1 - idxPart); + if(particlesToSend){ + (*workingSize) -= particlesToSend; + sendBuffer = new IndexedParticle[particlesToSend]; + memcpy(sendBuffer, &workingArray[idxPart + 1], particlesToSend * sizeof(IndexedParticle)); + + MPI_Isend( sendBuffer, particlesToSend * int(sizeof(IndexedParticle)), MPI_BYTE, + rank + 1, FMpi::TagSplittedLeaf, communicator.getComm(), &requestSendLeaf); + } + else{ + MPI_Isend( nullptr, 0, MPI_BYTE, rank + 1, FMpi::TagSplittedLeaf, communicator.getComm(), &requestSendLeaf); + } + } + + if( rank != 0 ){ + int sendByOther = 0; + + MPI_Status probStatus; + MPI_Probe(rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), &probStatus); + MPI_Get_count( &probStatus, MPI_BYTE, &sendByOther); + + if(sendByOther){ + sendByOther /= int(sizeof(IndexedParticle)); + + const IndexedParticle* const reallocOutputArray = workingArray; + const FSize reallocOutputSize = (*workingSize); + + (*workingSize) += sendByOther; + workingArray = new IndexedParticle[(*workingSize)]; + FMemUtils::memcpy(&workingArray[sendByOther], reallocOutputArray, reallocOutputSize * sizeof(IndexedParticle)); + delete[] reallocOutputArray; + + MPI_Recv(workingArray, int(sizeof(IndexedParticle)) * sendByOther, MPI_BYTE, + rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), MPI_STATUS_IGNORE); + } + else{ + MPI_Recv( nullptr, 0, MPI_BYTE, rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), MPI_STATUS_IGNORE); + } + } + + if(rank != nbProcs - 1 && needToRecvBeforeSend == true){ + MPI_Send( workingArray, int((*workingSize) * sizeof(IndexedParticle)), MPI_BYTE, + rank + 1, FMpi::TagSplittedLeaf, communicator.getComm()); + delete[] workingArray; + workingArray = nullptr; + (*workingSize) = 0; + } + else if(rank != nbProcs - 1){ + MPI_Wait( &requestSendLeaf, MPI_STATUS_IGNORE); + delete[] sendBuffer; + sendBuffer = nullptr; + } + } + } + {//Filling the Array with leaves and parts //// COULD BE MOVED IN AN OTHER FUCTION + + (*leavesSize) = 0; //init ptr + (*leavesArray) = nullptr; //init ptr + (*leavesIndices) = nullptr; //init ptr + + if((*workingSize)){ + + //Array of particles + *leavesArray = new ParticleClass[(*workingSize)]; + + //Temporary array, we don't know yet how many leaves there will be + FSize * tempIndicesArray = new FSize[(*workingSize)]; + memset(tempIndicesArray,0,sizeof(FSize)*(*workingSize)); + + FSize idxInIndices = 0; + MortonIndex previousIndex = -1; + for(FSize idxPart = 0 ; idxPart < (*workingSize) ; ++idxPart){ + if(workingArray[idxPart].index != previousIndex){ + previousIndex = workingArray[idxPart]; + tempIndicesArray[idxInIndices] = idxPart; + idxInIndices++; + } + memcpy(&(*leavesArray)[idxPart],&workingArray[idxPart].particle,sizeof(ParticleClass)); + } + *leavesSize = idxInIndices; + + *leavesIndices = new FSize[idxInIndices]; + memcpy(*leavesIndices,tempIndicesArray,(*leavesSize)*sizeof(FSize)); + + } + + delete [] workingArray; + + workingArray = nullptr; + + } } - else{ + + + ////////////////////////////////////////////////////////////////////////// + // To equalize (same number of leaves among the procs) + ////////////////////////////////////////////////////////////////////////// + + /** Put the interval into a tree */ + template <class ContainerClass> + static void EqualizeAndFillTree(const FMpi::FComm& communicator, ContainerClass* particlesSaver, + FSize * leavesIndices, ParticleClass* leavesArray, FSize& nbLeavesInIntervals, FSize nbParts, + FAbstractBalanceAlgorithm * balancer){ + + + FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) ); + const int myRank = communicator.processId(); + const int nbProcs = communicator.processCount(); + const FSize myCurrentsParts = nbParts; + + if(nbProcs == 1){ //I'm the only one ! + //Just copy each part into the Particle Saver + for(FSize idxPart =0 ; idxPart < myCurrentsParts ; ++idxPart){ + particlesSaver->push(leavesArray[idxPart]); + } + } + else{ // We have to know the number of leaves each procs holds FSize*const numberOfLeavesPerProc = new FSize[nbProcs]; memset(numberOfLeavesPerProc, 0, sizeof(FSize) * nbProcs); @@ -343,7 +343,7 @@ private: printf("%d] to %d from %llu to %llu\n", myRank, pack.idProc, pack.elementFrom, pack.elementTo); idxToSend[pack.idProc] = pack.elementFrom; toSend[pack.idProc] = (pack.elementTo ? leavesIndices[pack.elementTo-1] : 0) - - (pack.elementFrom ? leavesIndices[pack.elementFrom-1] : 0); + - (pack.elementFrom ? leavesIndices[pack.elementFrom-1] : 0); } //Then, we exchange the datas to send @@ -424,87 +424,87 @@ private: delete[] toSend; delete[] numberOfLeavesPerProc; } - delete[] leavesArray; - delete[] leavesIndices; - } + delete[] leavesArray; + delete[] leavesIndices; + } + - public: - - /** + + /** * Those three function get through pubilc/private member issues for testing purpose */ - static void testSortParticlesFromArray( const FMpi::FComm& communicator, const ParticleClass array[], const FSize size, const SortingType type, - const FPoint& centerOfBox, const FReal boxWidth, - const int TreeHeight, IndexedParticle**const outputArray, FSize* const outputSize){ - SortParticlesFromArray(communicator, array, size, type, centerOfBox, boxWidth, TreeHeight,&outputArray, &outputSize); - } - - - static void testMergeLeaves(const FMpi::FComm& communicator, IndexedParticle*& workingArray, FSize* workingSize, - FSize ** leavesIndices, ParticleClass** leavesArray, FSize* const leavesSize){ - MergeLeaves(communicator,workingArray,workingSize,leavesIndices,leavesArray,leavesSize); - } - - template<class ContainerClass> - static void testEqualizeAndFillTree(const FMpi::FComm& communicator, ContainerClass* particlesSaver, - FSize * leavesIndices, ParticleClass* leavesArray, FSize& nbLeavesInIntervals, FSize nbParts, - FAbstractBalanceAlgorithm * balancer){ - EqualizeAndFillTree(communicator,particlesSaver,leavesIndices, leavesArray, nbLeavesInIntervals, nbParts, balancer); - } - - ////////////////////////////////////////////////////////////////////////// - // The builder function - ////////////////////////////////////////////////////////////////////////// - - template <class ContainerClass> - static void ArrayToTree(const FMpi::FComm& communicator, const ParticleClass array[], const FSize size, - const FPoint& boxCenter, const FReal boxWidth, const int treeHeight, - ContainerClass* particleSaver, FAbstractBalanceAlgorithm* balancer,const SortingType type = QuickSort){ - - IndexedParticle* particlesArray = nullptr; - FSize particlesSize = 0; - SortParticlesFromArray(communicator, array, size, type, boxCenter, boxWidth, treeHeight, - &particlesArray, &particlesSize); - - ParticleClass* leavesArray = nullptr; - FSize leavesSize = 0; - FSize * leavesIndices = nullptr; - - MergeLeaves(communicator, particlesArray, &particlesSize, &leavesIndices, &leavesArray, &leavesSize); - - EqualizeAndFillTree(communicator, particleSaver, leavesIndices, leavesArray, leavesSize, particlesSize, balancer); - - /** To produce stats after the Equalize phase - */ - int* nbPartsPerProc; - int myParts = particleSaver->getSize(); - int nbProc = communicator.processCount(); - - if(communicator.processId()==0){ - nbPartsPerProc = new int[nbProc]; + static void testSortParticlesFromArray( const FMpi::FComm& communicator, const ParticleClass array[], const FSize size, const SortingType type, + const FPoint& centerOfBox, const FReal boxWidth, + const int TreeHeight, IndexedParticle**const outputArray, FSize* const outputSize){ + SortParticlesFromArray(communicator, array, size, type, centerOfBox, boxWidth, TreeHeight,&outputArray, &outputSize); + } + + + static void testMergeLeaves(const FMpi::FComm& communicator, IndexedParticle*& workingArray, FSize* workingSize, + FSize ** leavesIndices, ParticleClass** leavesArray, FSize* const leavesSize){ + MergeLeaves(communicator,workingArray,workingSize,leavesIndices,leavesArray,leavesSize); + } + + template<class ContainerClass> + static void testEqualizeAndFillTree(const FMpi::FComm& communicator, ContainerClass* particlesSaver, + FSize * leavesIndices, ParticleClass* leavesArray, FSize& nbLeavesInIntervals, FSize nbParts, + FAbstractBalanceAlgorithm * balancer){ + EqualizeAndFillTree(communicator,particlesSaver,leavesIndices, leavesArray, nbLeavesInIntervals, nbParts, balancer); } - - MPI_Gather(&myParts,1,MPI_INT,nbPartsPerProc,1,MPI_INT,0,communicator.getComm()); - - if(communicator.processId()==0){ - float av=0; - int min=myParts, max=myParts; - for(int id=0; id<nbProc ; id++){ - if(nbPartsPerProc[id] > max){ - max=nbPartsPerProc[id]; - } - if(nbPartsPerProc[id] < min){ - min=nbPartsPerProc[id]; - } - av += float(nbPartsPerProc[id]); - } - av /= float(nbProc); - printf("End of Equalize Phase : \n \t Min number of parts : %d \n \t Max number of parts : %d \n \t Average number of parts : %e \n", - min,max,av); - delete[] nbPartsPerProc; + + ////////////////////////////////////////////////////////////////////////// + // The builder function + ////////////////////////////////////////////////////////////////////////// + + template <class ContainerClass> + static void ArrayToTree(const FMpi::FComm& communicator, const ParticleClass array[], const FSize size, + const FPoint& boxCenter, const FReal boxWidth, const int treeHeight, + ContainerClass* particleSaver, FAbstractBalanceAlgorithm* balancer,const SortingType type = QuickSort){ + + IndexedParticle* particlesArray = nullptr; + FSize particlesSize = 0; + SortParticlesFromArray(communicator, array, size, type, boxCenter, boxWidth, treeHeight, + &particlesArray, &particlesSize); + + ParticleClass* leavesArray = nullptr; + FSize leavesSize = 0; + FSize * leavesIndices = nullptr; + + MergeLeaves(communicator, particlesArray, &particlesSize, &leavesIndices, &leavesArray, &leavesSize); + + EqualizeAndFillTree(communicator, particleSaver, leavesIndices, leavesArray, leavesSize, particlesSize, balancer); + + /** To produce stats after the Equalize phase + */ + int* nbPartsPerProc; + int myParts = particleSaver->getSize(); + int nbProc = communicator.processCount(); + + if(communicator.processId()==0){ + nbPartsPerProc = new int[nbProc]; + } + + MPI_Gather(&myParts,1,MPI_INT,nbPartsPerProc,1,MPI_INT,0,communicator.getComm()); + + if(communicator.processId()==0){ + float av=0; + int min=myParts, max=myParts; + for(int id=0; id<nbProc ; id++){ + if(nbPartsPerProc[id] > max){ + max=nbPartsPerProc[id]; + } + if(nbPartsPerProc[id] < min){ + min=nbPartsPerProc[id]; + } + av += float(nbPartsPerProc[id]); + } + av /= float(nbProc); + printf("End of Equalize Phase : \n \t Min number of parts : %d \n \t Max number of parts : %d \n \t Average number of parts : %e \n", + min,max,av); + delete[] nbPartsPerProc; + } } - } }; diff --git a/Src/Utils/FMpi.hpp b/Src/Utils/FMpi.hpp index 190c41de54b12c878ff75e44810843a3d280e7ce..b0ef166dccaff80ba4b157458371f353bfc82610 100755 --- a/Src/Utils/FMpi.hpp +++ b/Src/Utils/FMpi.hpp @@ -53,298 +53,298 @@ class FMpi { public: - //////////////////////////////////////////////////////// - // MPI Flag - //////////////////////////////////////////////////////// - enum FMpiTag { - // FMpiTreeBuilder - TagExchangeIndexs, - TagSplittedLeaf, - TagExchangeNbLeafs, - TagSandSettling, - - // FQuickSort - TagQuickSort, - - // FMM - TagFmmM2M, - TagFmmL2L, - TagFmmP2P, - - // Bitonic, - TagBitonicMin, - TagBitonicMax, - TagBitonicMinMess, - TagBitonicMaxMess, - - // Last defined tag - TagLast, - }; - - //////////////////////////////////////////////////////// - // FComm to factorize MPI_Comm work - //////////////////////////////////////////////////////// - - /** This class is used to put all the usual method + //////////////////////////////////////////////////////// + // MPI Flag + //////////////////////////////////////////////////////// + enum FMpiTag { + // FMpiTreeBuilder + TagExchangeIndexs, + TagSplittedLeaf, + TagExchangeNbLeafs, + TagSandSettling, + + // FQuickSort + TagQuickSort, + + // FMM + TagFmmM2M, + TagFmmL2L, + TagFmmP2P, + + // Bitonic, + TagBitonicMin, + TagBitonicMax, + TagBitonicMinMess, + TagBitonicMaxMess, + + // Last defined tag + TagLast, + }; + + //////////////////////////////////////////////////////// + // FComm to factorize MPI_Comm work + //////////////////////////////////////////////////////// + + /** This class is used to put all the usual method * related mpi comm */ - class FComm : public FNoCopyable { - int rank; //< rank related to the comm - int nbProc; //< nb proc in this group - - MPI_Comm communicator; //< current mpi communicator - MPI_Group group; //< current mpi group + class FComm : public FNoCopyable { + int rank; //< rank related to the comm + int nbProc; //< nb proc in this group + + MPI_Comm communicator; //< current mpi communicator + MPI_Group group; //< current mpi group + + + // reset : get rank and nb proc from mpi + void reset(){ + FMpi::Assert( MPI_Comm_rank(communicator,&rank), __LINE__ ); + FMpi::Assert( MPI_Comm_size(communicator,&nbProc), __LINE__ ); + } + + public: + /** Constructor : dup the comm given in parameter */ + explicit FComm(MPI_Comm inCommunicator ) { + FMpi::Assert( MPI_Comm_dup(inCommunicator, &communicator), __LINE__ , "comm dup"); + FMpi::Assert( MPI_Comm_group(communicator, &group), __LINE__ , "comm group"); + + reset(); + } + + /** Free communicator and group */ + virtual ~FComm(){ + FMpi::Assert( MPI_Comm_free(&communicator), __LINE__ ); + FMpi::Assert( MPI_Group_free(&group), __LINE__ ); + } + + /** To get the mpi comm needed for communication */ + MPI_Comm getComm() const { + return communicator; + } + + /** The current rank */ + int processId() const { + return rank; + } + + /** The current number of procs in the group */ + int processCount() const { + return nbProc; + } + + //////////////////////////////////////////////////////////// + // Split/Chunk functions + //////////////////////////////////////////////////////////// + + /** Get a left index related to a size */ + template< class T > + T getLeft(const T inSize) const { + const double step = (double(inSize) / double(processCount())); + return T(FMath::Ceil(step * double(processId()))); + } + + /** Get a right index related to a size */ + template< class T > + T getRight(const T inSize) const { + const double step = (double(inSize) / double(processCount())); + const T res = T(FMath::Ceil(step * double(processId()+1))); + if(res > inSize) return inSize; + else return res; + } + + /** Get a right index related to a size and another id */ + template< class T > + T getOtherRight(const T inSize, const int other) const { + const double step = (double(inSize) / double(processCount())); + const T res = T(FMath::Ceil(step * double(other+1))); + if(res > inSize) return inSize; + else return res; + } + + /** Get a left index related to a size and another id */ + template< class T > + T getOtherLeft(const T inSize, const int other) const { + const double step = (double(inSize) / double(processCount())); + return T(FMath::Ceil(step * double(other))); + } + + /** Get a proc id from and index */ + template< class T > + int getProc(const int position, const T inSize) const { + const double step = (double(inSize) / processCount()); + return int(position/step); + } + + //////////////////////////////////////////////////////////// + // Mpi interface functions + //////////////////////////////////////////////////////////// + + + /** Reduce a value for proc == 0 */ + template< class T > + T reduceSum(T data) const { + T result(0); + FMpi::Assert( MPI_Reduce( &data, &result, 1, FMpi::GetType(data), MPI_SUM, 0, communicator ), __LINE__); + return result; + } + + /** Reduce an average */ + template< class T > + T reduceAverageAll(T data) const { + T result[processCount()]; + FMpi::Assert( MPI_Allgather( &data, 1, FMpi::GetType(data), result, 1, FMpi::GetType(data), getComm()), __LINE__ ); + + T average = 0; + for(int idxProc = 0 ; idxProc < processCount() ;++idxProc){ + average += result[idxProc] / processCount(); + } + return average; + } + + /** Change the group size */ + void groupReduce(const int from , const int to){ + int * procsIdArray = new int [to - from + 1]; + for(int idxProc = from ;idxProc <= to ; ++idxProc){ + procsIdArray[idxProc - from] = idxProc; + } + + MPI_Group previousGroup = group; + FMpi::Assert( MPI_Group_incl(previousGroup, to - from + 1 , procsIdArray, &group), __LINE__ ); + + MPI_Comm previousComm = communicator; + FMpi::Assert( MPI_Comm_create(previousComm, group, &communicator), __LINE__ ); + + MPI_Comm_free(&previousComm); + MPI_Group_free(&previousGroup); + + reset(); + delete[] procsIdArray ; + } + }; + + //////////////////////////////////////////////////////// + // FMpi methods + //////////////////////////////////////////////////////// + + /* + We use init with thread because of an openmpi error: + [fourmi062:15896] [[13237,0],1]-[[13237,1],1] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104) + [fourmi056:04597] [[13237,0],3]-[[13237,1],3] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104) + [fourmi053:08571] [[13237,0],5]-[[13237,1],5] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104) - // reset : get rank and nb proc from mpi - void reset(){ - FMpi::Assert( MPI_Comm_rank(communicator,&rank), __LINE__ ); - FMpi::Assert( MPI_Comm_size(communicator,&nbProc), __LINE__ ); + Erreur pour le proc1 + [[13237,1],1][btl_openib_component.c:3227:handle_wc] from fourmi062 to: fourmi056 error polling LP CQ with status LOCAL LENGTH ERROR status number 1 for wr_id 7134664 opcode 0 vendor error 105 qp_idx 3 + Tous on la meme erreur le 2e 1 est remplace par le rang. + */ + FMpi() : communicator(nullptr) { + int provided = 0; + FMpi::Assert( MPI_Init_thread(NULL,NULL, MPI_THREAD_SERIALIZED, &provided), __LINE__); + communicator = new FComm(MPI_COMM_WORLD); } - - public: - /** Constructor : dup the comm given in parameter */ - explicit FComm(MPI_Comm inCommunicator ) { - FMpi::Assert( MPI_Comm_dup(inCommunicator, &communicator), __LINE__ , "comm dup"); - FMpi::Assert( MPI_Comm_group(communicator, &group), __LINE__ , "comm group"); - - reset(); + FMpi(int inArgc, char ** inArgv ) : communicator(nullptr) { + int provided = 0; + FMpi::Assert( MPI_Init_thread(&inArgc,&inArgv, MPI_THREAD_SERIALIZED, &provided), __LINE__); + communicator = new FComm(MPI_COMM_WORLD); } - /** Free communicator and group */ - virtual ~FComm(){ - FMpi::Assert( MPI_Comm_free(&communicator), __LINE__ ); - FMpi::Assert( MPI_Group_free(&group), __LINE__ ); + /** Delete the communicator and call mpi finalize */ + ~FMpi(){ + delete communicator; + MPI_Finalize(); } - /** To get the mpi comm needed for communication */ - MPI_Comm getComm() const { - return communicator; + /** Get the global communicator */ + const FComm& global() { + return (*communicator); } - /** The current rank */ - int processId() const { - return rank; - } + //////////////////////////////////////////////////////////// + // Mpi Types meta function + //////////////////////////////////////////////////////////// - /** The current number of procs in the group */ - int processCount() const { - return nbProc; + static const MPI_Datatype GetType(const long long&){ + return MPI_LONG_LONG; } - //////////////////////////////////////////////////////////// - // Split/Chunk functions - //////////////////////////////////////////////////////////// + static const MPI_Datatype GetType(const long int&){ + return MPI_LONG; + } - /** Get a left index related to a size */ - template< class T > - T getLeft(const T inSize) const { - const double step = (double(inSize) / double(processCount())); - return T(FMath::Ceil(step * double(processId()))); + static const MPI_Datatype GetType(const double&){ + return MPI_DOUBLE; } - /** Get a right index related to a size */ - template< class T > - T getRight(const T inSize) const { - const double step = (double(inSize) / double(processCount())); - const T res = T(FMath::Ceil(step * double(processId()+1))); - if(res > inSize) return inSize; - else return res; + static const MPI_Datatype GetType(const float&){ + return MPI_FLOAT; } - /** Get a right index related to a size and another id */ - template< class T > - T getOtherRight(const T inSize, const int other) const { - const double step = (double(inSize) / double(processCount())); - const T res = T(FMath::Ceil(step * double(other+1))); - if(res > inSize) return inSize; - else return res; + static const MPI_Datatype GetType(const int&){ + return MPI_INT; } - /** Get a left index related to a size and another id */ - template< class T > - T getOtherLeft(const T inSize, const int other) const { - const double step = (double(inSize) / double(processCount())); - return T(FMath::Ceil(step * double(other))); + static const MPI_Datatype GetType(const char&){ + return MPI_CHAR; } - /** Get a proc id from and index */ - template< class T > - int getProc(const int position, const T inSize) const { - const double step = (double(inSize) / processCount()); - return int(position/step); + static const MPI_Datatype GetType(const FComplexe& a){ + MPI_Datatype FMpiComplexe; + MPI_Type_contiguous(2, GetType(a.getReal()) , &FMpiComplexe); + return FMpiComplexe; } //////////////////////////////////////////////////////////// // Mpi interface functions //////////////////////////////////////////////////////////// - - /** Reduce a value for proc == 0 */ - template< class T > - T reduceSum(T data) const { - T result(0); - FMpi::Assert( MPI_Reduce( &data, &result, 1, FMpi::GetType(data), MPI_SUM, 0, communicator ), __LINE__); - return result; + /** generic mpi assert function */ + static void Assert(const int test, const unsigned line, const char* const message = nullptr){ + if(test != MPI_SUCCESS){ + printf("[ERROR-QS] Test failled at line %d, result is %d", line, test); + if(message) printf(", message: %s",message); + printf("\n"); + fflush(stdout); + MPI_Abort(MPI_COMM_WORLD, int(line) ); + } } - /** Reduce an average */ - template< class T > - T reduceAverageAll(T data) const { - T result[processCount()]; - FMpi::Assert( MPI_Allgather( &data, 1, FMpi::GetType(data), result, 1, FMpi::GetType(data), getComm()), __LINE__ ); - - T average = 0; - for(int idxProc = 0 ; idxProc < processCount() ;++idxProc){ - average += result[idxProc] / processCount(); - } - return average; + /** Compute a left index from data */ + template <class T> + static T GetLeft(const T inSize, const int inIdProc, const int inNbProc) { + const double step = (double(inSize) / inNbProc); + return T(ceil(step * inIdProc)); } - /** Change the group size */ - void groupReduce(const int from , const int to){ - int * procsIdArray = new int [to - from + 1]; - for(int idxProc = from ;idxProc <= to ; ++idxProc){ - procsIdArray[idxProc - from] = idxProc; - } - - MPI_Group previousGroup = group; - FMpi::Assert( MPI_Group_incl(previousGroup, to - from + 1 , procsIdArray, &group), __LINE__ ); - - MPI_Comm previousComm = communicator; - FMpi::Assert( MPI_Comm_create(previousComm, group, &communicator), __LINE__ ); - - MPI_Comm_free(&previousComm); - MPI_Group_free(&previousGroup); - - reset(); - delete procsIdArray ; + /** Compute a right index from data */ + template <class T> + static T GetRight(const T inSize, const int inIdProc, const int inNbProc) { + const double step = (double(inSize) / inNbProc); + const T res = T(ceil(step * (inIdProc+1))); + if(res > inSize) return inSize; + else return res; } - }; - //////////////////////////////////////////////////////// - // FMpi methods - //////////////////////////////////////////////////////// - - /* - We use init with thread because of an openmpi error: - - [fourmi062:15896] [[13237,0],1]-[[13237,1],1] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104) - [fourmi056:04597] [[13237,0],3]-[[13237,1],3] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104) - [fourmi053:08571] [[13237,0],5]-[[13237,1],5] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104) - - Erreur pour le proc1 - [[13237,1],1][btl_openib_component.c:3227:handle_wc] from fourmi062 to: fourmi056 error polling LP CQ with status LOCAL LENGTH ERROR status number 1 for wr_id 7134664 opcode 0 vendor error 105 qp_idx 3 - Tous on la meme erreur le 2e 1 est remplace par le rang. - */ - FMpi() : communicator(nullptr) { - int provided = 0; - FMpi::Assert( MPI_Init_thread(NULL,NULL, MPI_THREAD_SERIALIZED, &provided), __LINE__); - communicator = new FComm(MPI_COMM_WORLD); - } - FMpi(int inArgc, char ** inArgv ) : communicator(nullptr) { - int provided = 0; - FMpi::Assert( MPI_Init_thread(&inArgc,&inArgv, MPI_THREAD_SERIALIZED, &provided), __LINE__); - communicator = new FComm(MPI_COMM_WORLD); - } - - /** Delete the communicator and call mpi finalize */ - ~FMpi(){ - delete communicator; - MPI_Finalize(); - } - - /** Get the global communicator */ - const FComm& global() { - return (*communicator); - } - - //////////////////////////////////////////////////////////// - // Mpi Types meta function - //////////////////////////////////////////////////////////// - - static const MPI_Datatype GetType(const long long&){ - return MPI_LONG_LONG; - } - - static const MPI_Datatype GetType(const long int&){ - return MPI_LONG; - } - - static const MPI_Datatype GetType(const double&){ - return MPI_DOUBLE; - } - - static const MPI_Datatype GetType(const float&){ - return MPI_FLOAT; - } - - static const MPI_Datatype GetType(const int&){ - return MPI_INT; - } - - static const MPI_Datatype GetType(const char&){ - return MPI_CHAR; - } - - static const MPI_Datatype GetType(const FComplexe& a){ - MPI_Datatype FMpiComplexe; - MPI_Type_contiguous(2, GetType(a.getReal()) , &FMpiComplexe); - return FMpiComplexe; - } - - //////////////////////////////////////////////////////////// - // Mpi interface functions - //////////////////////////////////////////////////////////// - - /** generic mpi assert function */ - static void Assert(const int test, const unsigned line, const char* const message = nullptr){ - if(test != MPI_SUCCESS){ - printf("[ERROR-QS] Test failled at line %d, result is %d", line, test); - if(message) printf(", message: %s",message); - printf("\n"); - fflush(stdout); - MPI_Abort(MPI_COMM_WORLD, int(line) ); + /** Compute a proc id from index & data */ + template <class T> + static int GetProc(const T position, const T inSize, const int inNbProc) { + const double step = double(inSize) / double(inNbProc); + return int(double(position)/step); } - } - - /** Compute a left index from data */ - template <class T> - static T GetLeft(const T inSize, const int inIdProc, const int inNbProc) { - const double step = (double(inSize) / inNbProc); - return T(ceil(step * inIdProc)); - } - - /** Compute a right index from data */ - template <class T> - static T GetRight(const T inSize, const int inIdProc, const int inNbProc) { - const double step = (double(inSize) / inNbProc); - const T res = T(ceil(step * (inIdProc+1))); - if(res > inSize) return inSize; - else return res; - } - - /** Compute a proc id from index & data */ - template <class T> - static int GetProc(const T position, const T inSize, const int inNbProc) { - const double step = double(inSize) / double(inNbProc); - return int(double(position)/step); - } - - /** assert if mpi error */ - static void MpiAssert(const int test, const unsigned line, const char* const message = nullptr){ - if(test != MPI_SUCCESS){ - printf("[ERROR] Test failed at line %d, result is %d", line, test); - if(message) printf(", message: %s",message); - printf("\n"); - fflush(stdout); - MPI_Abort(MPI_COMM_WORLD, int(line) ); + + /** assert if mpi error */ + static void MpiAssert(const int test, const unsigned line, const char* const message = nullptr){ + if(test != MPI_SUCCESS){ + printf("[ERROR] Test failed at line %d, result is %d", line, test); + if(message) printf(", message: %s",message); + printf("\n"); + fflush(stdout); + MPI_Abort(MPI_COMM_WORLD, int(line) ); + } } - } private: - /** The original communicator */ - FComm* communicator; + /** The original communicator */ + FComm* communicator; }; diff --git a/Src/Utils/FQuickSort.hpp b/Src/Utils/FQuickSort.hpp index 7084d7f9d4eafe5f33b5d4895bfa823b53c6bbb7..b4d10b1ddec9da965d1e342dca157df527c8e2c1 100755 --- a/Src/Utils/FQuickSort.hpp +++ b/Src/Utils/FQuickSort.hpp @@ -76,31 +76,6 @@ protected: return left; } - /* A local iteration of qs */ - static void QsLocal(SortType array[], const CompareType& pivot, - IndexType myLeft, IndexType myRight, - IndexType& prefix, IndexType& sufix){ - - IndexType leftIter = myLeft; - IndexType rightIter = myRight; - - while(true){ - while(CompareType(array[leftIter]) <= pivot && leftIter < rightIter){ - ++leftIter; - } - while(leftIter <= rightIter && pivot < CompareType(array[rightIter])){ - --rightIter; - } - if(rightIter < leftIter) break; - - Swap(array[leftIter],array[rightIter]); - ++leftIter; - --rightIter; - } - - prefix = leftIter - myLeft; - sufix = myRight - myLeft - prefix + 1; - } /* The sequential qs */ static void QsSequentialStep(SortType array[], const IndexType left, const IndexType right){ diff --git a/Src/Utils/FQuickSortMpi.hpp b/Src/Utils/FQuickSortMpi.hpp index 642938d799b1b49e846342bf861e7c5fbbcde487..91b849fdb4cb5151f1a933d9440cc68b80b0874a 100755 --- a/Src/Utils/FQuickSortMpi.hpp +++ b/Src/Utils/FQuickSortMpi.hpp @@ -18,225 +18,367 @@ #include "FQuickSort.hpp" #include "FMpi.hpp" +#include "FLog.hpp" +#include <memory> template <class SortType, class CompareType, class IndexType> class FQuickSortMpi : public FQuickSort< SortType, CompareType, IndexType> { -public: - /* the mpi qs */ - static void QsMpi(const SortType originalArray[], IndexType size, SortType* & outputArray, IndexType& outputSize, const FMpi::FComm& originalComm){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Quicksort" , __FILE__ , __LINE__) ); - // We need a structure see the algorithm detail to know more - struct Fix{ - IndexType pre; - IndexType suf; - }; - - // first we copy data into our working buffer : outputArray - outputArray = new SortType[size]; - FMemUtils::memcpy(outputArray, originalArray, sizeof(SortType) * size); - outputSize = size; - - // alloc outputArray to store pre/sufixe, maximum needed is nb procs[comm world] + 1 - Fix fixes[originalComm.processCount() + 1]; - Fix fixesSum[originalComm.processCount() + 1]; - memset(fixes,0,sizeof(Fix) * originalComm.processCount()); - memset(fixesSum,0,sizeof(Fix) * (originalComm.processCount() + 1) ); - - // receiving buffer - IndexType bufferSize = 0; - SortType* buffer = nullptr; - - // Create the first com - FMpi::FComm currentComm(originalComm.getComm()); + // We need a structure see the algorithm detail to know more + struct Partition{ + IndexType lowerPart; + IndexType greaterPart; + }; + + struct PackData { + int idProc; + IndexType fromElement; + IndexType toElement; + }; + + + static void Swap(SortType& value, SortType& other){ + SortType temp = value; + value = other; + other = temp; + } - // While I am not working alone on my own data - while( currentComm.processCount() != 1 ){ - const int currentRank = currentComm.processId(); - const int currentNbProcs = currentComm.processCount(); + /* A local iteration of qs */ + static IndexType QsPartition(SortType array[], IndexType left, IndexType right, const CompareType& pivot){ + IndexType idx = left; + while( idx <= right && CompareType(array[idx]) <= pivot){ + idx += 1; + } + left = idx; - MPI_Request requests[currentNbProcs * 2]; - int iterRequest = 0; + for( ; idx <= right ; ++idx){ + if( CompareType(array[idx]) <= pivot ){ + Swap(array[idx],array[left]); + left += 1; + } + } - ///////////////////////////////////////////////// - // Local sort - ///////////////////////////////////////////////// + return left; + } - // sort QsLocal part of the outputArray - const CompareType pivot = currentComm.reduceAverageAll( CompareType(outputArray[size/2]) ); - Fix myFix; - FQuickSort< SortType, CompareType, IndexType>::QsLocal(outputArray, pivot, 0, size - 1, myFix.pre, myFix.suf); + static std::vector<PackData> Distribute(const int currentRank, const int currentNbProcs , + const Partition globalElementBalance[], const Partition globalElementBalanceSum[], + const int procInTheMiddle, const bool inFromRightToLeft){ + // First agree on who send and who recv + const int firstProcToSend = (inFromRightToLeft ? procInTheMiddle+1 : 0); + const int lastProcToSend = (inFromRightToLeft ? currentNbProcs : procInTheMiddle+1); + const int firstProcToRecv = (inFromRightToLeft ? 0 : procInTheMiddle+1); + const int lastProcToRecv = (inFromRightToLeft ? procInTheMiddle+1 : currentNbProcs); + // Get the number of element depending on the lower or greater send/recv + const IndexType totalElementToProceed = (inFromRightToLeft ? + globalElementBalanceSum[lastProcToSend].lowerPart - globalElementBalanceSum[firstProcToSend].lowerPart : + globalElementBalanceSum[lastProcToSend].greaterPart - globalElementBalanceSum[firstProcToSend].greaterPart); + const IndexType totalElementAlreadyOwned = (inFromRightToLeft ? + globalElementBalanceSum[lastProcToRecv].lowerPart - globalElementBalanceSum[firstProcToRecv].lowerPart : + globalElementBalanceSum[lastProcToRecv].greaterPart - globalElementBalanceSum[firstProcToRecv].greaterPart); + + const int nbProcsToRecv = (lastProcToRecv-firstProcToRecv); + const int nbProcsToSend = (lastProcToSend-firstProcToSend); + const IndexType totalElements = (totalElementToProceed+totalElementAlreadyOwned); + + std::vector<IndexType> nbElementsToRecvPerProc; + nbElementsToRecvPerProc.resize(nbProcsToRecv); + { + // Get the number of elements each proc should recv + IndexType totalRemainingElements = totalElements; + + for(int idxProc = firstProcToRecv; idxProc < lastProcToRecv ; ++idxProc){ + const IndexType nbElementsAlreadyOwned = (inFromRightToLeft ? globalElementBalance[idxProc].lowerPart : globalElementBalance[idxProc].greaterPart); + const IndexType averageNbElementForRemainingProc = (totalRemainingElements)/(lastProcToRecv-idxProc); + totalRemainingElements -= nbElementsAlreadyOwned; + if(nbElementsAlreadyOwned < averageNbElementForRemainingProc){ + nbElementsToRecvPerProc[idxProc - firstProcToRecv] = (averageNbElementForRemainingProc - nbElementsAlreadyOwned); + totalRemainingElements -= nbElementsToRecvPerProc[idxProc - firstProcToRecv]; + } + else{ + nbElementsToRecvPerProc[idxProc - firstProcToRecv] = 0; + } + ////FLOG( FLog::Controller << currentRank << "] nbElementsToRecvPerProc[" << idxProc << "] = " << nbElementsToRecvPerProc[idxProc - firstProcToRecv] << "\n"; ) + } + } - // exchange fixes - FMpi::Assert( MPI_Allgather( &myFix, sizeof(Fix), MPI_BYTE, fixes, sizeof(Fix), MPI_BYTE, currentComm.getComm()), __LINE__ ); + // Store in an array the number of element to send + std::vector<IndexType> nbElementsToSendPerProc; + nbElementsToSendPerProc.resize(nbProcsToSend); + for(int idxProc = firstProcToSend; idxProc < lastProcToSend ; ++idxProc){ + const IndexType nbElementsAlreadyOwned = (inFromRightToLeft ? globalElementBalance[idxProc].lowerPart : globalElementBalance[idxProc].greaterPart); + nbElementsToSendPerProc[idxProc-firstProcToSend] = nbElementsAlreadyOwned; + ////FLOG( FLog::Controller << currentRank << "] nbElementsToSendPerProc[" << idxProc << "] = " << nbElementsToSendPerProc[idxProc-firstProcToSend] << "\n"; ) + } - // each procs compute the summation - fixesSum[0].pre = 0; - fixesSum[0].suf = 0; - for(int idxProc = 0 ; idxProc < currentNbProcs ; ++idxProc){ - fixesSum[idxProc + 1].pre = fixesSum[idxProc].pre + fixes[idxProc].pre; - fixesSum[idxProc + 1].suf = fixesSum[idxProc].suf + fixes[idxProc].suf; + // Compute all the send recv but keep only the ones related to currentRank + std::vector<PackData> packs; + int idxProcSend = 0; + IndexType positionElementsSend = 0; + int idxProcRecv = 0; + IndexType positionElementsRecv = 0; + while(idxProcSend != nbProcsToSend && idxProcRecv != nbProcsToRecv){ + if(nbElementsToSendPerProc[idxProcSend] == 0){ + idxProcSend += 1; + positionElementsSend = 0; } - - // then I need to know which procs will be in the middle - int splitProc = FMpi::GetProc(fixesSum[currentNbProcs].pre - 1, fixesSum[currentNbProcs].pre + fixesSum[currentNbProcs].suf, currentNbProcs); - if(splitProc == currentNbProcs - 1){ - --splitProc; + else if(nbElementsToRecvPerProc[idxProcRecv] == 0){ + idxProcRecv += 1; + positionElementsRecv = 0; } - - ///////////////////////////////////////////////// - // Send my data - ///////////////////////////////////////////////// - - // above pivot (right part) - if( fixes[currentRank].suf ){ - const int procsInSuf = currentNbProcs - 1 - splitProc; - const int firstProcInSuf = splitProc + 1; - const IndexType elementsInSuf = fixesSum[currentNbProcs].suf; - - const int firstProcToSend = FMpi::GetProc(fixesSum[currentRank].suf, elementsInSuf, procsInSuf) + firstProcInSuf; - const int lastProcToSend = FMpi::GetProc(fixesSum[currentRank + 1].suf - 1, elementsInSuf, procsInSuf) + firstProcInSuf; - - IndexType sent = 0; - for(int idxProc = firstProcToSend ; idxProc <= lastProcToSend ; ++idxProc){ - const IndexType thisProcRight = FMpi::GetRight(elementsInSuf, idxProc - firstProcInSuf, procsInSuf); - IndexType sendToProc = thisProcRight - fixesSum[currentRank].suf - sent; - - if(sendToProc + sent > fixes[currentRank].suf){ - sendToProc = fixes[currentRank].suf - sent; - } - if( sendToProc ){ - FMpi::Assert( MPI_Isend(&outputArray[sent + fixes[currentRank].pre], int(sendToProc * sizeof(SortType)), MPI_BYTE , idxProc, FMpi::TagQuickSort, currentComm.getComm(), &requests[iterRequest++]), __LINE__ ); - } - sent += sendToProc; + else { + const IndexType nbElementsInPack = FMath::Min(nbElementsToSendPerProc[idxProcSend], nbElementsToRecvPerProc[idxProcRecv]); + if(idxProcSend + firstProcToSend == currentRank){ + PackData pack; + pack.idProc = idxProcRecv + firstProcToRecv; + pack.fromElement = positionElementsSend; + pack.toElement = pack.fromElement + nbElementsInPack; + packs.push_back(pack); } - } - - // under pivot (left part) - if( fixes[currentRank].pre ){ - const int procsInPre = splitProc + 1; - const IndexType elementsInPre = fixesSum[currentNbProcs].pre; - - const int firstProcToSend = FMpi::GetProc(fixesSum[currentRank].pre, elementsInPre, procsInPre); - const int lastProcToSend = FMpi::GetProc(fixesSum[currentRank + 1].pre - 1, elementsInPre, procsInPre); - - IndexType sent = 0; - for(int idxProc = firstProcToSend ; idxProc <= lastProcToSend ; ++idxProc){ - const IndexType thisProcRight = FMpi::GetRight(elementsInPre, idxProc, procsInPre); - IndexType sendToProc = thisProcRight - fixesSum[currentRank].pre - sent; - - if(sendToProc + sent > fixes[currentRank].pre){ - sendToProc = fixes[currentRank].pre - sent; - } - if(sendToProc){ - FMpi::Assert( MPI_Isend(&outputArray[sent], int(sendToProc * sizeof(SortType)), MPI_BYTE , idxProc, FMpi::TagQuickSort, currentComm.getComm(), &requests[iterRequest++]), __LINE__ ); - } - sent += sendToProc; + else if(idxProcRecv + firstProcToRecv == currentRank){ + PackData pack; + pack.idProc = idxProcSend + firstProcToSend; + pack.fromElement = positionElementsRecv; + pack.toElement = pack.fromElement + nbElementsInPack; + packs.push_back(pack); } + nbElementsToSendPerProc[idxProcSend] -= nbElementsInPack; + nbElementsToRecvPerProc[idxProcRecv] -= nbElementsInPack; + positionElementsSend += nbElementsInPack; + positionElementsRecv += nbElementsInPack; } + } + + return packs; + } + + static void RecvDistribution(SortType ** inPartRecv, IndexType* inNbElementsRecv, + const Partition globalElementBalance[], const Partition globalElementBalanceSum[], + const int procInTheMiddle, const FMpi::FComm& currentComm, const bool inFromRightToLeft){ + // Compute to know what to recv + const std::vector<PackData> whatToRecvFromWho = Distribute(currentComm.processId(), currentComm.processCount(), + globalElementBalance, globalElementBalanceSum, + procInTheMiddle, inFromRightToLeft); + // Count the total number of elements to recv + IndexType totalToRecv = 0; + for(const PackData& pack : whatToRecvFromWho){ + totalToRecv += pack.toElement - pack.fromElement; + } + // Alloc buffer + SortType* recvBuffer = new SortType[totalToRecv]; + + // Recv all data + MPI_Request requests[whatToRecvFromWho.size()]; + for(int idxPack = 0 ; idxPack < int(whatToRecvFromWho.size()) ; ++idxPack){ + const PackData& pack = whatToRecvFromWho[idxPack]; + ////FLOG( FLog::Controller << currentComm.processId() << "] Recv from " << pack.idProc << " from " << pack.fromElement << " to " << pack.toElement << "\n"; ) + FMpi::Assert( MPI_Irecv((SortType*)&recvBuffer[pack.fromElement], int((pack.toElement - pack.fromElement) * sizeof(SortType)), MPI_BYTE, pack.idProc, + FMpi::TagQuickSort, currentComm.getComm(), &requests[idxPack]) , __LINE__); + } + // Wait to complete + FMpi::Assert( MPI_Waitall(whatToRecvFromWho.size(), requests, MPI_STATUSES_IGNORE), __LINE__ ); + // Copy to ouput variables + (*inPartRecv) = recvBuffer; + (*inNbElementsRecv) = totalToRecv; + } - ///////////////////////////////////////////////// - // Receive data that belong to me - ///////////////////////////////////////////////// + static void SendDistribution(const SortType * inPartToSend, const IndexType inNbElementsToSend, + const Partition globalElementBalance[], const Partition globalElementBalanceSum[], + const int procInTheMiddle, const FMpi::FComm& currentComm, const bool inFromRightToLeft){ + // Compute to know what to send + const std::vector<PackData> whatToSendToWho = Distribute(currentComm.processId(), currentComm.processCount(), + globalElementBalance, globalElementBalanceSum, + procInTheMiddle, inFromRightToLeft); + + // Post send messages + MPI_Request requests[whatToSendToWho.size()]; + for(int idxPack = 0 ; idxPack < int(whatToSendToWho.size()) ; ++idxPack){ + const PackData& pack = whatToSendToWho[idxPack]; + ////FLOG( FLog::Controller << currentComm.processId() << "] Send to " << pack.idProc << " from " << pack.fromElement << " to " << pack.toElement << "\n"; ) + FMpi::Assert( MPI_Isend((SortType*)&inPartToSend[pack.fromElement], int((pack.toElement - pack.fromElement) * sizeof(SortType)), MPI_BYTE , pack.idProc, + FMpi::TagQuickSort, currentComm.getComm(), &requests[idxPack]) , __LINE__); + } + // Wait to complete + FMpi::Assert( MPI_Waitall(whatToSendToWho.size(), requests, MPI_STATUSES_IGNORE), __LINE__ ); + } - if( currentRank <= splitProc ){ - // I am in S-Part (smaller than pivot) - const int procsInPre = splitProc + 1; - const IndexType elementsInPre = fixesSum[currentNbProcs].pre; + static CompareType SelectPivot(const SortType workingArray[], const IndexType currentSize, const FMpi::FComm& currentComm, bool* shouldStop){ + enum ValuesState{ + ALL_THE_SAME, + NO_VALUES, + AVERAGE_2 + }; + // Check if all the same + bool allTheSame = true; + for(int idx = 1 ; idx < currentSize && allTheSame; ++idx){ + if(workingArray[0] != workingArray[idx]){ + allTheSame = false; + } + } + // Check if empty + const bool noValues = (currentSize == 0); + // Get the local pivot if not empty + CompareType localPivot = CompareType(0); + if(!noValues){ + localPivot = (CompareType(workingArray[currentSize/3])+CompareType(workingArray[(2*currentSize)/3]))/2; + } - IndexType myLeft = FMpi::GetLeft(elementsInPre, currentRank, procsInPre); - IndexType myRightLimit = FMpi::GetRight(elementsInPre, currentRank, procsInPre); + ////FLOG( FLog::Controller << currentComm.processId() << "] localPivot = " << localPivot << "\n" ); + + const int myRank = currentComm.processId(); + const int nbProcs = currentComm.processCount(); + // Exchange the pivos and the state + std::unique_ptr<int[]> allProcStates(new int[nbProcs]); + std::unique_ptr<CompareType[]> allProcPivots(new CompareType[nbProcs]); + { + int myState = (noValues?NO_VALUES:(allTheSame?ALL_THE_SAME:AVERAGE_2)); + FMpi::Assert( MPI_Allgather( &myState, 1, MPI_INT, allProcStates.get(), + 1, MPI_INT, currentComm.getComm()), __LINE__ ); + FMpi::Assert( MPI_Allgather( &localPivot, sizeof(CompareType), MPI_BYTE, allProcPivots.get(), + sizeof(CompareType), MPI_BYTE, currentComm.getComm()), __LINE__ ); + } + // Test if all the procs have ALL_THE_SAME and the same value + bool allProcsAreSame = true; + for(int idxProc = 0 ; idxProc < nbProcs && allProcsAreSame; ++idxProc){ + if(allProcStates[idxProc] != NO_VALUES && (allProcStates[idxProc] != ALL_THE_SAME || allProcPivots[0] != allProcPivots[idxProc])){ + allProcsAreSame = false; + } + } - size = myRightLimit - myLeft; - if(bufferSize < size){ - bufferSize = size; - delete[] buffer; - buffer = new SortType[bufferSize]; + if(allProcsAreSame){ + // All the procs are the same so we need to stop + (*shouldStop) = true; + return CompareType(0); + } + else{ + CompareType globalPivot = 0; + CompareType counterValuesInPivot = 0; + // Compute the pivos + for(int idxProc = 0 ; idxProc < nbProcs; ++idxProc){ + if(allProcStates[idxProc] != NO_VALUES){ + globalPivot += allProcPivots[idxProc]; + counterValuesInPivot += 1; } + } + (*shouldStop) = false; + return globalPivot/counterValuesInPivot; + } + } - int idxProc = 0; - while( idxProc < currentNbProcs && fixesSum[idxProc + 1].pre <= myLeft ){ - ++idxProc; - } +public: - IndexType indexArray = 0; + static void QsMpi(const SortType originalArray[], const IndexType originalSize, + SortType** outputArray, IndexType* outputSize, const FMpi::FComm& originalComm){ + // We do not work in place, so create a new array + IndexType currentSize = originalSize; + SortType* workingArray = new SortType[currentSize]; + FMemUtils::memcpy(workingArray, originalArray, sizeof(SortType) * currentSize); - while( idxProc < currentNbProcs && indexArray < myRightLimit - myLeft){ - const IndexType firstIndex = FMath::Max(myLeft , fixesSum[idxProc].pre ); - const IndexType endIndex = FMath::Min(fixesSum[idxProc + 1].pre, myRightLimit); - if( (endIndex - firstIndex) ){ - FMpi::Assert( MPI_Irecv(&buffer[indexArray], int((endIndex - firstIndex) * sizeof(SortType)), MPI_BYTE, idxProc, FMpi::TagQuickSort, currentComm.getComm(), &requests[iterRequest++]), __LINE__ ); - } - indexArray += endIndex - firstIndex; - ++idxProc; - } - // Proceed all send/receive - FMpi::Assert( MPI_Waitall(iterRequest, requests, MPI_STATUSES_IGNORE), __LINE__ ); + // Clone the MPI group because we will reduce it after each partition + FMpi::FComm currentComm(originalComm.getComm()); - currentComm.groupReduce( 0, splitProc); + // Parallel sharing until I am alone on the data + while( currentComm.processCount() != 1 ){ + // Agree on the Pivot + bool shouldStop; + const CompareType globalPivot = SelectPivot(workingArray, currentSize, currentComm, &shouldStop); + if(shouldStop){ + ////FLOG( FLog::Controller << currentComm.processId() << "] shouldStop = " << shouldStop << "\n" ); + break; } - else{ - // I am in L-Part (larger than pivot) - const int procsInSuf = currentNbProcs - 1 - splitProc; - const IndexType elementsInSuf = fixesSum[currentNbProcs].suf; - - const int rankInL = currentRank - splitProc - 1; - IndexType myLeft = FMpi::GetLeft(elementsInSuf, rankInL, procsInSuf); - IndexType myRightLimit = FMpi::GetRight(elementsInSuf, rankInL, procsInSuf); - - size = myRightLimit - myLeft; - if(bufferSize < size){ - bufferSize = size; - delete[] buffer; - buffer = new SortType[bufferSize]; - } - int idxProc = 0; - while( idxProc < currentNbProcs && fixesSum[idxProc + 1].suf <= myLeft ){ - ++idxProc; - } + ////FLOG( FLog::Controller << currentComm.processId() << "] globalPivot = " << globalPivot << "\n" ); - IndexType indexArray = 0; + // Split the array in two parts lower equal to pivot and greater than pivot + const IndexType nbLowerElements = QsPartition(workingArray, 0, currentSize-1, globalPivot); + const IndexType nbGreaterElements = currentSize - nbLowerElements; - while( idxProc < currentNbProcs && indexArray < myRightLimit - myLeft){ - const IndexType firstIndex = FMath::Max(myLeft , fixesSum[idxProc].suf ); - const IndexType endIndex = FMath::Min(fixesSum[idxProc + 1].suf, myRightLimit); - if( (endIndex - firstIndex) ){ - FMpi::Assert( MPI_Irecv(&buffer[indexArray], int((endIndex - firstIndex) * sizeof(SortType)), MPI_BYTE, idxProc, FMpi::TagQuickSort, currentComm.getComm(), &requests[iterRequest++]), __LINE__ ); - } - indexArray += endIndex - firstIndex; - ++idxProc; - } - // Proceed all send/receive - FMpi::Assert( MPI_Waitall(iterRequest, requests, MPI_STATUSES_IGNORE), __LINE__ ); + ////FLOG( FLog::Controller << currentComm.processId() << "] After Partition: lower = " << nbLowerElements << " greater = " << nbGreaterElements << "\n"; ) - currentComm.groupReduce( splitProc + 1, currentNbProcs - 1); - } + const int currentRank = currentComm.processId(); + const int currentNbProcs = currentComm.processCount(); + // We need to know what each process is holding + Partition currentElementsBalance = { nbLowerElements, nbGreaterElements}; + Partition globalElementBalance[currentNbProcs]; + // Every one in the group need to know + FMpi::Assert( MPI_Allgather( ¤tElementsBalance, sizeof(Partition), MPI_BYTE, globalElementBalance, + sizeof(Partition), MPI_BYTE, currentComm.getComm()), __LINE__ ); - // Copy res into outputArray - if(outputSize < size){ - delete[] outputArray; - outputArray = new SortType[size]; - outputSize = size; + // Find the number of elements lower or greater + IndexType globalNumberOfElementsGreater = 0; + IndexType globalNumberOfElementsLower = 0; + Partition globalElementBalanceSum[currentNbProcs + 1]; + globalElementBalanceSum[0].lowerPart = 0; + globalElementBalanceSum[0].greaterPart = 0; + for(int idxProc = 0 ; idxProc < currentNbProcs ; ++idxProc){ + globalElementBalanceSum[idxProc + 1].lowerPart = globalElementBalanceSum[idxProc].lowerPart + globalElementBalance[idxProc].lowerPart; + globalElementBalanceSum[idxProc + 1].greaterPart = globalElementBalanceSum[idxProc].greaterPart + globalElementBalance[idxProc].greaterPart; + globalNumberOfElementsGreater += globalElementBalance[idxProc].greaterPart; + globalNumberOfElementsLower += globalElementBalance[idxProc].lowerPart; } - FMemUtils::memcpy(outputArray, buffer, sizeof(SortType) * size); + ////FLOG( FLog::Controller << currentComm.processId() << "] globalNumberOfElementsGreater = " << globalNumberOfElementsGreater << "\n"; ) + ////FLOG( FLog::Controller << currentComm.processId() << "] globalNumberOfElementsLower = " << globalNumberOfElementsLower << "\n"; ) + + // The proc rank in the middle from the percentage + int procInTheMiddle; + if(globalNumberOfElementsLower == 0) procInTheMiddle = -1; + else if(globalNumberOfElementsGreater == 0) procInTheMiddle = currentNbProcs-1; + else procInTheMiddle = FMath::Min(currentNbProcs-2, (currentNbProcs*globalNumberOfElementsLower) + /(globalNumberOfElementsGreater + globalNumberOfElementsLower)); + + ////FLOG( FLog::Controller << currentComm.processId() << "] procInTheMiddle = " << procInTheMiddle << "\n"; ) + + // Send or receive depending on the state + if(currentRank <= procInTheMiddle){ + // I am in the group of the lower elements + SendDistribution(workingArray + nbLowerElements, nbGreaterElements, + globalElementBalance, globalElementBalanceSum, procInTheMiddle, currentComm, false); + SortType* lowerPartRecv = nullptr; + IndexType nbLowerElementsRecv = 0; + RecvDistribution(&lowerPartRecv, &nbLowerElementsRecv, + globalElementBalance, globalElementBalanceSum, procInTheMiddle, currentComm, true); + // Merge previous part and just received elements + const IndexType fullNbLowerElementsRecv = nbLowerElementsRecv + nbLowerElements; + SortType* fullLowerPart = new SortType[fullNbLowerElementsRecv]; + memcpy(fullLowerPart, workingArray, sizeof(SortType)* nbLowerElements); + memcpy(fullLowerPart + nbLowerElements, lowerPartRecv, sizeof(SortType)* nbLowerElementsRecv); + delete[] workingArray; + delete[] lowerPartRecv; + workingArray = fullLowerPart; + currentSize = fullNbLowerElementsRecv; + // Reduce working group + currentComm.groupReduce( 0, procInTheMiddle); + } + else { + // I am in the group of the greater elements + SortType* greaterPartRecv = nullptr; + IndexType nbGreaterElementsRecv = 0; + RecvDistribution(&greaterPartRecv, &nbGreaterElementsRecv, + globalElementBalance, globalElementBalanceSum, procInTheMiddle, currentComm, false); + SendDistribution(workingArray, nbLowerElements, + globalElementBalance, globalElementBalanceSum, procInTheMiddle, currentComm, true); + // Merge previous part and just received elements + const IndexType fullNbGreaterElementsRecv = nbGreaterElementsRecv + nbGreaterElements; + SortType* fullGreaterPart = new SortType[fullNbGreaterElementsRecv]; + memcpy(fullGreaterPart, workingArray + nbLowerElements, sizeof(SortType)* nbGreaterElements); + memcpy(fullGreaterPart + nbGreaterElements, greaterPartRecv, sizeof(SortType)* nbGreaterElementsRecv); + delete[] workingArray; + delete[] greaterPartRecv; + workingArray = fullGreaterPart; + currentSize = fullNbGreaterElementsRecv; + // Reduce working group + currentComm.groupReduce( procInTheMiddle + 1, currentNbProcs - 1); + } } - ///////////////////////////////////////////////// - // End QsMpi sort - ///////////////////////////////////////////////// - - // Clean - delete[] buffer; - - // Normal Quick sort - FQuickSort< SortType, CompareType, IndexType>::QsOmp(outputArray, size); - outputSize = size; + // Finish by a local sort + FQuickSort< SortType, CompareType, IndexType>::QsOmp(workingArray, currentSize); + (*outputSize) = currentSize; + (*outputArray) = workingArray; } - }; #endif // FQUICKSORTMPI_HPP diff --git a/UTests/FUTester.hpp b/UTests/FUTester.hpp index 6a80cba4f7e030b3fd74e5786b89b448ecfc7a56..e3d5c1f4c7c0de1713a7ad506f573c4ea518ad86 100755 --- a/UTests/FUTester.hpp +++ b/UTests/FUTester.hpp @@ -16,6 +16,7 @@ #ifndef UTESTER_HPP #define UTESTER_HPP +#include "ScalFmmConfig.h" #include <iostream> #include <list> @@ -24,10 +25,10 @@ #define TestClass(X)\ -int main(void){\ - X Controller;\ - return Controller.Run();\ -}\ + int main(void){\ + X Controller;\ + return Controller.Run();\ + }\ /** @@ -44,164 +45,164 @@ int main(void){\ */ template <class TestClass> class FUTester{ - // Test function pointer - typedef void (TestClass::*TestFunc)(void); - - /** Test descriptor */ - struct TestFuncDescriptor{ - TestFunc func; //< Test adress - std::string name; //< Test name - }; - - - std::list<TestFuncDescriptor> tests; //< all tests - - int totalTests; //< number of tests - - int currentTest; //< current processing test in the run - int currentStep; //< current processing step in the run - - int failedSteps; //< number of failed step in the current test - int failedTests; //< number of failed tests - + // Test function pointer + typedef void (TestClass::*TestFunc)(void); + + /** Test descriptor */ + struct TestFuncDescriptor{ + TestFunc func; //< Test adress + std::string name; //< Test name + }; + + + std::list<TestFuncDescriptor> tests; //< all tests + + int totalTests; //< number of tests + + int currentTest; //< current processing test in the run + int currentStep; //< current processing step in the run + + int failedSteps; //< number of failed step in the current test + int failedTests; //< number of failed tests + protected: - /** Constructor */ - FUTester(){ - totalTests = 0; - } - - /** Callback before processing test */ - virtual void Before(){} - - /** Callback after processing test */ - virtual void After(){} - - /** Callback before each unit test */ - virtual void PreTest(){} - - /** Callback after each unit test */ - virtual void PostTest(){} - - /** - * This function has to add tests + /** Constructor */ + FUTester(){ + totalTests = 0; + } + + /** Callback before processing test */ + virtual void Before(){} + + /** Callback after processing test */ + virtual void After(){} + + /** Callback before each unit test */ + virtual void PreTest(){} + + /** Callback after each unit test */ + virtual void PostTest(){} + + /** + * This function has to add tests * <code> AddTest(&MyTest::TestOne); </code> - */ - virtual void SetTests() = 0; - - /** - * Add a test without giving a name - * @param inFunc test function address - */ - void AddTest(TestFunc inFunc){ - char buff[256]; - sprintf(buff,"Unnamed Test number %d",totalTests+1); - AddTest(inFunc,buff); - } - - /** - * Add a test with a name - * @param inFunc test function address - * @param inFuncName function name - */ - void AddTest(TestFunc inFunc, const std::string& inFuncName){ - ++totalTests; - TestFuncDescriptor desc; - desc.func = inFunc; - desc.name = inFuncName; - tests.push_back(desc); - } - - /** - * To print a message manually in the test - * @param value a object that ostream can work on - */ - template <class Output> - void Print(const Output& value){ - std::cout<< "--- Output from program : " << value << "\n"; - } - - /** - * To test - * @param result the test result - * if result is false test failed - */ - void uassert(const bool result){ - ++currentStep; - if(!result){ - std::cout << ">> Step " << currentStep << " Failed\n"; - ++failedSteps; - } - } - - /** - * To test equality - * @param v1 value one - * @param v2 value 2 - * if v1 is not equal v2 test failed - */ - template <class T> - void equal(const T& v1, const T& v2){ - uassert(v1 == v2); - } - - /** - * To test equality - * @param v1 value one - * @param v2 value 2 - * if v1 is equal v2 test failed - */ - template <class T> - void different(const T& v1, const T& v2){ - uassert(v1 != v2); - } - + */ + virtual void SetTests() = 0; + + /** + * Add a test without giving a name + * @param inFunc test function address + */ + void AddTest(TestFunc inFunc){ + char buff[256]; + sprintf(buff,"Unnamed Test number %d",totalTests+1); + AddTest(inFunc,buff); + } + + /** + * Add a test with a name + * @param inFunc test function address + * @param inFuncName function name + */ + void AddTest(TestFunc inFunc, const std::string& inFuncName){ + ++totalTests; + TestFuncDescriptor desc; + desc.func = inFunc; + desc.name = inFuncName; + tests.push_back(desc); + } + + /** + * To print a message manually in the test + * @param value a object that ostream can work on + */ + template <class Output> + void Print(const Output& value){ + std::cout<< "--- Output from program : " << value << "\n"; + } + + /** + * To test + * @param result the test result + * if result is false test failed + */ + void uassert(const bool result){ + ++currentStep; + if(!result){ + std::cout << ">> Step " << currentStep << " Failed\n"; + ++failedSteps; + } + } + + /** + * To test equality + * @param v1 value one + * @param v2 value 2 + * if v1 is not equal v2 test failed + */ + template <class T> + void equal(const T& v1, const T& v2){ + uassert(v1 == v2); + } + + /** + * To test equality + * @param v1 value one + * @param v2 value 2 + * if v1 is equal v2 test failed + */ + template <class T> + void different(const T& v1, const T& v2){ + uassert(v1 != v2); + } + public : - /** - * Processing the test + /** + * Processing the test * return application exit code (= nb of errors) - */ - int Run(){ - tests.clear(); - // register tests - SetTests(); - - TestClass* const toTest = static_cast<TestClass*>(this); - currentTest = 0; - failedTests = 0; - - Before(); - - // for each tests - const typename std::list<TestFuncDescriptor>::const_iterator end = tests.end(); - for(typename std::list<TestFuncDescriptor>::iterator iter = tests.begin() ; iter != end ; ++iter){ - currentStep = 0; - failedSteps = 0; - - std::cout << "[Start] " << (*iter).name << "\n"; - - PreTest(); - TestFunc ff = (*iter).func; - (toTest->*ff)(); - PostTest(); - - if(failedSteps){ - std::cout << "[Finished] FAILED (" << failedSteps << "/" << currentStep<< " steps failed)\n"; - ++failedTests; - } - else{ - std::cout << "[Finished] PASSED (" << currentStep << " steps)\n"; - } - - ++currentTest; - } - - - After(); - - std::cout <<"Test is over, " << (totalTests-failedTests) << " Passed, " << failedTests << " Failed\n"; - - return failedTests; - } + */ + int Run(){ + tests.clear(); + // register tests + SetTests(); + + TestClass* const toTest = static_cast<TestClass*>(this); + currentTest = 0; + failedTests = 0; + + Before(); + + // for each tests + const typename std::list<TestFuncDescriptor>::const_iterator end = tests.end(); + for(typename std::list<TestFuncDescriptor>::iterator iter = tests.begin() ; iter != end ; ++iter){ + currentStep = 0; + failedSteps = 0; + + std::cout << "[Start] " << (*iter).name << "\n"; + + PreTest(); + TestFunc ff = (*iter).func; + (toTest->*ff)(); + PostTest(); + + if(failedSteps){ + std::cout << "[Finished] FAILED (" << failedSteps << "/" << currentStep<< " steps failed)\n"; + ++failedTests; + } + else{ + std::cout << "[Finished] PASSED (" << currentStep << " steps)\n"; + } + + ++currentTest; + } + + + After(); + + std::cout <<"Test is over, " << (totalTests-failedTests) << " Passed, " << failedTests << " Failed\n"; + + return failedTests; + } }; @@ -210,33 +211,33 @@ public : #include "../Src/Utils/FMpi.hpp" #define TestClassMpi(X) \ - int main(int argc, char** argv){ \ + int main(int argc, char** argv){ \ X Controller(argc,argv); \ return Controller.Run(); \ - } \ - + } \ + template <class TestClass> class FUTesterMpi : public FUTester<TestClass>{ protected: - FMpi app; - - //Constructor with params to initialize FMpi - FUTesterMpi(int argc, char ** argv) : app(argc,argv){ - } + FMpi app; + + //Constructor with params to initialize FMpi + FUTesterMpi(int argc, char ** argv) : app(argc,argv){ + } - /** + /** * To print a message manually in the test * @param value a object that ostream can work on */ - template <class Output> - void Print(const Output& value){ - if(app.global().processId()==0){ - std::cout<< "--- Output from program : " << value << "\n"; + template <class Output> + void Print(const Output& value){ + if(app.global().processId()==0){ + std::cout<< "--- Output from program : " << value << "\n"; + } } - } - + }; - + #endif #endif diff --git a/UTests/utestMpiQs.cpp b/UTests/utestMpiQs.cpp index aed9241dd51e7f594ea534d46908063e3d8dd05d..bd66483e9d133de4404bf873ef14ba72ba77c50a 100644 --- a/UTests/utestMpiQs.cpp +++ b/UTests/utestMpiQs.cpp @@ -1,3 +1,18 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, BĂ©renger Bramas, Matthias Messner +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== #ifndef UTESTMPIQS_HPP #define UTESTMPIQS_HPP