diff --git a/Src/BalanceTree/FAbstractBalanceAlgorithm.hpp b/Src/BalanceTree/FAbstractBalanceAlgorithm.hpp new file mode 100644 index 0000000000000000000000000000000000000000..685642bf57cc5abde755a867dcdb832e1b2605d4 --- /dev/null +++ b/Src/BalanceTree/FAbstractBalanceAlgorithm.hpp @@ -0,0 +1,61 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== + +#ifndef FABSTRACTBALANCEALGORITHM_H +#define FABSTRACTBALANCEALGORITHM_H + + +/** + * @author Cyrille Piacibello + * @class FAbstractBalanceAlgorithm + * + * @brief This class provide the methods that are used to balance a + * tree FMpiTreeBuilder::EqualizeAndFillTree + */ +class FAbstractBalanceAlgorithm{ +public: + virtual ~FAbstractBalanceAlgorithm(){ + } + + /** + * @brief Give the right leaves (ie the min) of the interval that + * will be handle by idxOfProc + * @param numberOfLeaves Total number of leaves that exist. + * @param numberOfPartPerLeaf Array of lenght numberOfLeaves containing the number of particles in each leaf + * @param numberOfPart Number of particles in the whole field + * @param idxOfLeaves Array of lenght numberOfLeaves containing the Morton Index of each Leaf + * @param numberOfProc Number of MPI processus that will handle the Octree + * @param idxOfProc Idx of the proc calling. + */ + virtual FSize getRight(const FSize numberOfLeaves, const int*numberOfPartPerLeaf, const FSize numberOfPart, const MortonIndex* idxOfLeaves, + const int numberOfProc, const int idxOfProc) = 0; + + /** + * @brief Give the Leaft leaves (ie the max) of the interval that + * will be handle by idxOfProc + * @param numberOfLeaves Total number of leaves that exist. + * @param numberOfPartPerLeaf Array of lenght numberOfLeaves containing the number of particles in each leaf + * @param numberOfPart Number of particles in the whole field + * @param idxOfLeaves Array of lenght numberOfLeaves containing the Morton Index of each Leaf + * @param numberOfProc Number of MPI processus that will handle the Octree + * @param idxOfProc Idx of the proc calling. + */ + virtual FSize getLeft(const FSize numberOfLeaves, const int*numberOfPartPerLeaf, const FSize numberOfPart, const MortonIndex* idxOfLeaves, + const int numberOfProc, const int idxOfProc) = 0; + +}; + +#endif //FABSTRACTBALANCEALGORITHM_H diff --git a/Src/BalanceTree/FLeafBalance.hpp b/Src/BalanceTree/FLeafBalance.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1a4c09a78983bba571e2efaa3ad0b08e6aec0d7a --- /dev/null +++ b/Src/BalanceTree/FLeafBalance.hpp @@ -0,0 +1,58 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== + +#ifndef FLEAFBALANCE_H +#define FLEAFBALANCE_H + +#include "./FAbstractBalanceAlgorithm.hpp" +#include "../Utils/FMath.hpp" + +/** + * @author Cyrille Piacibello + * @class FLeafBalance + * + * @brief This class inherits from FAbstractBalanceAlgorithm. It + * provides balancing methods based on leaf numbers only. + */ +class FLeafBalance : public FAbstractBalanceAlgorithm{ + +public: + /** + * Does not need the number of particles. Just divide the leaves + * over processus + */ + FSize getRight(const FSize numberOfLeaves, const int*numberOfPartPerLeaf, const FSize numberOfPart, const MortonIndex* idxOfLeaves, + const int numberOfProc, const int idxOfProc){ + const double step = (double(numberOfLeaves) / double(numberOfProc)); + const FSize res = FSize(FMath::Ceil(step * double(idxOfProc+1))); + if(res > numberOfLeaves) return numberOfLeaves; + else return res; + } + + /** + * Does not need the number of particles. Just divide the leaves + * over processus + */ + FSize getLeft(const FSize numberOfLeaves, const int*numberOfPartPerLeaf, const FSize numberOfPart, const MortonIndex* idxOfLeaves, + const int numberOfProc, const int idxOfProc){ + const double step = (double(numberOfLeaves) / double(numberOfProc)); + return FSize(FMath::Ceil(step * double(idxOfProc))); + } + +}; + + +#endif // FLEAFBALANCE_H diff --git a/Src/BalanceTree/FParticlesBalance.hpp b/Src/BalanceTree/FParticlesBalance.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9794134d635215f6ff7b4d1cb964b134908317ee --- /dev/null +++ b/Src/BalanceTree/FParticlesBalance.hpp @@ -0,0 +1,73 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== + +#ifndef FPARTICLESBALANCE_H +#define FPARTICLESBALANCE_H + +#include "./FAbstractBalanceAlgorithm.hpp" +#include "../Utils/FMath.hpp" + +/** + * @author Cyrille Piacibello + * @class FLeafBalance + * + * @brief This class inherits from FAbstractBalanceAlgorithm. It + * provides balancing methods based on particles distribution. + */ +class FParticlesBalance : public FAbstractBalanceAlgorithm{ + +public: + /** + * getRight interval based on particles distribution + */ + FSize getRight(const FSize numberOfLeaves, const int*numberOfPartPerLeaf, const FSize numberOfPart, const MortonIndex* idxOfLeaves, + const int numberOfProc, const int idxOfProc){ + int acc = 0; + FSize i = 0; + const double step = (double(numberOfPart) / double(numberOfProc)); + FSize aimRight = FSize(FMath::Ceil(step * double(idxOfProc+1))); + if(aimRight > numberOfPart) aimRight = numberOfPart; + while(acc < aimRight){ + acc+=numberOfPartPerLeaf[i]; + ++i; + } + if(FMath::Abs(aimRight-acc) < FMath::Abs(aimRight-(acc-numberOfPartPerLeaf[i]))) return i; + else + return i-1; + } + + /** + * get left interval based on particles distribution + */ + FSize getLeft(const FSize numberOfLeaves, const int*numberOfPartPerLeaf, const FSize numberOfPart, const MortonIndex* idxOfLeaves, + const int numberOfProc, const int idxOfProc){ + int acc = 0; + FSize i = 0; + const double step = (double(numberOfPart) / double(numberOfProc)); + const FSize aimLeft = FSize(FMath::Ceil(step * double(idxOfProc))); + while (acc < aimLeft){ + acc+=numberOfPartPerLeaf[i]; + ++i; + } + if(FMath::Abs(aimLeft-acc) < FMath::Abs(aimLeft-(acc-numberOfPartPerLeaf[i]))) return i; + else + return i-1; + } + +}; + + +#endif // FPARTICLESBALANCE_H diff --git a/Src/Files/FMpiTreeBuilder.hpp b/Src/Files/FMpiTreeBuilder.hpp index e4d4514f316f32d0ac327659182c4aa495c68886..008942bb4f32ea4b616e1f8fe5d2ae61d12b8aee 100755 --- a/Src/Files/FMpiTreeBuilder.hpp +++ b/Src/Files/FMpiTreeBuilder.hpp @@ -23,500 +23,558 @@ #include "../Utils/FMemUtils.hpp" #include "../Utils/FTrace.hpp" +#include "../BalanceTree/FLeafBalance.hpp" + /** This class manage the loading of particles for the mpi version. - * It use a BinLoader and then sort the data with a parallel quick sort - * or bitonic sort. - * After it needs to merge the leaves to finally equalize the data. - */ + * It use a BinLoader and then sort the data with a parallel quick sort + * or bitonic sort. + * After it needs to merge the leaves to finally equalize the data. + */ template class FMpiTreeBuilder{ public: - /** What sorting algorithm to use */ - enum SortingType{ - QuickSort, - BitonicSort, - }; + /** What sorting algorithm to use */ + enum SortingType{ + QuickSort, + BitonicSort, + }; private: - /** This method has been tacken from the octree - * it computes a tree coordinate (x or y or z) from real position - */ - static int getTreeCoordinate(const FReal inRelativePosition, const FReal boxWidthAtLeafLevel) { - const FReal indexFReal = inRelativePosition / boxWidthAtLeafLevel; - const int index = int(FMath::dfloor(indexFReal)); - if( index && FMath::LookEqual(inRelativePosition, boxWidthAtLeafLevel * FReal(index) ) ){ - return index - 1; - } - return index; + /** This method has been tacken from the octree + * it computes a tree coordinate (x or y or z) from real position + */ + static int getTreeCoordinate(const FReal inRelativePosition, const FReal boxWidthAtLeafLevel) { + const FReal indexFReal = inRelativePosition / boxWidthAtLeafLevel; + const int index = int(FMath::dfloor(indexFReal)); + if( index && FMath::LookEqual(inRelativePosition, boxWidthAtLeafLevel * FReal(index) ) ){ + return index - 1; } + return index; + } + + /** A particle may not have a MortonIndex Method + * but they are sorted on their morton index + * so this struct store a particle + its index + */ + struct IndexedParticle{ + MortonIndex index; + ParticleClass particle; - /** A particle may not have a MortonIndex Method - * but they are sorted on their morton index - * so this struct store a particle + its index - */ - struct IndexedParticle{ - MortonIndex index; - ParticleClass particle; - - operator MortonIndex(){ - return this->index; - } - }; - - ////////////////////////////////////////////////////////////////////////// - // To sort tha particles we hold - ////////////////////////////////////////////////////////////////////////// - - - template - static void SortParticles( const FMpi::FComm& communicator, LoaderClass& loader, const SortingType type, - const int TreeHeight, IndexedParticle**const outputArray, FSize* const outputSize){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__ , "Loader to Tree" , __FILE__ , __LINE__) ); - - // create particles - IndexedParticle*const realParticlesIndexed = new IndexedParticle[loader.getNumberOfParticles()]; - FMemUtils::memset(realParticlesIndexed, 0, sizeof(IndexedParticle) * loader.getNumberOfParticles()); - FPoint boxCorner(loader.getCenterOfBox() - (loader.getBoxWidth()/2)); - FTreeCoordinate host; - - const FReal boxWidthAtLeafLevel = loader.getBoxWidth() / FReal(1 << (TreeHeight - 1) ); - for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ - loader.fillParticle(realParticlesIndexed[idxPart].particle); - host.setX( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getX() - boxCorner.getX(), boxWidthAtLeafLevel )); - host.setY( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getY() - boxCorner.getY(), boxWidthAtLeafLevel )); - host.setZ( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getZ() - boxCorner.getZ(), boxWidthAtLeafLevel )); - - realParticlesIndexed[idxPart].index = host.getMortonIndex(TreeHeight - 1); - } - - // sort particles - if(type == QuickSort){ - FQuickSortMpi::QsMpi(realParticlesIndexed, loader.getNumberOfParticles(), *outputArray, *outputSize,communicator); - delete [] (realParticlesIndexed); - } - else { - FBitonicSort::Sort( realParticlesIndexed, loader.getNumberOfParticles(), communicator ); - *outputArray = realParticlesIndexed; - *outputSize = loader.getNumberOfParticles(); - } + operator MortonIndex(){ + return this->index; } + }; + + ////////////////////////////////////////////////////////////////////////// + // To sort tha particles we hold + ////////////////////////////////////////////////////////////////////////// + + + template + static void SortParticles( const FMpi::FComm& communicator, LoaderClass& loader, const SortingType type, + const int TreeHeight, IndexedParticle**const outputArray, FSize* const outputSize){ + FTRACE( FTrace::FFunction functionTrace(__FUNCTION__ , "Loader to Tree" , __FILE__ , __LINE__) ); + + // create particles + IndexedParticle*const realParticlesIndexed = new IndexedParticle[loader.getNumberOfParticles()]; + FMemUtils::memset(realParticlesIndexed, 0, sizeof(IndexedParticle) * loader.getNumberOfParticles()); + FPoint boxCorner(loader.getCenterOfBox() - (loader.getBoxWidth()/2)); + FTreeCoordinate host; + + const FReal boxWidthAtLeafLevel = loader.getBoxWidth() / FReal(1 << (TreeHeight - 1) ); + for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + loader.fillParticle(realParticlesIndexed[idxPart].particle); + host.setX( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getX() - boxCorner.getX(), boxWidthAtLeafLevel )); + host.setY( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getY() - boxCorner.getY(), boxWidthAtLeafLevel )); + host.setZ( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getZ() - boxCorner.getZ(), boxWidthAtLeafLevel )); - static void SortParticlesFromArray( const FMpi::FComm& communicator, const ParticleClass array[], const FSize size, const SortingType type, - const FPoint& centerOfBox, const FReal boxWidth, - const int TreeHeight, IndexedParticle**const outputArray, FSize* const outputSize){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__ , "Loader to Tree" , __FILE__ , __LINE__) ); - - // create particles - IndexedParticle*const realParticlesIndexed = new IndexedParticle[size]; - FMemUtils::memset(realParticlesIndexed, 0, sizeof(IndexedParticle) * size); - - FPoint boxCorner(centerOfBox - (boxWidth/2)); - FTreeCoordinate host; - - const FReal boxWidthAtLeafLevel = boxWidth / FReal(1 << (TreeHeight - 1) ); - - for(int idxPart = 0 ; idxPart < size ; ++idxPart){ - realParticlesIndexed[idxPart].particle = array[idxPart]; - host.setX( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getX() - boxCorner.getX(), boxWidthAtLeafLevel )); - host.setY( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getY() - boxCorner.getY(), boxWidthAtLeafLevel )); - host.setZ( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getZ() - boxCorner.getZ(), boxWidthAtLeafLevel )); - - realParticlesIndexed[idxPart].index = host.getMortonIndex(TreeHeight - 1); - } - - // sort particles - if(type == QuickSort){ - FQuickSortMpi::QsMpi(realParticlesIndexed, size, *outputArray, *outputSize,communicator); - delete [] (realParticlesIndexed); - } - else { - FBitonicSort::Sort( realParticlesIndexed, size, communicator ); - *outputArray = realParticlesIndexed; - *outputSize = size; - } + realParticlesIndexed[idxPart].index = host.getMortonIndex(TreeHeight - 1); } + // sort particles + if(type == QuickSort){ + FQuickSortMpi::QsMpi(realParticlesIndexed, loader.getNumberOfParticles(), *outputArray, *outputSize,communicator); + delete [] (realParticlesIndexed); + } + else { + FBitonicSort::Sort( realParticlesIndexed, loader.getNumberOfParticles(), communicator ); + *outputArray = realParticlesIndexed; + *outputSize = loader.getNumberOfParticles(); + } + } + + static void SortParticlesFromArray( const FMpi::FComm& communicator, const ParticleClass array[], const FSize size, const SortingType type, + const FPoint& centerOfBox, const FReal boxWidth, + const int TreeHeight, IndexedParticle**const outputArray, FSize* const outputSize){ + FTRACE( FTrace::FFunction functionTrace(__FUNCTION__ , "Loader to Tree" , __FILE__ , __LINE__) ); + + // create particles + IndexedParticle*const realParticlesIndexed = new IndexedParticle[size]; + FMemUtils::memset(realParticlesIndexed, 0, sizeof(IndexedParticle) * size); + + FPoint boxCorner(centerOfBox - (boxWidth/2)); + FTreeCoordinate host; + + const FReal boxWidthAtLeafLevel = boxWidth / FReal(1 << (TreeHeight - 1) ); - ////////////////////////////////////////////////////////////////////////// - // To merge the leaves - ////////////////////////////////////////////////////////////////////////// - - static void MergeLeaves(const FMpi::FComm& communicator, IndexedParticle*& workingArray, FSize& workingSize, - char** leavesArray, FSize* const leavesSize){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) ); - const int rank = communicator.processId(); - const int nbProcs = communicator.processCount(); - - // be sure there is no splited leaves - // to do that we exchange the first index with the left proc - { - FTRACE( FTrace::FRegion regionTrace("Remove Splited leaves", __FUNCTION__ , __FILE__ , __LINE__) ); - - MortonIndex otherFirstIndex = -1; - if(workingSize != 0 && rank != 0 && rank != nbProcs - 1){ - MPI_Sendrecv(&workingArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, - &otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, - communicator.getComm(), MPI_STATUS_IGNORE); - } - else if( rank == 0){ - MPI_Recv(&otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, communicator.getComm(), MPI_STATUS_IGNORE); - } - else if( rank == nbProcs - 1){ - MPI_Send( &workingArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, communicator.getComm()); - } - else { - MPI_Recv(&otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, communicator.getComm(), MPI_STATUS_IGNORE); - MPI_Send(&otherFirstIndex, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, communicator.getComm()); - } - - // at this point every one know the first index of his right neighbors - const bool needToRecvBeforeSend = (rank != 0 && ((workingSize && otherFirstIndex == workingArray[0].index ) || !workingSize)); - MPI_Request requestSendLeaf; - - IndexedParticle* sendBuffer = 0; - if(rank != nbProcs - 1 && needToRecvBeforeSend == false){ - FSize idxPart = workingSize - 1 ; - while(idxPart >= 0 && workingArray[idxPart].index == otherFirstIndex){ - --idxPart; - } - const int particlesToSend = int(workingSize - 1 - idxPart); - if(particlesToSend){ - workingSize -= particlesToSend; - sendBuffer = new IndexedParticle[particlesToSend]; - memcpy(sendBuffer, &workingArray[idxPart + 1], particlesToSend * sizeof(IndexedParticle)); - - MPI_Isend( sendBuffer, particlesToSend * int(sizeof(IndexedParticle)), MPI_BYTE, - rank + 1, FMpi::TagSplittedLeaf, communicator.getComm(), &requestSendLeaf); - } - else{ - MPI_Isend( 0, 0, MPI_BYTE, rank + 1, FMpi::TagSplittedLeaf, communicator.getComm(), &requestSendLeaf); - } - } - - if( rank != 0 ){ - int sendByOther = 0; - - MPI_Status probStatus; - MPI_Probe(rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), &probStatus); - MPI_Get_count( &probStatus, MPI_BYTE, &sendByOther); - - if(sendByOther){ - sendByOther /= int(sizeof(IndexedParticle)); - - const IndexedParticle* const reallocOutputArray = workingArray; - const FSize reallocOutputSize = workingSize; - - workingSize += sendByOther; - workingArray = new IndexedParticle[workingSize]; - FMemUtils::memcpy(&workingArray[sendByOther], reallocOutputArray, reallocOutputSize * sizeof(IndexedParticle)); - delete[] reallocOutputArray; - - MPI_Recv(workingArray, int(sizeof(IndexedParticle)) * sendByOther, MPI_BYTE, - rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), MPI_STATUS_IGNORE); - } - else{ - MPI_Recv( 0, 0, MPI_BYTE, rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), MPI_STATUS_IGNORE); - } - } - - if(rank != nbProcs - 1 && needToRecvBeforeSend == true){ - MPI_Send( workingArray, int(workingSize * sizeof(IndexedParticle)), MPI_BYTE, - rank + 1, FMpi::TagSplittedLeaf, communicator.getComm()); - delete[] workingArray; - workingArray = 0; - workingSize = 0; - } - else if(rank != nbProcs - 1){ - MPI_Wait( &requestSendLeaf, MPI_STATUS_IGNORE); - delete[] sendBuffer; - sendBuffer = 0; - } - } - - { - FTRACE( FTrace::FRegion regionTrace("Remove Splited leaves", __FUNCTION__ , __FILE__ , __LINE__) ); - // We now copy the data from a sorted type into real particles array + counter - - (*leavesSize) = 0; - (*leavesArray) = 0; - - if(workingSize){ - (*leavesArray) = new char[workingSize * (sizeof(ParticleClass) + sizeof(int))]; - - MortonIndex previousIndex = -1; - char* writeIndex = (*leavesArray); - int* writeCounter = 0; - - for( FSize idxPart = 0; idxPart < workingSize ; ++idxPart){ - if( workingArray[idxPart].index != previousIndex ){ - previousIndex = workingArray[idxPart].index; - ++(*leavesSize); - - writeCounter = reinterpret_cast( writeIndex ); - writeIndex += sizeof(int); - - (*writeCounter) = 0; - } - - memcpy(writeIndex, &workingArray[idxPart].particle, sizeof(ParticleClass)); - - writeIndex += sizeof(ParticleClass); - ++(*writeCounter); - } - } - delete [] workingArray; - - workingArray = 0; - workingSize = 0; - } + for(int idxPart = 0 ; idxPart < size ; ++idxPart){ + realParticlesIndexed[idxPart].particle = array[idxPart]; + host.setX( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getX() - boxCorner.getX(), boxWidthAtLeafLevel )); + host.setY( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getY() - boxCorner.getY(), boxWidthAtLeafLevel )); + host.setZ( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getZ() - boxCorner.getZ(), boxWidthAtLeafLevel )); + + realParticlesIndexed[idxPart].index = host.getMortonIndex(TreeHeight - 1); } - ////////////////////////////////////////////////////////////////////////// - // To equalize (same number of leaves among the procs) - ////////////////////////////////////////////////////////////////////////// - - /** Put the interval into a tree */ - template - static void EqualizeAndFillTree(const FMpi::FComm& communicator, ContainerClass* particlesSaver, - char*& leavesArray, FSize& nbLeavesInIntervals ){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) ); - const int rank = communicator.processId(); - const int nbProcs = communicator.processCount(); - const FSize currentNbLeafs = nbLeavesInIntervals; - - // We have to know the number of leaves each procs holds - FSize leavesPerProcs[nbProcs]; - memset(leavesPerProcs, 0, sizeof(int) * nbProcs); - MPI_Allgather(&nbLeavesInIntervals, 1, MPI_LONG_LONG, leavesPerProcs, 1, MPI_LONG_LONG, communicator.getComm()); - - // Count the number of leaves on each side - FSize currentLeafsOnMyLeft = 0; - FSize currentLeafsOnMyRight = 0; - for(int idxProc = 0 ; idxProc < nbProcs ; ++idxProc ){ - if(idxProc < rank) currentLeafsOnMyLeft += leavesPerProcs[idxProc]; - if(rank < idxProc) currentLeafsOnMyRight += leavesPerProcs[idxProc]; - } - - // So we can know the number of total leafs and - // the number of leaves each procs must have at the end - const FSize totalNbLeaves = (currentLeafsOnMyLeft + currentNbLeafs + currentLeafsOnMyRight); - const FSize correctLeftLeavesNumber = communicator.getLeft(totalNbLeaves); - const FSize correctRightLeavesIndex = communicator.getRight(totalNbLeaves); - - // This will be used for the all to all - int leavesToSend[nbProcs]; - memset(leavesToSend, 0, sizeof(int) * nbProcs); - int bytesToSend[nbProcs]; - memset(bytesToSend, 0, sizeof(int) * nbProcs); - int bytesOffset[nbProcs]; - memset(bytesToSend, 0, sizeof(int) * nbProcs); - - // Buffer Position - FSize currentIntervalPosition = 0; - - // I need to send left if I hold leaves that belong to procs on my left - const bool iNeedToSendToLeft = currentLeafsOnMyLeft < correctLeftLeavesNumber; - // I need to send right if I hold leaves that belong to procs on my right - const bool iNeedToSendToRight = correctRightLeavesIndex < currentLeafsOnMyLeft + currentNbLeafs; - - if(iNeedToSendToLeft){ - FTRACE( FTrace::FRegion regionTrace("Calcul SendToLeft", __FUNCTION__ , __FILE__ , __LINE__) ); - // Find the first proc that need my data - int idxProc = rank - 1; - while( idxProc > 0 ){ - const FSize thisProcRight = communicator.getOtherRight(totalNbLeaves, idxProc - 1); - // Go to left until proc-1 has a right index lower than my current left - if( thisProcRight < currentLeafsOnMyLeft){ - break; - } - --idxProc; - } - - // Count data for this proc - int ICanGive = int(currentNbLeafs); - leavesToSend[idxProc] = int(FMath::Min(communicator.getOtherRight(totalNbLeaves, idxProc), totalNbLeaves - currentLeafsOnMyRight) - - FMath::Max( currentLeafsOnMyLeft , communicator.getOtherLeft(totalNbLeaves, idxProc))); - { - bytesOffset[idxProc] = 0; - for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){ - currentIntervalPosition += ((*(int*)&leavesArray[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int); - } - bytesToSend[idxProc] = int(currentIntervalPosition - bytesOffset[idxProc]); - } - ICanGive -= leavesToSend[idxProc]; - ++idxProc; - - // count data to other proc - while(idxProc < rank && ICanGive){ - leavesToSend[idxProc] = int(FMath::Min( communicator.getOtherRight(totalNbLeaves, idxProc) - communicator.getOtherLeft(totalNbLeaves, idxProc), FSize(ICanGive))); - - bytesOffset[idxProc] = int(currentIntervalPosition); - for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){ - currentIntervalPosition += ((*(int*)&leavesArray[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int); - } - bytesToSend[idxProc] = int(currentIntervalPosition - bytesOffset[idxProc]); - - ICanGive -= leavesToSend[idxProc]; - ++idxProc; - } - } - - // Store the index of my data but we do not insert the now - const FSize myParticlesPosition = currentIntervalPosition; - { - FTRACE( FTrace::FRegion regionTrace("Jump My particles", __FUNCTION__ , __FILE__ , __LINE__) ); - const FSize iNeedToSendLeftCount = correctLeftLeavesNumber - currentLeafsOnMyLeft; - FSize endForMe = currentNbLeafs; - if(iNeedToSendToRight){ - const FSize iNeedToSendRightCount = currentLeafsOnMyLeft + currentNbLeafs - correctRightLeavesIndex; - endForMe -= iNeedToSendRightCount; - } - - // We have to jump the correct number of leaves - for(FSize idxLeaf = FMath::Max(iNeedToSendLeftCount,FSize(0)) ; idxLeaf < endForMe ; ++idxLeaf){ - const int nbPartInLeaf = (*(int*)&leavesArray[currentIntervalPosition]); - currentIntervalPosition += (nbPartInLeaf * sizeof(ParticleClass)) + sizeof(int); - } - } - - // Proceed same on the right - if(iNeedToSendToRight){ - FTRACE( FTrace::FRegion regionTrace("Calcul SendToRight", __FUNCTION__ , __FILE__ , __LINE__) ); - // Find the last proc on the right that need my data - int idxProc = rank + 1; - while( idxProc < nbProcs ){ - const FSize thisProcLeft = communicator.getOtherLeft(totalNbLeaves, idxProc); - const FSize thisProcRight = communicator.getOtherRight(totalNbLeaves, idxProc); - // Progress until the proc+1 has its left index upper to my current right - if( thisProcLeft < currentLeafsOnMyLeft || (totalNbLeaves - currentLeafsOnMyRight) < thisProcRight){ - break; - } - ++idxProc; - } - - // Count the data - int ICanGive = int(currentLeafsOnMyLeft + currentNbLeafs - correctRightLeavesIndex); - leavesToSend[idxProc] = int(FMath::Min(communicator.getOtherRight(totalNbLeaves, idxProc) , (totalNbLeaves - currentLeafsOnMyRight)) - - FMath::Max(communicator.getOtherLeft(totalNbLeaves, idxProc), currentLeafsOnMyLeft) ); - - { - bytesOffset[idxProc] = int(currentIntervalPosition); - for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){ - currentIntervalPosition += ((*(int*)&leavesArray[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int); - } - bytesToSend[idxProc] = int(currentIntervalPosition - bytesOffset[idxProc]); - } - ICanGive -= leavesToSend[idxProc]; - ++idxProc; - - // Now Count the data to other - while(idxProc < nbProcs && ICanGive){ - leavesToSend[idxProc] = int(FMath::Min( communicator.getOtherRight(totalNbLeaves, idxProc) - communicator.getOtherLeft(totalNbLeaves, idxProc), FSize(ICanGive))); - - bytesOffset[idxProc] = int(currentIntervalPosition); - for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){ - currentIntervalPosition += ((*(int*)&leavesArray[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int); - } - bytesToSend[idxProc] = int(currentIntervalPosition - bytesOffset[idxProc]); - - ICanGive -= leavesToSend[idxProc]; - ++idxProc; - } - } - - // Inform other about who will send/receive what - int bytesToSendRecv[nbProcs * nbProcs]; - memset(bytesToSendRecv, 0, sizeof(int) * nbProcs * nbProcs); - MPI_Allgather(bytesToSend, nbProcs, MPI_INT, bytesToSendRecv, nbProcs, MPI_INT, communicator.getComm()); - - int bytesToRecv[nbProcs]; - memset(bytesToRecv, 0, sizeof(int) * nbProcs); - int bytesOffsetToRecv[nbProcs]; - memset(bytesOffsetToRecv, 0, sizeof(int) * nbProcs); - - // Prepare needed buffer - FSize sumBytesToRecv = 0; - for(int idxProc = 0 ; idxProc < nbProcs ; ++idxProc){ - if( bytesToSendRecv[idxProc * nbProcs + rank] ){ - bytesOffsetToRecv[idxProc] = int(sumBytesToRecv); - sumBytesToRecv += FSize(bytesToSendRecv[idxProc * nbProcs + rank]); - bytesToRecv[idxProc] = bytesToSendRecv[idxProc * nbProcs + rank]; - } - } - - // Send alll to all - char* const recvbuf = new char[sumBytesToRecv]; - MPI_Alltoallv(leavesArray, bytesToSend, bytesOffset, MPI_BYTE, - recvbuf, bytesToRecv, bytesOffsetToRecv, MPI_BYTE, - communicator.getComm()); - - { // Insert received data - FTRACE( FTrace::FRegion regionTrace("Insert Received data", __FUNCTION__ , __FILE__ , __LINE__) ); - FSize recvBufferPosition = 0; - while( recvBufferPosition < sumBytesToRecv){ - const int nbPartInLeaf = (*reinterpret_cast(&recvbuf[recvBufferPosition])); - ParticleClass* const particles = reinterpret_cast(&recvbuf[recvBufferPosition] + sizeof(int)); - - for(int idxPart = 0 ; idxPart < nbPartInLeaf ; ++idxPart){ - particlesSaver->push(particles[idxPart]); - } - recvBufferPosition += (nbPartInLeaf * sizeof(ParticleClass)) + sizeof(int); - - } - } - delete[] recvbuf; - - - { // Insert my data - FTRACE( FTrace::FRegion regionTrace("Insert My particles", __FUNCTION__ , __FILE__ , __LINE__) ); - currentIntervalPosition = myParticlesPosition; - const FSize iNeedToSendLeftCount = correctLeftLeavesNumber - currentLeafsOnMyLeft; - FSize endForMe = currentNbLeafs; - if(iNeedToSendToRight){ - const FSize iNeedToSendRightCount = currentLeafsOnMyLeft + currentNbLeafs - correctRightLeavesIndex; - endForMe -= iNeedToSendRightCount; - } - - for(FSize idxLeaf = FMath::Max(iNeedToSendLeftCount,FSize(0)) ; idxLeaf < endForMe ; ++idxLeaf){ - const int nbPartInLeaf = (*(int*)&leavesArray[currentIntervalPosition]); - ParticleClass* const particles = reinterpret_cast(&leavesArray[currentIntervalPosition] + sizeof(int)); - - for(int idxPart = 0 ; idxPart < nbPartInLeaf ; ++idxPart){ - particlesSaver->push(particles[idxPart]); - } - currentIntervalPosition += (nbPartInLeaf * sizeof(ParticleClass)) + sizeof(int); - - } - } - - delete[] leavesArray; - leavesArray = 0; - nbLeavesInIntervals = 0; + // sort particles + if(type == QuickSort){ + FQuickSortMpi::QsMpi(realParticlesIndexed, size, *outputArray, *outputSize,communicator); + delete [] (realParticlesIndexed); + } + else { + FBitonicSort::Sort( realParticlesIndexed, size, communicator ); + *outputArray = realParticlesIndexed; + *outputSize = size; + } + } + + + ////////////////////////////////////////////////////////////////////////// + // To merge the leaves + ////////////////////////////////////////////////////////////////////////// + + static void MergeLeaves(const FMpi::FComm& communicator, IndexedParticle*& workingArray, FSize& workingSize, + char** leavesArray, FSize* const leavesSize){ + FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) ); + const int rank = communicator.processId(); + const int nbProcs = communicator.processCount(); + + // be sure there is no splited leaves + // to do that we exchange the first index with the left proc + { + FTRACE( FTrace::FRegion regionTrace("Remove Splited leaves", __FUNCTION__ , __FILE__ , __LINE__) ); + + MortonIndex otherFirstIndex = -1; + if(workingSize != 0 && rank != 0 && rank != nbProcs - 1){ + MPI_Sendrecv(&workingArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, + &otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, + communicator.getComm(), MPI_STATUS_IGNORE); + } + else if( rank == 0){ + MPI_Recv(&otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, communicator.getComm(), MPI_STATUS_IGNORE); + } + else if( rank == nbProcs - 1){ + MPI_Send( &workingArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, communicator.getComm()); + } + else { + MPI_Recv(&otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, communicator.getComm(), MPI_STATUS_IGNORE); + MPI_Send(&otherFirstIndex, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, communicator.getComm()); + } + + // at this point every one know the first index of his right neighbors + const bool needToRecvBeforeSend = (rank != 0 && ((workingSize && otherFirstIndex == workingArray[0].index ) || !workingSize)); + MPI_Request requestSendLeaf; + + IndexedParticle* sendBuffer = 0; + if(rank != nbProcs - 1 && needToRecvBeforeSend == false){ + FSize idxPart = workingSize - 1 ; + while(idxPart >= 0 && workingArray[idxPart].index == otherFirstIndex){ + --idxPart; + } + const int particlesToSend = int(workingSize - 1 - idxPart); + if(particlesToSend){ + workingSize -= particlesToSend; + sendBuffer = new IndexedParticle[particlesToSend]; + memcpy(sendBuffer, &workingArray[idxPart + 1], particlesToSend * sizeof(IndexedParticle)); + + MPI_Isend( sendBuffer, particlesToSend * int(sizeof(IndexedParticle)), MPI_BYTE, + rank + 1, FMpi::TagSplittedLeaf, communicator.getComm(), &requestSendLeaf); + } + else{ + MPI_Isend( 0, 0, MPI_BYTE, rank + 1, FMpi::TagSplittedLeaf, communicator.getComm(), &requestSendLeaf); + } + } + + if( rank != 0 ){ + int sendByOther = 0; + + MPI_Status probStatus; + MPI_Probe(rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), &probStatus); + MPI_Get_count( &probStatus, MPI_BYTE, &sendByOther); + + if(sendByOther){ + sendByOther /= int(sizeof(IndexedParticle)); + + const IndexedParticle* const reallocOutputArray = workingArray; + const FSize reallocOutputSize = workingSize; + + workingSize += sendByOther; + workingArray = new IndexedParticle[workingSize]; + FMemUtils::memcpy(&workingArray[sendByOther], reallocOutputArray, reallocOutputSize * sizeof(IndexedParticle)); + delete[] reallocOutputArray; + + MPI_Recv(workingArray, int(sizeof(IndexedParticle)) * sendByOther, MPI_BYTE, + rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), MPI_STATUS_IGNORE); + } + else{ + MPI_Recv( 0, 0, MPI_BYTE, rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), MPI_STATUS_IGNORE); + } + } + + if(rank != nbProcs - 1 && needToRecvBeforeSend == true){ + MPI_Send( workingArray, int(workingSize * sizeof(IndexedParticle)), MPI_BYTE, + rank + 1, FMpi::TagSplittedLeaf, communicator.getComm()); + delete[] workingArray; + workingArray = 0; + workingSize = 0; + } + else if(rank != nbProcs - 1){ + MPI_Wait( &requestSendLeaf, MPI_STATUS_IGNORE); + delete[] sendBuffer; + sendBuffer = 0; + } } -public: + { + FTRACE( FTrace::FRegion regionTrace("Remove Splited leaves", __FUNCTION__ , __FILE__ , __LINE__) ); + // We now copy the data from a sorted type into real particles array + counter + + (*leavesSize) = 0; + (*leavesArray) = 0; + + if(workingSize){ + (*leavesArray) = new char[workingSize * (sizeof(ParticleClass) + sizeof(int))]; - ////////////////////////////////////////////////////////////////////////// - // The builder function - ////////////////////////////////////////////////////////////////////////// + MortonIndex previousIndex = -1; + char* writeIndex = (*leavesArray); + int* writeCounter = 0; - template - static void ArrayToTree(const FMpi::FComm& communicator, const ParticleClass array[], const FSize size, - const FPoint& boxCenter, const FReal boxWidth, const int treeHeight, - ContainerClass* particleSaver, const SortingType type = QuickSort){ + for( FSize idxPart = 0; idxPart < workingSize ; ++idxPart){ + if( workingArray[idxPart].index != previousIndex ){ + previousIndex = workingArray[idxPart].index; + ++(*leavesSize); - IndexedParticle* particlesArray = 0; - FSize particlesSize = 0; - SortParticlesFromArray(communicator, array, size, type, boxCenter, boxWidth, treeHeight, - &particlesArray, &particlesSize); + writeCounter = reinterpret_cast( writeIndex ); + writeIndex += sizeof(int); - char* leavesArray = 0; - FSize leavesSize = 0; - MergeLeaves(communicator, particlesArray, particlesSize, &leavesArray, &leavesSize); + (*writeCounter) = 0; + } - EqualizeAndFillTree(communicator, particleSaver, leavesArray, leavesSize); + memcpy(writeIndex, &workingArray[idxPart].particle, sizeof(ParticleClass)); + + writeIndex += sizeof(ParticleClass); + ++(*writeCounter); + } + } + + delete [] workingArray; + + workingArray = 0; + workingSize = 0; + } + } + + ////////////////////////////////////////////////////////////////////////// + // To equalize (same number of leaves among the procs) + ////////////////////////////////////////////////////////////////////////// + + /** Put the interval into a tree */ + template + static void EqualizeAndFillTree(const FMpi::FComm& communicator, ContainerClass* particlesSaver, + char*& leavesArray, FSize& nbLeavesInIntervals, FAbstractBalanceAlgorithm * balancer){ + + + FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) ); + const int rank = communicator.processId(); + const int nbProcs = communicator.processCount(); + const FSize currentNbLeafs = nbLeavesInIntervals; + + // We have to know the number of leaves each procs holds + FSize leavesPerProcs[nbProcs]; + memset(leavesPerProcs, 0, sizeof(int) * nbProcs); + MPI_Allgather(&nbLeavesInIntervals, 1, MPI_LONG_LONG, leavesPerProcs, 1, MPI_LONG_LONG, communicator.getComm()); + printf("Proc : %d : Currently have %lld leaves \n",rank,currentNbLeafs); + + + //Start working THERE + + //We need the max number of leafs over th procs + FSize maxNbOfLeaf=0; + for(int i = 0 ; i< nbProcs ; ++i){ + if(maxNbOfLeaf < leavesPerProcs[i]) maxNbOfLeaf=leavesPerProcs[i]; } + //Creation of an array to store how many parts are in each leaf + int * arrayToLeafs = new int[nbProcs*maxNbOfLeaf]; + memset(arrayToLeafs,0,sizeof(int)*maxNbOfLeaf*nbProcs); + + //Just indirection for simplifying + int * myParts = &arrayToLeafs[rank*maxNbOfLeaf]; + + //Loop over leafArray to fill myParts + FSize remainingLeafsToVisit = nbLeavesInIntervals; + long unsigned int firstIdx = 0; + while(remainingLeafsToVisit > 0){ + int* intTab = reinterpret_cast(&leavesArray[firstIdx]); + //ParticleClass* tab = reinterpret_cast(&leavesArray[firstIdx+sizeof(int)]); + //printf("Leaf %lld contains %d :: \n",nbLeavesInIntervals-remainingLeafsToVisit,intTab[0]); + for(int k =0; kgetLeft( totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,rank); + const FSize correctRightLeavesIndex = balancer->getRight(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,rank); + + printf("Proc [%d] :: will work from leaf %lld \t to leaf %lld \n",rank,correctLeftLeavesNumber,correctRightLeavesIndex); + + // This will be used for the all to all + int leavesToSend[nbProcs]; + memset(leavesToSend, 0, sizeof(int) * nbProcs); + int bytesToSend[nbProcs]; + memset(bytesToSend, 0, sizeof(int) * nbProcs); + int bytesOffset[nbProcs]; + memset(bytesToSend, 0, sizeof(int) * nbProcs); + + // Buffer Position + FSize currentIntervalPosition = 0; + + // I need to send left if I hold leaves that belong to procs on my left + const bool iNeedToSendToLeft = currentLeafsOnMyLeft < correctLeftLeavesNumber; + // I need to send right if I hold leaves that belong to procs on my right + const bool iNeedToSendToRight = correctRightLeavesIndex < currentLeafsOnMyLeft + currentNbLeafs; + + if(iNeedToSendToLeft){ + FTRACE( FTrace::FRegion regionTrace("Calcul SendToLeft", __FUNCTION__ , __FILE__ , __LINE__) ); + // Find the first proc that need my data + int idxProc = rank - 1; + while( idxProc > 0 ){ + const FSize thisProcRight = balancer->getRight(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,idxProc - 1); + // Go to left until proc-1 has a right index lower than my current left + if( thisProcRight < currentLeafsOnMyLeft){ + break; + } + --idxProc; + } + + // Count data for this proc + int ICanGive = int(currentNbLeafs); + leavesToSend[idxProc] = int(FMath::Min(balancer->getRight(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,idxProc),totalNbLeaves - currentLeafsOnMyRight) - FMath::Max( currentLeafsOnMyLeft , balancer->getLeft(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,idxProc))); + { + bytesOffset[idxProc] = 0; + for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){ + currentIntervalPosition += ((*(int*)&leavesArray[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int); + } + bytesToSend[idxProc] = int(currentIntervalPosition - bytesOffset[idxProc]); + } + ICanGive -= leavesToSend[idxProc]; + ++idxProc; + + // count data to other proc + while(idxProc < rank && ICanGive){ + leavesToSend[idxProc] = int(FMath::Min( balancer->getRight(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,idxProc) - balancer->getLeft(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,idxProc), FSize(ICanGive))); + + bytesOffset[idxProc] = int(currentIntervalPosition); + for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){ + currentIntervalPosition += ((*(int*)&leavesArray[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int); + } + bytesToSend[idxProc] = int(currentIntervalPosition - bytesOffset[idxProc]); + + ICanGive -= leavesToSend[idxProc]; + ++idxProc; + } + } + + // Store the index of my data but we do not insert the now + const FSize myParticlesPosition = currentIntervalPosition; + { + FTRACE( FTrace::FRegion regionTrace("Jump My particles", __FUNCTION__ , __FILE__ , __LINE__) ); + const FSize iNeedToSendLeftCount = correctLeftLeavesNumber - currentLeafsOnMyLeft; + FSize endForMe = currentNbLeafs; + if(iNeedToSendToRight){ + const FSize iNeedToSendRightCount = currentLeafsOnMyLeft + currentNbLeafs - correctRightLeavesIndex; + endForMe -= iNeedToSendRightCount; + } + + // We have to jump the correct number of leaves + for(FSize idxLeaf = FMath::Max(iNeedToSendLeftCount,FSize(0)) ; idxLeaf < endForMe ; ++idxLeaf){ + const int nbPartInLeaf = (*(int*)&leavesArray[currentIntervalPosition]); + currentIntervalPosition += (nbPartInLeaf * sizeof(ParticleClass)) + sizeof(int); + } + } + + // Proceed same on the right + if(iNeedToSendToRight){ + FTRACE( FTrace::FRegion regionTrace("Calcul SendToRight", __FUNCTION__ , __FILE__ , __LINE__) ); + // Find the last proc on the right that need my data + int idxProc = rank + 1; + while( idxProc < nbProcs ){ + const FSize thisProcLeft = balancer->getLeft(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,idxProc); + const FSize thisProcRight = balancer->getRight(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,idxProc); + // Progress until the proc+1 has its left index upper to my current right + if( thisProcLeft < currentLeafsOnMyLeft || (totalNbLeaves - currentLeafsOnMyRight) < thisProcRight){ + break; + } + ++idxProc; + } + + // Count the data + int ICanGive = int(currentLeafsOnMyLeft + currentNbLeafs - correctRightLeavesIndex); + leavesToSend[idxProc] = int(FMath::Min(balancer->getRight(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,idxProc) , (totalNbLeaves - currentLeafsOnMyRight)) - FMath::Max(balancer->getLeft(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,idxProc), currentLeafsOnMyLeft) ); + + { + bytesOffset[idxProc] = int(currentIntervalPosition); + for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){ + currentIntervalPosition += ((*(int*)&leavesArray[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int); + } + bytesToSend[idxProc] = int(currentIntervalPosition - bytesOffset[idxProc]); + } + ICanGive -= leavesToSend[idxProc]; + ++idxProc; + + // Now Count the data to other + while(idxProc < nbProcs && ICanGive){ + leavesToSend[idxProc] = int(FMath::Min( balancer->getRight(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs, idxProc) - balancer->getLeft(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,idxProc), FSize(ICanGive))); + + bytesOffset[idxProc] = int(currentIntervalPosition); + for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){ + currentIntervalPosition += ((*(int*)&leavesArray[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int); + } + bytesToSend[idxProc] = int(currentIntervalPosition - bytesOffset[idxProc]); + + ICanGive -= leavesToSend[idxProc]; + ++idxProc; + } + } + + // Inform other about who will send/receive what + int bytesToSendRecv[nbProcs * nbProcs]; + memset(bytesToSendRecv, 0, sizeof(int) * nbProcs * nbProcs); + MPI_Allgather(bytesToSend, nbProcs, MPI_INT, bytesToSendRecv, nbProcs, MPI_INT, communicator.getComm()); + + int bytesToRecv[nbProcs]; + memset(bytesToRecv, 0, sizeof(int) * nbProcs); + int bytesOffsetToRecv[nbProcs]; + memset(bytesOffsetToRecv, 0, sizeof(int) * nbProcs); + + // Prepare needed buffer + FSize sumBytesToRecv = 0; + for(int idxProc = 0 ; idxProc < nbProcs ; ++idxProc){ + if( bytesToSendRecv[idxProc * nbProcs + rank] ){ + bytesOffsetToRecv[idxProc] = int(sumBytesToRecv); + sumBytesToRecv += FSize(bytesToSendRecv[idxProc * nbProcs + rank]); + bytesToRecv[idxProc] = bytesToSendRecv[idxProc * nbProcs + rank]; + } + } + + // Send alll to all + char* const recvbuf = new char[sumBytesToRecv]; + MPI_Alltoallv(leavesArray, bytesToSend, bytesOffset, MPI_BYTE, + recvbuf, bytesToRecv, bytesOffsetToRecv, MPI_BYTE, + communicator.getComm()); + + { // Insert received data + FTRACE( FTrace::FRegion regionTrace("Insert Received data", __FUNCTION__ , __FILE__ , __LINE__) ); + FSize recvBufferPosition = 0; + while( recvBufferPosition < sumBytesToRecv){ + const int nbPartInLeaf = (*reinterpret_cast(&recvbuf[recvBufferPosition])); + ParticleClass* const particles = reinterpret_cast(&recvbuf[recvBufferPosition] + sizeof(int)); + + for(int idxPart = 0 ; idxPart < nbPartInLeaf ; ++idxPart){ + particlesSaver->push(particles[idxPart]); + } + recvBufferPosition += (nbPartInLeaf * sizeof(ParticleClass)) + sizeof(int); + + } + } + delete[] recvbuf; + + + { // Insert my data + FTRACE( FTrace::FRegion regionTrace("Insert My particles", __FUNCTION__ , __FILE__ , __LINE__) ); + currentIntervalPosition = myParticlesPosition; + const FSize iNeedToSendLeftCount = correctLeftLeavesNumber - currentLeafsOnMyLeft; + FSize endForMe = currentNbLeafs; + if(iNeedToSendToRight){ + const FSize iNeedToSendRightCount = currentLeafsOnMyLeft + currentNbLeafs - correctRightLeavesIndex; + endForMe -= iNeedToSendRightCount; + } + + for(FSize idxLeaf = FMath::Max(iNeedToSendLeftCount,FSize(0)) ; idxLeaf < endForMe ; ++idxLeaf){ + const int nbPartInLeaf = (*(int*)&leavesArray[currentIntervalPosition]); + ParticleClass* const particles = reinterpret_cast(&leavesArray[currentIntervalPosition] + sizeof(int)); + + for(int idxPart = 0 ; idxPart < nbPartInLeaf ; ++idxPart){ + particlesSaver->push(particles[idxPart]); + } + currentIntervalPosition += (nbPartInLeaf * sizeof(ParticleClass)) + sizeof(int); + + } + } + + delete[] leavesArray; + leavesArray = 0; + nbLeavesInIntervals = 0; + } + +public: + + ////////////////////////////////////////////////////////////////////////// + // The builder function + ////////////////////////////////////////////////////////////////////////// + + template + static void ArrayToTree(const FMpi::FComm& communicator, const ParticleClass array[], const FSize size, + const FPoint& boxCenter, const FReal boxWidth, const int treeHeight, + ContainerClass* particleSaver, FAbstractBalanceAlgorithm* balancer,const SortingType type = QuickSort){ + + IndexedParticle* particlesArray = 0; + FSize particlesSize = 0; + SortParticlesFromArray(communicator, array, size, type, boxCenter, boxWidth, treeHeight, + &particlesArray, &particlesSize); + + char* leavesArray = 0; + FSize leavesSize = 0; + MergeLeaves(communicator, particlesArray, particlesSize, &leavesArray, &leavesSize); + + EqualizeAndFillTree(communicator, particleSaver, leavesArray, leavesSize, balancer); + } };