diff --git a/Src/Files/FMpiTreeBuilder.hpp b/Src/Files/FMpiTreeBuilder.hpp index 008942bb4f32ea4b616e1f8fe5d2ae61d12b8aee..e7de52d3429501f433e832bb5f50ca8f4cbc275d 100755 --- a/Src/Files/FMpiTreeBuilder.hpp +++ b/Src/Files/FMpiTreeBuilder.hpp @@ -4,13 +4,13 @@ // This software is a computer program whose purpose is to compute the FMM. // // This software is governed by the CeCILL-C and LGPL licenses and -// abiding by the rules of distribution of free software. -// +// abiding by the rules of distribution of free software. +// // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public and CeCILL-C Licenses for more details. -// "http://www.cecill.info". +// "http://www.cecill.info". // "http://www.gnu.org/licenses". // =================================================================================== #ifndef FMPITREEBUILDER_H @@ -33,548 +33,512 @@ template<class ParticleClass> class FMpiTreeBuilder{ public: - /** What sorting algorithm to use */ - enum SortingType{ - QuickSort, - BitonicSort, - }; + /** What sorting algorithm to use */ + enum SortingType{ + QuickSort, + BitonicSort, + }; private: - /** This method has been tacken from the octree + /** This method has been tacken from the octree * it computes a tree coordinate (x or y or z) from real position */ - static int getTreeCoordinate(const FReal inRelativePosition, const FReal boxWidthAtLeafLevel) { - const FReal indexFReal = inRelativePosition / boxWidthAtLeafLevel; - const int index = int(FMath::dfloor(indexFReal)); - if( index && FMath::LookEqual(inRelativePosition, boxWidthAtLeafLevel * FReal(index) ) ){ - return index - 1; + static int getTreeCoordinate(const FReal inRelativePosition, const FReal boxWidthAtLeafLevel) { + const FReal indexFReal = inRelativePosition / boxWidthAtLeafLevel; + const int index = int(FMath::dfloor(indexFReal)); + if( index && FMath::LookEqual(inRelativePosition, boxWidthAtLeafLevel * FReal(index) ) ){ + return index - 1; + } + return index; } - return index; - } - /** A particle may not have a MortonIndex Method + /** A particle may not have a MortonIndex Method * but they are sorted on their morton index * so this struct store a particle + its index */ - struct IndexedParticle{ - MortonIndex index; - ParticleClass particle; - - operator MortonIndex(){ - return this->index; + struct IndexedParticle{ + MortonIndex index; + ParticleClass particle; + + operator MortonIndex(){ + return this->index; + } + }; + + ////////////////////////////////////////////////////////////////////////// + // To sort tha particles we hold + ////////////////////////////////////////////////////////////////////////// + + + template <class LoaderClass> + static void SortParticles( const FMpi::FComm& communicator, LoaderClass& loader, const SortingType type, + const int TreeHeight, IndexedParticle**const outputArray, FSize* const outputSize){ + FTRACE( FTrace::FFunction functionTrace(__FUNCTION__ , "Loader to Tree" , __FILE__ , __LINE__) ); + + // create particles + IndexedParticle*const realParticlesIndexed = new IndexedParticle[loader.getNumberOfParticles()]; + FMemUtils::memset(realParticlesIndexed, 0, sizeof(IndexedParticle) * loader.getNumberOfParticles()); + FPoint boxCorner(loader.getCenterOfBox() - (loader.getBoxWidth()/2)); + FTreeCoordinate host; + + const FReal boxWidthAtLeafLevel = loader.getBoxWidth() / FReal(1 << (TreeHeight - 1) ); + for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + loader.fillParticle(realParticlesIndexed[idxPart].particle); + host.setX( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getX() - boxCorner.getX(), boxWidthAtLeafLevel )); + host.setY( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getY() - boxCorner.getY(), boxWidthAtLeafLevel )); + host.setZ( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getZ() - boxCorner.getZ(), boxWidthAtLeafLevel )); + + realParticlesIndexed[idxPart].index = host.getMortonIndex(TreeHeight - 1); + } + + // sort particles + if(type == QuickSort){ + FQuickSortMpi<IndexedParticle,MortonIndex, FSize>::QsMpi(realParticlesIndexed, loader.getNumberOfParticles(), *outputArray, *outputSize,communicator); + delete [] (realParticlesIndexed); + } + else { + FBitonicSort<IndexedParticle,MortonIndex, FSize>::Sort( realParticlesIndexed, loader.getNumberOfParticles(), communicator ); + *outputArray = realParticlesIndexed; + *outputSize = loader.getNumberOfParticles(); + } } - }; - - ////////////////////////////////////////////////////////////////////////// - // To sort tha particles we hold - ////////////////////////////////////////////////////////////////////////// - - - template <class LoaderClass> - static void SortParticles( const FMpi::FComm& communicator, LoaderClass& loader, const SortingType type, - const int TreeHeight, IndexedParticle**const outputArray, FSize* const outputSize){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__ , "Loader to Tree" , __FILE__ , __LINE__) ); - - // create particles - IndexedParticle*const realParticlesIndexed = new IndexedParticle[loader.getNumberOfParticles()]; - FMemUtils::memset(realParticlesIndexed, 0, sizeof(IndexedParticle) * loader.getNumberOfParticles()); - FPoint boxCorner(loader.getCenterOfBox() - (loader.getBoxWidth()/2)); - FTreeCoordinate host; - - const FReal boxWidthAtLeafLevel = loader.getBoxWidth() / FReal(1 << (TreeHeight - 1) ); - for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ - loader.fillParticle(realParticlesIndexed[idxPart].particle); - host.setX( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getX() - boxCorner.getX(), boxWidthAtLeafLevel )); - host.setY( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getY() - boxCorner.getY(), boxWidthAtLeafLevel )); - host.setZ( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getZ() - boxCorner.getZ(), boxWidthAtLeafLevel )); - realParticlesIndexed[idxPart].index = host.getMortonIndex(TreeHeight - 1); + static void SortParticlesFromArray( const FMpi::FComm& communicator, const ParticleClass array[], const FSize size, const SortingType type, + const FPoint& centerOfBox, const FReal boxWidth, + const int TreeHeight, IndexedParticle**const outputArray, FSize* const outputSize){ + FTRACE( FTrace::FFunction functionTrace(__FUNCTION__ , "Loader to Tree" , __FILE__ , __LINE__) ); + + // create particles + IndexedParticle*const realParticlesIndexed = new IndexedParticle[size]; + FMemUtils::memset(realParticlesIndexed, 0, sizeof(IndexedParticle) * size); + + FPoint boxCorner(centerOfBox - (boxWidth/2)); + FTreeCoordinate host; + + const FReal boxWidthAtLeafLevel = boxWidth / FReal(1 << (TreeHeight - 1) ); + + for(int idxPart = 0 ; idxPart < size ; ++idxPart){ + realParticlesIndexed[idxPart].particle = array[idxPart]; + host.setX( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getX() - boxCorner.getX(), boxWidthAtLeafLevel )); + host.setY( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getY() - boxCorner.getY(), boxWidthAtLeafLevel )); + host.setZ( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getZ() - boxCorner.getZ(), boxWidthAtLeafLevel )); + + realParticlesIndexed[idxPart].index = host.getMortonIndex(TreeHeight - 1); + } + + // sort particles + if(type == QuickSort){ + FQuickSortMpi<IndexedParticle,MortonIndex, FSize>::QsMpi(realParticlesIndexed, size, *outputArray, *outputSize,communicator); + delete [] (realParticlesIndexed); + } + else { + FBitonicSort<IndexedParticle,MortonIndex, FSize>::Sort( realParticlesIndexed, size, communicator ); + *outputArray = realParticlesIndexed; + *outputSize = size; + } } - // sort particles - if(type == QuickSort){ - FQuickSortMpi<IndexedParticle,MortonIndex, FSize>::QsMpi(realParticlesIndexed, loader.getNumberOfParticles(), *outputArray, *outputSize,communicator); - delete [] (realParticlesIndexed); - } - else { - FBitonicSort<IndexedParticle,MortonIndex, FSize>::Sort( realParticlesIndexed, loader.getNumberOfParticles(), communicator ); - *outputArray = realParticlesIndexed; - *outputSize = loader.getNumberOfParticles(); - } - } - - static void SortParticlesFromArray( const FMpi::FComm& communicator, const ParticleClass array[], const FSize size, const SortingType type, - const FPoint& centerOfBox, const FReal boxWidth, - const int TreeHeight, IndexedParticle**const outputArray, FSize* const outputSize){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__ , "Loader to Tree" , __FILE__ , __LINE__) ); - - // create particles - IndexedParticle*const realParticlesIndexed = new IndexedParticle[size]; - FMemUtils::memset(realParticlesIndexed, 0, sizeof(IndexedParticle) * size); - - FPoint boxCorner(centerOfBox - (boxWidth/2)); - FTreeCoordinate host; - - const FReal boxWidthAtLeafLevel = boxWidth / FReal(1 << (TreeHeight - 1) ); - for(int idxPart = 0 ; idxPart < size ; ++idxPart){ - realParticlesIndexed[idxPart].particle = array[idxPart]; - host.setX( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getX() - boxCorner.getX(), boxWidthAtLeafLevel )); - host.setY( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getY() - boxCorner.getY(), boxWidthAtLeafLevel )); - host.setZ( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getZ() - boxCorner.getZ(), boxWidthAtLeafLevel )); - - realParticlesIndexed[idxPart].index = host.getMortonIndex(TreeHeight - 1); + ////////////////////////////////////////////////////////////////////////// + // To merge the leaves + ////////////////////////////////////////////////////////////////////////// + + static void MergeLeaves(const FMpi::FComm& communicator, IndexedParticle*& workingArray, FSize& workingSize, + char** leavesArray, FSize* const leavesSize){ + FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) ); + const int rank = communicator.processId(); + const int nbProcs = communicator.processCount(); + + // be sure there is no splited leaves + // to do that we exchange the first index with the left proc + { + FTRACE( FTrace::FRegion regionTrace("Remove Splited leaves", __FUNCTION__ , __FILE__ , __LINE__) ); + + MortonIndex otherFirstIndex = -1; + if(workingSize != 0 && rank != 0 && rank != nbProcs - 1){ + MPI_Sendrecv(&workingArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, + &otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, + communicator.getComm(), MPI_STATUS_IGNORE); + } + else if( rank == 0){ + MPI_Recv(&otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, communicator.getComm(), MPI_STATUS_IGNORE); + } + else if( rank == nbProcs - 1){ + MPI_Send( &workingArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, communicator.getComm()); + } + else { + MPI_Recv(&otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, communicator.getComm(), MPI_STATUS_IGNORE); + MPI_Send(&otherFirstIndex, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, communicator.getComm()); + } + + // at this point every one know the first index of his right neighbors + const bool needToRecvBeforeSend = (rank != 0 && ((workingSize && otherFirstIndex == workingArray[0].index ) || !workingSize)); + MPI_Request requestSendLeaf; + + IndexedParticle* sendBuffer = 0; + if(rank != nbProcs - 1 && needToRecvBeforeSend == false){ + FSize idxPart = workingSize - 1 ; + while(idxPart >= 0 && workingArray[idxPart].index == otherFirstIndex){ + --idxPart; + } + const int particlesToSend = int(workingSize - 1 - idxPart); + if(particlesToSend){ + workingSize -= particlesToSend; + sendBuffer = new IndexedParticle[particlesToSend]; + memcpy(sendBuffer, &workingArray[idxPart + 1], particlesToSend * sizeof(IndexedParticle)); + + MPI_Isend( sendBuffer, particlesToSend * int(sizeof(IndexedParticle)), MPI_BYTE, + rank + 1, FMpi::TagSplittedLeaf, communicator.getComm(), &requestSendLeaf); + } + else{ + MPI_Isend( 0, 0, MPI_BYTE, rank + 1, FMpi::TagSplittedLeaf, communicator.getComm(), &requestSendLeaf); + } + } + + if( rank != 0 ){ + int sendByOther = 0; + + MPI_Status probStatus; + MPI_Probe(rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), &probStatus); + MPI_Get_count( &probStatus, MPI_BYTE, &sendByOther); + + if(sendByOther){ + sendByOther /= int(sizeof(IndexedParticle)); + + const IndexedParticle* const reallocOutputArray = workingArray; + const FSize reallocOutputSize = workingSize; + + workingSize += sendByOther; + workingArray = new IndexedParticle[workingSize]; + FMemUtils::memcpy(&workingArray[sendByOther], reallocOutputArray, reallocOutputSize * sizeof(IndexedParticle)); + delete[] reallocOutputArray; + + MPI_Recv(workingArray, int(sizeof(IndexedParticle)) * sendByOther, MPI_BYTE, + rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), MPI_STATUS_IGNORE); + } + else{ + MPI_Recv( 0, 0, MPI_BYTE, rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), MPI_STATUS_IGNORE); + } + } + + if(rank != nbProcs - 1 && needToRecvBeforeSend == true){ + MPI_Send( workingArray, int(workingSize * sizeof(IndexedParticle)), MPI_BYTE, + rank + 1, FMpi::TagSplittedLeaf, communicator.getComm()); + delete[] workingArray; + workingArray = 0; + workingSize = 0; + } + else if(rank != nbProcs - 1){ + MPI_Wait( &requestSendLeaf, MPI_STATUS_IGNORE); + delete[] sendBuffer; + sendBuffer = 0; + } + } + + { + FTRACE( FTrace::FRegion regionTrace("Remove Splited leaves", __FUNCTION__ , __FILE__ , __LINE__) ); + // We now copy the data from a sorted type into real particles array + counter + + (*leavesSize) = 0; + (*leavesArray) = 0; + + if(workingSize){ + (*leavesArray) = new char[workingSize * (sizeof(ParticleClass) + sizeof(int))]; + + MortonIndex previousIndex = -1; + char* writeIndex = (*leavesArray); + int* writeCounter = 0; + + for( FSize idxPart = 0; idxPart < workingSize ; ++idxPart){ + if( workingArray[idxPart].index != previousIndex ){ + previousIndex = workingArray[idxPart].index; + ++(*leavesSize); + + writeCounter = reinterpret_cast<int*>( writeIndex ); + writeIndex += sizeof(int); + + (*writeCounter) = 0; + } + + memcpy(writeIndex, &workingArray[idxPart].particle, sizeof(ParticleClass)); + + writeIndex += sizeof(ParticleClass); + ++(*writeCounter); + } + } + + delete [] workingArray; + + workingArray = 0; + workingSize = 0; + } } - // sort particles - if(type == QuickSort){ - FQuickSortMpi<IndexedParticle,MortonIndex, FSize>::QsMpi(realParticlesIndexed, size, *outputArray, *outputSize,communicator); - delete [] (realParticlesIndexed); - } - else { - FBitonicSort<IndexedParticle,MortonIndex, FSize>::Sort( realParticlesIndexed, size, communicator ); - *outputArray = realParticlesIndexed; - *outputSize = size; - } - } - - - ////////////////////////////////////////////////////////////////////////// - // To merge the leaves - ////////////////////////////////////////////////////////////////////////// - - static void MergeLeaves(const FMpi::FComm& communicator, IndexedParticle*& workingArray, FSize& workingSize, - char** leavesArray, FSize* const leavesSize){ - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) ); - const int rank = communicator.processId(); - const int nbProcs = communicator.processCount(); - - // be sure there is no splited leaves - // to do that we exchange the first index with the left proc - { - FTRACE( FTrace::FRegion regionTrace("Remove Splited leaves", __FUNCTION__ , __FILE__ , __LINE__) ); - - MortonIndex otherFirstIndex = -1; - if(workingSize != 0 && rank != 0 && rank != nbProcs - 1){ - MPI_Sendrecv(&workingArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, - &otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, - communicator.getComm(), MPI_STATUS_IGNORE); - } - else if( rank == 0){ - MPI_Recv(&otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, communicator.getComm(), MPI_STATUS_IGNORE); - } - else if( rank == nbProcs - 1){ - MPI_Send( &workingArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, communicator.getComm()); - } - else { - MPI_Recv(&otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, communicator.getComm(), MPI_STATUS_IGNORE); - MPI_Send(&otherFirstIndex, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, communicator.getComm()); - } - - // at this point every one know the first index of his right neighbors - const bool needToRecvBeforeSend = (rank != 0 && ((workingSize && otherFirstIndex == workingArray[0].index ) || !workingSize)); - MPI_Request requestSendLeaf; - - IndexedParticle* sendBuffer = 0; - if(rank != nbProcs - 1 && needToRecvBeforeSend == false){ - FSize idxPart = workingSize - 1 ; - while(idxPart >= 0 && workingArray[idxPart].index == otherFirstIndex){ - --idxPart; - } - const int particlesToSend = int(workingSize - 1 - idxPart); - if(particlesToSend){ - workingSize -= particlesToSend; - sendBuffer = new IndexedParticle[particlesToSend]; - memcpy(sendBuffer, &workingArray[idxPart + 1], particlesToSend * sizeof(IndexedParticle)); - - MPI_Isend( sendBuffer, particlesToSend * int(sizeof(IndexedParticle)), MPI_BYTE, - rank + 1, FMpi::TagSplittedLeaf, communicator.getComm(), &requestSendLeaf); - } - else{ - MPI_Isend( 0, 0, MPI_BYTE, rank + 1, FMpi::TagSplittedLeaf, communicator.getComm(), &requestSendLeaf); - } - } - - if( rank != 0 ){ - int sendByOther = 0; - - MPI_Status probStatus; - MPI_Probe(rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), &probStatus); - MPI_Get_count( &probStatus, MPI_BYTE, &sendByOther); - - if(sendByOther){ - sendByOther /= int(sizeof(IndexedParticle)); - - const IndexedParticle* const reallocOutputArray = workingArray; - const FSize reallocOutputSize = workingSize; - - workingSize += sendByOther; - workingArray = new IndexedParticle[workingSize]; - FMemUtils::memcpy(&workingArray[sendByOther], reallocOutputArray, reallocOutputSize * sizeof(IndexedParticle)); - delete[] reallocOutputArray; - - MPI_Recv(workingArray, int(sizeof(IndexedParticle)) * sendByOther, MPI_BYTE, - rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), MPI_STATUS_IGNORE); - } - else{ - MPI_Recv( 0, 0, MPI_BYTE, rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), MPI_STATUS_IGNORE); - } - } - - if(rank != nbProcs - 1 && needToRecvBeforeSend == true){ - MPI_Send( workingArray, int(workingSize * sizeof(IndexedParticle)), MPI_BYTE, - rank + 1, FMpi::TagSplittedLeaf, communicator.getComm()); - delete[] workingArray; - workingArray = 0; - workingSize = 0; - } - else if(rank != nbProcs - 1){ - MPI_Wait( &requestSendLeaf, MPI_STATUS_IGNORE); - delete[] sendBuffer; - sendBuffer = 0; - } + ////////////////////////////////////////////////////////////////////////// + // To equalize (same number of leaves among the procs) + ////////////////////////////////////////////////////////////////////////// + + /** Put the interval into a tree */ + template <class ContainerClass> + static void EqualizeAndFillTree(const FMpi::FComm& communicator, ContainerClass* particlesSaver, + char*& leavesArray, FSize& nbLeavesInIntervals, FAbstractBalanceAlgorithm * balancer){ + + + FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) ); + const int myRank = communicator.processId(); + const int nbProcs = communicator.processCount(); + const FSize myNumberOfLeaves = nbLeavesInIntervals; + + // We have to know the number of leaves each procs holds + int*const numberOfLeavesPerProc = new int[nbProcs]; + memset(numberOfLeavesPerProc, 0, sizeof(int) * nbProcs); + int intNbLeavesInIntervals = int(nbLeavesInIntervals); + MPI_Allgather(&intNbLeavesInIntervals, 1, MPI_INT, numberOfLeavesPerProc, 1, MPI_INT, communicator.getComm()); + printf("Proc : %d : Currently have %lld leaves \n", myRank, myNumberOfLeaves); + + //Start working THERE + + //We need the max number of leafs over th procs + int*const leavesOffsetPerProc = new int[nbProcs + 1]; + FSize totalNumberOfLeaves = 0; + + leavesOffsetPerProc[0] = 0; + totalNumberOfLeaves += numberOfLeavesPerProc[0]; + for(int idxProc = 1 ; idxProc < nbProcs ; ++idxProc ){ + leavesOffsetPerProc[idxProc] = leavesOffsetPerProc[idxProc-1] + numberOfLeavesPerProc[idxProc-1]; + totalNumberOfLeaves += numberOfLeavesPerProc[idxProc]; + } + leavesOffsetPerProc[nbProcs] = int(totalNumberOfLeaves); + + const FSize currentLeafsOnMyLeft = leavesOffsetPerProc[myRank]; + const FSize currentRightLeafIdx = leavesOffsetPerProc[myRank+1]; + + //Creation of an array to store how many parts are in each leaf + int*const numberOfParticlesPerLeaf = new int[totalNumberOfLeaves]; + int*const myParticlesCounterArray = &numberOfParticlesPerLeaf[leavesOffsetPerProc[myRank]]; + + memset(numberOfParticlesPerLeaf, 0, sizeof(int)*totalNumberOfLeaves); + + //Loop over leafArray to fill myParts + size_t idxOfParticlesNumber = 0; + for(int idxLeaf = 0 ; idxLeaf < nbLeavesInIntervals ; ++idxLeaf){ + const int numberOfParticlesInThisLeaf = (*reinterpret_cast<int*>(&leavesArray[idxOfParticlesNumber])); + myParticlesCounterArray[idxLeaf] += numberOfParticlesInThisLeaf; + idxOfParticlesNumber += (sizeof(ParticleClass)*numberOfParticlesInThisLeaf+sizeof(int)); + } + + MPI_Allgatherv(myParticlesCounterArray,numberOfLeavesPerProc[myRank], MPI_INT, + numberOfParticlesPerLeaf, numberOfLeavesPerProc, leavesOffsetPerProc, MPI_INT, communicator.getComm()); + + FSize totalNumberOfParticles = 0; + for(int idxLeaf = 0 ; idxLeaf < totalNumberOfLeaves ; ++idxLeaf){ + totalNumberOfParticles += numberOfParticlesPerLeaf[idxLeaf]; + } + + const FSize correctLeftLeavesNumber = balancer->getLeft( totalNumberOfLeaves,numberOfParticlesPerLeaf,totalNumberOfParticles, + NULL,nbProcs,myRank); + const FSize correctRightLeavesIndex = balancer->getRight(totalNumberOfLeaves,numberOfParticlesPerLeaf,totalNumberOfParticles, + NULL,nbProcs,myRank); + + //// TODO REMOVE WHEN DEBUG printf("Proc [%d] :: will work from leaf %lld \t to leaf %lld \n",myRank,correctLeftLeavesNumber,correctRightLeavesIndex); + + MPI_Request* requests = new MPI_Request[nbProcs * 2]; + int counterRequest = 0; + + if(currentLeafsOnMyLeft < correctLeftLeavesNumber || correctRightLeavesIndex < currentRightLeafIdx){ + size_t offsetLeafToSend = 0; + int counterLeafToSend = 0; + int idxProcToProceed = 0; + + while( idxProcToProceed < nbProcs && (balancer->getLeft(totalNumberOfLeaves,numberOfParticlesPerLeaf,totalNumberOfParticles,NULL,nbProcs,idxProcToProceed) < currentRightLeafIdx)){ + const FSize procToProceedRightIdx = balancer->getRight(totalNumberOfLeaves,numberOfParticlesPerLeaf,totalNumberOfParticles,NULL,nbProcs,idxProcToProceed); + const FSize procToProceedLeftIdx = balancer->getLeft(totalNumberOfLeaves,numberOfParticlesPerLeaf,totalNumberOfParticles,NULL,nbProcs,idxProcToProceed); + const bool procToProceedHasLeftInMyInterval = (currentLeafsOnMyLeft <= procToProceedLeftIdx && procToProceedLeftIdx < currentRightLeafIdx); + const bool procToProceedHasRightInMyInterval = (currentLeafsOnMyLeft <= procToProceedRightIdx && procToProceedRightIdx < currentRightLeafIdx); + const bool procIncludeMyInterval = (procToProceedLeftIdx <= currentLeafsOnMyLeft && currentRightLeafIdx <= procToProceedRightIdx); + //// TODO REMOVE WHEN DEBUG printf("%d] idxProcToProceed %d procToProceedRightIdx %llu procToProceedLeftIdx %llu procToProceedHasLeftInMyInterval %d procToProceedHasRightInMyInterval %d\n", + //// TODO REMOVE WHEN DEBUG myRank, idxProcToProceed, procToProceedRightIdx, procToProceedLeftIdx, procToProceedHasLeftInMyInterval, procToProceedHasRightInMyInterval); + + if(idxProcToProceed != myRank && (procToProceedHasLeftInMyInterval || procToProceedHasRightInMyInterval || procIncludeMyInterval) ){ + const int firstLeafToSend = FMath::Max(int(procToProceedLeftIdx - currentLeafsOnMyLeft), 0); + const int lastLeafToSend = int(FMath::Min(procToProceedRightIdx - currentLeafsOnMyLeft, myNumberOfLeaves )); + + //// TODO REMOVE WHEN DEBUG printf("Proc :: %d (from leaf %d to %d)\n", myRank, firstLeafToSend, lastLeafToSend); + + while(counterLeafToSend != firstLeafToSend){ + const int numberOfParticlesInThisLeaf = (*reinterpret_cast<int*>(&leavesArray[offsetLeafToSend])); + offsetLeafToSend += (sizeof(ParticleClass)*numberOfParticlesInThisLeaf+sizeof(int)); + counterLeafToSend += 1; + } + const size_t offetSetToSend = offsetLeafToSend; + while(counterLeafToSend != lastLeafToSend){ + const int numberOfParticlesInThisLeaf = (*reinterpret_cast<int*>(&leavesArray[offsetLeafToSend])); + offsetLeafToSend += (sizeof(ParticleClass)*numberOfParticlesInThisLeaf+sizeof(int)); + counterLeafToSend += 1; + } + + //// TODO REMOVE WHEN DEBUG printf("Proc :: %d send %d bytes to %d (from leaf %d to %d)\n", + //// TODO REMOVE WHEN DEBUG myRank, int(offsetLeafToSend - offetSetToSend), idxProcToProceed, firstLeafToSend, lastLeafToSend); + MPI_Isend(&leavesArray[offetSetToSend], int(offsetLeafToSend - offetSetToSend), MPI_BYTE, + idxProcToProceed, firstLeafToSend + currentLeafsOnMyLeft, communicator.getComm(), &requests[counterRequest++]); + } + idxProcToProceed += 1; + } + } + + struct RecvBlockInfo{ + char* buffer; + int nbLeaves; + }; + RecvBlockInfo* recvBlockInfo = new RecvBlockInfo[nbProcs]; + int nbBlocksToRecv = 0; + + if(correctLeftLeavesNumber < currentLeafsOnMyLeft || currentRightLeafIdx < correctRightLeavesIndex){ + FSize iterCorrectLeafIdx = correctLeftLeavesNumber; + int idxProcToProceed = 0; + while(iterCorrectLeafIdx < correctRightLeavesIndex){ + if(currentLeafsOnMyLeft <= iterCorrectLeafIdx && iterCorrectLeafIdx < currentRightLeafIdx){ + //// TODO REMOVE WHEN DEBUG printf("%d] currentLeafsOnMyLeft %llu iterCorrectLeafIdx %llu iterCorrectLeafIdx %llu currentRightLeafIdx %llu\n", + //// TODO REMOVE WHEN DEBUG myRank, currentLeafsOnMyLeft, iterCorrectLeafIdx, iterCorrectLeafIdx, currentRightLeafIdx); + iterCorrectLeafIdx = currentRightLeafIdx; + idxProcToProceed = myRank + 1; + } + else{ + //// TODO REMOVE WHEN DEBUG printf("%d] currentLeafsOnMyLeft %llu iterCorrectLeafIdx %llu iterCorrectLeafIdx %llu currentRightLeafIdx %llu correctRightLeavesIndex %llu\n", + //// TODO REMOVE WHEN DEBUG myRank, currentLeafsOnMyLeft, iterCorrectLeafIdx, iterCorrectLeafIdx, currentRightLeafIdx, correctRightLeavesIndex); + while(leavesOffsetPerProc[idxProcToProceed+1] <= iterCorrectLeafIdx){ + //// TODO REMOVE WHEN DEBUG printf("%d] leavesOffsetPerProc[%d+1] %llu iterCorrectLeafIdx %lld\n", + //// TODO REMOVE WHEN DEBUG myRank, idxProcToProceed, leavesOffsetPerProc[idxProcToProceed+1], iterCorrectLeafIdx); + idxProcToProceed += 1; + } + const int nbLeafToReceive = FMath::Min(leavesOffsetPerProc[idxProcToProceed+1], int(correctRightLeavesIndex)) - int(iterCorrectLeafIdx); + FSize nbParticlesToReceive = 0; + for(int idxLeaf = 0 ; idxLeaf < nbLeafToReceive ; ++idxLeaf){ + nbParticlesToReceive += numberOfParticlesPerLeaf[idxLeaf + iterCorrectLeafIdx]; + } + + FSize bytesToRecv = (sizeof(ParticleClass)*nbParticlesToReceive) + sizeof(int)*nbLeafToReceive; + char* bufferToReceive = new char[bytesToRecv]; + + //// TODO REMOVE WHEN DEBUG printf("Proc :: %d recv %d bytes to %d (from leaf %d to %d)\n", + //// TODO REMOVE WHEN DEBUG myRank, bytesToRecv, idxProcToProceed, iterCorrectLeafIdx, iterCorrectLeafIdx + nbLeafToReceive); + + MPI_Irecv(bufferToReceive, int(bytesToRecv), MPI_BYTE, idxProcToProceed, int(iterCorrectLeafIdx), + communicator.getComm(), &requests[counterRequest++]); + + recvBlockInfo[nbBlocksToRecv].buffer = bufferToReceive; + recvBlockInfo[nbBlocksToRecv].nbLeaves = nbLeafToReceive; + nbBlocksToRecv += 1; + + iterCorrectLeafIdx += nbLeafToReceive; + } + } + } + + //// TODO REMOVE WHEN DEBUG printf("%d Wait!\n", myRank); + MPI_Waitall(counterRequest, requests, MPI_STATUSES_IGNORE); + //// TODO REMOVE WHEN DEBUG printf("%d Done!\n", myRank); + + int idxBlockRecvInLeft = 0; + if(correctLeftLeavesNumber < currentLeafsOnMyLeft){ + const int nbLeavesRecv = int(FMath::Min(currentLeafsOnMyLeft, correctRightLeavesIndex) - correctLeftLeavesNumber); + //// TODO REMOVE WHEN DEBUG printf("%d] has receive %d from left\n", myRank, nbLeavesRecv); + int idxLeaf = 0; + while(idxLeaf < nbLeavesRecv){ + //// TODO REMOVE WHEN DEBUG printf("%d] block %d has %d leaves\n", myRank, idxBlockRecvInLeft, recvBlockInfo[idxBlockRecvInLeft].nbLeaves); + size_t offsetBuffer = 0; + for(int idxLeafInBlock = 0 ; idxLeafInBlock < recvBlockInfo[idxBlockRecvInLeft].nbLeaves ; ++idxLeafInBlock){ + const int numberOfParticlesInThisLeaf = (*reinterpret_cast<int*>(&recvBlockInfo[idxBlockRecvInLeft].buffer[offsetBuffer])); + const ParticleClass*const particles = reinterpret_cast<ParticleClass*>(&recvBlockInfo[idxBlockRecvInLeft].buffer[offsetBuffer] + sizeof(int)); + //// TODO REMOVE WHEN DEBUG printf("%d] block %d leaf %d has %d part\n", myRank, idxBlockRecvInLeft, idxLeafInBlock, numberOfParticlesInThisLeaf); + for(int idxParticle = 0 ; idxParticle < numberOfParticlesInThisLeaf ; ++idxParticle){ + particlesSaver->push(particles[idxParticle]); + } + offsetBuffer += (sizeof(ParticleClass)*numberOfParticlesInThisLeaf+sizeof(int)); + } + idxLeaf += recvBlockInfo[idxBlockRecvInLeft].nbLeaves; + delete[] recvBlockInfo[idxBlockRecvInLeft].buffer; + idxBlockRecvInLeft += 1; + } + } + //// TODO REMOVE WHEN DEBUG printf("currentLeafsOnMyLeft %lld correctLeftLeavesNumber %lld currentRightLeafIdx %lld correctRightLeavesIndex %lld \n", + //// TODO REMOVE WHEN DEBUG currentLeafsOnMyLeft, correctLeftLeavesNumber, currentRightLeafIdx, correctRightLeavesIndex); + if((currentLeafsOnMyLeft <= correctLeftLeavesNumber && correctLeftLeavesNumber < currentRightLeafIdx) + || (currentLeafsOnMyLeft < correctRightLeavesIndex && correctRightLeavesIndex <= currentRightLeafIdx)){ + const int nbLeavesToSkip = correctLeftLeavesNumber-currentLeafsOnMyLeft; + size_t offsetBuffer = 0; + //// TODO REMOVE WHEN DEBUG printf("%d] skip %d leaves\n", myRank, nbLeavesToSkip); + for(int idxToSkip = 0 ; idxToSkip < nbLeavesToSkip ; ++idxToSkip){ + const int numberOfParticlesInThisLeaf = (*reinterpret_cast<int*>(&leavesArray[offsetBuffer])); + offsetBuffer += (sizeof(ParticleClass)*numberOfParticlesInThisLeaf+sizeof(int)); + //// TODO REMOVE WHEN DEBUG printf("%d] leaf %d had %d part\n", myRank, idxToSkip, numberOfParticlesInThisLeaf); + } + const int nbLeafToCopy = int(FMath::Min(currentRightLeafIdx, correctRightLeavesIndex) - FMath::Max(currentLeafsOnMyLeft, correctLeftLeavesNumber)); + //// TODO REMOVE WHEN DEBUG printf("%d] Need to copy %d leaves\n", myRank, nbLeafToCopy); + for(int idxToProcess = 0 ; idxToProcess < nbLeafToCopy ; ++idxToProcess){ + const int numberOfParticlesInThisLeaf = (*reinterpret_cast<int*>(&leavesArray[offsetBuffer])); + //// TODO REMOVE WHEN DEBUG printf("%d] leaf %d had %d part\n", myRank, idxToProcess, numberOfParticlesInThisLeaf); + const ParticleClass*const particles = reinterpret_cast<ParticleClass*>(&leavesArray[offsetBuffer] + sizeof(int)); + for(int idxParticle = 0 ; idxParticle < numberOfParticlesInThisLeaf ; ++idxParticle){ + particlesSaver->push(particles[idxParticle]); + } + + offsetBuffer += (sizeof(ParticleClass)*numberOfParticlesInThisLeaf+sizeof(int)); + } + } + if(currentRightLeafIdx < correctRightLeavesIndex){ + const int nbLeavesRecv = int(correctRightLeavesIndex - FMath::Max(currentRightLeafIdx, correctLeftLeavesNumber)); + //// TODO REMOVE WHEN DEBUG printf("%d] has receive %d from right\n", myRank, nbLeavesRecv); + int idxLeaf = 0; + while(idxLeaf < nbLeavesRecv){ + //// TODO REMOVE WHEN DEBUG printf("%d] block %d has %d leaves\n", myRank, idxBlockRecvInLeft, recvBlockInfo[idxBlockRecvInLeft].nbLeaves); + size_t offsetBuffer = 0; + for(int idxLeafInBlock = 0 ; idxLeafInBlock < recvBlockInfo[idxBlockRecvInLeft].nbLeaves ; ++idxLeafInBlock){ + const int numberOfParticlesInThisLeaf = (*reinterpret_cast<int*>(&recvBlockInfo[idxBlockRecvInLeft].buffer[offsetBuffer])); + const ParticleClass*const particles = reinterpret_cast<ParticleClass*>(&recvBlockInfo[idxBlockRecvInLeft].buffer[offsetBuffer] + sizeof(int)); + //// TODO REMOVE WHEN DEBUG printf("%d] block %d leaf %d has %d part\n", myRank, idxBlockRecvInLeft, idxLeafInBlock, numberOfParticlesInThisLeaf); + for(int idxParticle = 0 ; idxParticle < numberOfParticlesInThisLeaf ; ++idxParticle){ + particlesSaver->push(particles[idxParticle]); + } + offsetBuffer += (sizeof(ParticleClass)*numberOfParticlesInThisLeaf+sizeof(int)); + } + idxLeaf += recvBlockInfo[idxBlockRecvInLeft].nbLeaves; + delete[] recvBlockInfo[idxBlockRecvInLeft].buffer; + idxBlockRecvInLeft += 1; + } + } + + delete[] leavesArray; + delete[] numberOfLeavesPerProc; + delete[] leavesOffsetPerProc; + delete[] numberOfParticlesPerLeaf; + delete[] requests; + delete[] recvBlockInfo; } - { - FTRACE( FTrace::FRegion regionTrace("Remove Splited leaves", __FUNCTION__ , __FILE__ , __LINE__) ); - // We now copy the data from a sorted type into real particles array + counter - - (*leavesSize) = 0; - (*leavesArray) = 0; - - if(workingSize){ - (*leavesArray) = new char[workingSize * (sizeof(ParticleClass) + sizeof(int))]; - - MortonIndex previousIndex = -1; - char* writeIndex = (*leavesArray); - int* writeCounter = 0; - - for( FSize idxPart = 0; idxPart < workingSize ; ++idxPart){ - if( workingArray[idxPart].index != previousIndex ){ - previousIndex = workingArray[idxPart].index; - ++(*leavesSize); - - writeCounter = reinterpret_cast<int*>( writeIndex ); - writeIndex += sizeof(int); - - (*writeCounter) = 0; - } - - memcpy(writeIndex, &workingArray[idxPart].particle, sizeof(ParticleClass)); - - writeIndex += sizeof(ParticleClass); - ++(*writeCounter); - } - } - - delete [] workingArray; - - workingArray = 0; - workingSize = 0; - } - } - - ////////////////////////////////////////////////////////////////////////// - // To equalize (same number of leaves among the procs) - ////////////////////////////////////////////////////////////////////////// - - /** Put the interval into a tree */ - template <class ContainerClass> - static void EqualizeAndFillTree(const FMpi::FComm& communicator, ContainerClass* particlesSaver, - char*& leavesArray, FSize& nbLeavesInIntervals, FAbstractBalanceAlgorithm * balancer){ - - - FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) ); - const int rank = communicator.processId(); - const int nbProcs = communicator.processCount(); - const FSize currentNbLeafs = nbLeavesInIntervals; - - // We have to know the number of leaves each procs holds - FSize leavesPerProcs[nbProcs]; - memset(leavesPerProcs, 0, sizeof(int) * nbProcs); - MPI_Allgather(&nbLeavesInIntervals, 1, MPI_LONG_LONG, leavesPerProcs, 1, MPI_LONG_LONG, communicator.getComm()); - printf("Proc : %d : Currently have %lld leaves \n",rank,currentNbLeafs); - - - //Start working THERE - - //We need the max number of leafs over th procs - FSize maxNbOfLeaf=0; - for(int i = 0 ; i< nbProcs ; ++i){ - if(maxNbOfLeaf < leavesPerProcs[i]) maxNbOfLeaf=leavesPerProcs[i]; - } - //Creation of an array to store how many parts are in each leaf - int * arrayToLeafs = new int[nbProcs*maxNbOfLeaf]; - memset(arrayToLeafs,0,sizeof(int)*maxNbOfLeaf*nbProcs); - - //Just indirection for simplifying - int * myParts = &arrayToLeafs[rank*maxNbOfLeaf]; - - //Loop over leafArray to fill myParts - FSize remainingLeafsToVisit = nbLeavesInIntervals; - long unsigned int firstIdx = 0; - while(remainingLeafsToVisit > 0){ - int* intTab = reinterpret_cast<int*>(&leavesArray[firstIdx]); - //ParticleClass* tab = reinterpret_cast<ParticleClass*>(&leavesArray[firstIdx+sizeof(int)]); - //printf("Leaf %lld contains %d :: \n",nbLeavesInIntervals-remainingLeafsToVisit,intTab[0]); - for(int k =0; k<intTab[0] ; ++k){ - myParts[nbLeavesInIntervals-remainingLeafsToVisit]++; - } - firstIdx += sizeof(ParticleClass)*intTab[0]+sizeof(int); - remainingLeafsToVisit--; - } - // {//TODO delete, because of uselessness - // if(rank==0){ - // for(int k=0 ; k<nbLeavesInIntervals ; ++k){ - // printf("%d \t",myParts[k]); - // } - // printf("\n"); - // } - // } - MPI_Allgather(myParts,(int)maxNbOfLeaf,MPI_INT,arrayToLeafs,(int)maxNbOfLeaf,MPI_INT,communicator.getComm()); - - // Count the number of leaves on each side - FSize currentLeafsOnMyLeft = 0; - FSize currentLeafsOnMyRight = 0; - for(int idxProc = 0 ; idxProc < nbProcs ; ++idxProc ){ - if(idxProc < rank) currentLeafsOnMyLeft += leavesPerProcs[idxProc]; - if(rank < idxProc) currentLeafsOnMyRight += leavesPerProcs[idxProc]; - } - - // So we can know the number of total leafs and - // the number of leaves each procs must have at the end - const FSize totalNbLeaves = (currentLeafsOnMyLeft + currentNbLeafs + currentLeafsOnMyRight); - int * leavesContents = new int[totalNbLeaves]; - int idx = 0; - FSize totParts = 0; - for(int k=0 ; k<maxNbOfLeaf*nbProcs ; ++k){ - if(arrayToLeafs[k]){ - leavesContents[idx] = arrayToLeafs[k]; - totParts += FSize(arrayToLeafs[k]); - idx++; - } - } - delete[] arrayToLeafs; - - MortonIndex * notUsed = 0; - - const FSize correctLeftLeavesNumber = balancer->getLeft( totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,rank); - const FSize correctRightLeavesIndex = balancer->getRight(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,rank); - - printf("Proc [%d] :: will work from leaf %lld \t to leaf %lld \n",rank,correctLeftLeavesNumber,correctRightLeavesIndex); - - // This will be used for the all to all - int leavesToSend[nbProcs]; - memset(leavesToSend, 0, sizeof(int) * nbProcs); - int bytesToSend[nbProcs]; - memset(bytesToSend, 0, sizeof(int) * nbProcs); - int bytesOffset[nbProcs]; - memset(bytesToSend, 0, sizeof(int) * nbProcs); - - // Buffer Position - FSize currentIntervalPosition = 0; - - // I need to send left if I hold leaves that belong to procs on my left - const bool iNeedToSendToLeft = currentLeafsOnMyLeft < correctLeftLeavesNumber; - // I need to send right if I hold leaves that belong to procs on my right - const bool iNeedToSendToRight = correctRightLeavesIndex < currentLeafsOnMyLeft + currentNbLeafs; - - if(iNeedToSendToLeft){ - FTRACE( FTrace::FRegion regionTrace("Calcul SendToLeft", __FUNCTION__ , __FILE__ , __LINE__) ); - // Find the first proc that need my data - int idxProc = rank - 1; - while( idxProc > 0 ){ - const FSize thisProcRight = balancer->getRight(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,idxProc - 1); - // Go to left until proc-1 has a right index lower than my current left - if( thisProcRight < currentLeafsOnMyLeft){ - break; - } - --idxProc; - } - - // Count data for this proc - int ICanGive = int(currentNbLeafs); - leavesToSend[idxProc] = int(FMath::Min(balancer->getRight(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,idxProc),totalNbLeaves - currentLeafsOnMyRight) - FMath::Max( currentLeafsOnMyLeft , balancer->getLeft(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,idxProc))); - { - bytesOffset[idxProc] = 0; - for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){ - currentIntervalPosition += ((*(int*)&leavesArray[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int); - } - bytesToSend[idxProc] = int(currentIntervalPosition - bytesOffset[idxProc]); - } - ICanGive -= leavesToSend[idxProc]; - ++idxProc; - - // count data to other proc - while(idxProc < rank && ICanGive){ - leavesToSend[idxProc] = int(FMath::Min( balancer->getRight(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,idxProc) - balancer->getLeft(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,idxProc), FSize(ICanGive))); - - bytesOffset[idxProc] = int(currentIntervalPosition); - for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){ - currentIntervalPosition += ((*(int*)&leavesArray[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int); - } - bytesToSend[idxProc] = int(currentIntervalPosition - bytesOffset[idxProc]); - - ICanGive -= leavesToSend[idxProc]; - ++idxProc; - } - } - - // Store the index of my data but we do not insert the now - const FSize myParticlesPosition = currentIntervalPosition; - { - FTRACE( FTrace::FRegion regionTrace("Jump My particles", __FUNCTION__ , __FILE__ , __LINE__) ); - const FSize iNeedToSendLeftCount = correctLeftLeavesNumber - currentLeafsOnMyLeft; - FSize endForMe = currentNbLeafs; - if(iNeedToSendToRight){ - const FSize iNeedToSendRightCount = currentLeafsOnMyLeft + currentNbLeafs - correctRightLeavesIndex; - endForMe -= iNeedToSendRightCount; - } - - // We have to jump the correct number of leaves - for(FSize idxLeaf = FMath::Max(iNeedToSendLeftCount,FSize(0)) ; idxLeaf < endForMe ; ++idxLeaf){ - const int nbPartInLeaf = (*(int*)&leavesArray[currentIntervalPosition]); - currentIntervalPosition += (nbPartInLeaf * sizeof(ParticleClass)) + sizeof(int); - } - } - - // Proceed same on the right - if(iNeedToSendToRight){ - FTRACE( FTrace::FRegion regionTrace("Calcul SendToRight", __FUNCTION__ , __FILE__ , __LINE__) ); - // Find the last proc on the right that need my data - int idxProc = rank + 1; - while( idxProc < nbProcs ){ - const FSize thisProcLeft = balancer->getLeft(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,idxProc); - const FSize thisProcRight = balancer->getRight(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,idxProc); - // Progress until the proc+1 has its left index upper to my current right - if( thisProcLeft < currentLeafsOnMyLeft || (totalNbLeaves - currentLeafsOnMyRight) < thisProcRight){ - break; - } - ++idxProc; - } - - // Count the data - int ICanGive = int(currentLeafsOnMyLeft + currentNbLeafs - correctRightLeavesIndex); - leavesToSend[idxProc] = int(FMath::Min(balancer->getRight(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,idxProc) , (totalNbLeaves - currentLeafsOnMyRight)) - FMath::Max(balancer->getLeft(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,idxProc), currentLeafsOnMyLeft) ); - - { - bytesOffset[idxProc] = int(currentIntervalPosition); - for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){ - currentIntervalPosition += ((*(int*)&leavesArray[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int); - } - bytesToSend[idxProc] = int(currentIntervalPosition - bytesOffset[idxProc]); - } - ICanGive -= leavesToSend[idxProc]; - ++idxProc; - - // Now Count the data to other - while(idxProc < nbProcs && ICanGive){ - leavesToSend[idxProc] = int(FMath::Min( balancer->getRight(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs, idxProc) - balancer->getLeft(totalNbLeaves,leavesContents,totParts,notUsed,nbProcs,idxProc), FSize(ICanGive))); - - bytesOffset[idxProc] = int(currentIntervalPosition); - for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){ - currentIntervalPosition += ((*(int*)&leavesArray[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int); - } - bytesToSend[idxProc] = int(currentIntervalPosition - bytesOffset[idxProc]); - - ICanGive -= leavesToSend[idxProc]; - ++idxProc; - } - } - - // Inform other about who will send/receive what - int bytesToSendRecv[nbProcs * nbProcs]; - memset(bytesToSendRecv, 0, sizeof(int) * nbProcs * nbProcs); - MPI_Allgather(bytesToSend, nbProcs, MPI_INT, bytesToSendRecv, nbProcs, MPI_INT, communicator.getComm()); - - int bytesToRecv[nbProcs]; - memset(bytesToRecv, 0, sizeof(int) * nbProcs); - int bytesOffsetToRecv[nbProcs]; - memset(bytesOffsetToRecv, 0, sizeof(int) * nbProcs); - - // Prepare needed buffer - FSize sumBytesToRecv = 0; - for(int idxProc = 0 ; idxProc < nbProcs ; ++idxProc){ - if( bytesToSendRecv[idxProc * nbProcs + rank] ){ - bytesOffsetToRecv[idxProc] = int(sumBytesToRecv); - sumBytesToRecv += FSize(bytesToSendRecv[idxProc * nbProcs + rank]); - bytesToRecv[idxProc] = bytesToSendRecv[idxProc * nbProcs + rank]; - } - } - - // Send alll to all - char* const recvbuf = new char[sumBytesToRecv]; - MPI_Alltoallv(leavesArray, bytesToSend, bytesOffset, MPI_BYTE, - recvbuf, bytesToRecv, bytesOffsetToRecv, MPI_BYTE, - communicator.getComm()); - - { // Insert received data - FTRACE( FTrace::FRegion regionTrace("Insert Received data", __FUNCTION__ , __FILE__ , __LINE__) ); - FSize recvBufferPosition = 0; - while( recvBufferPosition < sumBytesToRecv){ - const int nbPartInLeaf = (*reinterpret_cast<int*>(&recvbuf[recvBufferPosition])); - ParticleClass* const particles = reinterpret_cast<ParticleClass*>(&recvbuf[recvBufferPosition] + sizeof(int)); - - for(int idxPart = 0 ; idxPart < nbPartInLeaf ; ++idxPart){ - particlesSaver->push(particles[idxPart]); - } - recvBufferPosition += (nbPartInLeaf * sizeof(ParticleClass)) + sizeof(int); - - } - } - delete[] recvbuf; - - - { // Insert my data - FTRACE( FTrace::FRegion regionTrace("Insert My particles", __FUNCTION__ , __FILE__ , __LINE__) ); - currentIntervalPosition = myParticlesPosition; - const FSize iNeedToSendLeftCount = correctLeftLeavesNumber - currentLeafsOnMyLeft; - FSize endForMe = currentNbLeafs; - if(iNeedToSendToRight){ - const FSize iNeedToSendRightCount = currentLeafsOnMyLeft + currentNbLeafs - correctRightLeavesIndex; - endForMe -= iNeedToSendRightCount; - } - - for(FSize idxLeaf = FMath::Max(iNeedToSendLeftCount,FSize(0)) ; idxLeaf < endForMe ; ++idxLeaf){ - const int nbPartInLeaf = (*(int*)&leavesArray[currentIntervalPosition]); - ParticleClass* const particles = reinterpret_cast<ParticleClass*>(&leavesArray[currentIntervalPosition] + sizeof(int)); - - for(int idxPart = 0 ; idxPart < nbPartInLeaf ; ++idxPart){ - particlesSaver->push(particles[idxPart]); - } - currentIntervalPosition += (nbPartInLeaf * sizeof(ParticleClass)) + sizeof(int); - - } - } - - delete[] leavesArray; - leavesArray = 0; - nbLeavesInIntervals = 0; - } - public: - ////////////////////////////////////////////////////////////////////////// - // The builder function - ////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////// + // The builder function + ////////////////////////////////////////////////////////////////////////// - template <class ContainerClass> - static void ArrayToTree(const FMpi::FComm& communicator, const ParticleClass array[], const FSize size, - const FPoint& boxCenter, const FReal boxWidth, const int treeHeight, - ContainerClass* particleSaver, FAbstractBalanceAlgorithm* balancer,const SortingType type = QuickSort){ + template <class ContainerClass> + static void ArrayToTree(const FMpi::FComm& communicator, const ParticleClass array[], const FSize size, + const FPoint& boxCenter, const FReal boxWidth, const int treeHeight, + ContainerClass* particleSaver, FAbstractBalanceAlgorithm* balancer,const SortingType type = QuickSort){ - IndexedParticle* particlesArray = 0; - FSize particlesSize = 0; - SortParticlesFromArray(communicator, array, size, type, boxCenter, boxWidth, treeHeight, - &particlesArray, &particlesSize); + IndexedParticle* particlesArray = 0; + FSize particlesSize = 0; + SortParticlesFromArray(communicator, array, size, type, boxCenter, boxWidth, treeHeight, + &particlesArray, &particlesSize); - char* leavesArray = 0; - FSize leavesSize = 0; - MergeLeaves(communicator, particlesArray, particlesSize, &leavesArray, &leavesSize); + char* leavesArray = 0; + FSize leavesSize = 0; + MergeLeaves(communicator, particlesArray, particlesSize, &leavesArray, &leavesSize); - EqualizeAndFillTree(communicator, particleSaver, leavesArray, leavesSize, balancer); - } + EqualizeAndFillTree(communicator, particleSaver, leavesArray, leavesSize, balancer); + } }; diff --git a/Src/GroupTree/FGroupSeqAlgorithm.hpp b/Src/GroupTree/FGroupSeqAlgorithm.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e7976d4cab399bfcc28accf17a4be546e02afabe --- /dev/null +++ b/Src/GroupTree/FGroupSeqAlgorithm.hpp @@ -0,0 +1,390 @@ +#ifndef FGROUPSEQALGORITHM_HPP +#define FGROUPSEQALGORITHM_HPP + +#include "../Utils/FGlobal.hpp" +#include "../Core/FCoreCommon.hpp" +#include "../Utils/FQuickSort.hpp" +#include "../Containers/FTreeCoordinate.hpp" + +#include <list> +#include <vector> + +template <class OctreeClass, class CellContainerClass, class CellClass, class KernelClass, class ParticleContainerClass> +class FGroupSeqAlgorithm { +protected: + struct OutOfBlockInteraction{ + MortonIndex outIndex; + MortonIndex insideIndex; + int outPosition; + + operator long long() const{ + return static_cast<long long>(outIndex); + } + }; + + const int MaxThreads; //< The number of threads + OctreeClass*const tree; //< The Tree + KernelClass*const kernels; //< The kernels + +public: + FGroupSeqAlgorithm(OctreeClass*const inTree, KernelClass* inKernels) : MaxThreads(1), tree(inTree), kernels(inKernels){ + FAssertLF(tree, "tree cannot be null"); + FAssertLF(kernels, "kernels cannot be null"); + + FLOG(FLog::Controller << "FGroupSeqAlgorithm (Max Thread " << MaxThreads << ")\n"); + } + + ~FGroupSeqAlgorithm(){ + } + + void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){ + if(operationsToProceed & FFmmP2M) bottomPass(); + + if(operationsToProceed & FFmmM2M) upwardPass(); + + if(operationsToProceed & FFmmM2L) transferPass(); + + if(operationsToProceed & FFmmL2L) downardPass(); + + if( (operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P) ) directPass(); + } + +protected: + void bottomPass(){ + typename std::list<ParticleContainerClass>::iterator iterParticles = tree->leavesBegin(); + const typename std::list<ParticleContainerClass>::iterator endParticles = tree->leavesEnd(); + + typename std::list<CellContainerClass>::iterator iterCells = tree->cellsBegin(tree->getHeight()-1); + const typename std::list<CellContainerClass>::iterator endCells = tree->cellsEnd(tree->getHeight()-1); + + while(iterParticles != endParticles && iterCells != endCells){ + { // Can be a task(in:iterParticles, out:iterCells) + const MortonIndex blockStartIdx = (*iterCells)->getStartingIndex(); + const MortonIndex blockEndIdx = (*iterCells)->getEndingIndex(); + + for(MortonIndex mindex = blockStartIdx ; mindex < blockEndIdx ; ++mindex){ + CellClass* cell = (*iterCells)->getCell(mindex); + if(cell){ + ParticleContainerClass particles = (*iterParticles)->getLeaf(mindex); + FAssertLF(particles.isAttachedToSomething()); + kernels->P2M(cell, &particles); + } + } + } + + ++iterParticles; + ++iterCells; + } + + FAssertLF(iterParticles == endParticles && iterCells == endCells); + } + + void upwardPass(){ + for(int idxLevel = tree->getHeight()-2 ; idxLevel >= 2 ; --idxLevel){ + typename std::list<CellContainerClass>::iterator iterCells = tree->cellsBegin(idxLevel); + const typename std::list<CellContainerClass>::iterator endCells = tree->cellsEnd(idxLevel); + + typename std::list<CellContainerClass>::iterator iterChildCells = tree->cellsBegin(idxLevel+1); + const typename std::list<CellContainerClass>::iterator endChildCells = tree->cellsEnd(idxLevel+1); + + while(iterCells != endCells && iterChildCells != endChildCells){ + { // Can be a task(in:iterParticles, out:iterChildCells ...) + const MortonIndex blockStartIdx = (*iterCells)->getStartingIndex(); + const MortonIndex blockEndIdx = (*iterCells)->getEndingIndex(); + + for(MortonIndex mindex = blockStartIdx ; mindex < blockEndIdx && iterChildCells != endChildCells; ++mindex){ + CellClass* cell = (*iterCells)->getCell(mindex); + if(cell){ + CellClass* child[8] = {NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}; + + for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){ + while(iterChildCells != endChildCells && (*iterChildCells)->getEndingIndex() < ((mindex<<3)+idxChild) ){ + ++iterChildCells; + } + if( iterChildCells == endChildCells ){ + break; + } + child[idxChild] = (*iterChildCells)->getCell((mindex<<3)+idxChild); + } + + kernels->M2M(cell, child, idxLevel); + } + } + } + + ++iterCells; + } + + FAssertLF(iterCells == endCells && (iterChildCells == endChildCells || (++iterChildCells) == endChildCells)); + } + } + + void transferPass(){ + for(int idxLevel = tree->getHeight()-1 ; idxLevel >= 2 ; --idxLevel){ + typename std::list<CellContainerClass>::iterator iterCells = tree->cellsBegin(idxLevel); + const typename std::list<CellContainerClass>::iterator endCells = tree->cellsEnd(idxLevel); + + while(iterCells != endCells){ + std::vector<OutOfBlockInteraction> outsideInteractions; + + { // Can be a task(inout:iterCells, out:outsideInteractions) + const MortonIndex blockStartIdx = (*iterCells)->getStartingIndex(); + const MortonIndex blockEndIdx = (*iterCells)->getEndingIndex(); + + for(MortonIndex mindex = blockStartIdx ; mindex < blockEndIdx ; ++mindex){ + CellClass* cell = (*iterCells)->getCell(mindex); + if(cell){ + MortonIndex interactionsIndexes[189]; + int interactionsPosition[189]; + FTreeCoordinate coord(mindex, idxLevel); + int counter = coord.getInteractionNeighbors(idxLevel,interactionsIndexes,interactionsPosition); + + CellClass* interactions[343]; + memset(interactions, 0, 343*sizeof(CellClass*)); + int counterExistingCell = 0; + + for(int idxInter = 0 ; idxInter < counter ; ++idxInter){ + if( blockStartIdx <= interactionsIndexes[idxInter] && interactionsIndexes[idxInter] < blockEndIdx ){ + CellClass* interCell = (*iterCells)->getCell(interactionsIndexes[idxInter]); + if(interCell){ + interactions[interactionsPosition[idxInter]] = interCell; + counterExistingCell += 1; + } + } + else if(interactionsPosition[idxInter] < 343/2){ + OutOfBlockInteraction property; + property.insideIndex = mindex; + property.outIndex = interactionsIndexes[idxInter]; + property.outPosition = interactionsPosition[idxInter]; + outsideInteractions.push_back(property); + } + } + + kernels->M2L( cell , interactions, counterExistingCell, idxLevel); + } + } + } + + + // Manage outofblock interaction + FQuickSort<OutOfBlockInteraction, long long, int>::QsSequential(outsideInteractions.data(),outsideInteractions.size()); + + typename std::list<CellContainerClass>::iterator iterLeftCells = tree->cellsBegin(idxLevel); + int currentOutInteraction = 0; + while(iterLeftCells != iterCells && currentOutInteraction < outsideInteractions.size()){ + const MortonIndex blockStartIdx = (*iterLeftCells)->getStartingIndex(); + const MortonIndex blockEndIdx = (*iterLeftCells)->getEndingIndex(); + + while(outsideInteractions[currentOutInteraction].outIndex < blockStartIdx){ + currentOutInteraction += 1; + } + + int lastOutInteraction = currentOutInteraction + 1; + while(lastOutInteraction < outsideInteractions.size() && outsideInteractions[lastOutInteraction].outIndex < blockEndIdx){ + lastOutInteraction += 1; + } + + { // Can be a task(in:currentOutInteraction, in:outsideInteractions, in:lastOutInteraction, inout:iterLeftCells, inout:iterCells) + for(int outInterIdx = currentOutInteraction ; outInterIdx < lastOutInteraction ; ++outInterIdx){ + CellClass* interCell = (*iterLeftCells)->getCell(outsideInteractions[outInterIdx].outIndex); + if(interCell){ + CellClass* cell = (*iterCells)->getCell(outsideInteractions[outInterIdx].insideIndex); + FAssertLF(cell); + CellClass* interactions[343]; + memset(interactions, 0, 343*sizeof(CellClass*)); + interactions[outsideInteractions[outInterIdx].outPosition] = interCell; + const int counter = 1; + kernels->M2L( cell , interactions, counter, idxLevel); + + interactions[outsideInteractions[outInterIdx].outPosition] = NULL; + interactions[getOppositeInterIndex(outsideInteractions[outInterIdx].outPosition)] = cell; + kernels->M2L( interCell , interactions, counter, idxLevel); + } + } + } + + currentOutInteraction = lastOutInteraction; + ++iterLeftCells; + } + + ++iterCells; + } + + } + } + + void downardPass(){ + for(int idxLevel = 2 ; idxLevel <= tree->getHeight()-2 ; ++idxLevel){ + typename std::list<CellContainerClass>::iterator iterCells = tree->cellsBegin(idxLevel); + const typename std::list<CellContainerClass>::iterator endCells = tree->cellsEnd(idxLevel); + + typename std::list<CellContainerClass>::iterator iterChildCells = tree->cellsBegin(idxLevel+1); + const typename std::list<CellContainerClass>::iterator endChildCells = tree->cellsEnd(idxLevel+1); + + while(iterCells != endCells && iterChildCells != endChildCells){ + { // Can be a task(in:iterParticles, inout:iterChildCells ...) + const MortonIndex blockStartIdx = (*iterCells)->getStartingIndex(); + const MortonIndex blockEndIdx = (*iterCells)->getEndingIndex(); + + for(MortonIndex mindex = blockStartIdx ; mindex < blockEndIdx && iterChildCells != endChildCells; ++mindex){ + CellClass* cell = (*iterCells)->getCell(mindex); + if(cell){ + CellClass* child[8] = {NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL}; + + for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){ + while(iterChildCells != endChildCells && (*iterChildCells)->getEndingIndex() < ((mindex<<3)+idxChild) ){ + ++iterChildCells; + } + if( iterChildCells == endChildCells ){ + break; + } + child[idxChild] = (*iterChildCells)->getCell((mindex<<3)+idxChild); + } + + kernels->L2L(cell, child, idxLevel); + } + } + } + + ++iterCells; + } + + FAssertLF(iterCells == endCells && (iterChildCells == endChildCells || (++iterChildCells) == endChildCells)); + } + } + + void directPass(){ + { + typename std::list<ParticleContainerClass>::iterator iterParticles = tree->leavesBegin(); + const typename std::list<ParticleContainerClass>::iterator endParticles = tree->leavesEnd(); + + typename std::list<CellContainerClass>::iterator iterCells = tree->cellsBegin(tree->getHeight()-1); + const typename std::list<CellContainerClass>::iterator endCells = tree->cellsEnd(tree->getHeight()-1); + + while(iterParticles != endParticles && iterCells != endCells){ + { // Can be a task(in:iterCells, inout:iterParticles) + const MortonIndex blockStartIdx = (*iterCells)->getStartingIndex(); + const MortonIndex blockEndIdx = (*iterCells)->getStartingIndex(); + + for(MortonIndex mindex = blockStartIdx ; mindex < blockEndIdx ; ++mindex){ + CellClass* cell = (*iterCells)->getCell(mindex); + if(cell){ + ParticleContainerClass particles = (*iterParticles)->getLeaf(mindex); + FAssertLF(particles.isAttachedToSomething()); + kernels->P2M(cell, &particles); + } + } + } + + ++iterParticles; + ++iterCells; + } + + FAssertLF(iterParticles == endParticles && iterCells == endCells); + } + { + typename std::list<ParticleContainerClass>::iterator iterParticles = tree->leavesBegin(); + const typename std::list<ParticleContainerClass>::iterator endParticles = tree->leavesEnd(); + + while(iterParticles != endParticles){ + typename std::vector<OutOfBlockInteraction> outsideInteractions; + + { // Can be a task(inout:iterCells, out:outsideInteractions) + const MortonIndex blockStartIdx = (*iterParticles)->getStartingIndex(); + const MortonIndex blockEndIdx = (*iterParticles)->getEndingIndex(); + + for(MortonIndex mindex = blockStartIdx ; mindex < blockEndIdx ; ++mindex){ + ParticleContainerClass particles = (*iterParticles)->getLeaf(mindex); + if(particles.isAttachedToSomething()){ + MortonIndex interactionsIndexes[26]; + int interactionsPosition[26]; + FTreeCoordinate coord(mindex, tree->getHeight()-1); + int counter = coord.getNeighborsIndexes(tree->getHeight(),interactionsIndexes,interactionsPosition); + + ParticleContainerClass interactionsObjects[27]; + ParticleContainerClass* interactions[27]; + memset(interactions, 0, 27*sizeof(CellClass*)); + int counterExistingCell = 0; + + for(int idxInter = 0 ; idxInter < counter ; ++idxInter){ + if( blockStartIdx <= interactionsIndexes[idxInter] && interactionsIndexes[idxInter] < blockEndIdx ){ + interactionsObjects[counterExistingCell] = (*iterParticles)->getLeaf(interactionsIndexes[idxInter]); + if(interactionsObjects[counterExistingCell].isAttachedToSomething()){ + interactions[interactionsPosition[idxInter]] = &interactionsObjects[counterExistingCell]; + counterExistingCell += 1; + } + } + else if(interactionsPosition[idxInter] < 27/2){ + OutOfBlockInteraction property; + property.insideIndex = mindex; + property.outIndex = interactionsIndexes[idxInter]; + property.outPosition = interactionsPosition[idxInter]; + outsideInteractions.push_back(property); + } + } + + kernels->P2P( coord, &particles, &particles , interactions, counterExistingCell); + } + } + } + + + // Manage outofblock interaction + FQuickSort<OutOfBlockInteraction, long long, int>::QsSequential(outsideInteractions.data(),outsideInteractions.size()); + + typename std::list<ParticleContainerClass>::iterator iterLeftParticles = tree->leavesBegin(); + int currentOutInteraction = 0; + while(iterLeftParticles != iterParticles && currentOutInteraction < outsideInteractions.size()){ + const MortonIndex blockStartIdx = (*iterLeftParticles)->getStartingIndex(); + const MortonIndex blockEndIdx = (*iterLeftParticles)->getEndingIndex(); + + while(outsideInteractions[currentOutInteraction].outIndex < blockStartIdx){ + currentOutInteraction += 1; + } + + int lastOutInteraction = currentOutInteraction + 1; + while(lastOutInteraction < outsideInteractions.size() && outsideInteractions[lastOutInteraction].outIndex < blockEndIdx){ + lastOutInteraction += 1; + } + + { // Can be a task(in:currentOutInteraction, in:outsideInteractions, in:lastOutInteraction, inout:iterLeftParticles, inout:iterParticles) + for(int outInterIdx = currentOutInteraction ; outInterIdx < lastOutInteraction ; ++outInterIdx){ + ParticleContainerClass interParticles = (*iterLeftParticles)->getLeaf(outsideInteractions[outInterIdx].outIndex); + if(interParticles.isAttachedToSomething()){ + ParticleContainerClass particles = (*iterParticles)->getLeaf(outsideInteractions[outInterIdx].insideIndex); + FAssertLF(particles.isAttachedToSomething()); + CellClass* interactions[27]; + memset(interactions, 0, 27*sizeof(CellClass*)); + interactions[outsideInteractions[outInterIdx].outPosition] = &interParticles; + const int counter = 1; + kernels->P2PRemote( FTreeCoordinate(outsideInteractions[outInterIdx].insideIndex, tree->getHeight()-1), &particles, &particles , interactions, counter); + + interactions[outsideInteractions[outInterIdx].outPosition] = NULL; + interactions[getOppositeNeighIndex(outsideInteractions[outInterIdx].outPosition)] = &particles; + kernels->P2PRemote( FTreeCoordinate(outsideInteractions[outInterIdx].outIndex, tree->getHeight()-1), &interParticles, &interParticles , interactions, counter); + } + } + } + + currentOutInteraction = lastOutInteraction; + ++iterLeftParticles; + } + + ++iterParticles; + } + } + } + + int getOppositeNeighIndex(const int index) const { + // ((idxX+1)*3 + (idxY+1)) * 3 + (idxZ+1) + return 27-index; + } + + int getOppositeInterIndex(const int index) const { + // ((( (xdiff+3) * 7) + (ydiff+3))) * 7 + zdiff + 3 + return 343-index; + } +}; + + +#endif // FGROUPSEQALGORITHM_HPP diff --git a/Src/GroupTree/FGroupTree.hpp b/Src/GroupTree/FGroupTree.hpp index 580c448be5033a93e982e0bb0ceb2ebf055e16c4..45a5e60288bd080dafd3cf5279f78908d2f9cdca 100644 --- a/Src/GroupTree/FGroupTree.hpp +++ b/Src/GroupTree/FGroupTree.hpp @@ -15,18 +15,20 @@ template <class CellClass, unsigned NbAttributesPerParticle, class AttributeClass = FReal> class FGroupTree { +public: typedef FGroupAttachedLeaf<NbAttributesPerParticle,AttributeClass> BasicAttachedClass; + typedef FGroupOfCells<CellClass> CellGroupClass; +protected: //< This value is for not used cells static const int CellIsEmptyFlag = -1; -protected: //< height of the tree (1 => only the root) const int treeHeight; //< max number of cells in a block const int nbElementsPerBlock; //< all the blocks of the tree - std::list<FGroupOfCells<CellClass>*>* cellBlocksPerLevel; + std::list<CellGroupClass*>* cellBlocksPerLevel; //< all the blocks of leaves std::list<FGroupOfParticles<NbAttributesPerParticle,AttributeClass>*> particleBlocks; @@ -71,7 +73,7 @@ public: : treeHeight(inTreeHeight), nbElementsPerBlock(inNbElementsPerBlock), cellBlocksPerLevel(0), boxCenter(inOctreeSrc->getBoxCenter()), boxCorner(inOctreeSrc->getBoxCenter(),-(inOctreeSrc->getBoxWidth()/2)), boxWidth(inOctreeSrc->getBoxWidth()), boxWidthAtLeafLevel(inOctreeSrc->getBoxWidth()/FReal(1<<inTreeHeight)){ - cellBlocksPerLevel = new std::list<FGroupOfCells<CellClass>*>[treeHeight]; + cellBlocksPerLevel = new std::list<CellGroupClass*>[treeHeight]; // Iterate on the tree and build typename OctreeClass::Iterator octreeIterator(inOctreeSrc); @@ -92,7 +94,7 @@ public: } // Create a block with the apropriate parameters - FGroupOfCells<CellClass>*const newBlock = new FGroupOfCells<CellClass>(blockIteratorInOctree.getCurrentGlobalIndex(), + CellGroupClass*const newBlock = new CellGroupClass(blockIteratorInOctree.getCurrentGlobalIndex(), octreeIterator.getCurrentGlobalIndex()+1, sizeOfBlock); FGroupOfParticles<NbAttributesPerParticle, AttributeClass>*const newParticleBlock = new FGroupOfParticles<NbAttributesPerParticle, AttributeClass>(blockIteratorInOctree.getCurrentGlobalIndex(), @@ -148,7 +150,7 @@ public: } // Create a block with the apropriate parameters - FGroupOfCells<CellClass>*const newBlock = new FGroupOfCells<CellClass>(blockIteratorInOctree.getCurrentGlobalIndex(), + CellGroupClass*const newBlock = new CellGroupClass(blockIteratorInOctree.getCurrentGlobalIndex(), octreeIterator.getCurrentGlobalIndex()+1, sizeOfBlock); // Initialize each cell of the block @@ -191,7 +193,7 @@ public: boxCenter(inBoxCenter), boxCorner(inBoxCenter,-(inBoxWidth/2)), boxWidth(inBoxWidth), boxWidthAtLeafLevel(inBoxWidth/FReal(1<<inTreeHeight)){ - cellBlocksPerLevel = new std::list<FGroupOfCells<CellClass>*>[treeHeight]; + cellBlocksPerLevel = new std::list<CellGroupClass*>[treeHeight]; MortonIndex* currentBlockIndexes = new MortonIndex[nbElementsPerBlock]; // First we work at leaf level @@ -248,7 +250,7 @@ public: } // Create a group - FGroupOfCells<CellClass>*const newBlock = new FGroupOfCells<CellClass>(currentBlockIndexes[0], + CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0], currentBlockIndexes[sizeOfBlock-1]+1, sizeOfBlock); FGroupOfParticles<NbAttributesPerParticle, AttributeClass>*const newParticleBlock = new FGroupOfParticles<NbAttributesPerParticle, AttributeClass>(currentBlockIndexes[0], @@ -292,8 +294,8 @@ public: // For each level from heigth - 2 to 1 for(int idxLevel = treeHeight-2; idxLevel > 0 ; --idxLevel){ - typename std::list<FGroupOfCells<CellClass>*>::const_iterator iterChildCells = cellBlocksPerLevel[idxLevel+1].begin(); - const typename std::list<FGroupOfCells<CellClass>*>::const_iterator iterChildEndCells = cellBlocksPerLevel[idxLevel+1].end(); + typename std::list<CellGroupClass*>::const_iterator iterChildCells = cellBlocksPerLevel[idxLevel+1].begin(); + const typename std::list<CellGroupClass*>::const_iterator iterChildEndCells = cellBlocksPerLevel[idxLevel+1].end(); MortonIndex currentCellIndex = (*iterChildCells)->getStartingIndex(); int sizeOfBlock = 0; @@ -322,7 +324,7 @@ public: // If group is full if(sizeOfBlock == nbElementsPerBlock || (sizeOfBlock && iterChildCells == iterChildEndCells)){ // Create a group - FGroupOfCells<CellClass>*const newBlock = new FGroupOfCells<CellClass>(currentBlockIndexes[0], + CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0], currentBlockIndexes[sizeOfBlock-1]+1, sizeOfBlock); // Init cells @@ -355,7 +357,7 @@ public: * create the block and the cells, using the constructor of * FGroupOfCells !! */ - /*FGroupOfCells<CellClass> * createBlockFromArray(MortonIndex head[]){ + /*CellGroupClass * createBlockFromArray(MortonIndex head[]){ //Store the start and end MortonIndex start = head[0]; MortonIndex end = start; @@ -371,7 +373,7 @@ public: // } } //allocation of memory - FGroupOfCells<CellClass> * newBlock = new FGroupOfCells<CellClass>(start,end+1,count); + CellGroupClass * newBlock = new CellGroupClass(start,end+1,count); //allocation of cells for(int idx=0 ; idx<count ; idx++){ newBlock->newCell(head[idx], idx); @@ -389,7 +391,7 @@ public: FGroupTree(const int inTreeHeight, const int inNbElementsPerBlock, OctreeClass*const inOctreeSrc, int FLAG): treeHeight(inTreeHeight),nbElementsPerBlock(inNbElementsPerBlock),cellBlocksPerLevel(0) { - cellBlocksPerLevel = new std::list<FGroupOfCells<CellClass>*>[treeHeight]; + cellBlocksPerLevel = new std::list<CellGroupClass*>[treeHeight]; int *nbCellPerLevel = new int[treeHeight]; inOctreeSrc->getNbCellsPerLevel(nbCellPerLevel); int nbLeaf = nbCellPerLevel[treeHeight-1]; @@ -423,7 +425,7 @@ public: idxLeafs += inNbElementsPerBlock; //creation of the block and addition to the list - FGroupOfCells<CellClass> * tempBlock = createBlockFromArray(head); + CellGroupClass * tempBlock = createBlockFromArray(head); cellBlocksPerLevel[treeHeight-1].push_back(tempBlock); } delete[] leafsIdx; @@ -438,7 +440,7 @@ public: MortonIndex previous = -1; //Iterator over the list at a deeper level (READ) - typename std::list<FGroupOfCells<CellClass>*>::const_iterator curBlockRead; + typename std::list<CellGroupClass*>::const_iterator curBlockRead; for(curBlockRead = cellBlocksPerLevel[idxLevel+1].begin() ; curBlockRead != cellBlocksPerLevel[idxLevel+1].end() ; ++curBlockRead){ //Loop over cells in READ list for(MortonIndex idxCell = (*curBlockRead)->getStartingIndex() ; idxCell < (*curBlockRead)->getEndingIndex() ; ++idxCell){ @@ -461,7 +463,7 @@ public: else{ //Creation of the block from head, then reset head, and //storage of new idx in new head - FGroupOfCells<CellClass> * tempBlock = createBlockFromArray(head); + CellGroupClass * tempBlock = createBlockFromArray(head); cellBlocksPerLevel[idxLevel].push_back(tempBlock); //Need a new block @@ -475,7 +477,7 @@ public: } } //Before changing Level, need to close current Block - FGroupOfCells<CellClass> * tempBlock = createBlockFromArray(head); + CellGroupClass * tempBlock = createBlockFromArray(head); cellBlocksPerLevel[idxLevel].push_back(tempBlock); } printf("toto \n"); @@ -490,8 +492,8 @@ public: /** This function dealloc the tree by deleting each block */ ~FGroupTree(){ for(int idxLevel = 0 ; idxLevel < treeHeight ; ++idxLevel){ - std::list<FGroupOfCells<CellClass>*>& levelBlocks = cellBlocksPerLevel[idxLevel]; - for (FGroupOfCells<CellClass>* block: levelBlocks){ + std::list<CellGroupClass*>& levelBlocks = cellBlocksPerLevel[idxLevel]; + for (CellGroupClass* block: levelBlocks){ delete block; } } @@ -524,8 +526,8 @@ public: */ void forEachCell(std::function<void(CellClass*)> function){ for(int idxLevel = 0 ; idxLevel < treeHeight ; ++idxLevel){ - std::list<FGroupOfCells<CellClass>*>& levelBlocks = cellBlocksPerLevel[idxLevel]; - for (FGroupOfCells<CellClass>* block: levelBlocks){ + std::list<CellGroupClass*>& levelBlocks = cellBlocksPerLevel[idxLevel]; + for (CellGroupClass* block: levelBlocks){ block->forEachCell(function); } } @@ -537,8 +539,8 @@ public: */ void forEachCellWithLevel(std::function<void(CellClass*,const int)> function){ for(int idxLevel = 0 ; idxLevel < treeHeight ; ++idxLevel){ - std::list<FGroupOfCells<CellClass>*>& levelBlocks = cellBlocksPerLevel[idxLevel]; - for (FGroupOfCells<CellClass>* block: levelBlocks){ + std::list<CellGroupClass*>& levelBlocks = cellBlocksPerLevel[idxLevel]; + for (CellGroupClass* block: levelBlocks){ block->forEachCell(function, idxLevel); } } @@ -550,8 +552,8 @@ public: */ template<class ParticlesAttachedClass> void forEachCellLeaf(std::function<void(CellClass*,ParticlesAttachedClass*)> function){ - typename std::list<FGroupOfCells<CellClass>*>::iterator iterCells = cellBlocksPerLevel[treeHeight-1].begin(); - const typename std::list<FGroupOfCells<CellClass>*>::iterator iterEndCells = cellBlocksPerLevel[treeHeight-1].end(); + typename std::list<CellGroupClass*>::iterator iterCells = cellBlocksPerLevel[treeHeight-1].begin(); + const typename std::list<CellGroupClass*>::iterator iterEndCells = cellBlocksPerLevel[treeHeight-1].end(); typename std::list<FGroupOfParticles<NbAttributesPerParticle,AttributeClass>*>::iterator iterLeaves = particleBlocks.begin(); const typename std::list<FGroupOfParticles<NbAttributesPerParticle,AttributeClass>*>::iterator iterEndLeaves = particleBlocks.end(); @@ -581,10 +583,10 @@ public: std::cout << "\t Group Size = " << nbElementsPerBlock << "\n"; std::cout << "\t Tree height = " << treeHeight << "\n"; for(int idxLevel = 1 ; idxLevel < treeHeight ; ++idxLevel){ - std::list<FGroupOfCells<CellClass>*>& levelBlocks = cellBlocksPerLevel[idxLevel]; + std::list<CellGroupClass*>& levelBlocks = cellBlocksPerLevel[idxLevel]; std::cout << "Level " << idxLevel << ", there are " << levelBlocks.size() << " groups.\n"; int idxGroup = 0; - for (const FGroupOfCells<CellClass>* block: levelBlocks){ + for (const CellGroupClass* block: levelBlocks){ std::cout << "\t Group " << (idxGroup++); std::cout << "\t Size = " << block->getNumberOfCellsInBlock(); std::cout << "\t Starting Index = " << block->getStartingIndex(); @@ -607,6 +609,51 @@ public: } std::cout << "There are " << totalNbParticles << " particles.\n"; } + + ///////////////////////////////////////////////////////// + // Algorithm function + ///////////////////////////////////////////////////////// + + int getHeight() const { + return treeHeight; + } + + typename std::list<CellGroupClass*>::iterator cellsBegin(const int atHeight){ + FAssertLF(atHeight < treeHeight); + return cellBlocksPerLevel[atHeight].begin(); + } + + typename std::list<CellGroupClass*>::const_iterator cellsBegin(const int atHeight) const { + FAssertLF(atHeight < treeHeight); + return cellBlocksPerLevel[atHeight].begin(); + } + + typename std::list<CellGroupClass*>::iterator cellsEnd(const int atHeight){ + FAssertLF(atHeight < treeHeight); + return cellBlocksPerLevel[atHeight].end(); + } + + typename std::list<CellGroupClass*>::const_iterator cellsEnd(const int atHeight) const { + FAssertLF(atHeight < treeHeight); + return cellBlocksPerLevel[atHeight].end(); + } + + + typename std::list<FGroupOfParticles<NbAttributesPerParticle,AttributeClass>*>::iterator leavesBegin(){ + return particleBlocks.begin(); + } + + typename std::list<FGroupOfParticles<NbAttributesPerParticle,AttributeClass>*>::const_iterator leavesBegin() const { + return particleBlocks.begin(); + } + + typename std::list<FGroupOfParticles<NbAttributesPerParticle,AttributeClass>*>::iterator leavesEnd(){ + return particleBlocks.end(); + } + + typename std::list<FGroupOfParticles<NbAttributesPerParticle,AttributeClass>*>::const_iterator leavesEnd() const { + return particleBlocks.end(); + } }; #endif // FGROUPTREE_HPP diff --git a/Tests/Kernels/testBlockedTree.cpp b/Tests/Kernels/testBlockedTree.cpp index 5db2193402683b05c6915b8d78fde04cecf98261..d8327f0211b8d5e4e9af2bd7d538e25a420c28b9 100644 --- a/Tests/Kernels/testBlockedTree.cpp +++ b/Tests/Kernels/testBlockedTree.cpp @@ -23,7 +23,7 @@ #include "../../Src/Files/FFmaBinLoader.hpp" - +#include "../../Src/GroupTree/FGroupSeqAlgorithm.hpp" int main(int argc, char* argv[]){ static const int P = 9; @@ -32,7 +32,6 @@ int main(int argc, char* argv[]){ typedef FSimpleLeaf< ContainerClass > LeafClass; typedef FOctree< CellClass, ContainerClass , LeafClass > OctreeClass; - //typedef FRotationKernel< CellClass, ContainerClass , P> KernelClass; typedef FGroupTree< CellClass, 4, FReal> GroupOctreeClass; FTic counter; @@ -70,5 +69,10 @@ int main(int argc, char* argv[]){ groupedTree2.printInfoBlocks(); groupedTree3.printInfoBlocks(); + + + typedef FRotationKernel< CellClass, ContainerClass , P> KernelClass; + FGroupSeqAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, CellClass, KernelClass, typename GroupOctreeClass::BasicAttachedClass> algo(NULL,NULL); + return 0; }