From 9b487735d9a37faf55e4a4524d0804dea0db6d36 Mon Sep 17 00:00:00 2001 From: berenger-bramas <berenger-bramas@2616d619-271b-44dc-8df4-d4a8f33a7222> Date: Mon, 1 Aug 2011 15:27:21 +0000 Subject: [PATCH] Parallel FMM - There is a bug in the P2P operator. (+Need more tests+refactoring+comments) git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/scalfmm/scalfmm/trunk@164 2616d619-271b-44dc-8df4-d4a8f33a7222 --- Src/Core/FFmmAlgorithmThreadProc.hpp | 51 ++++++++++++++++++++-------- Tests/testFmmAlgorithmProc.cpp | 35 +++++++++---------- 2 files changed, 54 insertions(+), 32 deletions(-) diff --git a/Src/Core/FFmmAlgorithmThreadProc.hpp b/Src/Core/FFmmAlgorithmThreadProc.hpp index 66bc14574..79435a4a8 100644 --- a/Src/Core/FFmmAlgorithmThreadProc.hpp +++ b/Src/Core/FFmmAlgorithmThreadProc.hpp @@ -71,6 +71,18 @@ public: }; +template<class OctreeClass> +void print(OctreeClass* const valideTree){ + typename OctreeClass::Iterator octreeIterator(valideTree); + for(int idxLevel = valideTree->getHeight() - 1 ; idxLevel > 1 ; --idxLevel ){ + do{ + std::cout << "[" << octreeIterator.getCurrentGlobalIndex() << "] up:" << octreeIterator.getCurrentCell()->getDataUp() << " down:" << octreeIterator.getCurrentCell()->getDataDown() << "\t"; + } while(octreeIterator.moveRight()); + std::cout << "\n"; + octreeIterator.gotoLeft(); + octreeIterator.moveDown(); + } +} /** * @author Berenger Bramas (berenger.bramas@inria.fr) @@ -347,7 +359,7 @@ public: const CellClass* const* const child = iterArray[0].getCurrentChild(); for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){ - if( child[idxChild] ){ + if( child[idxChild] && workingIntervalsPerLevel[(idxLevel+1) * nbProcess + idProcess].min <= child[idxChild]->getMortonIndex() ){ memcpy(&sendBuffer[idxBuff], child[idxChild], sizeof(CellClass)); idxBuff += sizeof(CellClass); state |= (0x1 << idxChild); @@ -395,6 +407,8 @@ public: if(iterRequests){ MPI_Waitall( iterRequests, requests, 0); + printf("Send/Receive what I need\n"); + if( hasToReceive ){ CellClass* currentChild[8]; memcpy(currentChild, iterArray[numberOfCells - 1].getCurrentChild(), 8 * sizeof(CellClass*)); @@ -410,6 +424,8 @@ public: ++position; } + printf("Receive Index is %lld child position is %d\n", iterArray[numberOfCells - 1].getCurrentGlobalIndex(), position); + fassert(!currentChild[position], "Already has a cell here", __LINE__, __FILE__); currentChild[position] = (CellClass*) &recvBuffer[idxProc * recvBufferOffset + bufferIndex]; bufferIndex += sizeof(CellClass); @@ -426,6 +442,7 @@ public: } } + FDEBUG( FDebug::Controller << "\tFinished (@Upward Pass (M2M) = " << counterTime.tacAndElapsed() << "s)\n" ); FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" ); FDEBUG( FDebug::Controller << "\t\t Send : " << sendCounter.cumulated() << " s\n" ); @@ -474,19 +491,22 @@ public: // for each levels for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){ int numberOfCells = 0; - while(octreeIterator.getCurrentGlobalIndex() != workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].min){ + + while(octreeIterator.getCurrentGlobalIndex() < workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].min){ octreeIterator.moveRight(); } + // for each cells do{ iterArray[numberOfCells] = octreeIterator; ++numberOfCells; - } while(octreeIterator.getCurrentGlobalIndex() != workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].max && octreeIterator.moveRight()); + } while(octreeIterator.getCurrentGlobalIndex() < workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].max && octreeIterator.moveRight()); avoidGotoLeftIterator.moveDown(); octreeIterator = avoidGotoLeftIterator; leafsNeedOther[idxLevel] = new FBoolArray(numberOfCells); + // Which cell potentialy need other data and in the same time // are potentialy needed by other MortonIndex neighborsIndexes[208]; @@ -598,14 +618,14 @@ public: // for each levels for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){ int numberOfCells = 0; - while(octreeIterator.getCurrentGlobalIndex() != workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].min){ + while(octreeIterator.getCurrentGlobalIndex() < workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].min){ octreeIterator.moveRight(); } // for each cells do{ iterArray[numberOfCells] = octreeIterator; ++numberOfCells; - } while(octreeIterator.getCurrentGlobalIndex() != workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].max && octreeIterator.moveRight()); + } while(octreeIterator.getCurrentGlobalIndex() < workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].max && octreeIterator.moveRight()); avoidGotoLeftIterator.moveDown(); octreeIterator = avoidGotoLeftIterator; @@ -654,7 +674,7 @@ public: int numberOfCells = 0; int realCellId = 0; - while(octreeIterator.getCurrentGlobalIndex() != workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].min){ + while(octreeIterator.getCurrentGlobalIndex() < workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].min){ octreeIterator.moveRight(); } // for each cells @@ -662,7 +682,7 @@ public: if(leafsNeedOther[idxLevel]->get(realCellId++)){ iterArray[numberOfCells++] = octreeIterator; } - } while(octreeIterator.getCurrentGlobalIndex() != workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].max && octreeIterator.moveRight()); + } while(octreeIterator.getCurrentGlobalIndex() < workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].max && octreeIterator.moveRight()); avoidGotoLeftIterator.moveDown(); octreeIterator = avoidGotoLeftIterator; @@ -709,8 +729,6 @@ public: FDEBUG( FDebug::Controller << "\t\t\tTotal time to find " << findCounter.cumulated() << " s.\n" ); } - - { // second L2L FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); ); FDEBUG(FTic counterTime); @@ -786,7 +804,6 @@ public: } } - FDEBUG( FDebug::Controller << "\tFinished (@Downward Pass (L2L) = " << counterTime.tacAndElapsed() << "s)\n" ); FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" ); FDEBUG( FDebug::Controller << "\t\t Send : " << sendCounter.cumulated() << " s\n" ); @@ -835,6 +852,9 @@ public: // index int indexToSend[nbProcess]; memset(indexToSend, 0, sizeof(int) * nbProcess); + // index + int partsToSend[nbProcess]; + memset(partsToSend, 0, sizeof(int) * nbProcess); // To know if a leaf has been already sent to a proc bool alreadySent[nbProcess]; @@ -875,6 +895,7 @@ public: delete[] reinterpret_cast<char*>(temp); } toSend[procToReceive][indexToSend[procToReceive]++] = iterArray[idxLeaf]; + partsToSend[procToReceive] += iterArray[idxLeaf].getCurrentListSrc()->getSize(); } } } @@ -887,10 +908,10 @@ public: int globalReceiveMap[nbProcess * nbProcess]; memset(globalReceiveMap, 0, sizeof(int) * nbProcess * nbProcess); - mpiassert( MPI_Allgather( indexToSend, nbProcess, MPI_INT, globalReceiveMap, nbProcess, MPI_INT, MPI_COMM_WORLD), __LINE__ ); + mpiassert( MPI_Allgather( partsToSend, nbProcess, MPI_INT, globalReceiveMap, nbProcess, MPI_INT, MPI_COMM_WORLD), __LINE__ ); for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){ - printf("indexToSend[%d] = %d\n", idxProc, indexToSend[idxProc]); + printf("indexToSend[%d] = %d leafs %d parts\n", idxProc, indexToSend[idxProc], partsToSend[idxProc]); } printf("Will send ...\n"); @@ -908,16 +929,16 @@ public: for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){ if(indexToSend[idxProc] != 0){ printf("Send %d to %d\n", indexToSend[idxProc], idxProc); - sendBuffer[idxProc] = reinterpret_cast<ParticleClass*>(new char[sizeof(ParticleClass) * indexToSend[idxProc]]); + sendBuffer[idxProc] = reinterpret_cast<ParticleClass*>(new char[sizeof(ParticleClass) * partsToSend[idxProc]]); int currentIndex = 0; - for(int idxLeaf = idxProc ; idxLeaf < indexToSend[idxProc] ; ++idxLeaf){ + for(int idxLeaf = 0 ; idxLeaf < indexToSend[idxProc] ; ++idxLeaf){ memcpy(&sendBuffer[idxProc][currentIndex], toSend[idxProc][idxLeaf].getCurrentListSrc()->data(), sizeof(ParticleClass) * toSend[idxProc][idxLeaf].getCurrentListSrc()->getSize() ); currentIndex += toSend[idxProc][idxLeaf].getCurrentListSrc()->getSize(); } - mpiassert( MPI_Isend( sendBuffer[idxProc], sizeof(ParticleClass) * indexToSend[idxProc] , MPI_BYTE , + mpiassert( MPI_Isend( sendBuffer[idxProc], sizeof(ParticleClass) * partsToSend[idxProc] , MPI_BYTE , idxProc, TAG_P2P_PART, MPI_COMM_WORLD, &requests[iterRequest++]) , __LINE__ ); } diff --git a/Tests/testFmmAlgorithmProc.cpp b/Tests/testFmmAlgorithmProc.cpp index 58f81bcbd..789cfeaac 100644 --- a/Tests/testFmmAlgorithmProc.cpp +++ b/Tests/testFmmAlgorithmProc.cpp @@ -230,18 +230,18 @@ void ValidateFMMAlgoProc(OctreeClass* const badTree, /** To print an octree * used to debug and understand how the values were passed */ -template<class OctreeClass> -void print(OctreeClass* const valideTree){ - typename OctreeClass::Iterator octreeIterator(valideTree); - for(int idxLevel = valideTree->getHeight() - 1 ; idxLevel > 1 ; --idxLevel ){ - do{ - std::cout << "[" << octreeIterator.getCurrentGlobalIndex() << "] up:" << octreeIterator.getCurrentCell()->getDataUp() << " down:" << octreeIterator.getCurrentCell()->getDataDown() << "\t"; - } while(octreeIterator.moveRight()); - std::cout << "\n"; - octreeIterator.gotoLeft(); - octreeIterator.moveDown(); - } -} +//template<class OctreeClass> +//void print(OctreeClass* const valideTree){ +// typename OctreeClass::Iterator octreeIterator(valideTree); +// for(int idxLevel = valideTree->getHeight() - 1 ; idxLevel > 1 ; --idxLevel ){ +// do{ +// std::cout << "[" << octreeIterator.getCurrentGlobalIndex() << "] up:" << octreeIterator.getCurrentCell()->getDataUp() << " down:" << octreeIterator.getCurrentCell()->getDataDown() << "\t"; +// } while(octreeIterator.moveRight()); +// std::cout << "\n"; +// octreeIterator.gotoLeft(); +// octreeIterator.moveDown(); +// } +//} struct ParticlesGroup { int number; @@ -337,7 +337,7 @@ int main(int argc, char ** argv){ long outputSize = 0; { // create particles - IndexedParticle*const realParticlesIndexed = reinterpret_cast<IndexedParticle*>(new char[loader.getNumberOfParticles() * sizeof(IndexedParticle)]); + IndexedParticle*const realParticlesIndexed = new IndexedParticle[loader.getNumberOfParticles()]; F3DPosition boxCorner(loader.getCenterOfBox() - (loader.getBoxWidth()/2)); FTreeCoordinate host; const FReal boxWidthAtLeafLevel = loader.getBoxWidth() / (1 << (NbLevels - 1) ); @@ -353,7 +353,7 @@ int main(int argc, char ** argv){ // sort particles FQuickSort::QsMpi<IndexedParticle,MortonIndex>(realParticlesIndexed, loader.getNumberOfParticles(),outputArray,outputSize); - delete [] reinterpret_cast<char*>(realParticlesIndexed); + delete [] (realParticlesIndexed); std::cout << "Sorted "<< outputSize << " particles..." << std::endl; } @@ -369,10 +369,11 @@ int main(int argc, char ** argv){ } if( app.processId() != app.processCount() - 1){ MPI_Irecv(&otherFirstIndex, 1, MPI_LONG_LONG, app.processId() + 1, 0, MPI_COMM_WORLD, &req[reqiter++]); - std::cout << "I receive index from right " << otherFirstIndex << std::endl; } MPI_Waitall(reqiter,req,0); + std::cout << "I receive index from right " << otherFirstIndex << std::endl; + if( 0 < app.processId() && !outputSize){ std::cout << "I send to left the first index is from right " << otherFirstIndex << std::endl; MPI_Send( &otherFirstIndex, 1, MPI_LONG_LONG, app.processId() - 1, 0, MPI_COMM_WORLD); @@ -828,11 +829,11 @@ int main(int argc, char ** argv){ } } - printf("Will now take my own particles from %d to %d\n",FMath::Max(myLeftLeaf-leftLeafs,0) , FMath::Min(nbLeafs , FMath::Max(myLeftLeaf-leftLeafs,0) + FMath::Min(myRightLeaf,totalNbLeafs - rightLeafs) - myLeftLeaf)); + printf("Will now take my own particles from %d to %d\n",FMath::Max(myLeftLeaf-leftLeafs,0) , totalNbLeafs - rightLeafs - leftLeafs - FMath::Max(0,leftLeafs + nbLeafs - myRightLeaf)); printf("myLeftLeaf %d leftLeafs %d myRightLeaf %d rightLeafs %d totalNbLeafs %d\n",myLeftLeaf,leftLeafs, myRightLeaf, rightLeafs, totalNbLeafs); // insert the particles we already have if(leftLeafs != totalNbLeafs){ - for(int idxLeafInsert = FMath::Max(myLeftLeaf-leftLeafs,0) ; idxLeafInsert < FMath::Min(nbLeafs , FMath::Max(myLeftLeaf-leftLeafs,0) + FMath::Min(myRightLeaf,totalNbLeafs - rightLeafs) - myLeftLeaf) ; ++idxLeafInsert){ + for(int idxLeafInsert = FMath::Max(myLeftLeaf-leftLeafs,0) ; idxLeafInsert < totalNbLeafs - rightLeafs - leftLeafs - FMath::Max(0,leftLeafs + nbLeafs - myRightLeaf) ; ++idxLeafInsert){ for(int idxPart = 0 ; idxPart < groups[idxLeafInsert].number ; ++idxPart){ realTree.insert(realParticles[groups[idxLeafInsert].positionInArray + idxPart]); } -- GitLab