From 9b487735d9a37faf55e4a4524d0804dea0db6d36 Mon Sep 17 00:00:00 2001
From: berenger-bramas <berenger-bramas@2616d619-271b-44dc-8df4-d4a8f33a7222>
Date: Mon, 1 Aug 2011 15:27:21 +0000
Subject: [PATCH] Parallel FMM - There is a bug in the P2P operator. (+Need
 more tests+refactoring+comments)

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/scalfmm/scalfmm/trunk@164 2616d619-271b-44dc-8df4-d4a8f33a7222
---
 Src/Core/FFmmAlgorithmThreadProc.hpp | 51 ++++++++++++++++++++--------
 Tests/testFmmAlgorithmProc.cpp       | 35 +++++++++----------
 2 files changed, 54 insertions(+), 32 deletions(-)

diff --git a/Src/Core/FFmmAlgorithmThreadProc.hpp b/Src/Core/FFmmAlgorithmThreadProc.hpp
index 66bc14574..79435a4a8 100644
--- a/Src/Core/FFmmAlgorithmThreadProc.hpp
+++ b/Src/Core/FFmmAlgorithmThreadProc.hpp
@@ -71,6 +71,18 @@ public:
 };
 
 
+template<class OctreeClass>
+void print(OctreeClass* const valideTree){
+    typename OctreeClass::Iterator octreeIterator(valideTree);
+    for(int idxLevel = valideTree->getHeight() - 1 ; idxLevel > 1 ; --idxLevel ){
+        do{
+            std::cout << "[" << octreeIterator.getCurrentGlobalIndex() << "] up:" << octreeIterator.getCurrentCell()->getDataUp() << " down:" << octreeIterator.getCurrentCell()->getDataDown() << "\t";
+        } while(octreeIterator.moveRight());
+        std::cout << "\n";
+        octreeIterator.gotoLeft();
+        octreeIterator.moveDown();
+    }
+}
 
 /**
 * @author Berenger Bramas (berenger.bramas@inria.fr)
@@ -347,7 +359,7 @@ public:
 
                 const CellClass* const* const child = iterArray[0].getCurrentChild();
                 for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){
-                    if( child[idxChild] ){
+                    if( child[idxChild] && workingIntervalsPerLevel[(idxLevel+1) * nbProcess + idProcess].min <= child[idxChild]->getMortonIndex() ){
                         memcpy(&sendBuffer[idxBuff], child[idxChild], sizeof(CellClass));
                         idxBuff += sizeof(CellClass);
                         state |= (0x1 << idxChild);
@@ -395,6 +407,8 @@ public:
             if(iterRequests){
                 MPI_Waitall( iterRequests, requests, 0);
 
+                printf("Send/Receive what I need\n");
+
                 if( hasToReceive ){
                     CellClass* currentChild[8];
                     memcpy(currentChild, iterArray[numberOfCells - 1].getCurrentChild(), 8 * sizeof(CellClass*));
@@ -410,6 +424,8 @@ public:
                                 ++position;
                             }
 
+                            printf("Receive Index is %lld child position is %d\n", iterArray[numberOfCells - 1].getCurrentGlobalIndex(), position);
+
                             fassert(!currentChild[position], "Already has a cell here", __LINE__, __FILE__);
                             currentChild[position] = (CellClass*) &recvBuffer[idxProc * recvBufferOffset + bufferIndex];
                             bufferIndex += sizeof(CellClass);
@@ -426,6 +442,7 @@ public:
             }
         }
 
+
         FDEBUG( FDebug::Controller << "\tFinished (@Upward Pass (M2M) = "  << counterTime.tacAndElapsed() << "s)\n" );
         FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
         FDEBUG( FDebug::Controller << "\t\t Send : " << sendCounter.cumulated() << " s\n" );
@@ -474,19 +491,22 @@ public:
                 // for each levels
                 for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
                     int numberOfCells = 0;
-                    while(octreeIterator.getCurrentGlobalIndex() !=  workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].min){
+
+                    while(octreeIterator.getCurrentGlobalIndex() <  workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].min){
                         octreeIterator.moveRight();
                     }
+
                     // for each cells
                     do{
                         iterArray[numberOfCells] = octreeIterator;
                         ++numberOfCells;
-                    } while(octreeIterator.getCurrentGlobalIndex() !=  workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].max && octreeIterator.moveRight());
+                    } while(octreeIterator.getCurrentGlobalIndex() <  workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].max && octreeIterator.moveRight());
                     avoidGotoLeftIterator.moveDown();
                     octreeIterator = avoidGotoLeftIterator;
 
                     leafsNeedOther[idxLevel] = new FBoolArray(numberOfCells);
 
+
                     // Which cell potentialy need other data and in the same time
                     // are potentialy needed by other
                     MortonIndex neighborsIndexes[208];
@@ -598,14 +618,14 @@ public:
                 // for each levels
                 for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
                     int numberOfCells = 0;
-                    while(octreeIterator.getCurrentGlobalIndex() !=  workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].min){
+                    while(octreeIterator.getCurrentGlobalIndex() <  workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].min){
                         octreeIterator.moveRight();
                     }
                     // for each cells
                     do{
                         iterArray[numberOfCells] = octreeIterator;
                         ++numberOfCells;
-                    } while(octreeIterator.getCurrentGlobalIndex() !=  workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].max && octreeIterator.moveRight());
+                    } while(octreeIterator.getCurrentGlobalIndex() <  workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].max && octreeIterator.moveRight());
                     avoidGotoLeftIterator.moveDown();
                     octreeIterator = avoidGotoLeftIterator;
 
@@ -654,7 +674,7 @@ public:
                     int numberOfCells = 0;
                     int realCellId = 0;
 
-                    while(octreeIterator.getCurrentGlobalIndex() !=  workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].min){
+                    while(octreeIterator.getCurrentGlobalIndex() <  workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].min){
                         octreeIterator.moveRight();
                     }
                     // for each cells
@@ -662,7 +682,7 @@ public:
                         if(leafsNeedOther[idxLevel]->get(realCellId++)){
                             iterArray[numberOfCells++] = octreeIterator;
                         }
-                    } while(octreeIterator.getCurrentGlobalIndex() !=  workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].max && octreeIterator.moveRight());
+                    } while(octreeIterator.getCurrentGlobalIndex() <  workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].max && octreeIterator.moveRight());
                     avoidGotoLeftIterator.moveDown();
                     octreeIterator = avoidGotoLeftIterator;
 
@@ -709,8 +729,6 @@ public:
             FDEBUG( FDebug::Controller << "\t\t\tTotal time to find "  << findCounter.cumulated() << " s.\n" );
         }
 
-
-
         { // second L2L
             FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); );
             FDEBUG(FTic counterTime);
@@ -786,7 +804,6 @@ public:
                 }
             }
 
-
             FDEBUG( FDebug::Controller << "\tFinished (@Downward Pass (L2L) = "  << counterTime.tacAndElapsed() << "s)\n" );
             FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
             FDEBUG( FDebug::Controller << "\t\t Send : " << sendCounter.cumulated() << " s\n" );
@@ -835,6 +852,9 @@ public:
             // index
             int indexToSend[nbProcess];
             memset(indexToSend, 0, sizeof(int) * nbProcess);
+            // index
+            int partsToSend[nbProcess];
+            memset(partsToSend, 0, sizeof(int) * nbProcess);
 
             // To know if a leaf has been already sent to a proc
             bool alreadySent[nbProcess];
@@ -875,6 +895,7 @@ public:
                                 delete[] reinterpret_cast<char*>(temp);
                             }
                             toSend[procToReceive][indexToSend[procToReceive]++] = iterArray[idxLeaf];
+                            partsToSend[procToReceive] += iterArray[idxLeaf].getCurrentListSrc()->getSize();
                         }
                     }
                 }
@@ -887,10 +908,10 @@ public:
             int globalReceiveMap[nbProcess * nbProcess];
             memset(globalReceiveMap, 0, sizeof(int) * nbProcess * nbProcess);
 
-            mpiassert( MPI_Allgather( indexToSend, nbProcess, MPI_INT, globalReceiveMap, nbProcess, MPI_INT, MPI_COMM_WORLD),  __LINE__ );
+            mpiassert( MPI_Allgather( partsToSend, nbProcess, MPI_INT, globalReceiveMap, nbProcess, MPI_INT, MPI_COMM_WORLD),  __LINE__ );
 
             for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){
-                printf("indexToSend[%d] = %d\n", idxProc, indexToSend[idxProc]);
+                printf("indexToSend[%d] = %d leafs %d parts\n", idxProc, indexToSend[idxProc], partsToSend[idxProc]);
             }
 
             printf("Will send ...\n");
@@ -908,16 +929,16 @@ public:
             for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){
                 if(indexToSend[idxProc] != 0){
                     printf("Send %d to %d\n", indexToSend[idxProc], idxProc);
-                    sendBuffer[idxProc] = reinterpret_cast<ParticleClass*>(new char[sizeof(ParticleClass) * indexToSend[idxProc]]);
+                    sendBuffer[idxProc] = reinterpret_cast<ParticleClass*>(new char[sizeof(ParticleClass) * partsToSend[idxProc]]);
 
                     int currentIndex = 0;
-                    for(int idxLeaf = idxProc ; idxLeaf < indexToSend[idxProc] ; ++idxLeaf){
+                    for(int idxLeaf = 0 ; idxLeaf < indexToSend[idxProc] ; ++idxLeaf){
                         memcpy(&sendBuffer[idxProc][currentIndex], toSend[idxProc][idxLeaf].getCurrentListSrc()->data(),
                                sizeof(ParticleClass) * toSend[idxProc][idxLeaf].getCurrentListSrc()->getSize() );
                         currentIndex += toSend[idxProc][idxLeaf].getCurrentListSrc()->getSize();
                     }
 
-                    mpiassert( MPI_Isend( sendBuffer[idxProc], sizeof(ParticleClass) * indexToSend[idxProc] , MPI_BYTE ,
+                    mpiassert( MPI_Isend( sendBuffer[idxProc], sizeof(ParticleClass) * partsToSend[idxProc] , MPI_BYTE ,
                                          idxProc, TAG_P2P_PART, MPI_COMM_WORLD, &requests[iterRequest++]) , __LINE__ );
 
                 }
diff --git a/Tests/testFmmAlgorithmProc.cpp b/Tests/testFmmAlgorithmProc.cpp
index 58f81bcbd..789cfeaac 100644
--- a/Tests/testFmmAlgorithmProc.cpp
+++ b/Tests/testFmmAlgorithmProc.cpp
@@ -230,18 +230,18 @@ void ValidateFMMAlgoProc(OctreeClass* const badTree,
 /** To print an octree
   * used to debug and understand how the values were passed
   */
-template<class OctreeClass>
-void print(OctreeClass* const valideTree){
-    typename OctreeClass::Iterator octreeIterator(valideTree);
-    for(int idxLevel = valideTree->getHeight() - 1 ; idxLevel > 1 ; --idxLevel ){
-        do{
-            std::cout << "[" << octreeIterator.getCurrentGlobalIndex() << "] up:" << octreeIterator.getCurrentCell()->getDataUp() << " down:" << octreeIterator.getCurrentCell()->getDataDown() << "\t";
-        } while(octreeIterator.moveRight());
-        std::cout << "\n";
-        octreeIterator.gotoLeft();
-        octreeIterator.moveDown();
-    }
-}
+//template<class OctreeClass>
+//void print(OctreeClass* const valideTree){
+//    typename OctreeClass::Iterator octreeIterator(valideTree);
+//    for(int idxLevel = valideTree->getHeight() - 1 ; idxLevel > 1 ; --idxLevel ){
+//        do{
+//            std::cout << "[" << octreeIterator.getCurrentGlobalIndex() << "] up:" << octreeIterator.getCurrentCell()->getDataUp() << " down:" << octreeIterator.getCurrentCell()->getDataDown() << "\t";
+//        } while(octreeIterator.moveRight());
+//        std::cout << "\n";
+//        octreeIterator.gotoLeft();
+//        octreeIterator.moveDown();
+//    }
+//}
 
 struct ParticlesGroup {
     int number;
@@ -337,7 +337,7 @@ int main(int argc, char ** argv){
                 long outputSize = 0;
                 {
                     // create particles
-                    IndexedParticle*const realParticlesIndexed = reinterpret_cast<IndexedParticle*>(new char[loader.getNumberOfParticles() * sizeof(IndexedParticle)]);
+                    IndexedParticle*const realParticlesIndexed = new IndexedParticle[loader.getNumberOfParticles()];
                     F3DPosition boxCorner(loader.getCenterOfBox() - (loader.getBoxWidth()/2));
                     FTreeCoordinate host;
                     const FReal boxWidthAtLeafLevel = loader.getBoxWidth() / (1 << (NbLevels - 1) );
@@ -353,7 +353,7 @@ int main(int argc, char ** argv){
 
                     // sort particles
                     FQuickSort::QsMpi<IndexedParticle,MortonIndex>(realParticlesIndexed, loader.getNumberOfParticles(),outputArray,outputSize);
-                    delete [] reinterpret_cast<char*>(realParticlesIndexed);
+                    delete [] (realParticlesIndexed);
 
                     std::cout << "Sorted "<< outputSize <<  " particles..." << std::endl;
                 }
@@ -369,10 +369,11 @@ int main(int argc, char ** argv){
                         }
                         if( app.processId() != app.processCount() - 1){
                             MPI_Irecv(&otherFirstIndex, 1, MPI_LONG_LONG, app.processId() + 1, 0, MPI_COMM_WORLD, &req[reqiter++]);
-                            std::cout << "I receive index from right " << otherFirstIndex << std::endl;
                         }
 
                         MPI_Waitall(reqiter,req,0);
+                        std::cout << "I receive index from right " << otherFirstIndex << std::endl;
+
                         if( 0 < app.processId() && !outputSize){
                             std::cout << "I send to left the first index is from right " << otherFirstIndex << std::endl;
                             MPI_Send( &otherFirstIndex, 1, MPI_LONG_LONG, app.processId() - 1, 0, MPI_COMM_WORLD);
@@ -828,11 +829,11 @@ int main(int argc, char ** argv){
                 }
             }
 
-            printf("Will now take my own particles from %d to %d\n",FMath::Max(myLeftLeaf-leftLeafs,0) , FMath::Min(nbLeafs , FMath::Max(myLeftLeaf-leftLeafs,0) + FMath::Min(myRightLeaf,totalNbLeafs - rightLeafs) - myLeftLeaf));
+            printf("Will now take my own particles from %d to %d\n",FMath::Max(myLeftLeaf-leftLeafs,0) , totalNbLeafs - rightLeafs - leftLeafs - FMath::Max(0,leftLeafs + nbLeafs - myRightLeaf));
             printf("myLeftLeaf %d leftLeafs %d myRightLeaf %d rightLeafs %d totalNbLeafs %d\n",myLeftLeaf,leftLeafs, myRightLeaf, rightLeafs, totalNbLeafs);
             // insert the particles we already have
             if(leftLeafs != totalNbLeafs){
-                for(int idxLeafInsert = FMath::Max(myLeftLeaf-leftLeafs,0) ; idxLeafInsert < FMath::Min(nbLeafs , FMath::Max(myLeftLeaf-leftLeafs,0) + FMath::Min(myRightLeaf,totalNbLeafs - rightLeafs) - myLeftLeaf) ; ++idxLeafInsert){
+                for(int idxLeafInsert = FMath::Max(myLeftLeaf-leftLeafs,0) ; idxLeafInsert <  totalNbLeafs - rightLeafs - leftLeafs - FMath::Max(0,leftLeafs + nbLeafs - myRightLeaf) ; ++idxLeafInsert){
                     for(int idxPart = 0 ; idxPart < groups[idxLeafInsert].number ; ++idxPart){
                         realTree.insert(realParticles[groups[idxLeafInsert].positionInArray + idxPart]);
                     }
-- 
GitLab