diff --git a/Src/Components/FTestKernels.hpp b/Src/Components/FTestKernels.hpp
index c8af5ca709e55268fdc580888448d79080907a9c..6ec3512af7fdb296ed656da7d6492742dc04dfbf 100644
--- a/Src/Components/FTestKernels.hpp
+++ b/Src/Components/FTestKernels.hpp
@@ -30,7 +30,7 @@ public:
     void P2M(CellClass* const pole, const ContainerClass* const particles) {
         FTRACE( FTrace::Controller.enterFunction(FTrace::KERNELS, __FUNCTION__ , __FILE__ , __LINE__) );
         // the pole represents all particles under
-        pole->setDataUp(particles->getSize());
+        pole->setDataUp(pole->getDataUp() + particles->getSize());
         FTRACE( FTrace::Controller.leaveFunction(FTrace::KERNELS) );
     }
 
diff --git a/Src/Core/FFmmAlgorithmThreadProc.hpp b/Src/Core/FFmmAlgorithmThreadProc.hpp
index cafc373cb08d36dbaa2494d4e2b01a1c3edd7c84..66bc14574dfa6086ec5cb5a75bfab3346f6cb7d2 100644
--- a/Src/Core/FFmmAlgorithmThreadProc.hpp
+++ b/Src/Core/FFmmAlgorithmThreadProc.hpp
@@ -15,6 +15,63 @@
 
 #include <omp.h>
 
+template <class CellClass>
+class FLightOctree {
+    class Node {
+        char data[sizeof(CellClass)];
+        Node* next[8];
+    public:
+        Node(){
+            memset(next, 0, sizeof(Node*)*8);
+        }
+        virtual ~Node(){
+            for(int idxNext = 0 ; idxNext < 8 ; ++idxNext){
+                delete next[idxNext];
+            }
+        }
+        void insert(const MortonIndex& index, const CellClass& cell, const int level){
+            if(level){
+                const int host = (index >> (3 * (level-1))) & 0x07;
+                if(!next[host]){
+                    next[host] = new Node();
+                }
+                next[host]->insert(index, cell, level - 1);
+            }
+            else{
+                memcpy(data, &cell, sizeof(CellClass));
+            }
+        }
+        const CellClass* getCell(const MortonIndex& index, const int level) const {
+            if(level){
+                const int host = (index >> (3 * (level-1))) & 0x07;
+                if(next[host]){
+                    return next[host]->getCell(index, level - 1);
+                }
+                return 0;
+            }
+            else{
+                return (CellClass*) data;
+            }
+        }
+    };
+
+    Node root;
+
+public:
+    FLightOctree(){
+    }
+
+    void insertCell(const MortonIndex& index, const CellClass& cell, const int level){
+        root.insert(index, cell, level);
+    }
+
+    const CellClass* getCell(const MortonIndex& index, const int level) const{
+        return root.getCell(index, level);
+    }
+};
+
+
+
 /**
 * @author Berenger Bramas (berenger.bramas@inria.fr)
 * @class FFmmAlgorithmThreadProc
@@ -225,6 +282,7 @@ public:
         }
         FDEBUG(computationCounter.tac());
 
+
         FDEBUG( FDebug::Controller << "\tFinished (@Bottom Pass (P2M) = "  << counterTime.tacAndElapsed() << "s)\n" );
         FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" );
         FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) );
@@ -262,9 +320,11 @@ public:
         // for each levels
         for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){
             // We do not touche cells that we are not responsible
-            printf("at level %d I go from %lld to %lld and the previous level >> 3 min %lld\n",
+            printf("at level %d I go from %lld to %lld and but my data goes from %lld to %lld\n",
                    idxLevel,workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].min,
-                   workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].max ,workingIntervalsPerLevel[(idxLevel + 1) * nbProcess + idProcess].min >>3);
+                   workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].max ,
+                   realIntervalsPerLevel[idxLevel  * nbProcess + idProcess].min,
+                   realIntervalsPerLevel[idxLevel  * nbProcess + idProcess].max);
 
             // copy cells to work with
             int numberOfCells = 0;
@@ -315,7 +375,7 @@ public:
                 }
                 endProcThatSend = firstProcThatSend;
                 while( endProcThatSend < nbProcess &&
-                        realIntervalsPerLevel[idxLevel * nbProcess + firstProcThatSend].min == workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].max){
+                        realIntervalsPerLevel[idxLevel * nbProcess + endProcThatSend].min <= workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].max){
                     ++endProcThatSend;
                 }
                 if(firstProcThatSend != endProcThatSend){
@@ -366,7 +426,6 @@ public:
             }
         }
 
-
         FDEBUG( FDebug::Controller << "\tFinished (@Upward Pass (M2M) = "  << counterTime.tacAndElapsed() << "s)\n" );
         FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.cumulated() << " s\n" );
         FDEBUG( FDebug::Controller << "\t\t Send : " << sendCounter.cumulated() << " s\n" );
@@ -393,34 +452,252 @@ public:
             FDEBUG(FTic findCounter);
 
 
-            typename OctreeClass::Iterator octreeIterator(tree);
-            octreeIterator.moveDown();
-            typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
+            // pointer to send
+            typename OctreeClass::Iterator* toSend[nbProcess * OctreeHeight];
+            memset(toSend, 0, sizeof(typename OctreeClass::Iterator*) * nbProcess * OctreeHeight );
+            int sizeToSend[nbProcess * OctreeHeight];
+            memset(sizeToSend, 0, sizeof(int) * nbProcess * OctreeHeight);
+            // index
+            int indexToSend[nbProcess * OctreeHeight];
+            memset(indexToSend, 0, sizeof(int) * nbProcess * OctreeHeight);
 
-            // for each levels
-            for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
-                int numberOfCells = 0;
-                // for each cells
-                do{
-                    iterArray[numberOfCells] = octreeIterator;
-                    ++numberOfCells;
-                } while(octreeIterator.moveRight());
-                avoidGotoLeftIterator.moveDown();
-                octreeIterator = avoidGotoLeftIterator;
+            // To know if a leaf has been already sent to a proc
+            bool alreadySent[nbProcess];
 
-                FDEBUG(computationCounter.tic());
-                #pragma omp parallel
-                {
-                    KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
-                    const CellClass* neighbors[208];
+            FBoolArray* leafsNeedOther[OctreeHeight];
+            memset(leafsNeedOther, 0, sizeof(FBoolArray) * OctreeHeight);
 
-                    #pragma omp for  schedule(dynamic) nowait
+            {
+                typename OctreeClass::Iterator octreeIterator(tree);
+                octreeIterator.moveDown();
+                typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
+                // for each levels
+                for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
+                    int numberOfCells = 0;
+                    while(octreeIterator.getCurrentGlobalIndex() !=  workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].min){
+                        octreeIterator.moveRight();
+                    }
+                    // for each cells
+                    do{
+                        iterArray[numberOfCells] = octreeIterator;
+                        ++numberOfCells;
+                    } while(octreeIterator.getCurrentGlobalIndex() !=  workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].max && octreeIterator.moveRight());
+                    avoidGotoLeftIterator.moveDown();
+                    octreeIterator = avoidGotoLeftIterator;
+
+                    leafsNeedOther[idxLevel] = new FBoolArray(numberOfCells);
+
+                    // Which cell potentialy need other data and in the same time
+                    // are potentialy needed by other
+                    MortonIndex neighborsIndexes[208];
                     for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){
-                        const int counter = tree->getDistantNeighbors(neighbors,  iterArray[idxCell].getCurrentGlobalCoordinate(),idxLevel);
-                        if(counter) myThreadkernels->M2L( iterArray[idxCell].getCurrentCell() , neighbors, counter, idxLevel);
+                        // Find the M2L neigbors of a cell
+                        const int counter = getDistantNeighbors(iterArray[idxCell].getCurrentGlobalCoordinate(),idxLevel,neighborsIndexes);
+
+                        memset(alreadySent, false, sizeof(bool) * nbProcess);
+                        bool needOther = false;
+                        // Test each negibors to know which one do not belong to us
+                        for(int idxNeigh = 0 ; idxNeigh < counter ; ++idxNeigh){
+                            if(neighborsIndexes[idxNeigh] < workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].min
+                                    || workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].max < neighborsIndexes[idxNeigh]){
+                                int procToReceive = idProcess;
+                                while( 0 != procToReceive && neighborsIndexes[idxNeigh] < workingIntervalsPerLevel[idxLevel * nbProcess + procToReceive].min ){
+                                    --procToReceive;
+                                }
+                                while( procToReceive != nbProcess -1 && workingIntervalsPerLevel[idxLevel * nbProcess + procToReceive].max < neighborsIndexes[idxNeigh]){
+                                    ++procToReceive;
+                                }
+                                // Maybe already sent to that proc?
+                                if( !alreadySent[procToReceive]
+                                    && workingIntervalsPerLevel[idxLevel * nbProcess + procToReceive].min <= neighborsIndexes[idxNeigh]
+                                    && neighborsIndexes[idxNeigh] <= workingIntervalsPerLevel[idxLevel * nbProcess + procToReceive].max){
+
+                                    alreadySent[procToReceive] = true;
+
+                                    needOther = true;
+
+                                    if(indexToSend[idxLevel * nbProcess + procToReceive] ==  sizeToSend[idxLevel * nbProcess + procToReceive]){
+                                        const int previousSize = sizeToSend[idxLevel * nbProcess + procToReceive];
+                                        sizeToSend[idxLevel * nbProcess + procToReceive] = FMath::Max(int(10*sizeof(typename OctreeClass::Iterator)), int(sizeToSend[idxLevel * nbProcess + procToReceive] * 1.5));
+                                        typename OctreeClass::Iterator* temp = toSend[idxLevel * nbProcess + procToReceive];
+                                        toSend[idxLevel * nbProcess + procToReceive] = reinterpret_cast<typename OctreeClass::Iterator*>(new char[sizeof(typename OctreeClass::Iterator) * sizeToSend[idxLevel * nbProcess + procToReceive]]);
+                                        memcpy(toSend[idxLevel * nbProcess + procToReceive], temp, previousSize * sizeof(typename OctreeClass::Iterator));
+                                        delete[] reinterpret_cast<char*>(temp);
+                                    }
+
+                                    toSend[idxLevel * nbProcess + procToReceive][indexToSend[idxLevel * nbProcess + procToReceive]++] = iterArray[idxCell];
+                                }
+                            }
+                        }
+                        if(needOther){
+                            leafsNeedOther[idxLevel]->set(idxCell,true);
+                        }
+
+                    }
+
+                }
+            }
+
+            printf("All gather...\n");
+
+            // All process say to each others
+            // what the will send to who
+            int globalReceiveMap[nbProcess * nbProcess * OctreeHeight];
+            memset(globalReceiveMap, 0, sizeof(int) * nbProcess * nbProcess * OctreeHeight);
+
+            mpiassert( MPI_Allgather( indexToSend, nbProcess * OctreeHeight, MPI_INT, globalReceiveMap, nbProcess * OctreeHeight, MPI_INT, MPI_COMM_WORLD),  __LINE__ );
+
+            printf("Will send ...\n");
+
+            // Then they can send and receive (because they know what they will receive)
+            // To send in asynchrone way
+            MPI_Request requests[2 * nbProcess * OctreeHeight];
+            int iterRequest = 0;
+
+            CellClass* sendBuffer[nbProcess * OctreeHeight];
+            memset(sendBuffer, 0, sizeof(CellClass*) * nbProcess * OctreeHeight);
+
+            CellClass* recvBuffer[nbProcess * OctreeHeight];
+            memset(recvBuffer, 0, sizeof(CellClass*) * nbProcess * OctreeHeight);
+
+
+            for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
+                for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){
+                    const int toSendAtProcAtLevel = indexToSend[idxLevel * nbProcess + idxProc];
+                    if(toSendAtProcAtLevel != 0){
+                        printf("Send %d to %d\n", toSendAtProcAtLevel, idxProc);
+                        sendBuffer[idxLevel * nbProcess + idxProc] = reinterpret_cast<CellClass*>(new char[sizeof(CellClass) * toSendAtProcAtLevel]);
+
+                        for(int idxLeaf = 0 ; idxLeaf < toSendAtProcAtLevel; ++idxLeaf){
+                            memcpy(&sendBuffer[idxLevel * nbProcess + idxProc][idxLeaf], toSend[idxLevel * nbProcess + idxProc][idxLeaf].getCurrentCell(),
+                                   sizeof(CellClass) );
+                        }
+
+                        mpiassert( MPI_Isend( sendBuffer[idxLevel * nbProcess + idxProc], toSendAtProcAtLevel * sizeof(CellClass) , MPI_BYTE ,
+                                             idxProc, idxLevel, MPI_COMM_WORLD, &requests[iterRequest++]) , __LINE__ );
+                    }
+
+                    const int toReceiveFromProcAtLevel = globalReceiveMap[(idxProc * nbProcess * OctreeHeight) + idxLevel * nbProcess + idProcess];
+                    if(toReceiveFromProcAtLevel){
+                        printf("Receive %d from %d\n", toReceiveFromProcAtLevel, idxProc);
+                        recvBuffer[idxLevel * nbProcess + idxProc] = reinterpret_cast<CellClass*>(new char[sizeof(CellClass) * toReceiveFromProcAtLevel]);
+
+                        mpiassert( MPI_Irecv(recvBuffer[idxLevel * nbProcess + idxProc], toReceiveFromProcAtLevel * sizeof(CellClass), MPI_BYTE,
+                                            idxProc, idxLevel, MPI_COMM_WORLD, &requests[iterRequest++]) , __LINE__ );
                     }
                 }
-                FDEBUG(computationCounter.tac());
+            }
+
+            printf("Compute ...\n");
+
+            {
+                typename OctreeClass::Iterator octreeIterator(tree);
+                octreeIterator.moveDown();
+                typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
+                // Now we can compute all the data
+                // for each levels
+                for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
+                    int numberOfCells = 0;
+                    while(octreeIterator.getCurrentGlobalIndex() !=  workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].min){
+                        octreeIterator.moveRight();
+                    }
+                    // for each cells
+                    do{
+                        iterArray[numberOfCells] = octreeIterator;
+                        ++numberOfCells;
+                    } while(octreeIterator.getCurrentGlobalIndex() !=  workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].max && octreeIterator.moveRight());
+                    avoidGotoLeftIterator.moveDown();
+                    octreeIterator = avoidGotoLeftIterator;
+
+                    FDEBUG(computationCounter.tic());
+                    #pragma omp parallel
+                    {
+                        KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
+                        const CellClass* neighbors[208];
+
+                        #pragma omp for  schedule(dynamic) nowait
+                        for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){
+                            const int counter = tree->getDistantNeighbors(neighbors,  iterArray[idxCell].getCurrentGlobalCoordinate(),idxLevel);
+                            if(counter) myThreadkernels->M2L( iterArray[idxCell].getCurrentCell() , neighbors, counter, idxLevel);
+                        }
+                    }
+                    FDEBUG(computationCounter.tac());
+                }
+            }
+
+
+            printf("Wait ...\n");
+            // Wait to receive every things (and send every things)
+            MPI_Waitall(iterRequest, requests, 0);
+
+            printf("Now process other...\n");
+
+            {
+                typename OctreeClass::Iterator octreeIterator(tree);
+                octreeIterator.moveDown();
+                typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
+                // compute the second time
+                // for each levels
+                for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
+                    // put the received data into a temporary tree
+                    FLightOctree<CellClass> tempTree;
+                    for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){
+                        const int toReceiveFromProcAtLevel = globalReceiveMap[(idxProc * nbProcess * OctreeHeight) + idxLevel * nbProcess + idProcess];
+                        const CellClass* const cells = reinterpret_cast<CellClass*>(recvBuffer[idxLevel * nbProcess + idxProc]);
+                        for(int idxCell = 0 ; idxCell < toReceiveFromProcAtLevel ; ++idxCell){
+                            tempTree.insertCell(cells[idxCell].getMortonIndex(), cells[idxCell], idxLevel);
+                        }
+                    }
+
+                    // take cells from our octree only if they are
+                    // linked to received data
+                    int numberOfCells = 0;
+                    int realCellId = 0;
+
+                    while(octreeIterator.getCurrentGlobalIndex() !=  workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].min){
+                        octreeIterator.moveRight();
+                    }
+                    // for each cells
+                    do{
+                        if(leafsNeedOther[idxLevel]->get(realCellId++)){
+                            iterArray[numberOfCells++] = octreeIterator;
+                        }
+                    } while(octreeIterator.getCurrentGlobalIndex() !=  workingIntervalsPerLevel[idxLevel * nbProcess + idProcess].max && octreeIterator.moveRight());
+                    avoidGotoLeftIterator.moveDown();
+                    octreeIterator = avoidGotoLeftIterator;
+
+                    delete leafsNeedOther[idxLevel];
+                    leafsNeedOther[idxLevel] = 0;
+
+                    // Compute this cells
+                    FDEBUG(computationCounter.tic());
+                    #pragma omp parallel
+                    {
+                        KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
+                        MortonIndex neighborsIndex[208];
+                        const CellClass* neighbors[208];
+
+                        #pragma omp for  schedule(dynamic) nowait
+                        for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){
+                            // compute indexes
+                            const int counterNeighbors = getDistantNeighbors(iterArray[idxCell].getCurrentGlobalCoordinate(), idxLevel, neighborsIndex);
+
+                            int counter = 0;
+                            // does we receive this index from someone?
+                            for(int idxNeig = 0 ;idxNeig < counterNeighbors ; ++idxNeig){
+                                const CellClass* const cellFromOtherProc = tempTree.getCell(neighborsIndex[idxNeig], idxLevel);
+                                if(cellFromOtherProc){
+                                    neighbors[counter++] = cellFromOtherProc;
+                                }
+                            }
+                            // need to compute
+                            if(counter){
+                                myThreadkernels->M2L( iterArray[idxCell].getCurrentCell() , neighbors, counter, idxLevel);
+                            }
+                        }
+                    }
+                    FDEBUG(computationCounter.tac());
+                }
             }
 
             FDEBUG( FDebug::Controller << "\tFinished (@Downward Pass (M2L) = "  << counterTime.tacAndElapsed() << "s)\n" );
@@ -433,6 +710,7 @@ public:
         }
 
 
+
         { // second L2L
             FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); );
             FDEBUG(FTic counterTime);
@@ -487,6 +765,7 @@ public:
                             (realIntervalsPerLevel[(idxLevel+1) * nbProcess + endProcThatRecv].min >> 3) == iterArray[numberOfCells - 1].getCurrentGlobalIndex()){
                         ++endProcThatRecv;
                     }
+
                     if(endProcThatRecv != idProcess + 1){
                         for(int idxProc = firstProcThatRecv ; idxProc < endProcThatRecv ; ++idxProc ){
                             printf("I send 1 to proc %d at index %lld\n", idxProc, iterArray[numberOfCells - 1].getCurrentGlobalIndex());
@@ -569,9 +848,9 @@ public:
                 memset(alreadySent, false, sizeof(bool) * nbProcess);
                 bool needOther = false;
 
-                getNeighborsIndexes(iterArray[idxLeaf].getCurrentGlobalIndex(), limite, indexesNeighbors);
+                const int neighCount = getNeighborsIndexes(iterArray[idxLeaf].getCurrentGlobalIndex(), limite, indexesNeighbors);
 
-                for(int idxNeigh = 0 ; idxNeigh < 26 ; ++idxNeigh){
+                for(int idxNeigh = 0 ; idxNeigh < neighCount ; ++idxNeigh){
                     if(indexesNeighbors[idxNeigh] < intervals[idProcess].min || intervals[idProcess].max < indexesNeighbors[idxNeigh]){
                         needOther = true;
 
@@ -580,6 +859,7 @@ public:
                         while( procToReceive != 0 && indexesNeighbors[idxNeigh] < intervals[procToReceive].min){
                             --procToReceive;
                         }
+
                         while( procToReceive != nbProcess - 1 && intervals[procToReceive].max < indexesNeighbors[idxNeigh]){
                             ++procToReceive;
                         }
@@ -588,7 +868,7 @@ public:
                             alreadySent[procToReceive] = true;
                             if(indexToSend[procToReceive] ==  sizeToSend[procToReceive]){
                                 const int previousSize = sizeToSend[procToReceive];
-                                sizeToSend[procToReceive] = FMath::Max(50, int(sizeToSend[procToReceive] * 1.5));
+                                sizeToSend[procToReceive] = FMath::Max(10*int(sizeof(typename OctreeClass::Iterator)), int(sizeToSend[procToReceive] * 1.5));
                                 typename OctreeClass::Iterator* temp = toSend[procToReceive];
                                 toSend[procToReceive] = reinterpret_cast<typename OctreeClass::Iterator*>(new char[sizeof(typename OctreeClass::Iterator) * sizeToSend[procToReceive]]);
                                 memcpy(toSend[procToReceive], temp, previousSize * sizeof(typename OctreeClass::Iterator));
@@ -757,9 +1037,9 @@ public:
 
                     if(leafsNeedOther.get(idxLeafs)){
                         MortonIndex indexesNeighbors[26];
-                        getNeighborsIndexes(currentIter.index, limite, indexesNeighbors);
+                        const int neighCount = getNeighborsIndexes(currentIter.index, limite, indexesNeighbors);
                         int counterAll = counter;
-                        for(int idxNeigh = 0 ; idxNeigh < 26 ; ++idxNeigh){
+                        for(int idxNeigh = 0 ; idxNeigh < neighCount ; ++idxNeigh){
                             if(indexesNeighbors[idxNeigh] < intervals[idProcess].min || intervals[idProcess].max < indexesNeighbors[idxNeigh]){
                                 ContainerClass*const hypotheticNeighbor = otherP2Ptree.getLeafSrc(indexesNeighbors[idxNeigh]);
                                 if(hypotheticNeighbor){
@@ -787,7 +1067,7 @@ public:
     }
 
 
-    void getNeighborsIndexes(const MortonIndex centerIndex, const long limite, MortonIndex indexes[26]){
+    int getNeighborsIndexes(const MortonIndex centerIndex, const long limite, MortonIndex indexes[26]) const{
         FTreeCoordinate center;
         center.setPositionFromMorton(centerIndex, OctreeHeight - 1);
         int idxNeig = 0;
@@ -809,6 +1089,53 @@ public:
                 }
             }
         }
+        return idxNeig;
+    }
+
+    int getDistantNeighbors(const FTreeCoordinate& workingCell,const int inLevel, MortonIndex inNeighbors[208]) const{
+
+        // Then take each child of the parent's neighbors if not in directNeighbors
+        // Father coordinate
+        const FTreeCoordinate parentCell(workingCell.getX()>>1,workingCell.getY()>>1,workingCell.getZ()>>1);
+
+        // Limite at parent level number of box (split by 2 by level)
+        const long limite = FMath::pow(2,inLevel-1);
+
+        int idxNeighbors = 0;
+        // We test all cells around
+        for(long idxX = -1 ; idxX <= 1 ; ++idxX){
+            if(!FMath::Between(parentCell.getX() + idxX,0l,limite)) continue;
+
+            for(long idxY = -1 ; idxY <= 1 ; ++idxY){
+                if(!FMath::Between(parentCell.getY() + idxY,0l,limite)) continue;
+
+                for(long idxZ = -1 ; idxZ <= 1 ; ++idxZ){
+                    if(!FMath::Between(parentCell.getZ() + idxZ,0l,limite)) continue;
+
+                    // if we are not on the current cell
+                    if( idxX || idxY || idxZ ){
+                        const FTreeCoordinate other(parentCell.getX() + idxX,parentCell.getY() + idxY,parentCell.getZ() + idxZ);
+                        const MortonIndex mortonOther = other.getMortonIndex(inLevel-1);
+
+                        // For each child
+                        for(int idxCousin = 0 ; idxCousin < 8 ; ++idxCousin){
+                            const FTreeCoordinate potentialNeighbor((other.getX()<<1) | (idxCousin>>2 & 1),
+                                                                   (other.getY()<<1) | (idxCousin>>1 & 1),
+                                                                   (other.getZ()<<1) | (idxCousin&1));
+                            // Test if it is a direct neighbor
+                            if(FMath::Abs(workingCell.getX() - potentialNeighbor.getX()) > 1 ||
+                                    FMath::Abs(workingCell.getY() - potentialNeighbor.getY()) > 1 ||
+                                    FMath::Abs(workingCell.getZ() - potentialNeighbor.getZ()) > 1){
+                                // add to neighbors
+                                inNeighbors[idxNeighbors++] = (mortonOther << 3) | idxCousin;
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        return idxNeighbors;
     }
 };
 
diff --git a/Tests/testFmbAlgorithmProc.cpp b/Tests/testFmbAlgorithmProc.cpp
index cbd066cb7d24b464429b0ec0a80063c243e3f531..75b4bd49987345e5c330e52a2fb6742139a8a627 100644
--- a/Tests/testFmbAlgorithmProc.cpp
+++ b/Tests/testFmbAlgorithmProc.cpp
@@ -321,8 +321,8 @@ int main(int argc, char ** argv){
 
     KernelClass kernels(NbLevels,loader.getBoxWidth());
 
-    FmmClass algo(app,&tree,&kernels);
-    algo.execute();
+    //FmmClass algo(app,&tree,&kernels);
+    //algo.execute();
 #ifdef VALIDATE_FMM
     FmmClassNoProc algoValide(&treeValide,&kernels);
     algoValide.execute();
@@ -350,8 +350,8 @@ int main(int argc, char ** argv){
             ++NbLeafs;
         } while(countLeafsIterator.moveRight());
 
-        const int startIdx = algo.getLeft(NbLeafs);
-        const int endIdx = algo.getRight(NbLeafs);
+        const int startIdx = 0;//algo.getLeft(NbLeafs);
+        const int endIdx = 0;//algo.getRight(NbLeafs);
 
         std::cout <<"From " << startIdx << " to " << endIdx << "  NbLeafs is " << NbLeafs << std::endl;
 
diff --git a/Tests/testFmmAlgorithmProc.cpp b/Tests/testFmmAlgorithmProc.cpp
index 45ac8b9392ff689f2bbd924279a4b19d4b8bcccf..58f81bcbd107eda582146fa249944d397f6a6490 100644
--- a/Tests/testFmmAlgorithmProc.cpp
+++ b/Tests/testFmmAlgorithmProc.cpp
@@ -96,13 +96,12 @@ public :
 /** This function test the octree to be sure that the fmm algorithm
   * has worked completly.
   */
-template<class OctreeClass, class ContainerClass, class FmmClassProc>
+template<class OctreeClass, class ContainerClass>
 void ValidateFMMAlgoProc(OctreeClass* const badTree,
-                         OctreeClass* const valideTree,
-                         FmmClassProc*const fmm){
-    const int OctreeHeight = badTree->getHeight();
+                         OctreeClass* const valideTree){
     std::cout << "Check Result\n";
-    {
+    /*{
+        const int OctreeHeight = badTree->getHeight();
         typename OctreeClass::Iterator octreeIterator(badTree);
         octreeIterator.gotoBottomLeft();
 
@@ -150,12 +149,12 @@ void ValidateFMMAlgoProc(OctreeClass* const badTree,
             octreeIteratorValide.moveUp();
             octreeIteratorValide.gotoLeft();
         }
-    }
+    }*/
     {
         int NbPart = 0;
         int NbLeafs = 0;
         { // Check that each particle has been summed with all other
-            typename OctreeClass::Iterator octreeIterator(badTree);
+            typename OctreeClass::Iterator octreeIterator(valideTree);
             octreeIterator.gotoBottomLeft();
             do{
                 NbPart += octreeIterator.getCurrentListSrc()->getSize();
@@ -164,38 +163,70 @@ void ValidateFMMAlgoProc(OctreeClass* const badTree,
             std::cout << "There is " << NbPart << " particles on " << NbLeafs << " Leafs" << std::endl;
         }
         {
-            const int startIdx = fmm->getLeft(NbLeafs);
-            const int endIdx = fmm->getRight(NbLeafs);
             // Check that each particle has been summed with all other
             typename OctreeClass::Iterator octreeIterator(badTree);
             octreeIterator.gotoBottomLeft();
 
-            for(int idx = 0 ; idx < startIdx ; ++idx){
-                octreeIterator.moveRight();
-            }
-
-            for(int idx = startIdx ; idx < endIdx ; ++idx){
+            do {
                 typename ContainerClass::BasicIterator iter(*octreeIterator.getCurrentListTargets());
-
                 const bool isUsingTsm = (octreeIterator.getCurrentListTargets() != octreeIterator.getCurrentListSrc());
-
-                while( iter.hasNotFinished() ){
+                for(int idxPart = 0 ; idxPart < octreeIterator.getCurrentListTargets()->getSize() ; ++idxPart){
                     // If a particles has been impacted by less than NbPart - 1 (the current particle)
                     // there is a problem
                     if( (!isUsingTsm && iter.data().getDataDown() != NbPart - 1) ||
                         (isUsingTsm && iter.data().getDataDown() != NbPart) ){
-                        std::cout << "Problem L2P + P2P, value on particle is : " << iter.data().getDataDown() << "\n";
+                        std::cout << "Problem L2P + P2P, value on particle is : " << iter.data().getDataDown() <<
+                                     " at pos " << idxPart << " index is " << octreeIterator.getCurrentGlobalIndex() << "\n";
                     }
                     iter.gotoNext();
                 }
-                octreeIterator.moveRight();
-            }
+            } while( octreeIterator.moveRight());
         }
     }
+    {
+        {
+            // Check that each particle has been summed with all other
+            typename OctreeClass::Iterator octreeIterator(badTree);
+            octreeIterator.gotoBottomLeft();
+
+            do {
+                if(octreeIterator.getCurrentListSrc()->getSize() != octreeIterator.getCurrentCell()->getDataUp()){
+                    printf("P2M problem nb part %d data up %ld \n",
+                           octreeIterator.getCurrentListSrc()->getSize(), octreeIterator.getCurrentCell()->getDataUp());
+                }
+            } while( octreeIterator.moveRight());
+        }
+    }
+
+    {
+        // Check that each particle has been summed with all other
+        typename OctreeClass::Iterator octreeIterator(badTree);
+        octreeIterator.gotoBottomLeft();
+
+        typename OctreeClass::Iterator valideOctreeIterator(valideTree);
+        valideOctreeIterator.gotoBottomLeft();
+        while(valideOctreeIterator.getCurrentGlobalIndex() != octreeIterator.getCurrentGlobalIndex()){
+            valideOctreeIterator.moveRight();
+        }
+
+        do {
+            if(valideOctreeIterator.getCurrentGlobalIndex() != octreeIterator.getCurrentGlobalIndex()){
+                printf("Do not have the same index valide %lld invalide %lld \n",
+                       valideOctreeIterator.getCurrentGlobalIndex(), octreeIterator.getCurrentGlobalIndex());
+                break;
+            }
+
+            if(octreeIterator.getCurrentListTargets()->getSize() != valideOctreeIterator.getCurrentListTargets()->getSize()){
+                printf("Do not have the same number of particle at leaf id %lld, valide %d invalide %d \n",
+                       octreeIterator.getCurrentGlobalIndex(), valideOctreeIterator.getCurrentListTargets()->getSize(), octreeIterator.getCurrentListTargets()->getSize());
+            }
+        }while( octreeIterator.moveRight() && valideOctreeIterator.moveRight());
+    }
 
     std::cout << "Done\n";
 }
 
+
 /** To print an octree
   * used to debug and understand how the values were passed
   */
@@ -212,7 +243,6 @@ void print(OctreeClass* const valideTree){
     }
 }
 
-
 struct ParticlesGroup {
     int number;
     int positionInArray;
@@ -844,9 +874,9 @@ int main(int argc, char ** argv){
 
             OctreeClass::Iterator octreeIterator(&treeValide);
             octreeIterator.gotoBottomLeft();
-            do{
+            do {
                 ++totalNbLeafs;
-            }while(octreeIterator.moveRight());
+            } while(octreeIterator.moveRight());
         }
 
         const int myLeftLeaf = app.getLeft(totalNbLeafs);
@@ -889,8 +919,6 @@ int main(int argc, char ** argv){
             }
         }
 
-
-
     }
 
     //////////////////////////////////////////////////////////////////////////////////
@@ -904,8 +932,8 @@ int main(int argc, char ** argv){
     FmmClassProc algo(app,&realTree,&kernels);
     algo.execute();
 
-    ///FmmClass algoValide(&treeValide,&kernels);
-    ///algoValide.execute();
+    FmmClass algoValide(&treeValide,&kernels);
+    algoValide.execute();
 
     counter.tac();
     std::cout << "Done  " << "(@Algorithm Particles = " << counter.elapsed() << "s)." << std::endl;
@@ -913,7 +941,7 @@ int main(int argc, char ** argv){
     //////////////////////////////////////////////////////////////////////////////////
     //////////////////////////////////////////////////////////////////////////////////
 
-    ///ValidateFMMAlgoProc<OctreeClass,ContainerClass,FmmClassProc>(&realTree,&treeValide,&algo);
+    ValidateFMMAlgoProc<OctreeClass,ContainerClass>(&realTree,&treeValide);
 
     //////////////////////////////////////////////////////////////////////////////////
     //////////////////////////////////////////////////////////////////////////////////