diff --git a/Examples/RotationMPIFMM.cpp b/Examples/RotationMPIFMM.cpp
index 50084eb8611fec973cd27444bcf7d489389b26e6..978df5f73bcc22b128da2252d278a55f60685ced 100644
--- a/Examples/RotationMPIFMM.cpp
+++ b/Examples/RotationMPIFMM.cpp
@@ -164,6 +164,8 @@ int main(int argc, char* argv[])
                                                                     tree.getBoxWidth(),
                                                                     tree.getHeight(), &finalParticles,&balancer);
 
+        std::cout << "Local nb particles after sort "
+                  << finalParticles.getSize() << std::endl;
         for(FSize idx = 0 ; idx < finalParticles.getSize(); ++idx){
             tree.insert(finalParticles[idx].position,finalParticles[idx].indexInFile,finalParticles[idx].physicalValue);
         }
diff --git a/Licence.txt b/Licence.txt
index 7be5457ef5d7c4c4357d70b515d5019756b5b8ef..dbaa609a5fcf69ca7343bdb582a3b71f374f3a33 100644
--- a/Licence.txt
+++ b/Licence.txt
@@ -4,6 +4,9 @@ the more restrictive has to be used.
 ScalFmm est régi par la licence CeCILL-C & LGPL, en cas de conflit
 la plus restrictive prime.
 
+Folders under Addons might have separate Licence, in such case
+one can find a dedicated Licence file where appropriate.
+
 //////////////////////////////////////////////////////////////////////
 //////////////////////////////////////////////////////////////////////
 
diff --git a/Src/BalanceTree/FEqualize.hpp b/Src/BalanceTree/FEqualize.hpp
index b0c46806c548a4df08200c0075bf623a01634612..95caba72ad4e7dc08805001ded2feb3a0e3f12a7 100644
--- a/Src/BalanceTree/FEqualize.hpp
+++ b/Src/BalanceTree/FEqualize.hpp
@@ -81,7 +81,10 @@ public:
             pack.elementTo   = Min(allObjectives[idxProc].second , myCurrentInterval.second) - myCurrentInterval.first;
             // Next time give from the previous end
             currentElement   = pack.elementTo;
-            packToSend.push_back(pack);
+
+            if(pack.elementTo - pack.elementFrom != 0){
+                packToSend.push_back(pack);
+            }
             // Progress
             idxProc += 1;
         }
diff --git a/Src/BalanceTree/FPartitionsMapping.hpp b/Src/BalanceTree/FPartitionsMapping.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..50f2525978cd28204f038afa9d8fb062f0f32336
--- /dev/null
+++ b/Src/BalanceTree/FPartitionsMapping.hpp
@@ -0,0 +1,316 @@
+// ===================================================================================
+// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner
+// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
+// This software is a computer program whose purpose is to compute the FMM.
+//
+// This software is governed by the CeCILL-C and LGPL licenses and
+// abiding by the rules of distribution of free software.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public and CeCILL-C Licenses for more details.
+// "http://www.cecill.info".
+// "http://www.gnu.org/licenses".
+// ===================================================================================
+#ifndef FPARTITIONSMAPPING_HPP
+#define FPARTITIONSMAPPING_HPP
+
+#include "Utils/FGlobal.hpp"
+#include "Utils/FMpi.hpp"
+#include "Containers/FVector.hpp"
+#include "FLeafBalance.hpp"
+#include "Files/FMpiTreeBuilder.hpp"
+
+template <class FReal>
+class FPartitionsMapping {
+protected:
+    FMpi::FComm comm;
+
+    //! The number of particles from the initial decomposition
+    FSize nbParticlesInitial;
+    //! The number of particles from the scalfmm decomposition
+    FSize nbParticlesWorking;
+
+    std::unique_ptr<FSize[]> nbParticlesSentToOthers;
+    std::unique_ptr<FSize[]> offsetNbParticlesSentToOthers;
+    std::unique_ptr<FSize[]> nbParticlesRecvFromOthers;
+    std::unique_ptr<FSize[]> offsetNbParticlesRecvFromOthers;
+
+    std::unique_ptr<FSize[]> mappingToOthers;
+
+
+public:
+    FPartitionsMapping(const FMpi::FComm& inComm)
+        : comm(inComm), nbParticlesInitial(0), nbParticlesWorking(0) {
+    }
+
+    void setComm(const FMpi::FComm& inComm){
+        comm = inComm;
+    }
+
+    template< int NbPhysicalValuesPerPart>
+    struct TestParticle{
+        FPoint<FReal> position;
+        std::array<FReal, NbPhysicalValuesPerPart> physicalValues;
+        FSize localIndex;
+        int initialProcOwner;
+
+        const FPoint<FReal>& getPosition() const {
+            return position;
+        }
+    };
+
+    template< int NbPhysicalValuesPerPart, class FillerClass>
+    FVector<TestParticle<NbPhysicalValuesPerPart>> distributeParticles(const FSize inNbParticles,
+                                                                       const FPoint<FReal>& centerOfBox, const FReal boxWidth,
+                             const int TreeHeight, FillerClass filler){
+        nbParticlesInitial = inNbParticles;
+
+        ////////////////////////////////////////////////////////
+
+        std::unique_ptr<TestParticle<NbPhysicalValuesPerPart>[]> initialParticles(new TestParticle<NbPhysicalValuesPerPart>[inNbParticles]);
+
+        // Create the array to distribute
+        for(int idxPart = 0 ; idxPart < nbParticlesInitial ; ++idxPart){
+            filler(idxPart, &initialParticles[idxPart].position, &initialParticles[idxPart].physicalValues);
+            initialParticles[idxPart].localIndex = idxPart;
+            initialParticles[idxPart].initialProcOwner = comm.processId();
+        }
+
+        FVector<TestParticle<NbPhysicalValuesPerPart>> finalParticles;
+        FLeafBalance balancer;
+        FMpiTreeBuilder< FReal,TestParticle<NbPhysicalValuesPerPart> >::DistributeArrayToContainer(comm,initialParticles.get(),
+                                                                    nbParticlesInitial,
+                                                                    centerOfBox,
+                                                                    boxWidth,
+                                                                    TreeHeight,
+                                                                    &finalParticles, &balancer);
+
+        FQuickSort<TestParticle<NbPhysicalValuesPerPart>,FSize>::QsOmp(finalParticles.data(), finalParticles.getSize(),
+                          [](const TestParticle<NbPhysicalValuesPerPart>& p1,
+                          const TestParticle<NbPhysicalValuesPerPart>& p2){
+            return p1.initialProcOwner < p2.initialProcOwner
+                    || (p1.initialProcOwner == p2.initialProcOwner
+                    && p1.localIndex < p2.localIndex);
+        });
+
+        ////////////////////////////////////////////////////////
+
+        nbParticlesWorking = finalParticles.getSize();
+
+        nbParticlesRecvFromOthers.reset(new FSize[comm.processCount()]);
+        memset(nbParticlesRecvFromOthers.get(), 0 , sizeof(FSize)*comm.processCount());
+
+        for(FSize idxPart = 0 ; idxPart < finalParticles.getSize() ; ++idxPart){
+            // Count the particles received from each proc
+            nbParticlesRecvFromOthers[finalParticles[idxPart].initialProcOwner] += 1;
+
+        }
+
+        offsetNbParticlesRecvFromOthers.reset(new FSize[comm.processCount()+1]);
+        offsetNbParticlesRecvFromOthers[0] = 0;
+
+        for(int idxProc = 0 ; idxProc < comm.processCount() ; ++idxProc){
+            offsetNbParticlesRecvFromOthers[idxProc+1] = offsetNbParticlesRecvFromOthers[idxProc]
+                                    + nbParticlesRecvFromOthers[idxProc];
+        }
+
+        ////////////////////////////////////////////////////////
+
+        std::unique_ptr<FSize[]> nbParticlesRecvFromOthersAllAll(new FSize[comm.processCount()*comm.processCount()]);
+        // Exchange how many each proc receive from aother
+        FMpi::MpiAssert( MPI_Allgather( nbParticlesRecvFromOthers.get(), comm.processCount(), FMpi::GetType(FSize()),
+                                        nbParticlesRecvFromOthersAllAll.get(), comm.processCount(),
+                                        FMpi::GetType(FSize()), comm.getComm()),  __LINE__ );
+
+        ////////////////////////////////////////////////////////
+
+        nbParticlesSentToOthers.reset(new FSize[comm.processCount()]);
+        FSize checkerSent = 0;
+        offsetNbParticlesSentToOthers.reset(new FSize[comm.processCount()+1]);
+        offsetNbParticlesSentToOthers[0] = 0;
+
+        for(int idxProc = 0 ; idxProc < comm.processCount() ; ++idxProc){
+            nbParticlesSentToOthers[idxProc] = nbParticlesRecvFromOthersAllAll[comm.processCount()*idxProc + comm.processId()];
+            checkerSent += nbParticlesSentToOthers[idxProc];
+            offsetNbParticlesSentToOthers[idxProc+1] = offsetNbParticlesSentToOthers[idxProc]
+                                                        + nbParticlesSentToOthers[idxProc];
+        }
+        // I must have send what I was owning at the beginning
+        FAssertLF(checkerSent == nbParticlesInitial);
+
+        ////////////////////////////////////////////////////////
+
+        std::unique_ptr<FSize[]> localIdsRecvOrdered(new FSize[nbParticlesWorking]);
+
+        // We list the local id in order of the different proc
+        for(FSize idxPart = 0 ; idxPart < finalParticles.getSize() ; ++idxPart){
+            const int procOwner = finalParticles[idxPart].initialProcOwner;
+            localIdsRecvOrdered[idxPart] = finalParticles[idxPart].localIndex;
+            FAssertLF(offsetNbParticlesRecvFromOthers[procOwner] <= idxPart
+                      && idxPart < offsetNbParticlesRecvFromOthers[procOwner+1]);
+        }
+
+        ////////////////////////////////////////////////////////
+
+        std::unique_ptr<FSize[]> localIdsSendOrdered(new FSize[nbParticlesInitial]);
+        std::unique_ptr<MPI_Request[]> requests(new MPI_Request[comm.processCount()*2]);
+
+        int iterRequest = 0;
+        for(int idxProc = 0 ; idxProc < comm.processCount() ; ++idxProc){
+            if(idxProc == comm.processId()){
+                FAssertLF(nbParticlesRecvFromOthers[idxProc] == nbParticlesSentToOthers[idxProc]);
+                memcpy(&localIdsSendOrdered[offsetNbParticlesSentToOthers[idxProc]],
+                       &localIdsRecvOrdered[offsetNbParticlesRecvFromOthers[idxProc]],
+                       sizeof(FSize)*nbParticlesRecvFromOthers[idxProc]);
+            }
+            else{
+                const FSize nbRecvFromProc = nbParticlesRecvFromOthers[idxProc];
+                if(nbRecvFromProc){
+                    FMpi::MpiAssert( MPI_Isend(&localIdsRecvOrdered[offsetNbParticlesRecvFromOthers[idxProc]],
+                              int(nbRecvFromProc),
+                              FMpi::GetType(FSize()), idxProc,
+                              99, comm.getComm(), &requests[iterRequest++]), __LINE__ );
+                }
+                const FSize nbSendToProc = nbParticlesSentToOthers[idxProc];
+                if(nbSendToProc){
+                    FMpi::MpiAssert( MPI_Irecv(&localIdsSendOrdered[offsetNbParticlesSentToOthers[idxProc]],
+                              int(nbSendToProc),
+                              FMpi::GetType(FSize()), idxProc,
+                              99, comm.getComm(), &requests[iterRequest++]), __LINE__  );
+                }
+            }
+        }
+
+        FMpi::MpiAssert( MPI_Waitall( iterRequest, requests.get(), MPI_STATUSES_IGNORE), __LINE__  );
+
+        ////////////////////////////////////////////////////////
+
+        mappingToOthers.reset(new FSize[nbParticlesInitial]);
+        for(FSize idxPart = 0; idxPart < nbParticlesInitial ; ++idxPart){
+            mappingToOthers[localIdsSendOrdered[idxPart]] = idxPart;
+        }
+
+        return std::move(finalParticles);
+    }
+
+    ////////////////////////////////////////////////////////
+
+    // physicalValues must be of size nbParticlesInitial
+    template< int NbPhysicalValuesPerPart>
+    std::unique_ptr<std::array<FReal, NbPhysicalValuesPerPart>[]> distributeData(
+            const std::array<FReal, NbPhysicalValuesPerPart> physicalValues[]){
+        std::unique_ptr<std::array<FReal, NbPhysicalValuesPerPart>[]> physicalValuesRorder(
+                    new std::array<FReal, NbPhysicalValuesPerPart>[nbParticlesInitial]);
+
+        for(FSize idxPart = 0; idxPart < nbParticlesInitial ; ++idxPart){
+           physicalValuesRorder[mappingToOthers[idxPart]] = physicalValues[idxPart];
+        }
+
+        // Allocate the array to store the physical values of my working interval
+        std::unique_ptr<std::array<FReal, NbPhysicalValuesPerPart>[]> recvPhysicalValues(new std::array<FReal, NbPhysicalValuesPerPart>[nbParticlesWorking]);
+
+        std::unique_ptr<MPI_Request[]> requests(new MPI_Request[comm.processCount()*2]);
+        int iterRequest = 0;
+        for(int idxProc = 0 ; idxProc < comm.processCount() ; ++idxProc){
+            if(idxProc == comm.processId()){
+                FAssertLF(nbParticlesRecvFromOthers[idxProc] == nbParticlesSentToOthers[idxProc]);
+                memcpy(&recvPhysicalValues[offsetNbParticlesRecvFromOthers[idxProc]],
+                       &physicalValuesRorder[offsetNbParticlesSentToOthers[idxProc]],
+                       sizeof(std::array<FReal, NbPhysicalValuesPerPart>)*nbParticlesRecvFromOthers[idxProc]);
+            }
+            else{
+                const FSize nbSendToProc = nbParticlesSentToOthers[idxProc];
+                if(nbSendToProc){
+                    FMpi::MpiAssert( MPI_Isend(
+                              const_cast<std::array<FReal, NbPhysicalValuesPerPart>*>(&physicalValuesRorder[offsetNbParticlesSentToOthers[idxProc]]),
+                              int(nbSendToProc*sizeof(std::array<FReal, NbPhysicalValuesPerPart>)),
+                              MPI_BYTE, idxProc,
+                              2222, comm.getComm(), &requests[iterRequest++]), __LINE__  );;
+                }
+                const FSize nbRecvFromProc = nbParticlesRecvFromOthers[idxProc];
+                if(nbRecvFromProc){
+                    FMpi::MpiAssert( MPI_Irecv(
+                              (void*)&recvPhysicalValues[offsetNbParticlesRecvFromOthers[idxProc]],
+                              int(nbRecvFromProc*sizeof(std::array<FReal, NbPhysicalValuesPerPart>)),
+                              MPI_INT, idxProc,
+                              2222, comm.getComm(), &requests[iterRequest++]), __LINE__  );;
+                }
+            }
+        }
+
+        FMpi::MpiAssert( MPI_Waitall( iterRequest, requests.get(), MPI_STATUSES_IGNORE), __LINE__  );
+
+        return std::move(recvPhysicalValues);
+    }
+
+    ////////////////////////////////////////////////////////
+
+    // resValues must be of size nbParticlesWorking
+    template< int NbResValuesPerPart>
+    std::unique_ptr<std::array<FReal, NbResValuesPerPart>[]> getResultingData(
+            const std::array<FReal, NbResValuesPerPart> resValues[]){
+        // First allocate the array to store the result
+        std::unique_ptr<std::array<FReal, NbResValuesPerPart>[]> recvPhysicalValues(
+                    new std::array<FReal, NbResValuesPerPart>[nbParticlesInitial]);
+
+        std::unique_ptr<MPI_Request[]> requests(new MPI_Request[comm.processCount()*2]);
+        int iterRequest = 0;
+        for(int idxProc = 0 ; idxProc < comm.processCount() ; ++idxProc){
+            if(idxProc == comm.processId()){
+                FAssertLF(nbParticlesRecvFromOthers[idxProc] == nbParticlesSentToOthers[idxProc]);
+                memcpy(&recvPhysicalValues[offsetNbParticlesSentToOthers[idxProc]],
+                       &resValues[offsetNbParticlesRecvFromOthers[idxProc]],
+                       sizeof(std::array<FReal, NbResValuesPerPart>)*nbParticlesRecvFromOthers[idxProc]);
+            }
+            else{
+                // I originaly receive nbRecvFromProc, so I should
+                // send nbRecvFromProc back to the real owner
+                const FSize nbRecvFromProc = nbParticlesRecvFromOthers[idxProc];
+                if(nbRecvFromProc){
+                    FMpi::MpiAssert( MPI_Isend(
+                              const_cast<std::array<FReal, NbResValuesPerPart>*>(&resValues[offsetNbParticlesRecvFromOthers[idxProc]]),
+                              int(nbRecvFromProc*sizeof(std::array<FReal, NbResValuesPerPart>)),
+                              MPI_BYTE, idxProc,
+                              1111, comm.getComm(), &requests[iterRequest++]), __LINE__  );;
+                }
+                // I sent nbSendToProc to idxProc,
+                // so I should receive nbSendToProc in my interval
+                const FSize nbSendToProc = nbParticlesSentToOthers[idxProc];
+                if(nbSendToProc){
+                    FMpi::MpiAssert( MPI_Irecv(
+                              &recvPhysicalValues[offsetNbParticlesSentToOthers[idxProc]],
+                              int(nbSendToProc*sizeof(std::array<FReal, NbResValuesPerPart>)),
+                              MPI_BYTE, idxProc,
+                              1111, comm.getComm(), &requests[iterRequest++]), __LINE__  );;
+                }
+            }
+        }
+
+        FMpi::MpiAssert( MPI_Waitall( iterRequest, requests.get(), MPI_STATUSES_IGNORE), __LINE__  );
+
+
+        std::unique_ptr<std::array<FReal, NbResValuesPerPart>[]> recvPhysicalValuesOrder(
+                    new std::array<FReal, NbResValuesPerPart>[nbParticlesInitial]);
+
+        for(FSize idxPart = 0; idxPart < nbParticlesInitial ; ++idxPart){
+           recvPhysicalValuesOrder[idxPart] = recvPhysicalValues[mappingToOthers[idxPart]];
+        }
+
+        return std::move(recvPhysicalValuesOrder);
+    }
+
+    ////////////////////////////////////////////////////////
+
+    FSize getNbParticlesWorking() const{
+        return nbParticlesWorking;
+    }
+
+    FSize getMappingResultToLocal(const FSize inIdx) const{
+        return mappingToOthers[inIdx];
+    }
+
+};
+
+#endif
diff --git a/Src/Containers/FOctree.hpp b/Src/Containers/FOctree.hpp
index 54cf81058543e83f2681d180ba420ebeb62e0d4b..832f7a782896717983ddb7b4662ebd71fbea9600 100644
--- a/Src/Containers/FOctree.hpp
+++ b/Src/Containers/FOctree.hpp
@@ -1685,6 +1685,10 @@ public:
      * @param function
      */
     void forEachLeaf(std::function<void(LeafClass*)> function){
+        if(isEmpty()){
+            return;
+        }
+
         Iterator octreeIterator(this);
         octreeIterator.gotoBottomLeft();
 
@@ -1698,6 +1702,10 @@ public:
      * @param function
      */
     void forEachCell(std::function<void(CellClass*)> function){
+        if(isEmpty()){
+            return;
+        }
+
         Iterator octreeIterator(this);
         octreeIterator.gotoBottomLeft();
 
@@ -1717,6 +1725,10 @@ public:
      * @param function
      */
     void forEachCellWithLevel(std::function<void(CellClass*,const int)> function){
+        if(isEmpty()){
+            return;
+        }
+
         Iterator octreeIterator(this);
         octreeIterator.gotoBottomLeft();
 
@@ -1736,6 +1748,10 @@ public:
      * @param function
      */
     void forEachCellLeaf(std::function<void(CellClass*,LeafClass*)> function){
+        if(isEmpty()){
+            return;
+        }
+
         Iterator octreeIterator(this);
         octreeIterator.gotoBottomLeft();
 
diff --git a/Src/Core/FFmmAlgorithmThreadProc.hpp b/Src/Core/FFmmAlgorithmThreadProc.hpp
index 34812c09bc31aefc72fc101f1942433ccfb3ad96..86fe46bb186b200166c4abb47c09f99225e34179 100644
--- a/Src/Core/FFmmAlgorithmThreadProc.hpp
+++ b/Src/Core/FFmmAlgorithmThreadProc.hpp
@@ -72,17 +72,20 @@ private:
     OctreeClass* const tree;     ///< The octree to work on
     KernelClass** kernels;       ///< The kernels
 
-    const FMpi::FComm& comm;     ///< MPI comm
+    const FMpi::FComm comm;      ///< MPI comm
+    FMpi::FComm fcomCompute;
 
     /// Used to store pointers to cells/leafs to work with
-    typename OctreeClass::Iterator* iterArray;  
+    typename OctreeClass::Iterator* iterArray;
     /// Used to store pointers to cells/leafs to send/rcv
     typename OctreeClass::Iterator* iterArrayComm;
 
     int numberOfLeafs;           ///< To store the size at the previous level
     const int MaxThreads;        ///< Max number of thread allowed by openmp
-    const int nbProcess;         ///< Process count
-    const int idProcess;         ///< Current process id
+    const int nbProcessOrig;         ///< Process count
+    const int idProcessOrig;         ///< Current process id
+    int nbProcess;        ///< Process count
+    int idProcess;        ///< Current process id
     const int OctreeHeight;      ///< Tree height
 
     const int userChunkSize;
@@ -149,12 +152,15 @@ public:
         tree(inTree),
         kernels(nullptr),
         comm(inComm),
+        fcomCompute(inComm),
         iterArray(nullptr),
         iterArrayComm(nullptr),
         numberOfLeafs(0),
         MaxThreads(FEnv::GetValue("SCALFMM_ALGO_NUM_THREADS",omp_get_max_threads())),
-        nbProcess(inComm.processCount()),
-        idProcess(inComm.processId()),
+        nbProcessOrig(inComm.processCount()),
+        idProcessOrig(inComm.processId()),
+        nbProcess(0),
+        idProcess(0),
         OctreeHeight(tree->getHeight()),
         userChunkSize(inUserChunkSize),
         leafLevelSeparationCriteria(inLeafLevelSeperationCriteria),
@@ -164,9 +170,9 @@ public:
         FAssertLF(leafLevelSeparationCriteria < 3, "Separation criteria should be < 3");
 
         this->kernels = new KernelClass*[MaxThreads];
-        #pragma omp parallel num_threads(MaxThreads)
+#pragma omp parallel num_threads(MaxThreads)
         {
-            #pragma omp critical (InitFFmmAlgorithmThreadProc)
+#pragma omp critical (InitFFmmAlgorithmThreadProc)
             {
                 this->kernels[omp_get_thread_num()] = new KernelClass(*inKernels);
             }
@@ -175,7 +181,7 @@ public:
         FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight());
 
         FLOG(FLog::Controller << "FFmmAlgorithmThreadProc\n");
-        FLOG(FLog::Controller << "Max threads = "  << MaxThreads << ", Procs = " << nbProcess << ", I am " << idProcess << ".\n");
+        FLOG(FLog::Controller << "Max threads = "  << MaxThreads << ", Procs = " << nbProcessOrig << ", I am " << idProcessOrig << ".\n");
         FLOG(FLog::Controller << "Chunck Size = " << userChunkSize << "\n");
     }
 
@@ -196,136 +202,158 @@ protected:
      * Call this function to run the complete algorithm
      */
     void executeCore(const unsigned operationsToProceed) override {
-        // Count leaf
+        // We are not involve if the tree is empty
+        const int iHaveParticles = (!tree->isEmpty());
+
+        std::unique_ptr<int[]> hasParticles(new int[comm.processCount()]);
+        FMpi::Assert( MPI_Allgather(const_cast<int*>(&iHaveParticles), 1,MPI_INT,
+                                    hasParticles.get(), 1, MPI_INT,
+                                    comm.getComm()), __LINE__);
+
+        fcomCompute = FMpi::FComm(comm);
+        fcomCompute.groupReduce(hasParticles.get());
+
+        if(iHaveParticles){
+
+            nbProcess = fcomCompute.processCount();
+            idProcess = fcomCompute.processId();
+
+            FLOG(FLog::Controller << "Max threads = "  << MaxThreads << ", Procs = " << nbProcess << ", I am " << idProcess << ".\n");
+
+            // Count leaf
 #ifdef SCALFMM_TRACE_ALGO
-    	eztrace_resume();
+            eztrace_resume();
 #endif
-	this->numberOfLeafs = 0;
-        {
-            Interval myFullInterval;
-            {//Building the interval with the first and last leaves (and count the number of leaves)
-                typename OctreeClass::Iterator octreeIterator(tree);
-                octreeIterator.gotoBottomLeft();
-                myFullInterval.leftIndex = octreeIterator.getCurrentGlobalIndex();
-                do{
-                    ++this->numberOfLeafs;
-                } while(octreeIterator.moveRight());
-                myFullInterval.rightIndex = octreeIterator.getCurrentGlobalIndex();
-            }
-            // Allocate a number to store the pointer of the cells at a level
-            iterArray     = new typename OctreeClass::Iterator[numberOfLeafs];
-            iterArrayComm = new typename OctreeClass::Iterator[numberOfLeafs];
-            FAssertLF(iterArray,     "iterArray     bad alloc");
-            FAssertLF(iterArrayComm, "iterArrayComm bad alloc");
-
-            // We get the leftIndex/rightIndex indexes from each procs
-            FMpi::MpiAssert( MPI_Allgather( &myFullInterval, sizeof(Interval), MPI_BYTE, intervals, sizeof(Interval), MPI_BYTE, comm.getComm()),  __LINE__ );
-
-            // Build my intervals for all levels
-            std::unique_ptr<Interval[]> myIntervals(new Interval[OctreeHeight]);
-            // At leaf level we know it is the full interval
-            myIntervals[OctreeHeight - 1] = myFullInterval;
-
-            // We can estimate the interval for each level by using the parent/child relation
-            for(int idxLevel = OctreeHeight - 2 ; idxLevel >= 0 ; --idxLevel){
-                myIntervals[idxLevel].leftIndex = myIntervals[idxLevel+1].leftIndex >> 3;
-                myIntervals[idxLevel].rightIndex = myIntervals[idxLevel+1].rightIndex >> 3;
-            }
+            this->numberOfLeafs = 0;
+            {
+                Interval myFullInterval;
+                {//Building the interval with the first and last leaves (and count the number of leaves)
+                    typename OctreeClass::Iterator octreeIterator(tree);
+                    octreeIterator.gotoBottomLeft();
+                    myFullInterval.leftIndex = octreeIterator.getCurrentGlobalIndex();
+                    do{
+                        ++this->numberOfLeafs;
+                    } while(octreeIterator.moveRight());
+                    myFullInterval.rightIndex = octreeIterator.getCurrentGlobalIndex();
+                }
+                // Allocate a number to store the pointer of the cells at a level
+                iterArray     = new typename OctreeClass::Iterator[numberOfLeafs];
+                iterArrayComm = new typename OctreeClass::Iterator[numberOfLeafs];
+                FAssertLF(iterArray,     "iterArray     bad alloc");
+                FAssertLF(iterArrayComm, "iterArrayComm bad alloc");
+
+                // We get the leftIndex/rightIndex indexes from each procs
+                FMpi::MpiAssert( MPI_Allgather( &myFullInterval, sizeof(Interval), MPI_BYTE, intervals, sizeof(Interval), MPI_BYTE, fcomCompute.getComm()),  __LINE__ );
+
+                // Build my intervals for all levels
+                std::unique_ptr<Interval[]> myIntervals(new Interval[OctreeHeight]);
+                // At leaf level we know it is the full interval
+                myIntervals[OctreeHeight - 1] = myFullInterval;
+
+                // We can estimate the interval for each level by using the parent/child relation
+                for(int idxLevel = OctreeHeight - 2 ; idxLevel >= 0 ; --idxLevel){
+                    myIntervals[idxLevel].leftIndex = myIntervals[idxLevel+1].leftIndex >> 3;
+                    myIntervals[idxLevel].rightIndex = myIntervals[idxLevel+1].rightIndex >> 3;
+                }
 
-            // Process 0 uses the estimates as real intervals, but other processes
-            // should remove cells that belong to others
-            if(idProcess != 0){
-                //We test for each level if process on left (idProcess-1) own cell I thought I owned
-                typename OctreeClass::Iterator octreeIterator(tree);
-                octreeIterator.gotoBottomLeft();
-                octreeIterator.moveUp();
+                // Process 0 uses the estimates as real intervals, but other processes
+                // should remove cells that belong to others
+                if(idProcess != 0){
+                    //We test for each level if process on left (idProcess-1) own cell I thought I owned
+                    typename OctreeClass::Iterator octreeIterator(tree);
+                    octreeIterator.gotoBottomLeft();
+                    octreeIterator.moveUp();
 
-                // At h-1 the working limit is the parent of the right cell of the proc on the left
-                MortonIndex workingLimitAtLevel = intervals[idProcess-1].rightIndex >> 3;
+                    // At h-1 the working limit is the parent of the right cell of the proc on the left
+                    MortonIndex workingLimitAtLevel = intervals[idProcess-1].rightIndex >> 3;
 
-                // We check if we have no more work to do
-                int nullIntervalFromLevel = 0;
+                    // We check if we have no more work to do
+                    int nullIntervalFromLevel = 0;
 
-                for(int idxLevel = OctreeHeight - 2 ; idxLevel >= 1 && nullIntervalFromLevel == 0 ; --idxLevel){
-                    while(octreeIterator.getCurrentGlobalIndex() <= workingLimitAtLevel){
-                        if( !octreeIterator.moveRight() ){
-                            // We cannot move right we are not owner of any more cell
-                            nullIntervalFromLevel = idxLevel;
-                            break;
+                    for(int idxLevel = OctreeHeight - 2 ; idxLevel >= 1 && nullIntervalFromLevel == 0 ; --idxLevel){
+                        while(octreeIterator.getCurrentGlobalIndex() <= workingLimitAtLevel){
+                            if( !octreeIterator.moveRight() ){
+                                // We cannot move right we are not owner of any more cell
+                                nullIntervalFromLevel = idxLevel;
+                                break;
+                            }
+                        }
+                        // If we are responsible for some cells at this level keep the first index
+                        if(nullIntervalFromLevel == 0){
+                            myIntervals[idxLevel].leftIndex = octreeIterator.getCurrentGlobalIndex();
+                            octreeIterator.moveUp();
+                            workingLimitAtLevel >>= 3;
                         }
                     }
-                    // If we are responsible for some cells at this level keep the first index
-                    if(nullIntervalFromLevel == 0){
-                        myIntervals[idxLevel].leftIndex = octreeIterator.getCurrentGlobalIndex();
-                        octreeIterator.moveUp();
-                        workingLimitAtLevel >>= 3;
+                    // In case we are not responsible for any cells we put the leftIndex = rightIndex+1
+                    for(int idxLevel = nullIntervalFromLevel ; idxLevel >= 1 ; --idxLevel){
+                        myIntervals[idxLevel].leftIndex = myIntervals[idxLevel].rightIndex + 1;
                     }
                 }
-                // In case we are not responsible for any cells we put the leftIndex = rightIndex+1
-                for(int idxLevel = nullIntervalFromLevel ; idxLevel >= 1 ; --idxLevel){
-                    myIntervals[idxLevel].leftIndex = myIntervals[idxLevel].rightIndex + 1;
-                }
-            }
 
-            // We get the leftIndex/rightIndex indexes from each procs
-            FMpi::MpiAssert( MPI_Allgather( myIntervals.get(), int(sizeof(Interval)) * OctreeHeight, MPI_BYTE,
-                                            workingIntervalsPerLevel, int(sizeof(Interval)) * OctreeHeight, MPI_BYTE, comm.getComm()),  __LINE__ );
-        }
+                // We get the leftIndex/rightIndex indexes from each procs
+                FMpi::MpiAssert( MPI_Allgather( myIntervals.get(), int(sizeof(Interval)) * OctreeHeight, MPI_BYTE,
+                                                workingIntervalsPerLevel, int(sizeof(Interval)) * OctreeHeight, MPI_BYTE, fcomCompute.getComm()),  __LINE__ );
+            }
 #ifdef SCALFMM_TRACE_ALGO
-	    eztrace_enter_event("P2M", EZTRACE_YELLOW);
+            eztrace_enter_event("P2M", EZTRACE_YELLOW);
 #endif
-        Timers[P2MTimer].tic();
-        if(operationsToProceed & FFmmP2M) bottomPass();
-        Timers[P2MTimer].tac();
+            Timers[P2MTimer].tic();
+            if(operationsToProceed & FFmmP2M) bottomPass();
+            Timers[P2MTimer].tac();
 
 #ifdef SSCALFMM_TRACE_ALGO
-	eztrace_leave_event();
-	eztrace_enter_event("M2M", EZTRACE_PINK);
+            eztrace_leave_event();
+            eztrace_enter_event("M2M", EZTRACE_PINK);
 #endif
 
-        Timers[M2MTimer].tic();
-	    if(operationsToProceed & FFmmM2M) upwardPass();
-      Timers[M2MTimer].tac();
+            Timers[M2MTimer].tic();
+            if(operationsToProceed & FFmmM2M) upwardPass();
+            Timers[M2MTimer].tac();
 
 #ifdef SCALFMM_TRACE_ALGO
-	     eztrace_leave_event();
-	    eztrace_enter_event("M2L", EZTRACE_GREEN);
+            eztrace_leave_event();
+            eztrace_enter_event("M2L", EZTRACE_GREEN);
 #endif
 
-		Timers[M2LTimer].tic();
-        if(operationsToProceed & FFmmM2L) transferPass();
-        Timers[M2LTimer].tac();
+            Timers[M2LTimer].tic();
+            if(operationsToProceed & FFmmM2L) transferPass();
+            Timers[M2LTimer].tac();
 
- #ifdef SCALFMM_TRACE_ALGO
-		eztrace_leave_event();
-	    eztrace_enter_event("L2L", EZTRACE_PINK);
+#ifdef SCALFMM_TRACE_ALGO
+            eztrace_leave_event();
+            eztrace_enter_event("L2L", EZTRACE_PINK);
 #endif
 
-	    Timers[L2LTimer].tic();
-        if(operationsToProceed & FFmmL2L) downardPass();
-        Timers[L2LTimer].tac();
+            Timers[L2LTimer].tic();
+            if(operationsToProceed & FFmmL2L) downardPass();
+            Timers[L2LTimer].tac();
 
 #ifdef SCALFMM_TRACE_ALGO
-		eztrace_leave_event();
-	    eztrace_enter_event("L2P+P2P", EZTRACE_BLUE);
+            eztrace_leave_event();
+            eztrace_enter_event("L2P+P2P", EZTRACE_BLUE);
 #endif
 
-	    Timers[NearTimer].tic();
-        if( (operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P) ) directPass((operationsToProceed & FFmmP2P),(operationsToProceed & FFmmL2P));
-        Timers[NearTimer].tac();
+            Timers[NearTimer].tic();
+            if( (operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P) ) directPass((operationsToProceed & FFmmP2P),(operationsToProceed & FFmmL2P));
+            Timers[NearTimer].tac();
 
 #ifdef SCALFMM_TRACE_ALGO
-		eztrace_leave_event();
-	    eztrace_stop();
+            eztrace_leave_event();
+            eztrace_stop();
 #endif
-        // delete array
-        delete []     iterArray;
-        delete []     iterArrayComm;
-        iterArray          = nullptr;
-        iterArrayComm = nullptr;
+            // delete array
+            delete []     iterArray;
+            delete []     iterArrayComm;
+            iterArray          = nullptr;
+            iterArrayComm = nullptr;
 #ifdef SCALFMM_TRACE_ALGO
-	  eztrace_stop();
+            eztrace_stop();
 #endif
+        }
+        else{
+            FLOG( FLog::Controller << "\tProcess = " << comm.processId() << " has zero particles.\n" );
+        }
     }
 
     /////////////////////////////////////////////////////////////////////////////
@@ -455,11 +483,11 @@ protected:
             }
 
             FLOG(parallelCounter.tic());
-            #pragma omp parallel num_threads(MaxThreads)
+#pragma omp parallel num_threads(MaxThreads)
             {
                 KernelClass* myThreadkernels = (kernels[omp_get_thread_num()]);
                 //This single section post and receive the comms, and then do the M2M associated with it.
-                #pragma omp single nowait
+#pragma omp single nowait
                 {
                     FLOG(singleCounter.tic());
                     // Master proc never send
@@ -491,10 +519,10 @@ protected:
                             // Post the message
                             bufferSize = sendBuffer.getSize();
                             MPI_Isend(&bufferSize, 1, FMpi::GetType(bufferSize), currentProcIdToSendTo,
-                                      FMpi::TagFmmM2MSize + idxLevel, comm.getComm(), &requestsSize[iterMpiRequestsSize++]);
+                                      FMpi::TagFmmM2MSize + idxLevel, fcomCompute.getComm(), &requestsSize[iterMpiRequestsSize++]);
                             FAssertLF(sendBuffer.getSize() < std::numeric_limits<int>::max());
                             MPI_Isend(sendBuffer.data(), int(sendBuffer.getSize()), MPI_BYTE, currentProcIdToSendTo,
-                                      FMpi::TagFmmM2M + idxLevel, comm.getComm(), &requests[iterMpiRequests++]);
+                                      FMpi::TagFmmM2M + idxLevel, fcomCompute.getComm(), &requests[iterMpiRequests++]);
                         }
                     }
 
@@ -509,7 +537,7 @@ protected:
                                && ( !procHasWorkAtLevel(idxLevel+1, idProcSource) || procCoversMyRightBorderCell(idxLevel, idProcSource) )){
                             if(procHasWorkAtLevel(idxLevel+1, idProcSource) && procCoversMyRightBorderCell(idxLevel, idProcSource)){
                                 MPI_Irecv(&recvBufferSize[nbProcThatSendToMe], 1, FMpi::GetType(recvBufferSize[nbProcThatSendToMe]),
-                                        idProcSource, FMpi::TagFmmM2MSize + idxLevel, comm.getComm(), &requestsSize[iterMpiRequestsSize++]);
+                                          idProcSource, FMpi::TagFmmM2MSize + idxLevel, fcomCompute.getComm(), &requestsSize[iterMpiRequestsSize++]);
                                 nbProcThatSendToMe += 1;
                                 FAssertLF(nbProcThatSendToMe <= 7);
                             }
@@ -535,7 +563,7 @@ protected:
                                 recvBuffer[nbProcThatSendToMe].cleanAndResize(recvBufferSize[nbProcThatSendToMe]);
                                 FAssertLF(recvBufferSize[nbProcThatSendToMe] < std::numeric_limits<int>::max());
                                 MPI_Irecv(recvBuffer[nbProcThatSendToMe].data(), int(recvBufferSize[nbProcThatSendToMe]), MPI_BYTE,
-                                        idProcSource, FMpi::TagFmmM2M + idxLevel, comm.getComm(), &requests[iterMpiRequests++]);
+                                          idProcSource, FMpi::TagFmmM2M + idxLevel, fcomCompute.getComm(), &requests[iterMpiRequests++]);
                                 nbProcThatSendToMe += 1;
                                 FAssertLF(nbProcThatSendToMe <= 7);
                             }
@@ -557,11 +585,11 @@ protected:
                         memcpy(currentChild, iterArray[totalNbCellsAtLevel - 1].getCurrentChild(), 8 * sizeof(CellClass*));
 
                         // Retreive data and merge my child and the child from others
+                        int positionToInsert = 0;
                         for(int idxProc = 0 ; idxProc < nbProcThatSendToMe ; ++idxProc){
                             unsigned packageFlags = unsigned(recvBuffer[idxProc].getValue<unsigned char>());
 
                             int position = 0;
-                            int positionToInsert = 0;
                             while( packageFlags && position < 8){
                                 while(!(packageFlags & 0x1)){
                                     packageFlags >>= 1;
@@ -590,7 +618,7 @@ protected:
                 }//End Of Single section
 
                 // All threads proceed the M2M
-                #pragma omp for nowait schedule(dynamic, userChunkSize)
+#pragma omp for nowait schedule(dynamic, userChunkSize)
                 for( int idxCell = nbCellsToSkip ; idxCell < nbCellsForThreads ; ++idxCell){
                     myThreadkernels->M2M( iterArray[idxCell].getCurrentCell() , iterArray[idxCell].getCurrentChild(), idxLevel);
                 }
@@ -645,9 +673,9 @@ protected:
         FMpiBufferReader**const recvBuffer = new FMpiBufferReader*[nbProcess * OctreeHeight];
         memset(recvBuffer, 0, sizeof(FMpiBufferReader*) * nbProcess * OctreeHeight);
 
-        #pragma omp parallel num_threads(MaxThreads)
+#pragma omp parallel num_threads(MaxThreads)
         {
-            #pragma omp master
+#pragma omp master
             {
                 {
                     FLOG(prepareCounter.tic());
@@ -747,7 +775,7 @@ protected:
                 //////////////////////////////////////////////////////////////////
 
                 FLOG(gatherCounter.tic());
-                FMpi::MpiAssert( MPI_Allgather( indexToSend, nbProcess * OctreeHeight, MPI_LONG_LONG_INT, globalReceiveMap, nbProcess * OctreeHeight, MPI_LONG_LONG_INT, comm.getComm()),  __LINE__ );
+                FMpi::MpiAssert( MPI_Allgather( indexToSend, nbProcess * OctreeHeight, MPI_LONG_LONG_INT, globalReceiveMap, nbProcess * OctreeHeight, MPI_LONG_LONG_INT, fcomCompute.getComm()),  __LINE__ );
                 FLOG(gatherCounter.tac());
 
                 //////////////////////////////////////////////////////////////////
@@ -780,7 +808,7 @@ protected:
 
                             FMpi::ISendSplit(sendBuffer[idxLevel * nbProcess + idxProc]->data(),
                                     sendBuffer[idxLevel * nbProcess + idxProc]->getSize(), idxProc,
-                                    FMpi::TagLast + idxLevel*100, comm, &requests);
+                                    FMpi::TagLast + idxLevel*100, fcomCompute, &requests);
                         }
 
                         const long long int toReceiveFromProcAtLevel = globalReceiveMap[(idxProc * nbProcess * OctreeHeight) + idxLevel * nbProcess + idProcess];
@@ -789,7 +817,7 @@ protected:
 
                             FMpi::IRecvSplit(recvBuffer[idxLevel * nbProcess + idxProc]->data(),
                                     recvBuffer[idxLevel * nbProcess + idxProc]->getCapacity(), idxProc,
-                                    FMpi::TagLast + idxLevel*100, comm, &requests);
+                                    FMpi::TagLast + idxLevel*100, fcomCompute, &requests);
                         }
                     }
                 }
@@ -808,7 +836,7 @@ protected:
             // Do M2L
             //////////////////////////////////////////////////////////////////
 
-            #pragma omp single nowait
+#pragma omp single nowait
             {
                 typename OctreeClass::Iterator octreeIterator(tree);
                 octreeIterator.moveDown();
@@ -845,7 +873,7 @@ protected:
                     {
                         const int chunckSize = userChunkSize;
                         for(int idxCell = 0 ; idxCell < numberOfCells ; idxCell += chunckSize){
-                            #pragma omp task default(none) shared(numberOfCells,idxLevel) firstprivate(idxCell) //+ shared(chunckSize)
+#pragma omp task default(none) shared(numberOfCells,idxLevel) firstprivate(idxCell) //+ shared(chunckSize)
                             {
                                 KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
                                 const CellClass* neighbors[342];
@@ -860,15 +888,15 @@ protected:
                         }
                     }//End of task spawning
 
-                    #pragma omp taskwait
+#pragma omp taskwait
 
                     for(int idxThread = 0 ; idxThread < omp_get_num_threads() ; ++idxThread){
-                        #pragma omp task  default(none) firstprivate(idxThread,idxLevel)
+#pragma omp task  default(none) firstprivate(idxThread,idxLevel)
                         {
                             kernels[idxThread]->finishedLevelM2L(idxLevel);
                         }
                     }
-                    #pragma omp taskwait
+#pragma omp taskwait
 
                     FLOG(computationCounter.tac());
                 }
@@ -945,7 +973,7 @@ protected:
 
                 // Compute this cells
                 FLOG(computationCounter.tic());
-                #pragma omp parallel num_threads(MaxThreads)
+#pragma omp parallel num_threads(MaxThreads)
                 {
                     KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
                     MortonIndex neighborsIndex[/*189+26+1*/216];
@@ -953,7 +981,7 @@ protected:
                     const CellClass* neighbors[342];
                     int neighborPositions[342];
 
-                    #pragma omp for  schedule(dynamic, userChunkSize) nowait
+#pragma omp for  schedule(dynamic, userChunkSize) nowait
                     for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){
                         // compute indexes
                         const int counterNeighbors = iterArray[idxCell].getCurrentGlobalCoordinate().getInteractionNeighbors(idxLevel, neighborsIndex, neighborsPosition, separationCriteria);
@@ -1079,10 +1107,10 @@ protected:
                 }
             }
 
-            #pragma omp parallel num_threads(MaxThreads)
+#pragma omp parallel num_threads(MaxThreads)
             {
                 KernelClass* myThreadkernels = (kernels[omp_get_thread_num()]);
-                #pragma omp single nowait
+#pragma omp single nowait
                 {
                     FLOG(prepareCounter.tic());
                     int iterRequests = 0;
@@ -1092,7 +1120,7 @@ protected:
                     // Post the receive
                     if(hasToReceive){
                         FMpi::MpiAssert( MPI_Irecv( &recvBufferSize, 1, FMpi::GetType(recvBufferSize), idxProcToReceive,
-                                                    FMpi::TagFmmL2LSize + idxLevel, comm.getComm(), &requestsSize[iterRequestsSize++]), __LINE__ );
+                                                    FMpi::TagFmmL2LSize + idxLevel, fcomCompute.getComm(), &requestsSize[iterRequestsSize++]), __LINE__ );
                     }
 
                     // We have to be sure that we are not sending if we have no work in the current level
@@ -1112,10 +1140,10 @@ protected:
                                 }
                                 // Post the send message
                                 FMpi::MpiAssert( MPI_Isend(&sendBufferSize, 1, FMpi::GetType(sendBufferSize), idxProcSend,
-                                                           FMpi::TagFmmL2LSize + idxLevel, comm.getComm(), &requestsSize[iterRequestsSize++]), __LINE__);
+                                                           FMpi::TagFmmL2LSize + idxLevel, fcomCompute.getComm(), &requestsSize[iterRequestsSize++]), __LINE__);
                                 FAssertLF(sendBuffer.getSize() < std::numeric_limits<int>::max());
                                 FMpi::MpiAssert( MPI_Isend(sendBuffer.data(), int(sendBuffer.getSize()), MPI_BYTE, idxProcSend,
-                                                           FMpi::TagFmmL2L + idxLevel, comm.getComm(), &requests[iterRequests++]), __LINE__);
+                                                           FMpi::TagFmmL2L + idxLevel, fcomCompute.getComm(), &requests[iterRequests++]), __LINE__);
                                 // Inc and check the counter
                                 nbMessageSent += 1;
                                 FAssertLF(nbMessageSent <= 7);
@@ -1129,7 +1157,7 @@ protected:
                     // Finalize the communication
                     if(iterRequestsSize){
                         FLOG(waitCounter.tic());
-                        FAssertLF(iterRequestsSize < 8);
+                        FAssertLF(iterRequestsSize <= 8);
                         FMpi::MpiAssert(MPI_Waitall( iterRequestsSize, requestsSize, MPI_STATUSES_IGNORE), __LINE__);
                         FLOG(waitCounter.tac());
                     }
@@ -1138,12 +1166,12 @@ protected:
                         recvBuffer.cleanAndResize(recvBufferSize);
                         FAssertLF(recvBuffer.getCapacity() < std::numeric_limits<int>::max());
                         FMpi::MpiAssert( MPI_Irecv( recvBuffer.data(), int(recvBuffer.getCapacity()), MPI_BYTE, idxProcToReceive,
-                                                    FMpi::TagFmmL2L + idxLevel, comm.getComm(), &requests[iterRequests++]), __LINE__ );
+                                                    FMpi::TagFmmL2L + idxLevel, fcomCompute.getComm(), &requests[iterRequests++]), __LINE__ );
                     }
 
                     if(iterRequests){
                         FLOG(waitCounter.tic());
-                        FAssertLF(iterRequests < 8);
+                        FAssertLF(iterRequests <= 8);
                         FMpi::MpiAssert(MPI_Waitall( iterRequests, requests, MPI_STATUSES_IGNORE), __LINE__);
                         FLOG(waitCounter.tac());
                     }
@@ -1159,12 +1187,12 @@ protected:
                     FLOG(prepareCounter.tac());
                 }
 
-                #pragma omp single nowait
+#pragma omp single nowait
                 {
                     FLOG(computationCounter.tic());
                 }
                 // Threads are working on all the cell of our working interval at that level
-                #pragma omp for nowait  schedule(dynamic, userChunkSize)
+#pragma omp for nowait  schedule(dynamic, userChunkSize)
                 for(int idxCell = nbCellsToSkip ; idxCell < totalNbCellsAtLevel ; ++idxCell){
                     myThreadkernels->L2L( iterArray[idxCell].getCurrentCell() , iterArray[idxCell].getCurrentChild(), idxLevel);
                 }
@@ -1310,7 +1338,7 @@ protected:
                 //Share to all processus globalReceiveMap
                 FLOG(gatherCounter.tic());
                 FMpi::MpiAssert( MPI_Allgather( partsToSend, nbProcess, FMpi::GetType(*partsToSend),
-                                                globalReceiveMap, nbProcess, FMpi::GetType(*partsToSend), comm.getComm()),  __LINE__ );
+                                                globalReceiveMap, nbProcess, FMpi::GetType(*partsToSend), fcomCompute.getComm()),  __LINE__ );
                 FLOG(gatherCounter.tac());
 
                 FMpiBufferReader**const recvBuffer = new FMpiBufferReader*[nbProcess];
@@ -1329,7 +1357,7 @@ protected:
                         recvBuffer[idxProc] = new FMpiBufferReader(globalReceiveMap[idxProc * nbProcess + idProcess]);
 
                         FMpi::IRecvSplit(recvBuffer[idxProc]->data(), recvBuffer[idxProc]->getCapacity(),
-                                         idxProc, FMpi::TagFmmP2P, comm, &requests);
+                                         idxProc, FMpi::TagFmmP2P, fcomCompute, &requests);
                     }
                 }
 
@@ -1347,7 +1375,7 @@ protected:
                         FAssertLF(sendBuffer[idxProc]->getSize() == globalReceiveMap[idProcess*nbProcess+idxProc]);
 
                         FMpi::ISendSplit(sendBuffer[idxProc]->data(), sendBuffer[idxProc]->getSize(),
-                                         idxProc, FMpi::TagFmmP2P, comm, &requests);
+                                         idxProc, FMpi::TagFmmP2P, fcomCompute, &requests);
 
                     }
                 }
@@ -1380,7 +1408,7 @@ protected:
                         delete recvBuffer[idxProc];
                         recvBuffer[idxProc] = nullptr;
                     }
-                }                
+                }
 
                 for(int idxProc = 0 ; idxProc < nbProcess ; ++idxProc){
                     delete sendBuffer[idxProc];
@@ -1472,7 +1500,7 @@ protected:
                                         // need the current particles and neighbors particles
                                         const int counter = tree->getLeafsNeighbors(neighbors, neighborPositions, currentIter.coord, OctreeHeight-1);
                                         myThreadkernels->P2P( currentIter.coord,currentIter.targets,
-                                                          currentIter.sources, neighbors, neighborPositions, counter);
+                                                              currentIter.sources, neighbors, neighborPositions, counter);
                                     }
                                 }
                             }
@@ -1528,7 +1556,7 @@ protected:
                     }
                     if(counter){
                         myThreadkernels.P2PRemote( currentIter.cell->getCoordinate(), currentIter.targets,
-                                               currentIter.sources, neighbors, neighborPositions, counter);
+                                                   currentIter.sources, neighbors, neighborPositions, counter);
                     }
 
                 }
diff --git a/Src/Files/FMpiTreeBuilder.hpp b/Src/Files/FMpiTreeBuilder.hpp
index 95ed92bb00982bc3c6cd1ba978b78290c9aceb6e..fc449ea4038ca5203eb773ae545240b20fb42dcd 100644
--- a/Src/Files/FMpiTreeBuilder.hpp
+++ b/Src/Files/FMpiTreeBuilder.hpp
@@ -162,7 +162,7 @@ public:
     // To merge the leaves
     //////////////////////////////////////////////////////////////////////////
 
-    static void MergeSplitedLeaves(const FMpi::FComm& communicator, IndexedParticle* workingArray, FSize* workingSize,
+    static void MergeSplitedLeaves(const FMpi::FComm& communicator, IndexedParticle** workingArray, FSize* workingSize,
                                    FSize ** leavesOffsetInParticles, ParticleClass** particlesArrayInLeafOrder, FSize* const leavesSize){
         const int myRank = communicator.processId();
         const int nbProcs = communicator.processCount();
@@ -171,13 +171,13 @@ public:
         { // Get the information of the leaves
             leavesInfo.clear();
             if((*workingSize)){
-                leavesInfo.push({workingArray[0].index, 1, 0});
+                leavesInfo.push({(*workingArray)[0].index, 1, 0});
                 for(FSize idxPart = 1 ; idxPart < (*workingSize) ; ++idxPart){
-                    if(leavesInfo.data()[leavesInfo.getSize()-1].mindex == workingArray[idxPart].index){
+                    if(leavesInfo.data()[leavesInfo.getSize()-1].mindex == (*workingArray)[idxPart].index){
                         leavesInfo.data()[leavesInfo.getSize()-1].nbParts += 1;
                     }
                     else{
-                        leavesInfo.push({workingArray[idxPart].index, 1, idxPart});
+                        leavesInfo.push({(*workingArray)[idxPart].index, 1, idxPart});
                     }
                 }
             }
@@ -209,12 +209,16 @@ public:
                 while(0 < idProcToSendTo &&
                       (allProcFirstLeafStates[(idProcToSendTo-1)*2 + 1].mindex == borderLeavesState[0].mindex
                        || allProcFirstLeafStates[(idProcToSendTo-1)*2 + 1].mindex == noDataFlag)){
+                    FLOG(if(VerboseLog) FLog::Controller << "SCALFMM-DEBUG ["  << communicator.processId() << "] idProcToSendTo "
+                         << idProcToSendTo << " allProcFirstLeafStates[(idProcToSendTo-1)*2 + 1].mindex " <<
+                         allProcFirstLeafStates[(idProcToSendTo-1)*2 + 1].mindex << " borderLeavesState[0].mindex " <<
+                         borderLeavesState[0].mindex << "\n"; FLog::Controller.flush(); );
                     idProcToSendTo -= 1;
                 }
                 // We found someone
                 if(idProcToSendTo != myRank && allProcFirstLeafStates[(idProcToSendTo)*2 + 1].mindex == borderLeavesState[0].mindex){
                     // Post and send message for the first leaf
-                    FMpi::ISendSplit(&workingArray[0], borderLeavesState[0].nbParts, idProcToSendTo,
+                    FMpi::ISendSplit(&(*workingArray)[0], borderLeavesState[0].nbParts, idProcToSendTo,
                             FMpi::TagExchangeIndexs, communicator, &requests);
                     FLOG(if(VerboseLog) FLog::Controller << "SCALFMM-DEBUG ["  << communicator.processId() << "] send " << borderLeavesState[0].nbParts << " to " << idProcToSendTo << "\n"; FLog::Controller.flush(); );
                     hasSentFirstLeaf = true;
@@ -228,11 +232,18 @@ public:
                 // Count all the particle of our first leaf on other procs
                 FSize totalNbParticlesToRecv = 0;
                 int idProcToRecvFrom = myRank;
-                while(idProcToRecvFrom+1 < nbProcs &&
-                      (borderLeavesState[1].mindex == allProcFirstLeafStates[(idProcToRecvFrom+1)*2].mindex
-                       || allProcFirstLeafStates[(idProcToRecvFrom+1)*2].mindex == noDataFlag)){
-                    idProcToRecvFrom += 1;
-                    totalNbParticlesToRecv += allProcFirstLeafStates[(idProcToRecvFrom)*2].nbParts;
+                if(!hasSentFirstLeaf || borderLeavesState[0].mindex != borderLeavesState[1].mindex){
+                    while(idProcToRecvFrom+1 < nbProcs &&
+                          (borderLeavesState[1].mindex == allProcFirstLeafStates[(idProcToRecvFrom+1)*2].mindex
+                           || allProcFirstLeafStates[(idProcToRecvFrom+1)*2].mindex == noDataFlag)){
+                        FLOG(if(VerboseLog) FLog::Controller << "SCALFMM-DEBUG ["  << communicator.processId() << "] idProcToRecvFrom "
+                             << idProcToRecvFrom << " allProcFirstLeafStates[(idProcToRecvFrom+1)*2].mindex " <<
+                             allProcFirstLeafStates[(idProcToRecvFrom+1)*2].mindex << " borderLeavesState[1].mindex " <<
+                             borderLeavesState[1].mindex << "\n"; FLog::Controller.flush(); );
+
+                        idProcToRecvFrom += 1;
+                        totalNbParticlesToRecv += allProcFirstLeafStates[(idProcToRecvFrom)*2].nbParts;
+                    }
                 }
                 // If there are some
                 if(totalNbParticlesToRecv){
@@ -262,7 +273,7 @@ public:
                 const FSize offsetParticles = borderLeavesState[0].nbParts;
                 // Move all the particles
                 for(FSize idxPart = offsetParticles ; idxPart < (*workingSize) ; ++idxPart){
-                    workingArray[idxPart - offsetParticles] = workingArray[idxPart];
+                    (*workingArray)[idxPart - offsetParticles] = (*workingArray)[idxPart];
                 }
                 // Move all the leaf
                 for(int idxLeaf = 1 ; idxLeaf < leavesInfo.getSize() ; ++idxLeaf){
@@ -276,14 +287,16 @@ public:
             if(hasExtendLastLeaf){
                 // Allocate array
                 const FSize finalParticlesNumber = (*workingSize) + receivedParticles.size();
+                FLOG(if(VerboseLog) FLog::Controller << "SCALFMM-DEBUG ["  << communicator.processId() << "] Create array "
+                     << finalParticlesNumber << " particles\n"; FLog::Controller.flush(); );
                 IndexedParticle* particlesWithExtension = new IndexedParticle[finalParticlesNumber];
                 // Copy old data
-                memcpy(particlesWithExtension, workingArray, (*workingSize)*sizeof(IndexedParticle));
+                memcpy(particlesWithExtension, (*workingArray), (*workingSize)*sizeof(IndexedParticle));
                 // Copy received data
                 memcpy(particlesWithExtension + (*workingSize), receivedParticles.data(), receivedParticles.size()*sizeof(IndexedParticle));
                 // Move ptr
-                delete[] workingArray;
-                workingArray   = particlesWithExtension;
+                delete[] (*workingArray);
+                (*workingArray)   = particlesWithExtension;
                 (*workingSize) = finalParticlesNumber;
                 leavesInfo[leavesInfo.getSize()-1].nbParts += receivedParticles.size();
             }
@@ -298,7 +311,7 @@ public:
                 //Copy all the particles
                 (*particlesArrayInLeafOrder) = new ParticleClass[(*workingSize)];
                 for(FSize idxPart = 0 ; idxPart < (*workingSize) ; ++idxPart){
-                    memcpy(&(*particlesArrayInLeafOrder)[idxPart],&workingArray[idxPart].particle,sizeof(ParticleClass));
+                    memcpy(&(*particlesArrayInLeafOrder)[idxPart],&(*workingArray)[idxPart].particle,sizeof(ParticleClass));
                 }
                 // Assign the number of leaf
                 (*leavesSize) = leavesInfo.getSize();
@@ -363,8 +376,9 @@ public:
             const std::vector<FEqualize::Package> packsToSend = FEqualize::GetPackToSend(myCurrentInter, allObjectives);
 
             FAssertLF((currentNbLeaves == 0 && packsToSend.size() == 0) ||
-                      (packsToSend.size() && FSize(packsToSend[packsToSend.size()-1].elementTo) == currentNbLeaves));
+                      (currentNbLeaves != 0 && packsToSend.size() && FSize(packsToSend[packsToSend.size()-1].elementTo) == currentNbLeaves));
 
+            FLOG(if(VerboseLog) FLog::Controller << "SCALFMM-DEBUG ["  << communicator.processId() << "] Previous currentNbLeaves (" << currentNbLeaves << ")\n"; FLog::Controller.flush(); );
             FLOG(if(VerboseLog) FLog::Controller << "SCALFMM-DEBUG ["  << communicator.processId() << "] Get my interval (" << packsToSend.size() << ")\n"; FLog::Controller.flush(); );
             FLOG(if(VerboseLog) FLog::Controller << "SCALFMM-DEBUG ["  << communicator.processId() << "] Send data\n"; FLog::Controller.flush(); );
 
@@ -379,6 +393,8 @@ public:
                 const FEqualize::Package& pack = packsToSend[idxPack];
 
                 if(idxPack != 0) FAssertLF(packsToSend[idxPack].elementFrom == packsToSend[idxPack-1].elementTo);
+                FAssertLF(FSize(pack.elementTo) <= FSize(currentNbParts));
+                FAssertLF(pack.elementFrom <= pack.elementTo);
                 const long long int nbPartsPerPackToSend = leavesOffsetInParticles[pack.elementTo]-leavesOffsetInParticles[pack.elementFrom];
                 totalSend += nbPartsPerPackToSend;
 
@@ -388,7 +404,7 @@ public:
                          << " from " << pack.elementFrom << " to " << pack.elementTo << " \n"; FLog::Controller.flush(); );
                     // Send the size of the data
                     requestsNbParts.emplace_back();
-                    FMpi::MpiAssert(MPI_Isend(&nbPartsPerPackToSend,1,MPI_LONG_LONG_INT,pack.idProc,
+                    FMpi::MpiAssert(MPI_Isend(const_cast<long long int*>(&nbPartsPerPackToSend),1,MPI_LONG_LONG_INT,pack.idProc,
                                               FMpi::TagExchangeIndexs, communicator.getComm(), &requestsNbParts.back()),__LINE__);
 
                 }
@@ -552,7 +568,9 @@ public:
         // From ParticleClass get array of IndexedParticle sorted
         GetSortedParticlesFromArray(communicator, originalParticlesArray, originalNbParticles, sortingType, boxCenter, boxWidth, treeHeight,
                                     &sortedParticlesArray, &nbParticlesInArray);
-        FLOG( FLog::Controller << "["  << communicator.processId() << "] Particles Distribution: "  << "\t GetSortedParticlesFromArray is over (" << timer.tacAndElapsed() << "s)\n"; FLog::Controller.flush(); );
+        FLOG( FLog::Controller << "["  << communicator.processId() << "] Particles Distribution: "
+              << "\t GetSortedParticlesFromArray is over (" << timer.tacAndElapsed() << "s) "
+              << nbParticlesInArray << " particles\n"; FLog::Controller.flush(); );
         FLOG( timer.tic() );
 
 //        for(int idx = 0 ; idx < nbParticlesInArray ; ++idx){
@@ -563,7 +581,7 @@ public:
         FSize * leavesOffsetInParticles = nullptr;
         FSize nbLeaves = 0;
         // Merge the leaves
-        MergeSplitedLeaves(communicator, sortedParticlesArray, &nbParticlesInArray, &leavesOffsetInParticles, &particlesArrayInLeafOrder, &nbLeaves);
+        MergeSplitedLeaves(communicator, &sortedParticlesArray, &nbParticlesInArray, &leavesOffsetInParticles, &particlesArrayInLeafOrder, &nbLeaves);
         delete[] sortedParticlesArray;
 
 //        for(int idx = 0 ; idx < nbParticlesInArray ; ++idx){
diff --git a/Src/Utils/FMpi.hpp b/Src/Utils/FMpi.hpp
index f0654ea8ea223c919ca278774fecc095be29bf9e..c299a0fc6088dc6de6830c41ee35452b1cc5b85f 100644
--- a/Src/Utils/FMpi.hpp
+++ b/Src/Utils/FMpi.hpp
@@ -107,7 +107,7 @@ public:
      * This class is used to gather the usual methods related to identifying an
      * MPI communicator.
      */
-    class FComm : public FNoCopyable {
+    class FComm {
         int rank;   ///< rank related to the comm
         int nbProc; ///< nb proc in this group
 
@@ -130,6 +130,26 @@ public:
             reset();
         }
 
+        /// Constructor : duplicates the given communicator
+        FComm(const FComm& inCommunicator ) {
+            FMpi::Assert( MPI_Comm_dup(inCommunicator.communicator, &communicator),  __LINE__ , "comm dup");
+            FMpi::Assert( MPI_Comm_group(communicator, &group),  __LINE__ , "comm group");
+
+            reset();
+        }
+
+        FComm& operator=(const FComm& inCommunicator ) {
+            FMpi::Assert( MPI_Comm_free(&communicator),  __LINE__ );
+            FMpi::Assert( MPI_Group_free(&group),  __LINE__ );
+
+            FMpi::Assert( MPI_Comm_dup(inCommunicator.communicator, &communicator),  __LINE__ , "comm dup");
+            FMpi::Assert( MPI_Comm_group(communicator, &group),  __LINE__ , "comm group");
+
+            reset();
+
+            return *this;
+        }
+
         /// Frees communicator and group
         virtual ~FComm(){
             FMpi::Assert( MPI_Comm_free(&communicator),  __LINE__ );
@@ -248,6 +268,35 @@ public:
             delete[]  procsIdArray ;
         }
 
+        /** Change the group, create one groupd where processInGroup[i] != 0
+          * and another where processInGroup[i] == 0
+          */
+        void groupReduce(const int processInGroup[]){
+            int * procsIdArray = new int [nbProc];
+            int counterNewGroup = 0;
+            for(int idxProc = 0 ;idxProc < nbProc ; ++idxProc){
+                if(processInGroup[rank] && processInGroup[idxProc]){
+                    procsIdArray[counterNewGroup++] = idxProc;
+                }
+                else if(!processInGroup[rank] && !processInGroup[idxProc]){
+                    procsIdArray[counterNewGroup++] = idxProc;
+                }
+            }
+
+            MPI_Group previousGroup = group;
+            FMpi::Assert( MPI_Group_incl(previousGroup, counterNewGroup , procsIdArray, &group),  __LINE__ );
+
+            MPI_Comm previousComm = communicator;
+            FMpi::Assert( MPI_Comm_create(previousComm, group, &communicator),  __LINE__ );
+
+            MPI_Comm_free(&previousComm);
+            MPI_Group_free(&previousGroup);
+
+            reset();
+            FAssertLF(nbProc == counterNewGroup);
+            delete[]  procsIdArray ;
+        }
+
         void barrier() const {
             FMpi::Assert(MPI_Barrier(getComm()), __LINE__);
         }
diff --git a/Src/Utils/FQuickSortMpi.hpp b/Src/Utils/FQuickSortMpi.hpp
index 437d2ede4e2f81538ffb9ecdef515f88be01b197..eba5fdfff4bc1298bf76ef6e7cf1e7c72d354e36 100644
--- a/Src/Utils/FQuickSortMpi.hpp
+++ b/Src/Utils/FQuickSortMpi.hpp
@@ -93,14 +93,18 @@ class FQuickSortMpi : public FQuickSort< SortType, IndexType> {
         {
             // Get the number of elements each proc should recv
             IndexType totalRemainingElements = totalElements;
+            IndexType totalAvailableElements = totalElementToProceed;
+
 
             for(int idxProc = firstProcToRecv; idxProc < lastProcToRecv ; ++idxProc){
                 const IndexType nbElementsAlreadyOwned = (inFromRightToLeft ? globalElementBalance[idxProc].lowerPart : globalElementBalance[idxProc].greaterPart);
                 const IndexType averageNbElementForRemainingProc = (totalRemainingElements)/(lastProcToRecv-idxProc);
                 totalRemainingElements -= nbElementsAlreadyOwned;
                 FAssertLF(totalRemainingElements >= 0);
-                if(nbElementsAlreadyOwned < averageNbElementForRemainingProc){
-                    nbElementsToRecvPerProc[idxProc - firstProcToRecv] = (averageNbElementForRemainingProc - nbElementsAlreadyOwned);
+                if(nbElementsAlreadyOwned < averageNbElementForRemainingProc && totalAvailableElements){
+                    nbElementsToRecvPerProc[idxProc - firstProcToRecv] = FMath::Min(totalAvailableElements,
+                                                                                    averageNbElementForRemainingProc - nbElementsAlreadyOwned);
+                    totalAvailableElements -= nbElementsToRecvPerProc[idxProc - firstProcToRecv];
                     totalRemainingElements -= nbElementsToRecvPerProc[idxProc - firstProcToRecv];
                 }
                 else{
@@ -308,6 +312,10 @@ class FQuickSortMpi : public FQuickSort< SortType, IndexType> {
                     counterValuesInPivot += 1;
                 }
             }
+            if(counterValuesInPivot <= 1){
+                (*shouldStop) = true;
+                return globalPivot;
+            }
             (*shouldStop) = false;
             return globalPivot/counterValuesInPivot;
         }
@@ -336,7 +344,7 @@ public:
                 break;
             }
 
-            FLOG(if(VerboseLog)  FLog::Controller << "SCALFMM-DEBUG ["  << currentComm.processId() << "] globalPivot = " << globalPivot << "\n" );
+            FLOG(if(VerboseLog)  FLog::Controller << "SCALFMM-DEBUG ["  << currentComm.processId() << "] globalPivot = " << globalPivot << " for " << currentComm.processCount() << "\n" );
             FLOG(if(VerboseLog)  FLog::Controller.flush());
 
             // Split the array in two parts lower equal to pivot and greater than pivot
diff --git a/Tests/Utils/testPartitionsMapping.cpp b/Tests/Utils/testPartitionsMapping.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..b7f5c1bce67ae1b86b38253a3f94caef428d48a4
--- /dev/null
+++ b/Tests/Utils/testPartitionsMapping.cpp
@@ -0,0 +1,149 @@
+// ===================================================================================
+// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Berenger Bramas
+// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
+// This software is a computer program whose purpose is to compute the FMM.
+//
+// This software is governed by the CeCILL-C and LGPL licenses and
+// abiding by the rules of distribution of free software.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public and CeCILL-C Licenses for more details.
+// "http://www.cecill.info".
+// "http://www.gnu.org/licenses".
+// ===================================================================================
+
+// ==== CMAKE =====
+// @FUSE_MPI
+// ================
+
+#include <iostream>
+
+#include <cstdio>
+#include <cstdlib>
+
+
+#include "../../Src/Kernels/Rotation/FRotationCell.hpp"
+#include "../../Src/Kernels/Rotation/FRotationKernel.hpp"
+
+#include "../../Src/Components/FSimpleLeaf.hpp"
+#include "../../Src/Kernels/P2P/FP2PParticleContainerIndexed.hpp"
+
+#include "../../Src/Utils/FParameters.hpp"
+#include "../../Src/Utils/FMemUtils.hpp"
+#include "../../Src/BalanceTree/FPartitionsMapping.hpp"
+
+#include "../../Src/Containers/FOctree.hpp"
+#include "../../Src/Containers/FVector.hpp"
+
+#include "../../Src/Files/FRandomLoader.hpp"
+#include "../../Src/Files/FMpiTreeBuilder.hpp"
+
+#include "../../Src/Core/FFmmAlgorithm.hpp"
+#include "../../Src/Core/FFmmAlgorithmThread.hpp"
+#include "../../Src/Core/FFmmAlgorithmThreadProc.hpp"
+
+#include "../../Src/BalanceTree/FLeafBalance.hpp"
+
+#include "../../Src/Utils/FParameterNames.hpp"
+
+/**
+ * This program runs the FMM Algorithm Distributed with the Rotation kernel
+ */
+
+// Simply create particles and try the kernels
+int main(int argc, char* argv[])
+{
+    FHelpDescribeAndExit(argc, argv,
+                         "Test with MPI the chebyshev FMM and compare it to the direct computation for debugging purpose.",
+                         FParameterDefinitions::NbParticles, FParameterDefinitions::OctreeHeight,
+                         FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::NbThreads);
+
+    typedef double FReal;
+
+    FMpi app(argc,argv);
+
+    const FSize nbParticles       = FParameters::getValue(argc,argv, FParameterDefinitions::NbParticles.options, 10000000ULL);
+    const unsigned int TreeHeight    = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5);
+    FTic time;
+
+    std::cout << ">> This executable has to be used to test Proc Rotation Algorithm. \n";
+
+    // init particles position and physical value
+    struct TestParticle{
+        FPoint<FReal> position;
+        const FPoint<FReal>& getPosition(){
+            return position;
+        }
+    };
+
+    // open particle file
+    std::cout << "Creating : " << nbParticles << "\n" << std::endl;
+    FRandomLoader<FReal> loader(nbParticles, 1.0, FPoint<FReal>(0,0,0), app.global().processId());
+
+    time.tic();
+    std::unique_ptr<TestParticle[]> particles(new TestParticle[loader.getNumberOfParticles()]);
+    for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
+        loader.fillParticle(&particles[idxPart].position);
+    }
+
+    FPartitionsMapping<FReal> map(app.global());
+
+    FVector<FPartitionsMapping<double>::TestParticle<0> > finalParticles = map.distributeParticles<0>(loader.getNumberOfParticles(),
+                                                                   loader.getCenterOfBox(),
+                                                                   loader.getBoxWidth(), TreeHeight,
+                                                                   [&](const int idx, FPoint<FReal>* position, std::array<FReal, 0>* /*val*/){
+        position->setPosition(particles[idx].position.getX(),
+                              particles[idx].position.getY(),
+                              particles[idx].position.getZ());
+    });
+
+    // Test every particles exist
+    {
+        std::unique_ptr<int[]> count(new int[app.global().processCount() * nbParticles]);
+        memset(count.get(), 0, sizeof(int) * app.global().processCount() * nbParticles);
+        for(FSize part = 0 ; part < finalParticles.getSize() ; ++part){
+            const FSize idx = finalParticles[part].initialProcOwner*nbParticles + finalParticles[part].localIndex;
+            FAssertLF(count[idx] == 0)
+            count[idx] = 1;
+        }
+
+        FMpi::Assert( MPI_Allreduce(MPI_IN_PLACE, count.get(), app.global().processCount()*nbParticles, MPI_INT, MPI_SUM,
+                      app.global().getComm()), __LINE__);
+
+        for(FSize part = 0 ; part < app.global().processCount()*nbParticles ; ++part){
+            FAssertLF(count[part] == 1);
+        }
+    }
+
+    // Test to send data
+    {
+        std::unique_ptr<std::array<FReal, 2>[]> toDistr(new std::array<FReal, 2>[nbParticles]);
+        for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
+            toDistr[idxPart][0] = app.global().processId();
+            toDistr[idxPart][1] = FReal(idxPart);
+        }
+
+        std::unique_ptr<std::array<FReal, 2>[]> res = map.distributeData<2>(toDistr.get());
+
+        for(FSize part = 0 ; part < finalParticles.getSize() ; ++part){
+            if(int(res[part][0]) != finalParticles[part].initialProcOwner)
+                    std::cout << "[" << app.global().processId() << "]Res proc is " << res[part][0] << " should be " << finalParticles[part].initialProcOwner << std::endl;
+            if(int(res[part][1]) != finalParticles[part].localIndex)
+                    std::cout << "[" << app.global().processId() << "]Res localidx is " << res[part][1] << " should be " << finalParticles[part].localIndex << std::endl;
+        }
+
+        std::unique_ptr<std::array<FReal, 2>[]> resBack = map.getResultingData<2>(res.get());
+
+        for(FSize part = 0 ; part < loader.getNumberOfParticles() ; ++part){
+            if(int(resBack[part][0]) != app.global().processId())
+                    std::cout << "[" << app.global().processId() << "]ResBack proc is " << resBack[part][0] << " should be " << app.global().processId() << std::endl;
+            if(int(resBack[part][1]) != part)
+                    std::cout << "[" << app.global().processId() << "]ResBack localidx is " << resBack[part][1] << " should be " << part << std::endl;
+        }
+    }
+
+    return 0;
+}
+