diff --git a/Src/Core/FFmmAlgorithmThreadProc.hpp b/Src/Core/FFmmAlgorithmThreadProc.hpp
index c177d64ec80078ecb6cda95f2317676b6e5a4109..4d097822a13e2e0cfbb62662549f534c80d7bc7c 100644
--- a/Src/Core/FFmmAlgorithmThreadProc.hpp
+++ b/Src/Core/FFmmAlgorithmThreadProc.hpp
@@ -80,12 +80,12 @@ public:
       * @param inKernels the kernels to call
       * An assert is launched if one of the arguments is null
       */
-    FFmmAlgorithmThreadProc(FMpi& inApp, OctreeClass* const inTree, KernelClass* const inKernels)
+    FFmmAlgorithmThreadProc(const FMpi::FComm& comm, OctreeClass* const inTree, KernelClass* const inKernels)
             : tree(inTree) , kernels(0), numberOfLeafs(0),
-            MaxThreads(omp_get_max_threads()), nbProcess(inApp.processCount()), idProcess(inApp.processId()),
-            OctreeHeight(tree->getHeight()),intervals(new Interval[inApp.processCount()]),
-            realIntervalsPerLevel(new Interval[inApp.processCount() * tree->getHeight()]),
-            workingIntervalsPerLevel(new Interval[inApp.processCount() * tree->getHeight()]){
+            MaxThreads(omp_get_max_threads()), nbProcess(comm.processCount()), idProcess(comm.processId()),
+            OctreeHeight(tree->getHeight()),intervals(new Interval[comm.processCount()]),
+            realIntervalsPerLevel(new Interval[comm.processCount() * tree->getHeight()]),
+            workingIntervalsPerLevel(new Interval[comm.processCount() * tree->getHeight()]){
 
         fassert(tree, "tree cannot be null", __LINE__, __FILE__);
 
diff --git a/Src/Files/FMpiFmaLoader.hpp b/Src/Files/FMpiFmaLoader.hpp
index 3863422c6c0bb6c3f4ffb67b29e2e772a705d68b..d8a0a35c2445852d2cd50268b4fa382ce2bb6dda 100644
--- a/Src/Files/FMpiFmaLoader.hpp
+++ b/Src/Files/FMpiFmaLoader.hpp
@@ -54,112 +54,115 @@ public:
     * @param filename the name of the file to open
     * you can test if file is successfuly open by calling hasNotFinished()
     */
-    FMpiFmaLoader(const char* const filename, FMpi& app)
+    FMpiFmaLoader(const char* const filename, const FMpi::FComm& comm, const bool useMpiIO = false)
             : boxWidth(0), totalNbParticles(0), nbParticles(0), isOpenFlag(false), particles(0), idxParticles(0) {
-        /*char nonConstFilename[512];
-        strcpy(nonConstFilename,filename);
-        MPI_File file;
-        if(MPI_File_open(MPI::COMM_WORLD, nonConstFilename, MPI::MODE_RDONLY, MPI::INFO_NULL, &file) == MPI_SUCCESS){
-            int sizeOfElement(0);
-            FReal xyzBoxWidth[4];
-
-            MPI_Status status;
-            if( MPI_File_read(file, &sizeOfElement, 1, MPI_INT, &status) == MPI_SUCCESS
-                && MPI_File_read(file, &this->totalNbParticles, 1, MPI_LONG_LONG, &status) == MPI_SUCCESS
-                && MPI_File_read(file, xyzBoxWidth, 4, MPI_FLOAT, &status) == MPI_SUCCESS ){
-
-                FDEBUG(if(sizeOfElement != sizeof(FReal)){)
+        if( useMpiIO ){
+            char nonConstFilename[512];
+            strcpy(nonConstFilename,filename);
+            MPI_File file;
+            if(MPI_File_open(MPI::COMM_WORLD, nonConstFilename, MPI::MODE_RDONLY, MPI::INFO_NULL, &file) == MPI_SUCCESS){
+                int sizeOfElement(0);
+                FReal xyzBoxWidth[4];
+
+                MPI_Status status;
+                if( MPI_File_read(file, &sizeOfElement, 1, MPI_INT, &status) == MPI_SUCCESS
+                    && MPI_File_read(file, &this->totalNbParticles, 1, MPI_LONG_LONG, &status) == MPI_SUCCESS
+                    && MPI_File_read(file, xyzBoxWidth, 4, MPI_FLOAT, &status) == MPI_SUCCESS ){
+
+                    FDEBUG(if(sizeOfElement != sizeof(FReal)){)
+                        FDEBUG( FDebug::Controller.writeFromLine("Warning type size between file and FReal are differents\n", __LINE__, __FILE__); )
+                    FDEBUG(})
+
+                    this->boxWidth = xyzBoxWidth[3];
+                    this->centerOfBox.setPosition(xyzBoxWidth[0],xyzBoxWidth[1],xyzBoxWidth[2]);
+                    this->boxWidth *= 2;
+                    this->isOpenFlag = true;
+
+                    // load my particles
+                    MPI_Offset headDataOffSet(0);
+                    MPI_File_get_position(file, &headDataOffSet);
+
+                    MPI_Offset filesize(0);
+                    MPI_File_get_size(file, &filesize); // in bytes
+                    filesize = (filesize - headDataOffSet) / sizeof(FReal);
+                    if(filesize/4 != this->totalNbParticles){
+                        printf("Error fileSize %lld, nbPart %lld\n",filesize/4, this->totalNbParticles);
+                    }
+                    // in number of floats
+                    const FSize startPart = comm.getLeft(this->totalNbParticles);
+                    const FSize endPart   = comm.getRight(this->totalNbParticles);
+                    nbParticles = (endPart - startPart);
+                    const FSize bufsize = nbParticles * 4;
+                    // local number to read
+                    particles = new FReal[bufsize];
+
+                    if( sizeof(FReal) == sizeof(float) ){
+                        MPI_File_read_at(file, headDataOffSet + startPart * 4 * sizeof(FReal), particles, int(bufsize), MPI_FLOAT, &status);
+                    }
+                    else{
+                        MPI_File_read_at(file, headDataOffSet + startPart * 4 * sizeof(FReal), particles, int(bufsize), MPI_DOUBLE, &status);
+                    }
+
+
+                    // check if needed
+                    int count(0);
+                    MPI_Get_count(&status, MPI_INT, &count);
+                    FDEBUG(if(count  / 4 != this->nbParticles){)
+                        FDEBUG( FDebug::Controller<< "Error read " << count << " data, nbPart is " << this->nbParticles << __LINE__ << " " << __FILE__ << "\n"; )
+                    FDEBUG(})
+                }
+                else{
+                    this->totalNbParticles = 0;
+                }
+                MPI_File_close(&file);
+            }
+        }
+        else {
+            FILE* file(fopen(filename, "rb"));
+            int removeWarning(0);
+            // test if open
+            if(file != NULL) {
+                int sizeOfElement(0);
+                removeWarning += fread(&sizeOfElement, sizeof(int), 1, file);
+                FDEBUG(if(sizeOfElement != int(sizeof(FReal)) ){)
                     FDEBUG( FDebug::Controller.writeFromLine("Warning type size between file and FReal are differents\n", __LINE__, __FILE__); )
+                        printf("%d sizeofelement\n",sizeOfElement);
                 FDEBUG(})
+                removeWarning += fread(&this->totalNbParticles, sizeof(FSize), 1, file);
 
-                this->boxWidth = xyzBoxWidth[3];
-                this->centerOfBox.setPosition(xyzBoxWidth[0],xyzBoxWidth[1],xyzBoxWidth[2]);
+                removeWarning += fread(&this->boxWidth, sizeof(FReal), 1, file);
                 this->boxWidth *= 2;
+
+                FReal x,y,z;
+                removeWarning += fread(&x, sizeof(FReal), 1, file);
+                removeWarning += fread(&y, sizeof(FReal), 1, file);
+                removeWarning += fread(&z, sizeof(FReal), 1, file);
+                this->centerOfBox.setPosition(x,y,z);
+
                 this->isOpenFlag = true;
 
-                // load my particles
-                MPI_Offset headDataOffSet(0);
-                MPI_File_get_position(file, &headDataOffSet);
+                const long int headDataOffSet = ftell(file);
+                fseek(file, 0L, SEEK_END);
+                const long int filesize = (ftell(file) - headDataOffSet) / sizeof(FReal);
 
-                MPI_Offset filesize(0);
-                MPI_File_get_size(file, &filesize); // in bytes
-                filesize = (filesize - headDataOffSet) / sizeof(FReal);
                 if(filesize/4 != this->totalNbParticles){
-                    printf("Error fileSize %lld, nbPart %lld\n",filesize/4, this->totalNbParticles);
+                    printf("Error fileSize %ld, nbPart %lld\n", filesize/4, this->totalNbParticles);
                 }
+
                 // in number of floats
-                const FSize startPart = app.getLeft(this->totalNbParticles);
-                const FSize endPart   = app.getRight(this->totalNbParticles);
+                const FSize startPart = comm.getLeft(this->totalNbParticles);
+                const FSize endPart   = comm.getRight(this->totalNbParticles);
                 nbParticles = (endPart - startPart);
                 const FSize bufsize = nbParticles * 4;
                 // local number to read
                 particles = new FReal[bufsize];
 
-                if( sizeof(FReal) == sizeof(float) ){
-                    MPI_File_read_at(file, headDataOffSet + startPart * 4 * sizeof(FReal), particles, int(bufsize), MPI_FLOAT, &status);
-                }
-                else{
-                    MPI_File_read_at(file, headDataOffSet + startPart * 4 * sizeof(FReal), particles, int(bufsize), MPI_DOUBLE, &status);
-                }
+                fseek(file, long(headDataOffSet + startPart * 4 * sizeof(FReal)), SEEK_SET);
 
+                removeWarning += fread(particles, sizeof(FReal), int(bufsize), file);
 
-                // check if needed
-                int count(0);
-                MPI_Get_count(&status, MPI_INT, &count);
-                FDEBUG(if(count  / 4 != this->nbParticles){)
-                    FDEBUG( FDebug::Controller<< "Error read " << count << " data, nbPart is " << this->nbParticles << __LINE__ << " " << __FILE__ << "\n"; )
-                FDEBUG(})
-            }
-            else{
-                this->totalNbParticles = 0;
-            }
-            MPI_File_close(&file);
-        }*/
-
-        FILE* file(fopen(filename, "rb"));
-        int removeWarning(0);
-        // test if open
-        if(file != NULL) {
-            int sizeOfElement(0);
-            removeWarning += fread(&sizeOfElement, sizeof(int), 1, file);
-            FDEBUG(if(sizeOfElement != int(sizeof(FReal)) ){)
-                FDEBUG( FDebug::Controller.writeFromLine("Warning type size between file and FReal are differents\n", __LINE__, __FILE__); )
-                    printf("%d sizeofelement\n",sizeOfElement);
-            FDEBUG(})
-            removeWarning += fread(&this->totalNbParticles, sizeof(FSize), 1, file);
-
-            removeWarning += fread(&this->boxWidth, sizeof(FReal), 1, file);
-            this->boxWidth *= 2;
-
-            FReal x,y,z;
-            removeWarning += fread(&x, sizeof(FReal), 1, file);
-            removeWarning += fread(&y, sizeof(FReal), 1, file);
-            removeWarning += fread(&z, sizeof(FReal), 1, file);
-            this->centerOfBox.setPosition(x,y,z);
-
-            this->isOpenFlag = true;
-
-            const long int headDataOffSet = ftell(file);
-            fseek(file, 0L, SEEK_END);
-            const long int filesize = (ftell(file) - headDataOffSet) / sizeof(FReal);
-
-            if(filesize/4 != this->totalNbParticles){
-                printf("Error fileSize %ld, nbPart %lld\n", filesize/4, this->totalNbParticles);
+                fclose(file);
             }
-
-            // in number of floats
-            const FSize startPart = app.getLeft(this->totalNbParticles);
-            const FSize endPart   = app.getRight(this->totalNbParticles);
-            nbParticles = (endPart - startPart);
-            const FSize bufsize = nbParticles * 4;
-            // local number to read
-            particles = new FReal[bufsize];
-
-            fseek(file, long(headDataOffSet + startPart * 4 * sizeof(FReal)), SEEK_SET);
-
-            removeWarning += fread(particles, sizeof(FReal), int(bufsize), file);
-
-            fclose(file);
         }
     }
 
diff --git a/Src/Files/FMpiTreeBuilder.hpp b/Src/Files/FMpiTreeBuilder.hpp
index 443bb9029f39b0e7639a2bf88184ea4a05446d43..3e83773d817dac0b96f5e0f6c5fa76c826a966d9 100644
--- a/Src/Files/FMpiTreeBuilder.hpp
+++ b/Src/Files/FMpiTreeBuilder.hpp
@@ -8,13 +8,21 @@
 #include "../Utils/FMemUtils.hpp"
 #include "../Utils/FTrace.hpp"
 
-/** This class manage the loading of particles
-  * for the mpi version.
-  * It use a BinLoader and then sort the data
-  * with a parallel quick sort.
+/** This class manage the loading of particles for the mpi version.
+  * It use a BinLoader and then sort the data with a parallel quick sort
+  * or bitonic sort.
+  * After it needs to merge the leaves to finally equalize the data.
   */
 template<class ParticleClass>
 class FMpiTreeBuilder{
+public:
+    /** What sorting algorithm to use */
+    enum SortingType{
+        QuickSort,
+        BitonicSort,
+    };
+
+private:
     /** This method has been tacken from the octree
       * it computes a tree coordinate (x or y or z) from real position
       */
@@ -27,83 +35,10 @@ class FMpiTreeBuilder{
             return index;
     }
 
-    /** get current rank */
-    static int MpiGetRank(MPI_Comm comm = MPI_COMM_WORLD){
-        int rank(0);
-        MPI_Comm_rank(comm, &rank);
-        return rank;
-    }
-
-    /** get current nb procs */
-    static int MpiGetNbProcs(MPI_Comm comm = MPI_COMM_WORLD){
-        int nb(0);
-        MPI_Comm_size(comm, &nb);
-        return nb;
-    }
-
-    /** receive data from a tag function */
-    static void receiveDataFromTag(const int inSize, const int inTag, void* const inData, int* const inSource = 0, int* const inFilledSize = 0){
-        MPI_Status status;
-        MPI_Recv(inData, inSize, MPI_CHAR, MPI_ANY_SOURCE, inTag, MPI_COMM_WORLD, &status);
-        if(inSource) *inSource = status.MPI_SOURCE;
-        if(inFilledSize) MPI_Get_count(&status,MPI_CHAR,inFilledSize);
-    }
-
-    template< class T >
-    static T GetLeft(const T inSize) {
-        const double step = (double(inSize) / double(MpiGetNbProcs()));
-        return T(FMath::Ceil(step * double(MpiGetRank())));
-    }
-
-    template< class T >
-    static T GetRight(const T inSize) {
-        const double step = (double(inSize) / double(MpiGetNbProcs()));
-        const T res = T(FMath::Ceil(step * double(MpiGetRank()+1)));
-        if(res > inSize) return inSize;
-        else return res;
-    }
-
-    template< class T >
-    static T GetOtherRight(const T inSize, const int other) {
-        const double step = (double(inSize) / MpiGetNbProcs());
-        const T res = T(FMath::Ceil(step * (other+1)));
-        if(res > inSize) return inSize;
-        else return res;
-    }
-
-    template< class T >
-    static T GetOtherLeft(const T inSize, const int other) {
-        const double step = (double(inSize) / double(MpiGetNbProcs()));
-        return T(FMath::Ceil(step * double(other)));
-    }
-
-    template <class T1, class T2>
-    static T1 Min( const T1 v1, const T2 v2){
-        return T1(v1 < v2 ? v1 : v2);
-    }
-
-    template <class T1, class T2>
-    static T1 Max( const T1 v1, const T2 v2){
-        return T1(v1 > v2 ? v1 : v2);
-    }
-
-
-    /** This struct is used to represent a particles group to
-      * sort them easily
-      */
-    struct ParticlesGroup {
-        int number;
-        int positionInArray;
-        MortonIndex index;
-        ParticlesGroup(const int inNumber = 0 , const int inPositionInArray = 0, const MortonIndex inIndex = 0)
-            : number(inNumber), positionInArray(inPositionInArray), index(inIndex) {
-        }
-    };
-
 
     /** A particle may not have a MortonIndex Method
       * but they are sorted on their morton index
-      * so this struct store a particle and its index
+      * so this struct store a particle + its index
       */
     struct IndexedParticle{
         MortonIndex index;
@@ -114,75 +49,63 @@ class FMpiTreeBuilder{
         }
     };
 
-    char* intervals;
-    FSize nbLeavesInIntervals;
+    //////////////////////////////////////////////////////////////////////////
+    // To sort tha particles we hold
+    //////////////////////////////////////////////////////////////////////////
 
-private:
-    // Forbid copy
-    FMpiTreeBuilder(const FMpiTreeBuilder&){}
-    FMpiTreeBuilder& operator=(const FMpiTreeBuilder&){return *this;}
 
-public:
-    /** Constructor */
-    FMpiTreeBuilder()
-        :  intervals(0), nbLeavesInIntervals(0) {
-    }
+    template <class LoaderClass>
+    static void SortParticles( const FMpi::FComm& communicator, LoaderClass& loader, const SortingType type,
+                        const int TreeHeight, IndexedParticle**const outputArray, FSize* const outputSize){
+        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__ , "Loader to Tree" , __FILE__ , __LINE__) );
+
+        // create particles
+        IndexedParticle*const realParticlesIndexed = new IndexedParticle[loader.getNumberOfParticles()];
+        FMemUtils::memset(realParticlesIndexed, 0, sizeof(IndexedParticle) * loader.getNumberOfParticles());
+        F3DPosition boxCorner(loader.getCenterOfBox() - (loader.getBoxWidth()/2));
+        FTreeCoordinate host;
+
+        const FReal boxWidthAtLeafLevel = loader.getBoxWidth() / FReal(1 << (TreeHeight - 1) );
+        for(long idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
+            loader.fillParticle(realParticlesIndexed[idxPart].particle);
+            host.setX( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getX() - boxCorner.getX(), boxWidthAtLeafLevel ));
+            host.setY( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getY() - boxCorner.getY(), boxWidthAtLeafLevel ));
+            host.setZ( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getZ() - boxCorner.getZ(), boxWidthAtLeafLevel ));
+
+            realParticlesIndexed[idxPart].index = host.getMortonIndex(TreeHeight - 1);
+        }
 
-    /** Destructor */
-    virtual ~FMpiTreeBuilder(){
-        delete [] intervals;
+        // sort particles
+        if(type == QuickSort){
+            FQuickSort<IndexedParticle,MortonIndex, FSize>::QsMpi(realParticlesIndexed, loader.getNumberOfParticles(), *outputArray, *outputSize,communicator);
+            delete [] (realParticlesIndexed);
+        }
+        else {
+            FBitonicSort<IndexedParticle,MortonIndex, FSize>::Sort( realParticlesIndexed, loader.getNumberOfParticles(), communicator );
+            *outputArray = realParticlesIndexed;
+            *outputSize = loader.getNumberOfParticles();
+        }
     }
 
-    /** Split and sort the file */
-    template <class LoaderClass>
-    bool splitAndSortFile(LoaderClass& loader, const int NbLevels){
-        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader" , __FILE__ , __LINE__) );
-        const int rank = MpiGetRank();
-        const int nbProcs = MpiGetNbProcs();
 
-        // First we create the particles that belong to us (our proc)
-        // we compute their morton index to be able to sort them
-        //
+    //////////////////////////////////////////////////////////////////////////
+    // To merge the leaves
+    //////////////////////////////////////////////////////////////////////////
 
-        IndexedParticle* outputArray = 0;
-        FSize outputSize = 0;
-        {
-            FTRACE( FTrace::FRegion regionTrace("Insert Particles", __FUNCTION__ , __FILE__ , __LINE__) );
-            // create particles
-            IndexedParticle*const realParticlesIndexed = new IndexedParticle[loader.getNumberOfParticles()];
-            FMemUtils::memset(realParticlesIndexed, 0, sizeof(IndexedParticle) * loader.getNumberOfParticles());
-            F3DPosition boxCorner(loader.getCenterOfBox() - (loader.getBoxWidth()/2));
-            FTreeCoordinate host;
-
-            const FReal boxWidthAtLeafLevel = loader.getBoxWidth() / FReal(1 << (NbLevels - 1) );
-            for(long idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
-                loader.fillParticle(realParticlesIndexed[idxPart].particle);
-                host.setX( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getX() - boxCorner.getX(), boxWidthAtLeafLevel ));
-                host.setY( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getY() - boxCorner.getY(), boxWidthAtLeafLevel ));
-                host.setZ( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getZ() - boxCorner.getZ(), boxWidthAtLeafLevel ));
-
-                realParticlesIndexed[idxPart].index = host.getMortonIndex(NbLevels - 1);
-            }
+    static void MergeLeaves(const FMpi::FComm& communicator, IndexedParticle*& workingArray, FSize& workingSize,
+                            char** leavesArray, FSize* const leavesSize){
+        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) );
+        const int rank = communicator.processId();
+        const int nbProcs = communicator.processCount();
 
-            // sort particles
-            if(false){
-                FQuickSort<IndexedParticle,MortonIndex, FSize>::QsMpi(realParticlesIndexed, loader.getNumberOfParticles(),outputArray,outputSize);
-                delete [] (realParticlesIndexed);
-            }
-            else {
-                FBitonicSort::sort<IndexedParticle,MortonIndex>( realParticlesIndexed, loader.getNumberOfParticles() );
-                outputArray = realParticlesIndexed;
-                outputSize = loader.getNumberOfParticles();
-            }
-        }
         // be sure there is no splited leaves
         // to do that we exchange the first index with the left proc
         {
             FTRACE( FTrace::FRegion regionTrace("Remove Splited leaves", __FUNCTION__ , __FILE__ , __LINE__) );
 
             MortonIndex otherFirstIndex = -1;
-            if(outputSize != 0 && rank != 0 && rank != nbProcs - 1){
-                MPI_Sendrecv(&outputArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs,
+            if(workingSize != 0 && rank != 0 && rank != nbProcs - 1){
+                MPI_Sendrecv(&workingArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs,
                              &otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs,
                              MPI_COMM_WORLD, MPI_STATUS_IGNORE);
             }
@@ -190,7 +113,7 @@ public:
                 MPI_Recv(&otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
             }
             else if( rank == nbProcs - 1){
-                MPI_Send( &outputArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, MPI_COMM_WORLD);
+                MPI_Send( &workingArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, MPI_COMM_WORLD);
             }
             else {
                 MPI_Recv(&otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
@@ -198,20 +121,20 @@ public:
             }
 
             // at this point every one know the first index of his right neighbors
-            const bool needToRecvBeforeSend = (rank != 0 && ((outputSize && otherFirstIndex == outputArray[0].index ) || !outputSize));
+            const bool needToRecvBeforeSend = (rank != 0 && ((workingSize && otherFirstIndex == workingArray[0].index ) || !workingSize));
             MPI_Request requestSendLeaf;
 
             IndexedParticle* sendBuffer = 0;
             if(rank != nbProcs - 1 && needToRecvBeforeSend == false){
-                FSize idxPart = outputSize - 1 ;
-                while(idxPart >= 0 && outputArray[idxPart].index == otherFirstIndex){
+                FSize idxPart = workingSize - 1 ;
+                while(idxPart >= 0 && workingArray[idxPart].index == otherFirstIndex){
                     --idxPart;
                 }
-                const int particlesToSend = int(outputSize - 1 - idxPart);
+                const int particlesToSend = int(workingSize - 1 - idxPart);
                 if(particlesToSend){
-                    outputSize -= particlesToSend;
+                    workingSize -= particlesToSend;
                     sendBuffer = new IndexedParticle[particlesToSend];
-                    memcpy(sendBuffer, &outputArray[idxPart + 1], particlesToSend * sizeof(IndexedParticle));
+                    memcpy(sendBuffer, &workingArray[idxPart + 1], particlesToSend * sizeof(IndexedParticle));
 
                     MPI_Isend( sendBuffer, particlesToSend * sizeof(IndexedParticle), MPI_BYTE, rank + 1, FMpi::TagSplittedLeaf, MPI_COMM_WORLD, &requestSendLeaf);
                 }
@@ -230,15 +153,15 @@ public:
                 if(sendByOther){
                     sendByOther /= sizeof(IndexedParticle);
 
-                    const IndexedParticle* const reallocOutputArray = outputArray;
-                    const FSize reallocOutputSize = outputSize;
+                    const IndexedParticle* const reallocOutputArray = workingArray;
+                    const FSize reallocOutputSize = workingSize;
 
-                    outputSize += sendByOther;
-                    outputArray = new IndexedParticle[outputSize];
-                    FMemUtils::memcpy(&outputArray[sendByOther], reallocOutputArray, reallocOutputSize * sizeof(IndexedParticle));
+                    workingSize += sendByOther;
+                    workingArray = new IndexedParticle[workingSize];
+                    FMemUtils::memcpy(&workingArray[sendByOther], reallocOutputArray, reallocOutputSize * sizeof(IndexedParticle));
                     delete[] reallocOutputArray;
 
-                    MPI_Recv(outputArray, sizeof(IndexedParticle) * sendByOther, MPI_BYTE, rank - 1, FMpi::TagSplittedLeaf, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+                    MPI_Recv(workingArray, sizeof(IndexedParticle) * sendByOther, MPI_BYTE, rank - 1, FMpi::TagSplittedLeaf, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
                 }
                 else{
                     MPI_Recv( 0, 0, MPI_BYTE, rank - 1, FMpi::TagSplittedLeaf, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
@@ -246,10 +169,10 @@ public:
             }
 
             if(rank != nbProcs - 1 && needToRecvBeforeSend == true){
-                MPI_Send( outputArray, int(outputSize * sizeof(IndexedParticle)), MPI_BYTE, rank + 1, FMpi::TagSplittedLeaf, MPI_COMM_WORLD);
-                delete[] outputArray;
-                outputArray = 0;
-                outputSize  = 0;
+                MPI_Send( workingArray, int(workingSize * sizeof(IndexedParticle)), MPI_BYTE, rank + 1, FMpi::TagSplittedLeaf, MPI_COMM_WORLD);
+                delete[] workingArray;
+                workingArray = 0;
+                workingSize  = 0;
             }
             else if(rank != nbProcs - 1){
                 MPI_Wait( &requestSendLeaf, MPI_STATUS_IGNORE);
@@ -262,18 +185,20 @@ public:
             FTRACE( FTrace::FRegion regionTrace("Remove Splited leaves", __FUNCTION__ , __FILE__ , __LINE__) );
             // We now copy the data from a sorted type into real particles array + counter
 
-            nbLeavesInIntervals = 0;
-            if(outputSize){
-                intervals = new char[outputSize * (sizeof(ParticleClass) + sizeof(int))];
+            (*leavesSize)  = 0;
+            (*leavesArray) = 0;
+
+            if(workingSize){
+                (*leavesArray) = new char[workingSize * (sizeof(ParticleClass) + sizeof(int))];
 
                 MortonIndex previousIndex = -1;
-                char* writeIndex = intervals;
+                char* writeIndex = (*leavesArray);
                 int* writeCounter = 0;
 
-                for( FSize idxPart = 0; idxPart < outputSize ; ++idxPart){
-                    if( outputArray[idxPart].index != previousIndex ){
-                        previousIndex = outputArray[idxPart].index;
-                        ++nbLeavesInIntervals;
+                for( FSize idxPart = 0; idxPart < workingSize ; ++idxPart){
+                    if( workingArray[idxPart].index != previousIndex ){
+                        previousIndex = workingArray[idxPart].index;
+                        ++(*leavesSize);
 
                         writeCounter = reinterpret_cast<int*>( writeIndex );
                         writeIndex += sizeof(int);
@@ -281,24 +206,30 @@ public:
                         (*writeCounter) = 0;
                     }
 
-                    memcpy(writeIndex, &outputArray[idxPart].particle, sizeof(ParticleClass));
+                    memcpy(writeIndex, &workingArray[idxPart].particle, sizeof(ParticleClass));
 
                     writeIndex += sizeof(ParticleClass);
                     ++(*writeCounter);
                 }
             }
-            delete [] outputArray;
-        }
+            delete [] workingArray;
 
-        return true;
+            workingArray = 0;
+            workingSize = 0;
+        }
     }
 
+    //////////////////////////////////////////////////////////////////////////
+    // To equalize (same number of leaves among the procs)
+    //////////////////////////////////////////////////////////////////////////
+
     /** Put the interval into a tree */
     template <class OctreeClass>
-    bool intervalsToTree(OctreeClass& realTree){
-        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader" , __FILE__ , __LINE__) );
-        const int rank = MpiGetRank();
-        const int nbProcs = MpiGetNbProcs();
+    static void EqualizeAndFillTree(const FMpi::FComm& communicator,  OctreeClass& realTree,
+                             char*& leavesArray, FSize& nbLeavesInIntervals ){
+        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) );
+        const int rank = communicator.processId();
+        const int nbProcs = communicator.processCount();
         const FSize currentNbLeafs = nbLeavesInIntervals;
 
         // We have to know the number of leaves each procs holds
@@ -317,8 +248,8 @@ public:
         // So we can know the number of total leafs and
         // the number of leaves each procs must have at the end
         const FSize totalNbLeaves               = (currentLeafsOnMyLeft + currentNbLeafs + currentLeafsOnMyRight);
-        const FSize correctLeftLeavesNumber     = GetLeft(totalNbLeaves);
-        const FSize correctRightLeavesIndex     = GetRight(totalNbLeaves);
+        const FSize correctLeftLeavesNumber     = communicator.getLeft(totalNbLeaves);
+        const FSize correctRightLeavesIndex     = communicator.getRight(totalNbLeaves);
 
         // This will be used for the all to all
         int leavesToSend[nbProcs];
@@ -342,7 +273,7 @@ public:
             // Find the first proc that need my data
             int idxProc = rank - 1;
             while( idxProc > 0 ){
-                const FSize thisProcRight = GetOtherRight(totalNbLeaves, idxProc - 1);
+                const FSize thisProcRight = communicator.getOtherRight(totalNbLeaves, idxProc - 1);
                 // Go to left until proc-1 has a right index lower than my current left
                 if( thisProcRight < currentLeafsOnMyLeft){
                     break;
@@ -353,12 +284,12 @@ public:
             // Count data for this proc
             leftProcToStartSend = idxProc;
             int ICanGive = int(currentNbLeafs);
-            leavesToSend[idxProc] = int(Min(GetOtherRight(totalNbLeaves, idxProc), totalNbLeaves - currentLeafsOnMyRight)
-                                        - Max( currentLeafsOnMyLeft , GetOtherLeft(totalNbLeaves, idxProc)));
+            leavesToSend[idxProc] = int(FMath::Min(communicator.getOtherRight(totalNbLeaves, idxProc), totalNbLeaves - currentLeafsOnMyRight)
+                                        - FMath::Max( currentLeafsOnMyLeft , communicator.getOtherLeft(totalNbLeaves, idxProc)));
             {
                 bytesOffset[idxProc] = 0;
                 for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){
-                    currentIntervalPosition += ((*(int*)&intervals[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int);
+                    currentIntervalPosition += ((*(int*)&leavesArray[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int);
                 }
                 bytesToSend[idxProc] = int(currentIntervalPosition - bytesOffset[idxProc]);
             }
@@ -367,11 +298,11 @@ public:
 
             // count data to other proc
             while(idxProc < rank && ICanGive){
-                leavesToSend[idxProc] = int(Min( GetOtherRight(totalNbLeaves, idxProc) - GetOtherLeft(totalNbLeaves, idxProc), ICanGive));
+                leavesToSend[idxProc] = int(FMath::Min( communicator.getOtherRight(totalNbLeaves, idxProc) - communicator.getOtherLeft(totalNbLeaves, idxProc), FSize(ICanGive)));
 
                 bytesOffset[idxProc] = int(currentIntervalPosition);
                 for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){
-                    currentIntervalPosition += ((*(int*)&intervals[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int);
+                    currentIntervalPosition += ((*(int*)&leavesArray[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int);
                 }
                 bytesToSend[idxProc] = int(currentIntervalPosition - bytesOffset[idxProc]);
 
@@ -392,8 +323,8 @@ public:
             }
 
             // We have to jump the correct number of leaves
-            for(FSize idxLeaf = Max(iNeedToSendLeftCount,0) ; idxLeaf < endForMe ; ++idxLeaf){
-                const int nbPartInLeaf = (*(int*)&intervals[currentIntervalPosition]);
+            for(FSize idxLeaf = FMath::Max(iNeedToSendLeftCount,FSize(0)) ; idxLeaf < endForMe ; ++idxLeaf){
+                const int nbPartInLeaf = (*(int*)&leavesArray[currentIntervalPosition]);
                 currentIntervalPosition += (nbPartInLeaf * sizeof(ParticleClass)) + sizeof(int);
             }
         }
@@ -404,8 +335,8 @@ public:
             // Find the last proc on the right that need my data
             int idxProc = rank + 1;
             while( idxProc < nbProcs ){
-                const FSize thisProcLeft = GetOtherLeft(totalNbLeaves, idxProc);
-                const FSize thisProcRight = GetOtherRight(totalNbLeaves, idxProc);
+                const FSize thisProcLeft = communicator.getOtherLeft(totalNbLeaves, idxProc);
+                const FSize thisProcRight = communicator.getOtherRight(totalNbLeaves, idxProc);
                 // Progress until the proc+1 has its left index upper to my current right
                 if( thisProcLeft < currentLeafsOnMyLeft || (totalNbLeaves - currentLeafsOnMyRight) < thisProcRight){
                     break;
@@ -415,13 +346,13 @@ public:
 
             // Count the data
             int ICanGive = int(currentLeafsOnMyLeft + currentNbLeafs - correctRightLeavesIndex);
-            leavesToSend[idxProc] = int(Min(GetOtherRight(totalNbLeaves, idxProc) , (totalNbLeaves - currentLeafsOnMyRight))
-                                        - Max(GetOtherLeft(totalNbLeaves, idxProc), currentLeafsOnMyLeft) );
+            leavesToSend[idxProc] = int(FMath::Min(communicator.getOtherRight(totalNbLeaves, idxProc) , (totalNbLeaves - currentLeafsOnMyRight))
+                                        - FMath::Max(communicator.getOtherLeft(totalNbLeaves, idxProc), currentLeafsOnMyLeft) );
 
             {
                 bytesOffset[idxProc] = int(currentIntervalPosition);
                 for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){
-                    currentIntervalPosition += ((*(int*)&intervals[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int);
+                    currentIntervalPosition += ((*(int*)&leavesArray[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int);
                 }
                 bytesToSend[idxProc] = int(currentIntervalPosition - bytesOffset[idxProc]);
             }
@@ -430,11 +361,11 @@ public:
 
             // Now Count the data to other
             while(idxProc < nbProcs && ICanGive){
-                leavesToSend[idxProc] = int(Min( GetOtherRight(totalNbLeaves, idxProc) - GetOtherLeft(totalNbLeaves, idxProc), ICanGive));
+                leavesToSend[idxProc] = int(FMath::Min( communicator.getOtherRight(totalNbLeaves, idxProc) - communicator.getOtherLeft(totalNbLeaves, idxProc), FSize(ICanGive)));
 
                 bytesOffset[idxProc] = int(currentIntervalPosition);
                 for(FSize idxLeaf = 0 ; idxLeaf < leavesToSend[idxProc] ; ++idxLeaf){
-                    currentIntervalPosition += ((*(int*)&intervals[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int);
+                    currentIntervalPosition += ((*(int*)&leavesArray[currentIntervalPosition]) * sizeof(ParticleClass)) + sizeof(int);
                 }
                 bytesToSend[idxProc] = int(currentIntervalPosition - bytesOffset[idxProc]);
 
@@ -465,7 +396,7 @@ public:
 
         // Send alll to  all
         char* const recvbuf = new char[sumBytesToRecv];
-        MPI_Alltoallv(intervals, bytesToSend, bytesOffset, MPI_BYTE,
+        MPI_Alltoallv(leavesArray, bytesToSend, bytesOffset, MPI_BYTE,
                       recvbuf, bytesToRecv, bytesOffsetToRecv, MPI_BYTE,
                       MPI_COMM_WORLD);
 
@@ -496,9 +427,9 @@ public:
                 endForMe -= iNeedToSendRightCount;
             }
 
-            for(FSize idxLeaf = Max(iNeedToSendLeftCount,0) ; idxLeaf < endForMe ; ++idxLeaf){
-                const int nbPartInLeaf = (*(int*)&intervals[currentIntervalPosition]);
-                ParticleClass* const particles = reinterpret_cast<ParticleClass*>(&intervals[currentIntervalPosition] + sizeof(int));
+            for(FSize idxLeaf = FMath::Max(iNeedToSendLeftCount,FSize(0)) ; idxLeaf < endForMe ; ++idxLeaf){
+                const int nbPartInLeaf = (*(int*)&leavesArray[currentIntervalPosition]);
+                ParticleClass* const particles = reinterpret_cast<ParticleClass*>(&leavesArray[currentIntervalPosition] + sizeof(int));
 
                 for(int idxPart = 0 ; idxPart < nbPartInLeaf ; ++idxPart){
                     realTree.insert(particles[idxPart]);
@@ -508,9 +439,33 @@ public:
             }
         }
 
+        delete[] leavesArray;
+        leavesArray = 0;
+        nbLeavesInIntervals = 0;
+    }
+
+public:
 
-        return true;
+    //////////////////////////////////////////////////////////////////////////
+    // The builder function
+    //////////////////////////////////////////////////////////////////////////
+
+    template <class LoaderClass, class OctreeClass>
+    static void LoaderToTree(const FMpi::FComm& communicator, LoaderClass& loader,
+                             OctreeClass& realTree, const SortingType type = QuickSort){
+
+        IndexedParticle* particlesArray = 0;
+        FSize particlesSize = 0;
+        SortParticles(communicator, loader, type, realTree.getHeight(), &particlesArray, &particlesSize);
+
+        char* leavesArray = 0;
+        FSize leavesSize = 0;
+        MergeLeaves(communicator, particlesArray, particlesSize, &leavesArray, &leavesSize);
+
+        EqualizeAndFillTree(communicator, realTree, leavesArray, leavesSize);
     }
+
+
 };
 
 #endif // FMPITREEBUILDER_H
diff --git a/Src/Utils/FBitonicSort.hpp b/Src/Utils/FBitonicSort.hpp
index 30c4328303fa25c3c88df2a502f1baf7a9f19685..3933ef75e7feb9d6504673fcd3684964a2c2e777 100644
--- a/Src/Utils/FBitonicSort.hpp
+++ b/Src/Utils/FBitonicSort.hpp
@@ -10,6 +10,13 @@
 #include "FMpi.hpp"
 #include "FQuickSort.hpp"
 
+
+/** This class is a parallel bitonic sort
+  * it is based on the paper :
+  * Library Support for Parallel Sorting in Scientific Computations
+  * Holger Dachsel1 , Michael Hofmann2, , and Gudula R ̈nger2
+  */
+template <class SortType, class CompareType, class IndexType>
 class FBitonicSort {
 private:
 
@@ -17,22 +24,15 @@ private:
     // Bitonic parallel sort !
     ////////////////////////////////////////////////////////////////
 
-    // Mpi flag
-    static const int FlagMin = 5;
-    static const int FlagMax = 6;
-    static const int FlagMinMess = 4;
-    static const int FlagMaxMess = 3;
-
     // This function exchange data with the other rank,
     // its send the max value and receive min value
-    template <class SortType, class CompareType, class IndexType>
-    static void sendMaxAndGetMin(SortType array[], const IndexType size, const int otherRank){
+    static void SendMaxAndGetMin(SortType array[], const IndexType size, const int otherRank){
         IndexType left  = -1;
         IndexType right = size - 1;
         IndexType pivot = left + (right - left + 1)/2;
         CompareType otherValue = -1;
         CompareType tempCompareValue = CompareType(array[pivot]);
-        MPI_Sendrecv(&tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMin,&otherValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMax,MPI_COMM_WORLD,MPI_STATUS_IGNORE);
+        MPI_Sendrecv(&tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FMpi::TagBitonicMin,&otherValue,sizeof(CompareType),MPI_BYTE,otherRank,FMpi::TagBitonicMax,MPI_COMM_WORLD,MPI_STATUS_IGNORE);
 
         while( pivot != left && pivot != right  && array[pivot] != otherValue) {
 
@@ -45,19 +45,19 @@ private:
             pivot = left + (right - left + 1)/2;
             tempCompareValue = CompareType(array[pivot]);
 
-            MPI_Sendrecv(&tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMin,&otherValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMax,MPI_COMM_WORLD,MPI_STATUS_IGNORE);
+            MPI_Sendrecv(&tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FMpi::TagBitonicMin,&otherValue,sizeof(CompareType),MPI_BYTE,otherRank,FMpi::TagBitonicMax,MPI_COMM_WORLD,MPI_STATUS_IGNORE);
         }
 
         if( otherValue <= array[pivot] ){
             MPI_Sendrecv_replace(&array[pivot], int((size - pivot) * sizeof(SortType)) , MPI_BYTE,
-                                   otherRank, FlagMinMess, otherRank, FlagMaxMess,
+                                   otherRank, FMpi::TagBitonicMinMess, otherRank, FMpi::TagBitonicMaxMess,
                                    MPI_COMM_WORLD, MPI_STATUS_IGNORE);
 
         }
         else if( array[pivot] < otherValue){
             if(pivot != size - 1){
                 MPI_Sendrecv_replace(&array[pivot + 1], int((size - pivot - 1) * sizeof(SortType)) , MPI_BYTE,
-                                       otherRank, FlagMinMess, otherRank, FlagMaxMess,
+                                       otherRank, FMpi::TagBitonicMinMess, otherRank, FMpi::TagBitonicMaxMess,
                                        MPI_COMM_WORLD, MPI_STATUS_IGNORE);
             }
         }
@@ -66,14 +66,13 @@ private:
 
     // This function exchange data with the other rank,
     // its send the min value and receive max value
-    template <class SortType, class CompareType, class IndexType>
-    static void sendMinAndGetMax(SortType array[], const IndexType size, const int otherRank){
+    static void SendMinAndGetMax(SortType array[], const IndexType size, const int otherRank){
         IndexType left  = 0;
         IndexType right = size ;
         IndexType pivot = left + (right - left)/2;
         CompareType otherValue = -1;
         CompareType tempCompareValue = CompareType(array[pivot]);
-        MPI_Sendrecv(&tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMax,&otherValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMin,MPI_COMM_WORLD,MPI_STATUS_IGNORE);
+        MPI_Sendrecv(&tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FMpi::TagBitonicMax,&otherValue,sizeof(CompareType),MPI_BYTE,otherRank,FMpi::TagBitonicMin,MPI_COMM_WORLD,MPI_STATUS_IGNORE);
 
         while(  pivot != left  && array[pivot] != otherValue) {
 
@@ -85,19 +84,19 @@ private:
             }
             pivot = left + (right - left)/2;
             tempCompareValue = CompareType(array[pivot]);
-            MPI_Sendrecv(&tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMax,&otherValue,sizeof(CompareType),MPI_BYTE,otherRank,FlagMin,MPI_COMM_WORLD,MPI_STATUS_IGNORE);
+            MPI_Sendrecv(&tempCompareValue,sizeof(CompareType),MPI_BYTE,otherRank,FMpi::TagBitonicMax,&otherValue,sizeof(CompareType),MPI_BYTE,otherRank,FMpi::TagBitonicMin,MPI_COMM_WORLD,MPI_STATUS_IGNORE);
         }
 
 
         if( array[pivot] <= otherValue ){
             MPI_Sendrecv_replace(&array[0], int((pivot + 1) * sizeof(SortType)) , MPI_BYTE,
-                                   otherRank, FlagMaxMess, otherRank, FlagMinMess,
+                                   otherRank, FMpi::TagBitonicMaxMess, otherRank, FMpi::TagBitonicMinMess,
                                    MPI_COMM_WORLD, MPI_STATUS_IGNORE);
         }
         else if( otherValue < array[pivot]){
             if(pivot != 0){
                 MPI_Sendrecv_replace(&array[0], int((pivot) * sizeof(SortType)) , MPI_BYTE,
-                                       otherRank, FlagMaxMess, otherRank, FlagMinMess,
+                                       otherRank, FMpi::TagBitonicMaxMess, otherRank, FMpi::TagBitonicMinMess,
                                        MPI_COMM_WORLD, MPI_STATUS_IGNORE);
             }
         }
@@ -127,9 +126,10 @@ public:
         endfor
     endfor
       */
-    template <class SortType, class CompareType, class IndexType>
-    static void sort(SortType array[], const IndexType size, const int np, const int rank){
+    static void Sort(SortType array[], const IndexType size, const FMpi::FComm& comm){
         FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Bitonic" , __FILE__ , __LINE__) );
+        const int np = comm.processCount();
+        const int rank = comm.processId();
 
         FQuickSort<SortType,CompareType,IndexType>::QsOmp(array, size);
 
@@ -146,10 +146,10 @@ public:
                 const int otherRank = rank ^ (1 << otherBit);
 
                 if( diBit != myOtherBit ){
-                    sendMinAndGetMax<SortType,CompareType>(array, size, otherRank);
+                    SendMinAndGetMax(array, size, otherRank);
                 }
                 else{
-                    sendMaxAndGetMin<SortType,CompareType>(array, size, otherRank);
+                    SendMaxAndGetMin(array, size, otherRank);
                 }
                 // A merge sort is possible since the array is composed
                 // by two part already sorted, but we want to do this in space
@@ -157,17 +157,6 @@ public:
             }
         }
     }
-
-    template <class SortType, class CompareType, class IndexType>
-    static void sort(SortType array[], const IndexType size){
-        int rank = 0;
-        int nprocs = 0;
-
-        MPI_Comm_size(MPI_COMM_WORLD,&nprocs);
-        MPI_Comm_rank(MPI_COMM_WORLD,&rank);
-
-        sort<SortType, CompareType>(array, size, nprocs, rank);
-    }
 };
 
 #endif // BITONICSORT_HPP
diff --git a/Src/Utils/FMpi.hpp b/Src/Utils/FMpi.hpp
index 315a2f497736efdbc8d81e2c675ea8069f12df65..16fb396f1656ee08d31671c28d161fa7e99f888d 100644
--- a/Src/Utils/FMpi.hpp
+++ b/Src/Utils/FMpi.hpp
@@ -2,7 +2,6 @@
 #define FMPI_HPP
 // /!\ Please, you must read the license at the bottom of this page
 
-
 #include "FGlobal.hpp"
 #include "FMath.hpp"
 
@@ -15,13 +14,13 @@
 * @class FMpi
 * Please read the license
 *
-* This namespace is to compile with or without mpi
-* It defines a class to access MPI data, if the lib is compiled
-* without mpi support then simulate data.
 */
 
 class FMpi {
 public:
+////////////////////////////////////////////////////////
+// MPI Flag
+////////////////////////////////////////////////////////
     enum FMpiTag {
         // FMpiTreeBuilder
         TagExchangeIndexs,
@@ -37,294 +36,255 @@ public:
         TagFmmL2L,
         TagFmmP2P,
 
+        // Bitonic,
+        TagBitonicMin,
+        TagBitonicMax,
+        TagBitonicMinMess,
+        TagBitonicMaxMess,
+
         // Last defined tag
         TagLast,
     };
 
-#ifdef SCALFMM_USE_MPI
+////////////////////////////////////////////////////////
+// FComm to factorize MPI_Comm work
+////////////////////////////////////////////////////////
+
+    /** This class is used to put all the usual method
+      * related mpi comm
+      */
+    class FComm {
+        int rank;   //< rank related to the comm
+        int nbProc; //< nb proc in this group
+
+        MPI_Comm communicator;  //< current mpi communicator
+        MPI_Group group;        //< current mpi group
+
+        // Forbid copy
+        FComm(const FComm&){}
+        FComm& operator=(const FComm&){return *this;}
+
+        // reset : get rank and nb proc from mpi
+        void reset(){
+            FMpi::Assert( MPI_Comm_rank(communicator,&rank),  __LINE__ );
+            FMpi::Assert( MPI_Comm_size(communicator,&nbProc),  __LINE__ );
+        }
+
+    public:
+        /** Constructor : dup the comm given in parameter */
+        explicit FComm(MPI_Comm inCommunicator ) {
+            FMpi::Assert( MPI_Comm_dup(inCommunicator, &communicator),  __LINE__ , "comm dup");
+            FMpi::Assert( MPI_Comm_group(communicator, &group),  __LINE__ , "comm group");
+
+            reset();
+        }
+
+        /** Free communicator and group */
+        virtual ~FComm(){
+            FMpi::Assert( MPI_Comm_free(&communicator),  __LINE__ );
+            FMpi::Assert( MPI_Group_free(&group),  __LINE__ );
+        }
+
+        /** To get the mpi comm needed for communication */
+        MPI_Comm getComm() const {
+            return communicator;
+        }
+
+        /** The current rank */
+        int processId() const {
+            return rank;
+        }
+
+        /** The current number of procs in the group */
+        int processCount() const {
+            return nbProc;
+        }
+
+        ////////////////////////////////////////////////////////////
+        // Split/Chunk functions
+        ////////////////////////////////////////////////////////////
+
+        /** Get a left index related to a size */
+        template< class T >
+        T getLeft(const T inSize)  const {
+            const double step = (double(inSize) / double(processCount()));
+            return T(FMath::Ceil(step * double(processId())));
+        }
+
+        /** Get a right index related to a size */
+        template< class T >
+        T getRight(const T inSize)  const {
+            const double step = (double(inSize) / double(processCount()));
+            const T res = T(FMath::Ceil(step * double(processId()+1)));
+            if(res > inSize) return inSize;
+            else return res;
+        }
+
+        /** Get a right index related to a size and another id */
+        template< class T >
+        T getOtherRight(const T inSize, const int other)  const {
+            const double step = (double(inSize) / double(processCount()));
+            const T res = T(FMath::Ceil(step * double(other+1)));
+            if(res > inSize) return inSize;
+            else return res;
+        }
+
+        /** Get a left index related to a size and another id */
+        template< class T >
+        T getOtherLeft(const T inSize, const int other) const {
+            const double step = (double(inSize) / double(processCount()));
+            return T(FMath::Ceil(step * double(other)));
+        }
+
+        /** Get a proc id from and index */
+        template< class T >
+        int getProc(const int position, const T inSize) const {
+            const double step = (double(inSize) / processCount());
+            return int(position/step);
+        }
+
+        ////////////////////////////////////////////////////////////
+        // Mpi interface functions
+        ////////////////////////////////////////////////////////////
+
+
+        /** Reduce a value for proc == 0 */
+        template< class T >
+        T reduceSum(T data) const {
+            T result;
+            FMpi::Assert( MPI_Reduce( &data, &result, 1, FMpi::GetType(data), MPI_SUM, 0, communicator ), __LINE__);
+            return result;
+        }
+
+        /** Reduce an average */
+        template< class T >
+        T reduceAverageAll(T data) const {
+            T result[processCount()];
+            FMpi::Assert( MPI_Allgather( &data, 1, FMpi::GetType(data), result, 1, FMpi::GetType(data), getComm()),  __LINE__ );
+
+            T average = 0;
+            for(int idxProc = 0 ; idxProc < processCount() ;++idxProc){
+                average += result[idxProc] / processCount();
+            }
+            return average;
+        }
+
+        /** Change the group size */
+        void groupReduce(const int from , const int to){
+            int procsIdArray[to - from + 1];
+            for(int idxProc = from ;idxProc <= to ; ++idxProc){
+                procsIdArray[idxProc - from] = idxProc;
+            }
+
+            MPI_Group previousGroup = group;
+            FMpi::Assert( MPI_Group_incl(previousGroup, to - from + 1 , procsIdArray, &group),  __LINE__ );
+
+            MPI_Comm previousComm = communicator;
+            FMpi::Assert( MPI_Comm_create(previousComm, group, &communicator),  __LINE__ );
+
+            MPI_Comm_free(&previousComm);
+            MPI_Group_free(&previousGroup);
+
+            reset();
+        }
+    };
 
 ////////////////////////////////////////////////////////
-// Use MPI
+// FMpi methods
 ////////////////////////////////////////////////////////
-    typedef MPI_Request Request;
 
     /*
-[fourmi062:15896] [[13237,0],1]-[[13237,1],1] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104)
-[fourmi056:04597] [[13237,0],3]-[[13237,1],3] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104)
-[fourmi053:08571] [[13237,0],5]-[[13237,1],5] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104)
+    We use init with thread because of an openmpi error:
 
-Erreur pour le proc1
-[[13237,1],1][btl_openib_component.c:3227:handle_wc] from fourmi062 to: fourmi056 error polling LP CQ with status LOCAL LENGTH ERROR status number 1 for wr_id 7134664 opcode 0  vendor error 105 qp_idx 3
-Tous on la meme erreur le 2e 1 est remplacé par le rang.
-*/
-    FMpi(int inArgc, char **  inArgv ) {
+    [fourmi062:15896] [[13237,0],1]-[[13237,1],1] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104)
+    [fourmi056:04597] [[13237,0],3]-[[13237,1],3] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104)
+    [fourmi053:08571] [[13237,0],5]-[[13237,1],5] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104)
+
+    Erreur pour le proc1
+    [[13237,1],1][btl_openib_component.c:3227:handle_wc] from fourmi062 to: fourmi056 error polling LP CQ with status LOCAL LENGTH ERROR status number 1 for wr_id 7134664 opcode 0  vendor error 105 qp_idx 3
+    Tous on la meme erreur le 2e 1 est remplacé par le rang.
+    */
+    FMpi(int inArgc, char **  inArgv ) : communicator(0) {
         int provided = 0;
-        MPI_Init_thread(&inArgc,&inArgv, MPI_THREAD_MULTIPLE, &provided);
+        FMpi::Assert( MPI_Init_thread(&inArgc,&inArgv, MPI_THREAD_MULTIPLE, &provided), __LINE__);
+        communicator = new FComm(MPI_COMM_WORLD);
     }
 
+    /** Delete the communicator and call mpi finalize */
     ~FMpi(){
+        delete communicator;
         MPI_Finalize();
     }
 
-    void allgather(void* const sendbuf, const int sendcount, void* const recvbuf, const int recvcount, MPI_Datatype datatype = MPI_INT){
-        MPI_Allgather( sendbuf, sendcount, datatype, recvbuf, recvcount, datatype, MPI_COMM_WORLD);
+    /** Get the global communicator */
+    const FComm& global() {
+        return (*communicator);
     }
 
-    void sendData(const int inReceiver, const int inSize, void* const inData, const int inTag){
-        MPI_Send(inData, inSize, MPI_CHAR , inReceiver, inTag, MPI_COMM_WORLD);
-    }
+    ////////////////////////////////////////////////////////////
+    // Mpi Types meta function
+    ////////////////////////////////////////////////////////////
 
-    void isendData(const int inReceiver, const int inSize, void* const inData, const int inTag, MPI_Request*const request){
-        MPI_Isend(inData, inSize, MPI_CHAR , inReceiver, inTag, MPI_COMM_WORLD, request);
+    static MPI_Datatype GetType(long long&){
+        return MPI_LONG_LONG;
     }
 
-    void iWait( MPI_Request*const request ){
-        MPI_Status status;
-        MPI_Wait(request, &status);
+    static MPI_Datatype GetType(double&){
+        return MPI_DOUBLE;
     }
 
-    void iWaitall(MPI_Request requests[], const int count){
-        MPI_Status status[count];
-        MPI_Waitall(count, requests, status);
+    static MPI_Datatype GetType(float&){
+        return MPI_FLOAT;
     }
 
-    void waitMessage(int*const sender, int*const tag = 0, const int restrictsender = MPI_ANY_SOURCE, const int restricttag = MPI_ANY_TAG){
-        MPI_Status status;
-        MPI_Probe( restrictsender, restricttag, MPI_COMM_WORLD, &status );
-        if(sender) *sender = status.MPI_SOURCE;
-        if(tag) *tag = status.MPI_TAG;
-    }
-
-    void receiveData(const int inSize, void* const inData, int* const inSource = 0, int* const inTag = 0, int* const inFilledSize = 0){
-        MPI_Status status;
-        MPI_Recv(inData, inSize, MPI_CHAR, MPI_ANY_SOURCE, MPI_ANY_TAG,MPI_COMM_WORLD, &status);
-        if(inSource) *inSource = status.MPI_SOURCE;
-        if(inTag) *inTag = status.MPI_TAG;
-        if(inFilledSize) MPI_Get_count(&status,MPI_CHAR,inFilledSize);
-    }
-
-    void receiveDataFromTag(const int inSize, const int inTag, void* const inData, int* const inSource = 0, int* const inFilledSize = 0){
-        MPI_Status status;
-        MPI_Recv(inData, inSize, MPI_CHAR, MPI_ANY_SOURCE, inTag, MPI_COMM_WORLD, &status);
-        if(inSource) *inSource = status.MPI_SOURCE;
-        if(inFilledSize) MPI_Get_count(&status,MPI_CHAR,inFilledSize);
-    }
-
-    void receiveDataFromTagAndSource(const int inSize, const int inTag, const int inSource, void* const inData, int* const inFilledSize = 0){
-        MPI_Status status;
-        MPI_Recv(inData, inSize, MPI_CHAR, inSource, inTag, MPI_COMM_WORLD, &status);
-        if(inFilledSize) MPI_Get_count(&status,MPI_CHAR,inFilledSize);
-    }
-
-    bool receivedData(){
-        int flag;
-        MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, MPI_STATUS_IGNORE);
-        return flag;
-    }
-
-    bool receivedData(int* const tag){
-        MPI_Status status;
-        int flag;
-        MPI_Iprobe(MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &flag, &status);
-        *tag = status.MPI_TAG;
-        return flag;
-    }
-
-    int processId() {
-        int id;
-        MPI_Comm_rank(MPI_COMM_WORLD,&id);
-        return id;
-    }
-
-    int processCount() {
-        int count;
-        MPI_Comm_size(MPI_COMM_WORLD,&count);
-        return count;
-    }
-
-    void processBarrier() {
-        MPI_Barrier(MPI_COMM_WORLD);
-    }
-
-    void abort(const int inErrorCode = 1) {
-        MPI_Abort(MPI_COMM_WORLD, inErrorCode);
-    }
-
-    template < class T >
-    MPI_Datatype getType(){
+    static MPI_Datatype GetType(int&){
         return MPI_INT;
     }
 
-    template< class T >
-    T reduceSum(T data){
-        T result;
-        MPI_Reduce( &data, &result, 1, getType<T>(), MPI_SUM, 0, MPI_COMM_WORLD );
-        return result;
-    }
-
-    template< class T >
-    T reduceMin(T myMin){
-        T result;
-        MPI_Reduce( &myMin, &result, 1, getType<T>(), MPI_MIN, 0, MPI_COMM_WORLD );
-        return result;
-    }
+    ////////////////////////////////////////////////////////////
+    // Mpi interface functions
+    ////////////////////////////////////////////////////////////
 
-    template< class T >
-    T reduceMax(T myMax){
-        T result;
-        MPI_Reduce( &myMax, &result, 1, getType<T>(), MPI_MAX, 0, MPI_COMM_WORLD );
-        return result;
+    /** generic mpi assert function */
+    static void Assert(const int test, const unsigned line, const char* const message = 0){
+        if(test != MPI_SUCCESS){
+            printf("[ERROR-QS] Test failled at line %d, result is %d", line, test);
+            if(message) printf(", message: %s",message);
+            printf("\n");
+            fflush(stdout);
+            MPI_Abort(MPI_COMM_WORLD, int(line) );
+        }
     }
 
-    template< class T >
-    T broadcast(T value, const int from = 0){
-        MPI_Bcast ( &value, 1, getType<T>(), from, MPI_COMM_WORLD );
-        return value;
+    /** Compute a left index from data */
+    template <class T>
+    static T GetLeft(const T inSize, const int inIdProc, const int inNbProc) {
+        const double step = (double(inSize) / inNbProc);
+        return T(ceil(step * inIdProc));
     }
 
-#else
-////////////////////////////////////////////////////////
-// Without MPI
-////////////////////////////////////////////////////////
-    typedef int Request;
-
-    FMpi(int inArgc, char **  inArgv ) {}
-
-    void allgather(void* const , const int , void* const , const int, MPI_Datatype = 0){
-    }
-
-    void sendData(const int, const int, void* const, const int ){}
-
-    void isendData(const int , const int , void* const , const int , Request*const ){
-    }
-
-    void iWait( Request*const ){
-    }
-
-    void receiveData(const int, void* const, int* const inSource = 0, int* const inTag = 0, int* const inFilledSize = 0){
-        if(inSource) *inSource = 0;
-        if(inTag) *inTag = 0;
-        if(inFilledSize) *inFilledSize = 0;
-    }
-
-    void receiveDataFromTag(const int , const int , void* const , int* const inSource = 0, int* const inFilledSize = 0){
-        if(inSource) *inSource = 0;
-        if(inFilledSize) *inFilledSize = 0;
-    }
-
-    void receiveDataFromTagAndSource(const int , const int , const int , void* const , int* const inFilledSize = 0){
-        if(inFilledSize) *inFilledSize = 0;
-    }
-
-    bool receivedData(){
-        return false;
-    }
-
-    bool receivedData(int* const){
-        return false;
-    }
-
-    int processId() {
-        return 0;
-    }
-
-    int processCount() {
-        return 1;
-    }
-
-    void processBarrier() {}
-
-    void abort(const int inErrorCode = 1) {
-        exit(inErrorCode);
-    }
-
-    template< class T >
-    T reduceSum(T data){
-        return data;
-    }
-
-    template< class T >
-    T reduceMin(T myMin){
-        return myMin;
-    }
-
-    template< class T >
-    T reduceMax(T myMax){
-        return myMax;
-    }
-
-    template< class T >
-    T broadcast(T value, const int = 0){
-        return value;
-    }
-
-#endif
-
-////////////////////////////////////////////////////////
-// To use in any case
-////////////////////////////////////////////////////////
-    bool isMaster() {
-        return !processId();
-    }
-
-    bool isAlone() {
-        return processCount() == 1;
-    }
-
-    bool isSlave() {
-        return processId();
-    }
-
-    template< class T >
-    T getLeft(const T inSize) {
-        const float step = (float(inSize) / float(processCount()));
-        return T(FMath::Ceil(step * float(processId())));
-    }
-
-    template< class T >
-    T getRight(const T inSize) {
-        const float step = (float(inSize) / float(processCount()));
-        const T res = T(FMath::Ceil(step * float(processId()+1)));
+    /** Compute a right index from data */
+    template <class T>
+    static T GetRight(const T inSize, const int inIdProc, const int inNbProc) {
+        const double step = (double(inSize) / inNbProc);
+        const T res = T(ceil(step * (inIdProc+1)));
         if(res > inSize) return inSize;
         else return res;
     }
 
-    template< class T >
-    T getOtherRight(const T inSize, const int other) {
-        const float step = (float(inSize) / float(processCount()));
-        const T res = T(FMath::Ceil(step * float(other+1)));
-        if(res > inSize) return inSize;
-        else return res;
+    /** Compute a proc id from index & data */
+    template <class T>
+    static int GetProc(const T position, const T inSize, const int inNbProc) {
+        const double step = double(inSize) / double(inNbProc);
+        return int(double(position)/step);
     }
 
-    template< class T >
-    int getProc(const int position, const T inSize) {
-        const float step = (float(inSize) / processCount());
-        return int(position/step);
-    }
+
+private:
+    /** The original communicator */
+    FComm* communicator;
 };
 
-#ifdef SCALFMM_USE_MPI
-template <>
-MPI_Datatype FMpi::getType<long long>(){
-    return MPI_LONG_LONG;
-}
-
-template <>
-MPI_Datatype FMpi::getType<double>(){
-    return MPI_DOUBLE;
-}
-
-template <>
-MPI_Datatype FMpi::getType<float>(){
-    return MPI_FLOAT;
-}
-
-template <>
-MPI_Datatype FMpi::getType<int>(){
-    return MPI_INT;
-}
-#endif
 
 
 #endif //FMPI_HPP
diff --git a/Src/Utils/FQuickSort.hpp b/Src/Utils/FQuickSort.hpp
index d61494d827a4b707ed874e2b6ec51fbfddf4b8d0..8a5579eaa8e01756a64ce7eec5514a653d3b4616 100644
--- a/Src/Utils/FQuickSort.hpp
+++ b/Src/Utils/FQuickSort.hpp
@@ -16,25 +16,22 @@
 
 #include "FOmpBarrier.hpp"
 
+/** This class is parallel quick sort
+  * It hold a mpi version
+  * + 2 openmp versions (one on tasks and the other like mpi)
+  * + a sequential version
+  *
+  * The task based algorithm is easy to undestand,
+  * for the mpi/openmp2nd please see
+  * Introduction to parallel computing (Grama Gupta Karypis Kumar)
+  */
+
 template <class SortType, class CompareType, class IndexType>
 class FQuickSort {
     ////////////////////////////////////////////////////////////
     // Miscialenous functions
     ////////////////////////////////////////////////////////////
 
-
-    /** To get max between 2 values */
-    template <class NumType>
-    static NumType Max(const NumType inV1, const NumType inV2){
-        return (inV1 > inV2 ? inV1 : inV2);
-    }
-
-    /** To get min between 2 values */
-    template <class NumType>
-    static NumType Min(const NumType inV1, const NumType inV2){
-        return (inV1 < inV2 ? inV1 : inV2);
-    }
-
     /** swap to value */
     template <class NumType>
     static inline void Swap(NumType& value, NumType& other){
@@ -43,92 +40,11 @@ class FQuickSort {
         other = temp;
     }
 
-
-    ////////////////////////////////////////////////////////////
-    // Split information
-    ////////////////////////////////////////////////////////////
-
-    static IndexType getLeft(const IndexType inSize, const int inIdProc, const int inNbProc) {
-        const double step = (double(inSize) / inNbProc);
-        return IndexType(ceil(step * inIdProc));
-    }
-
-    static IndexType getRight(const IndexType inSize, const int inIdProc, const int inNbProc) {
-        const double step = (double(inSize) / inNbProc);
-        const IndexType res = IndexType(ceil(step * (inIdProc+1)));
-        if(res > inSize) return inSize;
-        else return res;
-    }
-
-    static int getProc(const IndexType position, const IndexType inSize, const int inNbProc) {
-        const double step = double(inSize) / double(inNbProc);
-        return int(double(position)/step);
-    }
-
-
-    ////////////////////////////////////////////////////////////
-    // MPI Function
-    ////////////////////////////////////////////////////////////
-
-    /** generic mpi assert function */
-    static void mpiassert(const int test, const unsigned line, const char* const message = 0){
-        if(test != MPI_SUCCESS){
-            printf("[ERROR-QS] Test failled at line %d, result is %d", line, test);
-            if(message) printf(", message: %s",message);
-            printf("\n");
-            fflush(stdout);
-            MPI_Abort(MPI_COMM_WORLD, int(line) );
-        }
-    }
-
-    /** get current rank */
-    static int MpiGetRank(MPI_Comm comm){
-        int rank(0);
-        mpiassert( MPI_Comm_rank(comm, &rank),  __LINE__ );
-        return rank;
-    }
-
-    /** get current nb procs */
-    static int MpiGetNbProcs(MPI_Comm comm){
-        int nb(0);
-        mpiassert( MPI_Comm_size(comm, &nb), __LINE__);
-        return nb;
-    }
-
-    /** get the pivot from several procs in Comm */
-    static CompareType MpiGetPivot(CompareType myData, MPI_Comm& comm, const int nbProcs){
-        CompareType result[nbProcs];
-        mpiassert( MPI_Allgather( &myData, sizeof(CompareType), MPI_BYTE, result, sizeof(CompareType), MPI_BYTE, comm),  __LINE__ );
-        // We do an average of the first element of each proc array
-        CompareType sum = 0;
-        for(int idxProc = 0 ; idxProc < nbProcs ;++idxProc){
-            sum += result[idxProc] / nbProcs;
-        }
-        return sum ;
-    }
-
-    /** change the group and communicator */
-    static void MpiChangeGroup(MPI_Group& currentGroup, MPI_Comm& currentComm, const int from , const int to){
-        int procsIdArray[to - from + 1];
-        for(int idxProc = from ;idxProc <= to ; ++idxProc){
-            procsIdArray[idxProc - from] = idxProc;
-        }
-
-        MPI_Group previousGroup = currentGroup;
-        mpiassert( MPI_Group_incl(previousGroup, to - from + 1 , procsIdArray, &currentGroup),  __LINE__ );
-
-        MPI_Comm previousComm = currentComm;
-        mpiassert( MPI_Comm_create(previousComm, currentGroup, &currentComm),  __LINE__ );
-
-        MPI_Comm_free(&previousComm);
-        MPI_Group_free(&previousGroup);
-    }
-
     ////////////////////////////////////////////////////////////
     // Quick sort
     ////////////////////////////////////////////////////////////
 
-    /* use in the sequential qs */
+    /* Use in the sequential qs */
     static IndexType QsPartition(SortType array[], IndexType left, IndexType right){
         const IndexType part = right;
         Swap(array[part],array[(right + left ) / 2]);
@@ -153,7 +69,7 @@ class FQuickSort {
         return left;
     }
 
-    /* a local iteration of qs */
+    /* A local iteration of qs */
     static void QsLocal(SortType array[], const CompareType& pivot,
                IndexType myLeft, IndexType myRight,
                IndexType& prefix, IndexType& sufix){
@@ -179,7 +95,7 @@ class FQuickSort {
         sufix = myRight - myLeft - prefix + 1;
     }
 
-    /* a sequential qs */
+    /* The sequential qs */
     static void QsSequentialStep(SortType array[], const IndexType left, const IndexType right){
         if(left < right){
             const IndexType part = QsPartition(array, left, right);
@@ -206,7 +122,7 @@ class FQuickSort {
         }
     }
 
-    /* the openmp qs */
+    /* The openmp qs */
     static void QsOmpNoTask(SortType array[], const IndexType size){
         FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Quicksort" , __FILE__ , __LINE__) );
         struct Fix{
@@ -230,8 +146,8 @@ class FQuickSort {
         {
             const int myThreadId = omp_get_thread_num();
 
-            IndexType myLeft = getLeft(size, myThreadId, omp_get_num_threads());
-            IndexType myRight = getRight(size, myThreadId, omp_get_num_threads()) - 1;
+            IndexType myLeft = FMpi::GetLeft(size, myThreadId, omp_get_num_threads());
+            IndexType myRight = FMpi::GetRight(size, myThreadId, omp_get_num_threads()) - 1;
 
             IndexType startIndex = 0;
             IndexType endIndex = size - 1;
@@ -285,7 +201,7 @@ class FQuickSort {
                 barriers[firstProc].wait();
 
                 // find my next QsLocal part
-                int splitProc = getProc(sufoffset - startIndex, nbElements, lastProc - firstProc + 1) + firstProc;
+                int splitProc = FMpi::GetProc(sufoffset - startIndex, nbElements, lastProc - firstProc + 1) + firstProc;
                 if(splitProc == lastProc){
                     --splitProc;
                 }
@@ -299,8 +215,8 @@ class FQuickSort {
                     firstProc = splitProc + 1;
                 }
 
-                myLeft = getLeft(endIndex - startIndex + 1, myThreadId - firstProc, lastProc - firstProc + 1) + startIndex;
-                myRight = getRight(endIndex - startIndex + 1, myThreadId - firstProc, lastProc - firstProc + 1) + startIndex - 1;
+                myLeft = FMpi::GetLeft(endIndex - startIndex + 1, myThreadId - firstProc, lastProc - firstProc + 1) + startIndex;
+                myRight = FMpi::GetRight(endIndex - startIndex + 1, myThreadId - firstProc, lastProc - firstProc + 1) + startIndex - 1;
             }
 
             QsSequentialStep(array,myLeft,myRight);
@@ -331,7 +247,7 @@ public:
     }
 
     /* the mpi qs */
-    static void QsMpi(const SortType originalArray[], IndexType size, SortType* & outputArray, IndexType& outputSize, MPI_Comm originalComm = MPI_COMM_WORLD){
+    static void QsMpi(const SortType originalArray[], IndexType size, SortType* & outputArray, IndexType& outputSize, const FMpi::FComm& originalComm){
         FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Quicksort" , __FILE__ , __LINE__) );
         // We need a structure see the algorithm detail to know more
         struct Fix{
@@ -345,27 +261,22 @@ public:
         outputSize = size;
 
         // alloc outputArray to store pre/sufixe, maximum needed is nb procs[comm world] + 1
-        Fix fixes[MpiGetNbProcs(MPI_COMM_WORLD) + 1];
-        Fix fixesSum[MpiGetNbProcs(MPI_COMM_WORLD) + 1];
-        memset(fixes,0,sizeof(Fix) * MpiGetNbProcs(MPI_COMM_WORLD));
-        memset(fixesSum,0,sizeof(Fix) * (MpiGetNbProcs(MPI_COMM_WORLD) + 1) );
+        Fix fixes[originalComm.processCount() + 1];
+        Fix fixesSum[originalComm.processCount() + 1];
+        memset(fixes,0,sizeof(Fix) * originalComm.processCount());
+        memset(fixesSum,0,sizeof(Fix) * (originalComm.processCount() + 1) );
 
         // receiving buffer
         IndexType bufferSize = 0;
         SortType* buffer = 0;
 
-        // Create the first group
-        MPI_Group currentGroup;
         // Create the first com
-        MPI_Comm currentComm;
-
-        mpiassert( MPI_Comm_dup(originalComm, &currentComm),  __LINE__ );
-        mpiassert( MPI_Comm_group(currentComm, &currentGroup),  __LINE__ );
+        FMpi::FComm currentComm(originalComm.getComm());
 
         // While I am not working alone on my own data
-        while( MpiGetNbProcs(currentComm) != 1 ){
-            const int currentRank = MpiGetRank(currentComm);
-            const int currentNbProcs = MpiGetNbProcs(currentComm);
+        while( currentComm.processCount() != 1 ){
+            const int currentRank = currentComm.processId();
+            const int currentNbProcs = currentComm.processCount();
 
             MPI_Request requests[currentNbProcs * 2];
             int iterRequest = 0;
@@ -375,12 +286,12 @@ public:
             /////////////////////////////////////////////////
 
             // sort QsLocal part of the outputArray
-            const CompareType pivot = MpiGetPivot(outputArray[size/2], currentComm, currentNbProcs);
+            const CompareType pivot = currentComm.reduceAverageAll( CompareType(outputArray[size/2]) );
             Fix myFix;
             QsLocal(outputArray, pivot, 0, size - 1, myFix.pre, myFix.suf);
 
             // exchange fixes
-            mpiassert( MPI_Allgather( &myFix, sizeof(Fix), MPI_BYTE, fixes, sizeof(Fix), MPI_BYTE, currentComm),  __LINE__ );
+            FMpi::Assert( MPI_Allgather( &myFix, sizeof(Fix), MPI_BYTE, fixes, sizeof(Fix), MPI_BYTE, currentComm.getComm()),  __LINE__ );
 
             // each procs compute the summation
             fixesSum[0].pre = 0;
@@ -391,7 +302,7 @@ public:
             }
 
             // then I need to know which procs will be in the middle
-            int splitProc = getProc(fixesSum[currentNbProcs].pre - 1, fixesSum[currentNbProcs].pre + fixesSum[currentNbProcs].suf, currentNbProcs);
+            int splitProc = FMpi::GetProc(fixesSum[currentNbProcs].pre - 1, fixesSum[currentNbProcs].pre + fixesSum[currentNbProcs].suf, currentNbProcs);
             if(splitProc == currentNbProcs - 1){
                 --splitProc;
             }
@@ -406,19 +317,19 @@ public:
                 const int firstProcInSuf = splitProc + 1;
                 const IndexType elementsInSuf = fixesSum[currentNbProcs].suf;
 
-                const int firstProcToSend = getProc(fixesSum[currentRank].suf, elementsInSuf, procsInSuf) + firstProcInSuf;
-                const int lastProcToSend = getProc(fixesSum[currentRank + 1].suf - 1, elementsInSuf, procsInSuf) + firstProcInSuf;
+                const int firstProcToSend = FMpi::GetProc(fixesSum[currentRank].suf, elementsInSuf, procsInSuf) + firstProcInSuf;
+                const int lastProcToSend = FMpi::GetProc(fixesSum[currentRank + 1].suf - 1, elementsInSuf, procsInSuf) + firstProcInSuf;
 
                 IndexType sent = 0;
                 for(int idxProc = firstProcToSend ; idxProc <= lastProcToSend ; ++idxProc){
-                    const IndexType thisProcRight = getRight(elementsInSuf, idxProc - firstProcInSuf, procsInSuf);
+                    const IndexType thisProcRight = FMpi::GetRight(elementsInSuf, idxProc - firstProcInSuf, procsInSuf);
                     IndexType sendToProc = thisProcRight - fixesSum[currentRank].suf - sent;
 
                     if(sendToProc + sent > fixes[currentRank].suf){
                         sendToProc = fixes[currentRank].suf - sent;
                     }
                     if( sendToProc ){
-                        mpiassert( MPI_Isend(&outputArray[sent + fixes[currentRank].pre], int(sendToProc * sizeof(SortType)), MPI_BYTE , idxProc, FMpi::TagQuickSort, currentComm, &requests[iterRequest++]),  __LINE__ );
+                        FMpi::Assert( MPI_Isend(&outputArray[sent + fixes[currentRank].pre], int(sendToProc * sizeof(SortType)), MPI_BYTE , idxProc, FMpi::TagQuickSort, currentComm.getComm(), &requests[iterRequest++]),  __LINE__ );
                     }
                     sent += sendToProc;
                 }
@@ -429,19 +340,19 @@ public:
                 const int procsInPre = splitProc + 1;
                 const IndexType elementsInPre = fixesSum[currentNbProcs].pre;
 
-                const int firstProcToSend = getProc(fixesSum[currentRank].pre, elementsInPre, procsInPre);
-                const int lastProcToSend = getProc(fixesSum[currentRank + 1].pre - 1, elementsInPre, procsInPre);
+                const int firstProcToSend = FMpi::GetProc(fixesSum[currentRank].pre, elementsInPre, procsInPre);
+                const int lastProcToSend = FMpi::GetProc(fixesSum[currentRank + 1].pre - 1, elementsInPre, procsInPre);
 
                 IndexType sent = 0;
                 for(int idxProc = firstProcToSend ; idxProc <= lastProcToSend ; ++idxProc){
-                    const IndexType thisProcRight = getRight(elementsInPre, idxProc, procsInPre);
+                    const IndexType thisProcRight = FMpi::GetRight(elementsInPre, idxProc, procsInPre);
                     IndexType sendToProc = thisProcRight - fixesSum[currentRank].pre - sent;
 
                     if(sendToProc + sent > fixes[currentRank].pre){
                         sendToProc = fixes[currentRank].pre - sent;
                     }
                     if(sendToProc){
-                        mpiassert( MPI_Isend(&outputArray[sent], int(sendToProc * sizeof(SortType)), MPI_BYTE , idxProc, FMpi::TagQuickSort, currentComm, &requests[iterRequest++]),  __LINE__ );
+                        FMpi::Assert( MPI_Isend(&outputArray[sent], int(sendToProc * sizeof(SortType)), MPI_BYTE , idxProc, FMpi::TagQuickSort, currentComm.getComm(), &requests[iterRequest++]),  __LINE__ );
                     }
                     sent += sendToProc;
                 }
@@ -456,8 +367,8 @@ public:
                 const int procsInPre = splitProc + 1;
                 const IndexType elementsInPre = fixesSum[currentNbProcs].pre;
 
-                IndexType myLeft = getLeft(elementsInPre, currentRank, procsInPre);
-                IndexType myRightLimit = getRight(elementsInPre, currentRank, procsInPre);
+                IndexType myLeft = FMpi::GetLeft(elementsInPre, currentRank, procsInPre);
+                IndexType myRightLimit = FMpi::GetRight(elementsInPre, currentRank, procsInPre);
 
                 size = myRightLimit - myLeft;
                 if(bufferSize < size){
@@ -474,18 +385,18 @@ public:
                 IndexType indexArray = 0;
 
                 while( idxProc < currentNbProcs && indexArray < myRightLimit - myLeft){
-                    const IndexType firstIndex = Max(myLeft , fixesSum[idxProc].pre );
-                    const IndexType endIndex = Min(fixesSum[idxProc + 1].pre,  myRightLimit);
+                    const IndexType firstIndex = FMath::Max(myLeft , fixesSum[idxProc].pre );
+                    const IndexType endIndex = FMath::Min(fixesSum[idxProc + 1].pre,  myRightLimit);
                     if( (endIndex - firstIndex) ){
-                        mpiassert( MPI_Irecv(&buffer[indexArray], int((endIndex - firstIndex) * sizeof(SortType)), MPI_BYTE, idxProc, FMpi::TagQuickSort, currentComm, &requests[iterRequest++]),  __LINE__ );
+                        FMpi::Assert( MPI_Irecv(&buffer[indexArray], int((endIndex - firstIndex) * sizeof(SortType)), MPI_BYTE, idxProc, FMpi::TagQuickSort, currentComm.getComm(), &requests[iterRequest++]),  __LINE__ );
                     }
                     indexArray += endIndex - firstIndex;
                     ++idxProc;
                 }
                 // Proceed all send/receive
-                mpiassert( MPI_Waitall(iterRequest, requests, MPI_STATUSES_IGNORE),  __LINE__ );
+                FMpi::Assert( MPI_Waitall(iterRequest, requests, MPI_STATUSES_IGNORE),  __LINE__ );
 
-                MpiChangeGroup(currentGroup, currentComm, 0, splitProc);
+                currentComm.groupReduce( 0, splitProc);
             }
             else{
                 // I am in L-Part (larger than pivot)
@@ -493,8 +404,8 @@ public:
                 const IndexType elementsInSuf = fixesSum[currentNbProcs].suf;
 
                 const int rankInL = currentRank - splitProc - 1;
-                IndexType myLeft = getLeft(elementsInSuf, rankInL, procsInSuf);
-                IndexType myRightLimit = getRight(elementsInSuf, rankInL, procsInSuf);
+                IndexType myLeft = FMpi::GetLeft(elementsInSuf, rankInL, procsInSuf);
+                IndexType myRightLimit = FMpi::GetRight(elementsInSuf, rankInL, procsInSuf);
 
                 size = myRightLimit - myLeft;
                 if(bufferSize < size){
@@ -511,18 +422,18 @@ public:
                 IndexType indexArray = 0;
 
                 while( idxProc < currentNbProcs && indexArray < myRightLimit - myLeft){
-                    const IndexType firstIndex = Max(myLeft , fixesSum[idxProc].suf );
-                    const IndexType endIndex = Min(fixesSum[idxProc + 1].suf,  myRightLimit);
+                    const IndexType firstIndex = FMath::Max(myLeft , fixesSum[idxProc].suf );
+                    const IndexType endIndex = FMath::Min(fixesSum[idxProc + 1].suf,  myRightLimit);
                     if( (endIndex - firstIndex) ){
-                        mpiassert( MPI_Irecv(&buffer[indexArray], int((endIndex - firstIndex) * sizeof(SortType)), MPI_BYTE, idxProc, FMpi::TagQuickSort, currentComm, &requests[iterRequest++]),  __LINE__ );
+                        FMpi::Assert( MPI_Irecv(&buffer[indexArray], int((endIndex - firstIndex) * sizeof(SortType)), MPI_BYTE, idxProc, FMpi::TagQuickSort, currentComm.getComm(), &requests[iterRequest++]),  __LINE__ );
                     }
                     indexArray += endIndex - firstIndex;
                     ++idxProc;
                 }
                 // Proceed all send/receive
-                mpiassert( MPI_Waitall(iterRequest, requests, MPI_STATUSES_IGNORE),  __LINE__ );
+                FMpi::Assert( MPI_Waitall(iterRequest, requests, MPI_STATUSES_IGNORE),  __LINE__ );
 
-                MpiChangeGroup(currentGroup, currentComm, splitProc + 1, currentNbProcs - 1);
+                currentComm.groupReduce( splitProc + 1, currentNbProcs - 1);
             }
 
 
@@ -543,8 +454,6 @@ public:
 
         // Clean
         delete[] buffer;
-        MPI_Comm_free(&currentComm);
-        MPI_Group_free(&currentGroup);
 
         // Normal Quick sort
         QsOmp(outputArray, size);
diff --git a/Tests/testFmbAlgorithmProc.cpp b/Tests/testFmbAlgorithmProc.cpp
index 2c2d4ea4be0df3d8a9c9a4b1a58e2f145e7a078d..8f99af1a30db79ce3c9af2cc6a0757e0dd33906c 100644
--- a/Tests/testFmbAlgorithmProc.cpp
+++ b/Tests/testFmbAlgorithmProc.cpp
@@ -246,7 +246,7 @@ int main(int argc, char ** argv){
         std::cout << "Opening : " << filename << "\n";
     }
 
-    FMpiFmaLoader<ParticleClass> loader(filename, app);
+    FMpiFmaLoader<ParticleClass> loader(filename, app.global());
     if(!loader.isOpen()){
         std::cout << "Loader Error, " << filename << " is missing\n";
         return 1;
@@ -262,27 +262,14 @@ int main(int argc, char ** argv){
     std::cout << "\tHeight : " << NbLevels << " \t sub-height : " << SizeSubLevels << std::endl;
     counter.tic();
 
-    if( app.processCount() != 1){
+    if( app.global().processCount() != 1){
         //////////////////////////////////////////////////////////////////////////////////
-        // We sort our particles
+        // Build tree from mpi loader
         //////////////////////////////////////////////////////////////////////////////////
-        std::cout << "Create intervals ..." << std::endl;
+        std::cout << "Build Tree ..." << std::endl;
         counter.tic();
 
-        FMpiTreeBuilder<ParticleClass> builder;
-
-        builder.splitAndSortFile(loader, NbLevels);
-
-        counter.tac();
-        std::cout << "Done  " << "(" << counter.elapsed() << "s)." << std::endl;
-
-        //////////////////////////////////////////////////////////////////////////////////
-        // Now we can build the real tree
-        //////////////////////////////////////////////////////////////////////////////////
-        std::cout << "Create real tree ..." << std::endl;
-        counter.tic();
-
-        builder.intervalsToTree(tree);
+        FMpiTreeBuilder<ParticleClass>::LoaderToTree(app.global(), loader, tree);
 
         counter.tac();
         std::cout << "Done  " << "(" << counter.elapsed() << "s)." << std::endl;
@@ -290,10 +277,9 @@ int main(int argc, char ** argv){
         //////////////////////////////////////////////////////////////////////////////////
     }
     else{
-        FFmaBinLoader<ParticleClass> loaderSeq(filename);
         ParticleClass partToInsert;
-        for(FSize idxPart = 0 ; idxPart < loaderSeq.getNumberOfParticles() ; ++idxPart){
-            loaderSeq.fillParticle(partToInsert);
+        for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
+            loader.fillParticle(partToInsert);
             tree.insert(partToInsert);
         }
     }
@@ -307,7 +293,7 @@ int main(int argc, char ** argv){
     counter.tic();
 
     KernelClass kernels(NbLevels,loader.getBoxWidth());
-    FmmClass algo(app,&tree,&kernels);
+    FmmClass algo(app.global(),&tree,&kernels);
     algo.execute();
 
     counter.tac();
@@ -333,13 +319,13 @@ int main(int argc, char ** argv){
 
         std::cout << "My potential is " << potential << std::endl;
 
-        potential = app.reduceSum(potential);
-        forces.setX(app.reduceSum(forces.getX()));
-        forces.setY(app.reduceSum(forces.getY()));
-        forces.setZ(app.reduceSum(forces.getZ()));
+        potential = app.global().reduceSum(potential);
+        forces.setX(app.global().reduceSum(forces.getX()));
+        forces.setY(app.global().reduceSum(forces.getY()));
+        forces.setZ(app.global().reduceSum(forces.getZ()));
 
 
-        if(app.isMaster()){
+        if(app.global().processId() == 0){
             std::cout << "Foces Sum  x = " << forces.getX() << " y = " << forces.getY() << " z = " << forces.getZ() << std::endl;
             std::cout << "Potential Sum = " << potential << std::endl;
         }
diff --git a/Tests/testFmmAlgorithmProc.cpp b/Tests/testFmmAlgorithmProc.cpp
index 23e0d1966a69689e5339c5680fb9a5422be08081..a35cc8bfb734954cbccdd0bbc58a83f5157d140c 100644
--- a/Tests/testFmmAlgorithmProc.cpp
+++ b/Tests/testFmmAlgorithmProc.cpp
@@ -48,7 +48,7 @@
 // Check if tree is built correctly
 template<class OctreeClass>
 void ValidateTree(OctreeClass& realTree,
-                        OctreeClass& treeValide, FMpi& app){
+                        OctreeClass& treeValide, const FMpi::FComm& comm){
     FSize totalNbLeafs = 0;
     {
 
@@ -59,10 +59,10 @@ void ValidateTree(OctreeClass& realTree,
         } while(octreeIterator.moveRight());
     }
 
-    const FSize myLeftLeaf = app.getLeft(totalNbLeafs);
-    const FSize myRightLeaf = app.getRight(totalNbLeafs);
+    const FSize myLeftLeaf = comm.getLeft(totalNbLeafs);
+    const FSize myRightLeaf = comm.getRight(totalNbLeafs);
 
-    //printf("%d should go from %d to %d leaf (on %d total leafs)\n", app.processId(), myLeftLeaf, myRightLeaf, totalNbLeafs);
+    //printf("%d should go from %d to %d leaf (on %d total leafs)\n", comm.processId(), myLeftLeaf, myRightLeaf, totalNbLeafs);
 
     typename OctreeClass::Iterator octreeIteratorValide(&treeValide);
     octreeIteratorValide.gotoBottomLeft();
@@ -335,7 +335,7 @@ int main(int argc, char ** argv){
         std::cout << "Opening : " << filename << "\n";
     }
 
-    FMpiFmaLoader<ParticleClass> loader(filename,app);
+    FMpiFmaLoader<ParticleClass> loader(filename,app.global());
     if(!loader.isOpen()){
         std::cout << "Loader Error, " << filename << " is missing\n";
         return 1;
@@ -344,27 +344,14 @@ int main(int argc, char ** argv){
     // The real tree to work on
     OctreeClass realTree(NbLevels, SizeSubLevels,loader.getBoxWidth(),loader.getCenterOfBox());
 
-    if( app.processCount() != 1){
+    if( app.global().processCount() != 1){
         //////////////////////////////////////////////////////////////////////////////////
-        // We sort our particles
+        // Build tree from mpi loader
         //////////////////////////////////////////////////////////////////////////////////
-        std::cout << "Create intervals ..." << std::endl;
+        std::cout << "Build Tree ..." << std::endl;
         counter.tic();
 
-        FMpiTreeBuilder<ParticleClass> builder;
-
-        builder.splitAndSortFile(loader, NbLevels);
-
-        counter.tac();
-        std::cout << "Done  " << "(" << counter.elapsed() << "s)." << std::endl;
-
-        //////////////////////////////////////////////////////////////////////////////////
-        // Now we can build the real tree
-        //////////////////////////////////////////////////////////////////////////////////
-        std::cout << "Create real tree ..." << std::endl;
-        counter.tic();
-
-        builder.intervalsToTree(realTree);
+        FMpiTreeBuilder<ParticleClass>::LoaderToTree(app.global(), loader, realTree);
 
         counter.tac();
         std::cout << "Done  " << "(" << counter.elapsed() << "s)." << std::endl;
@@ -372,10 +359,9 @@ int main(int argc, char ** argv){
         //////////////////////////////////////////////////////////////////////////////////
     }    
     else{
-        FFmaBinLoader<ParticleClass> loaderSeq(filename);
         ParticleClass partToInsert;
-        for(FSize idxPart = 0 ; idxPart < loaderSeq.getNumberOfParticles() ; ++idxPart){
-            loaderSeq.fillParticle(partToInsert);
+        for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
+            loader.fillParticle(partToInsert);
             realTree.insert(partToInsert);
         }
     }
@@ -400,7 +386,7 @@ int main(int argc, char ** argv){
     std::cout << "Validate tree ..." << std::endl;
     counter.tic();
 
-    ValidateTree(realTree, treeValide, app);
+    ValidateTree(realTree, treeValide, app.global());
 
     counter.tac();
     std::cout << "Done  " << "(" << counter.elapsed() << "s)." << std::endl;
@@ -412,7 +398,7 @@ int main(int argc, char ** argv){
 
     KernelClass kernels;
 
-    FmmClassProc algo(app,&realTree,&kernels);
+    FmmClassProc algo(app.global(),&realTree,&kernels);
     algo.execute();
 
     counter.tac();