diff --git a/Src/Files/FMpiTreeBuilder.hpp b/Src/Files/FMpiTreeBuilder.hpp
index 6b72423ba0232a14c485466aab86f7e78a4c70ac..31b4ab9564efd8ad96cd38ee0eff9aab66a158d9 100755
--- a/Src/Files/FMpiTreeBuilder.hpp
+++ b/Src/Files/FMpiTreeBuilder.hpp
@@ -34,278 +34,278 @@
 template<class ParticleClass>
 class FMpiTreeBuilder{
 public:
-  /** What sorting algorithm to use */
-  enum SortingType{
-    QuickSort,
-    BitonicSort,
-  };
+    /** What sorting algorithm to use */
+    enum SortingType{
+        QuickSort,
+        BitonicSort,
+    };
 
 
-  /** A particle may not have a MortonIndex Method
+    /** A particle may not have a MortonIndex Method
    * but they are sorted on their morton index
    * so this struct store a particle + its index
    */
-  struct IndexedParticle{
-  public:
-    MortonIndex index;
-    ParticleClass particle;
+    struct IndexedParticle{
+    public:
+        MortonIndex index;
+        ParticleClass particle;
+
+        operator MortonIndex(){
+            return this->index;
+        }
+    };
 
-    operator MortonIndex(){
-      return this->index;
-    }
-  };
 
-  
 private:
-  /** This method has been tacken from the octree
+    /** This method has been tacken from the octree
    * it computes a tree coordinate (x or y or z) from real position
    */
-  static int getTreeCoordinate(const FReal inRelativePosition, const FReal boxWidthAtLeafLevel) {
-    const FReal indexFReal = inRelativePosition / boxWidthAtLeafLevel;
-    const int index = int(FMath::dfloor(indexFReal));
-    if( index && FMath::LookEqual(inRelativePosition, boxWidthAtLeafLevel * FReal(index) ) ){
-      return index - 1;
+    static int getTreeCoordinate(const FReal inRelativePosition, const FReal boxWidthAtLeafLevel) {
+        const FReal indexFReal = inRelativePosition / boxWidthAtLeafLevel;
+        const int index = int(FMath::dfloor(indexFReal));
+        if( index && FMath::LookEqual(inRelativePosition, boxWidthAtLeafLevel * FReal(index) ) ){
+            return index - 1;
+        }
+        return index;
     }
-    return index;
-  }
 
 
-  //////////////////////////////////////////////////////////////////////////
-  // To sort tha particles we hold
-  //////////////////////////////////////////////////////////////////////////
+    //////////////////////////////////////////////////////////////////////////
+    // To sort tha particles we hold
+    //////////////////////////////////////////////////////////////////////////
 
 
-  template <class LoaderClass>
-  static void SortParticles( const FMpi::FComm& communicator, LoaderClass& loader, const SortingType type,
-			     const int TreeHeight, IndexedParticle**const outputArray, FSize* const outputSize){
-    FTRACE( FTrace::FFunction functionTrace(__FUNCTION__ , "Loader to Tree" , __FILE__ , __LINE__) );
+    template <class LoaderClass>
+    static void SortParticles( const FMpi::FComm& communicator, LoaderClass& loader, const SortingType type,
+                               const int TreeHeight, IndexedParticle**const outputArray, FSize* const outputSize){
+        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__ , "Loader to Tree" , __FILE__ , __LINE__) );
 
-    // create particles
-    IndexedParticle*const realParticlesIndexed = new IndexedParticle[loader.getNumberOfParticles()];
-    FMemUtils::memset(realParticlesIndexed, 0, sizeof(IndexedParticle) * loader.getNumberOfParticles());
-    FPoint boxCorner(loader.getCenterOfBox() - (loader.getBoxWidth()/2));
-    FTreeCoordinate host;
+        // create particles
+        IndexedParticle*const realParticlesIndexed = new IndexedParticle[loader.getNumberOfParticles()];
+        FMemUtils::memset(realParticlesIndexed, 0, sizeof(IndexedParticle) * loader.getNumberOfParticles());
+        FPoint boxCorner(loader.getCenterOfBox() - (loader.getBoxWidth()/2));
+        FTreeCoordinate host;
 
-    const FReal boxWidthAtLeafLevel = loader.getBoxWidth() / FReal(1 << (TreeHeight - 1) );
-    for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
-      loader.fillParticle(realParticlesIndexed[idxPart].particle);
-      host.setX( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getX() - boxCorner.getX(), boxWidthAtLeafLevel ));
-      host.setY( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getY() - boxCorner.getY(), boxWidthAtLeafLevel ));
-      host.setZ( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getZ() - boxCorner.getZ(), boxWidthAtLeafLevel ));
+        const FReal boxWidthAtLeafLevel = loader.getBoxWidth() / FReal(1 << (TreeHeight - 1) );
+        for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
+            loader.fillParticle(realParticlesIndexed[idxPart].particle);
+            host.setX( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getX() - boxCorner.getX(), boxWidthAtLeafLevel ));
+            host.setY( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getY() - boxCorner.getY(), boxWidthAtLeafLevel ));
+            host.setZ( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getZ() - boxCorner.getZ(), boxWidthAtLeafLevel ));
 
-      realParticlesIndexed[idxPart].index = host.getMortonIndex(TreeHeight - 1);
-    }
+            realParticlesIndexed[idxPart].index = host.getMortonIndex(TreeHeight - 1);
+        }
 
-    // sort particles
-    if(type == QuickSort){
-      FQuickSortMpi<IndexedParticle,MortonIndex, FSize>::QsMpi(realParticlesIndexed, loader.getNumberOfParticles(), *outputArray, *outputSize,communicator);
-      delete [] (realParticlesIndexed);
-    }
-    else {
-      FBitonicSort<IndexedParticle,MortonIndex, FSize>::Sort( realParticlesIndexed, loader.getNumberOfParticles(), communicator );
-      *outputArray = realParticlesIndexed;
-      *outputSize = loader.getNumberOfParticles();
+        // sort particles
+        if(type == QuickSort){
+            FQuickSortMpi<IndexedParticle,MortonIndex, FSize>::QsMpi(realParticlesIndexed, loader.getNumberOfParticles(), *outputArray, *outputSize,communicator);
+            delete [] (realParticlesIndexed);
+        }
+        else {
+            FBitonicSort<IndexedParticle,MortonIndex, FSize>::Sort( realParticlesIndexed, loader.getNumberOfParticles(), communicator );
+            *outputArray = realParticlesIndexed;
+            *outputSize = loader.getNumberOfParticles();
+        }
     }
-  }
 
-  static void SortParticlesFromArray( const FMpi::FComm& communicator, const ParticleClass array[], const FSize size, const SortingType type,
-				      const FPoint& centerOfBox, const FReal boxWidth,
-				      const int TreeHeight, IndexedParticle**const outputArray, FSize* const outputSize){
-    FTRACE( FTrace::FFunction functionTrace(__FUNCTION__ , "Loader to Tree" , __FILE__ , __LINE__) );
+    static void SortParticlesFromArray( const FMpi::FComm& communicator, const ParticleClass array[], const FSize size, const SortingType type,
+                                        const FPoint& centerOfBox, const FReal boxWidth,
+                                        const int TreeHeight, IndexedParticle**const outputArray, FSize* const outputSize){
+        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__ , "Loader to Tree" , __FILE__ , __LINE__) );
 
-    // create particles
-    IndexedParticle*const realParticlesIndexed = new IndexedParticle[size];
-    FMemUtils::memset(realParticlesIndexed, 0, sizeof(IndexedParticle) * size);
+        // create particles
+        IndexedParticle*const realParticlesIndexed = new IndexedParticle[size];
+        FMemUtils::memset(realParticlesIndexed, 0, sizeof(IndexedParticle) * size);
 
-    FPoint boxCorner(centerOfBox - (boxWidth/2));
-    FTreeCoordinate host;
+        FPoint boxCorner(centerOfBox - (boxWidth/2));
+        FTreeCoordinate host;
 
-    const FReal boxWidthAtLeafLevel = boxWidth / FReal(1 << (TreeHeight - 1) );
+        const FReal boxWidthAtLeafLevel = boxWidth / FReal(1 << (TreeHeight - 1) );
 
-    for(int idxPart = 0 ; idxPart < size ; ++idxPart){
-      realParticlesIndexed[idxPart].particle = array[idxPart];
-      host.setX( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getX() - boxCorner.getX(), boxWidthAtLeafLevel ));
-      host.setY( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getY() - boxCorner.getY(), boxWidthAtLeafLevel ));
-      host.setZ( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getZ() - boxCorner.getZ(), boxWidthAtLeafLevel ));
+        for(int idxPart = 0 ; idxPart < size ; ++idxPart){
+            realParticlesIndexed[idxPart].particle = array[idxPart];
+            host.setX( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getX() - boxCorner.getX(), boxWidthAtLeafLevel ));
+            host.setY( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getY() - boxCorner.getY(), boxWidthAtLeafLevel ));
+            host.setZ( getTreeCoordinate( realParticlesIndexed[idxPart].particle.getPosition().getZ() - boxCorner.getZ(), boxWidthAtLeafLevel ));
 
-      realParticlesIndexed[idxPart].index = host.getMortonIndex(TreeHeight - 1);
-    }
+            realParticlesIndexed[idxPart].index = host.getMortonIndex(TreeHeight - 1);
+        }
 
-    // sort particles
-    if(type == QuickSort){
-      FQuickSortMpi<IndexedParticle,MortonIndex, FSize>::QsMpi(realParticlesIndexed, size, *outputArray, *outputSize,communicator);
-      delete [] (realParticlesIndexed);
-    }
-    else {
-      FBitonicSort<IndexedParticle,MortonIndex, FSize>::Sort( realParticlesIndexed, size, communicator );
-      *outputArray = realParticlesIndexed;
-      *outputSize = size;
+        // sort particles
+        if(type == QuickSort){
+            FQuickSortMpi<IndexedParticle,MortonIndex, FSize>::QsMpi(realParticlesIndexed, size, outputArray, outputSize,communicator);
+            delete [] (realParticlesIndexed);
+        }
+        else {
+            FBitonicSort<IndexedParticle,MortonIndex, FSize>::Sort( realParticlesIndexed, size, communicator );
+            *outputArray = realParticlesIndexed;
+            *outputSize = size;
+        }
     }
-  }
 
 
-  //////////////////////////////////////////////////////////////////////////
-  // To merge the leaves
-  //////////////////////////////////////////////////////////////////////////
+    //////////////////////////////////////////////////////////////////////////
+    // To merge the leaves
+    //////////////////////////////////////////////////////////////////////////
 
-  static void MergeLeaves(const FMpi::FComm& communicator, IndexedParticle*& workingArray, FSize* workingSize,
-			  FSize ** leavesIndices, ParticleClass** leavesArray, FSize* const leavesSize){
-    FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) );
-    const int rank = communicator.processId();
-    const int nbProcs = communicator.processCount();
-    if(nbProcs == 1){
-      //Nothing to do there : there is no need to verify if leaves are split, if there is one process...
-    }
-    else{
-      // be sure there is no splited leaves
-      // to do that we exchange the first index with the left proc
-      {
-	FTRACE( FTrace::FRegion regionTrace("Remove Splited leaves", __FUNCTION__ , __FILE__ , __LINE__) );
-
-	MortonIndex otherFirstIndex = -1;
-	if((*workingSize) != 0 && rank != 0 && rank != nbProcs - 1){
-	  MPI_Sendrecv(&workingArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs,
-		       &otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs,
-		       communicator.getComm(), MPI_STATUS_IGNORE);
-	}
-	else if( rank == 0){
-	  MPI_Recv(&otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, communicator.getComm(), MPI_STATUS_IGNORE);
-	}
-	else if( rank == nbProcs - 1){
-	  MPI_Send( &workingArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, communicator.getComm());
-	}
-	else {
-	  MPI_Recv(&otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, communicator.getComm(), MPI_STATUS_IGNORE);
-	  MPI_Send(&otherFirstIndex, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, communicator.getComm());
-	}
-
-	// at this point every one know the first index of his right neighbors
-	const bool needToRecvBeforeSend = (rank != 0 && (((*workingSize) && otherFirstIndex == workingArray[0].index ) || !(*workingSize)));
-	MPI_Request requestSendLeaf;
-
-	IndexedParticle* sendBuffer = nullptr;
-	if(rank != nbProcs - 1 && needToRecvBeforeSend == false){
-	  FSize idxPart = (*workingSize) - 1 ;
-	  while(idxPart >= 0 && workingArray[idxPart].index == otherFirstIndex){
-	    --idxPart;
-	  }
-	  const int particlesToSend = int((*workingSize) - 1 - idxPart);
-	  if(particlesToSend){
-	    (*workingSize) -= particlesToSend;
-	    sendBuffer = new IndexedParticle[particlesToSend];
-	    memcpy(sendBuffer, &workingArray[idxPart + 1], particlesToSend * sizeof(IndexedParticle));
-
-	    MPI_Isend( sendBuffer, particlesToSend * int(sizeof(IndexedParticle)), MPI_BYTE,
-		       rank + 1, FMpi::TagSplittedLeaf, communicator.getComm(), &requestSendLeaf);
-	  }
-	  else{
-	    MPI_Isend( nullptr, 0, MPI_BYTE, rank + 1, FMpi::TagSplittedLeaf, communicator.getComm(), &requestSendLeaf);
-	  }
-	}
-
-	if( rank != 0 ){
-	  int sendByOther = 0;
-
-	  MPI_Status probStatus;
-	  MPI_Probe(rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), &probStatus);
-	  MPI_Get_count( &probStatus,  MPI_BYTE, &sendByOther);
-
-	  if(sendByOther){
-	    sendByOther /= int(sizeof(IndexedParticle));
-
-	    const IndexedParticle* const reallocOutputArray = workingArray;
-	    const FSize reallocOutputSize = (*workingSize);
-
-	    (*workingSize) += sendByOther;
-	    workingArray = new IndexedParticle[(*workingSize)];
-	    FMemUtils::memcpy(&workingArray[sendByOther], reallocOutputArray, reallocOutputSize * sizeof(IndexedParticle));
-	    delete[] reallocOutputArray;
-
-	    MPI_Recv(workingArray, int(sizeof(IndexedParticle)) * sendByOther, MPI_BYTE,
-		     rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), MPI_STATUS_IGNORE);
-	  }
-	  else{
-	    MPI_Recv( nullptr, 0, MPI_BYTE, rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), MPI_STATUS_IGNORE);
-	  }
-	}
-
-	if(rank != nbProcs - 1 && needToRecvBeforeSend == true){
-	  MPI_Send( workingArray, int((*workingSize) * sizeof(IndexedParticle)), MPI_BYTE,
-		    rank + 1, FMpi::TagSplittedLeaf, communicator.getComm());
-	  delete[] workingArray;
-	  workingArray = nullptr;
-	  (*workingSize)  = 0;
-	}
-	else if(rank != nbProcs - 1){
-	  MPI_Wait( &requestSendLeaf, MPI_STATUS_IGNORE);
-	  delete[] sendBuffer;
-	  sendBuffer = nullptr;
-	}
-      }
-    }
-    {//Filling the Array with leaves and parts //// COULD BE MOVED IN AN OTHER FUCTION
-	  
-      (*leavesSize)    = 0; //init ptr
-      (*leavesArray)   = nullptr; //init ptr
-      (*leavesIndices) = nullptr; //init ptr
-	  
-      if((*workingSize)){
-	    
-	//Array of particles
-	*leavesArray = new ParticleClass[(*workingSize)];
-	    
-	//Temporary array, we don't know yet how many leaves there will be 
-	FSize * tempIndicesArray = new FSize[(*workingSize)];
-	memset(tempIndicesArray,0,sizeof(FSize)*(*workingSize));
-	    
-	FSize idxInIndices = 0;
-	MortonIndex previousIndex = -1;
-	for(FSize idxPart = 0 ; idxPart < (*workingSize) ; ++idxPart){
-	  if(workingArray[idxPart].index != previousIndex){
-	    previousIndex = workingArray[idxPart];
-	    tempIndicesArray[idxInIndices] = idxPart;
-	    idxInIndices++;
-	  }
-	  memcpy(&(*leavesArray)[idxPart],&workingArray[idxPart].particle,sizeof(ParticleClass));
-	}
-	*leavesSize = idxInIndices;
-	    
-	*leavesIndices = new FSize[idxInIndices];
-	memcpy(*leavesIndices,tempIndicesArray,(*leavesSize)*sizeof(FSize));
-
-      }
-	  
-      delete []  workingArray;
-	  
-      workingArray = nullptr;
-	
-    }
-  }
-
-
-  //////////////////////////////////////////////////////////////////////////
-  // To equalize (same number of leaves among the procs)
-  //////////////////////////////////////////////////////////////////////////
-
-  /** Put the interval into a tree */
-  template <class ContainerClass>
-  static void EqualizeAndFillTree(const FMpi::FComm& communicator,  ContainerClass* particlesSaver,
-				  FSize * leavesIndices, ParticleClass* leavesArray, FSize& nbLeavesInIntervals, FSize nbParts, 
-				  FAbstractBalanceAlgorithm * balancer){
-
-
-    FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) );
-    const int myRank = communicator.processId();
-    const int nbProcs = communicator.processCount();
-    const FSize myCurrentsParts = nbParts;
-	
-    if(nbProcs == 1){ //I'm the only one !
-      //Just copy each part into the Particle Saver
-      for(FSize idxPart =0 ; idxPart < myCurrentsParts ; ++idxPart){
-	particlesSaver->push(leavesArray[idxPart]);
-      }
+    static void MergeLeaves(const FMpi::FComm& communicator, IndexedParticle*& workingArray, FSize* workingSize,
+                            FSize ** leavesIndices, ParticleClass** leavesArray, FSize* const leavesSize){
+        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) );
+        const int rank = communicator.processId();
+        const int nbProcs = communicator.processCount();
+        if(nbProcs == 1){
+            //Nothing to do there : there is no need to verify if leaves are split, if there is one process...
+        }
+        else{
+            // be sure there is no splited leaves
+            // to do that we exchange the first index with the left proc
+            {
+                FTRACE( FTrace::FRegion regionTrace("Remove Splited leaves", __FUNCTION__ , __FILE__ , __LINE__) );
+
+                MortonIndex otherFirstIndex = -1;
+                if((*workingSize) != 0 && rank != 0 && rank != nbProcs - 1){
+                    MPI_Sendrecv(&workingArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs,
+                            &otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs,
+                            communicator.getComm(), MPI_STATUS_IGNORE);
+                }
+                else if( rank == 0){
+                    MPI_Recv(&otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, communicator.getComm(), MPI_STATUS_IGNORE);
+                }
+                else if( rank == nbProcs - 1){
+                    MPI_Send( &workingArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, communicator.getComm());
+                }
+                else {
+                    MPI_Recv(&otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs, communicator.getComm(), MPI_STATUS_IGNORE);
+                    MPI_Send(&otherFirstIndex, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs, communicator.getComm());
+                }
+
+                // at this point every one know the first index of his right neighbors
+                const bool needToRecvBeforeSend = (rank != 0 && (((*workingSize) && otherFirstIndex == workingArray[0].index ) || !(*workingSize)));
+                MPI_Request requestSendLeaf;
+
+                IndexedParticle* sendBuffer = nullptr;
+                if(rank != nbProcs - 1 && needToRecvBeforeSend == false){
+                    FSize idxPart = (*workingSize) - 1 ;
+                    while(idxPart >= 0 && workingArray[idxPart].index == otherFirstIndex){
+                        --idxPart;
+                    }
+                    const int particlesToSend = int((*workingSize) - 1 - idxPart);
+                    if(particlesToSend){
+                        (*workingSize) -= particlesToSend;
+                        sendBuffer = new IndexedParticle[particlesToSend];
+                        memcpy(sendBuffer, &workingArray[idxPart + 1], particlesToSend * sizeof(IndexedParticle));
+
+                        MPI_Isend( sendBuffer, particlesToSend * int(sizeof(IndexedParticle)), MPI_BYTE,
+                                   rank + 1, FMpi::TagSplittedLeaf, communicator.getComm(), &requestSendLeaf);
+                    }
+                    else{
+                        MPI_Isend( nullptr, 0, MPI_BYTE, rank + 1, FMpi::TagSplittedLeaf, communicator.getComm(), &requestSendLeaf);
+                    }
+                }
+
+                if( rank != 0 ){
+                    int sendByOther = 0;
+
+                    MPI_Status probStatus;
+                    MPI_Probe(rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), &probStatus);
+                    MPI_Get_count( &probStatus,  MPI_BYTE, &sendByOther);
+
+                    if(sendByOther){
+                        sendByOther /= int(sizeof(IndexedParticle));
+
+                        const IndexedParticle* const reallocOutputArray = workingArray;
+                        const FSize reallocOutputSize = (*workingSize);
+
+                        (*workingSize) += sendByOther;
+                        workingArray = new IndexedParticle[(*workingSize)];
+                        FMemUtils::memcpy(&workingArray[sendByOther], reallocOutputArray, reallocOutputSize * sizeof(IndexedParticle));
+                        delete[] reallocOutputArray;
+
+                        MPI_Recv(workingArray, int(sizeof(IndexedParticle)) * sendByOther, MPI_BYTE,
+                                 rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), MPI_STATUS_IGNORE);
+                    }
+                    else{
+                        MPI_Recv( nullptr, 0, MPI_BYTE, rank - 1, FMpi::TagSplittedLeaf, communicator.getComm(), MPI_STATUS_IGNORE);
+                    }
+                }
+
+                if(rank != nbProcs - 1 && needToRecvBeforeSend == true){
+                    MPI_Send( workingArray, int((*workingSize) * sizeof(IndexedParticle)), MPI_BYTE,
+                              rank + 1, FMpi::TagSplittedLeaf, communicator.getComm());
+                    delete[] workingArray;
+                    workingArray = nullptr;
+                    (*workingSize)  = 0;
+                }
+                else if(rank != nbProcs - 1){
+                    MPI_Wait( &requestSendLeaf, MPI_STATUS_IGNORE);
+                    delete[] sendBuffer;
+                    sendBuffer = nullptr;
+                }
+            }
+        }
+        {//Filling the Array with leaves and parts //// COULD BE MOVED IN AN OTHER FUCTION
+
+            (*leavesSize)    = 0; //init ptr
+            (*leavesArray)   = nullptr; //init ptr
+            (*leavesIndices) = nullptr; //init ptr
+
+            if((*workingSize)){
+
+                //Array of particles
+                *leavesArray = new ParticleClass[(*workingSize)];
+
+                //Temporary array, we don't know yet how many leaves there will be
+                FSize * tempIndicesArray = new FSize[(*workingSize)];
+                memset(tempIndicesArray,0,sizeof(FSize)*(*workingSize));
+
+                FSize idxInIndices = 0;
+                MortonIndex previousIndex = -1;
+                for(FSize idxPart = 0 ; idxPart < (*workingSize) ; ++idxPart){
+                    if(workingArray[idxPart].index != previousIndex){
+                        previousIndex = workingArray[idxPart];
+                        tempIndicesArray[idxInIndices] = idxPart;
+                        idxInIndices++;
+                    }
+                    memcpy(&(*leavesArray)[idxPart],&workingArray[idxPart].particle,sizeof(ParticleClass));
+                }
+                *leavesSize = idxInIndices;
+
+                *leavesIndices = new FSize[idxInIndices];
+                memcpy(*leavesIndices,tempIndicesArray,(*leavesSize)*sizeof(FSize));
+
+            }
+
+            delete []  workingArray;
+
+            workingArray = nullptr;
+
+        }
     }
-    else{
+
+
+    //////////////////////////////////////////////////////////////////////////
+    // To equalize (same number of leaves among the procs)
+    //////////////////////////////////////////////////////////////////////////
+
+    /** Put the interval into a tree */
+    template <class ContainerClass>
+    static void EqualizeAndFillTree(const FMpi::FComm& communicator,  ContainerClass* particlesSaver,
+                                    FSize * leavesIndices, ParticleClass* leavesArray, FSize& nbLeavesInIntervals, FSize nbParts,
+                                    FAbstractBalanceAlgorithm * balancer){
+
+
+        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) );
+        const int myRank = communicator.processId();
+        const int nbProcs = communicator.processCount();
+        const FSize myCurrentsParts = nbParts;
+
+        if(nbProcs == 1){ //I'm the only one !
+            //Just copy each part into the Particle Saver
+            for(FSize idxPart =0 ; idxPart < myCurrentsParts ; ++idxPart){
+                particlesSaver->push(leavesArray[idxPart]);
+            }
+        }
+        else{
             // We have to know the number of leaves each procs holds
             FSize*const numberOfLeavesPerProc = new FSize[nbProcs];
             memset(numberOfLeavesPerProc, 0, sizeof(FSize) * nbProcs);
@@ -343,7 +343,7 @@ private:
                 printf("%d] to %d from %llu to %llu\n", myRank, pack.idProc, pack.elementFrom, pack.elementTo);
                 idxToSend[pack.idProc] = pack.elementFrom;
                 toSend[pack.idProc]    = (pack.elementTo ? leavesIndices[pack.elementTo-1] : 0)
-                                        - (pack.elementFrom ? leavesIndices[pack.elementFrom-1] : 0);
+                        - (pack.elementFrom ? leavesIndices[pack.elementFrom-1] : 0);
             }
 
             //Then, we exchange the datas to send
@@ -424,87 +424,87 @@ private:
             delete[] toSend;
             delete[] numberOfLeavesPerProc;
         }
-    delete[] leavesArray;
-    delete[] leavesIndices;
-  }
+        delete[] leavesArray;
+        delete[] leavesIndices;
+    }
+
 
- 
 public:
-  
-  /**
+
+    /**
    * Those three function get through pubilc/private member issues for testing purpose
    */
-  static void testSortParticlesFromArray( const FMpi::FComm& communicator, const ParticleClass array[], const FSize size, const SortingType type,
-					  const FPoint& centerOfBox, const FReal boxWidth,
-					  const int TreeHeight, IndexedParticle**const outputArray, FSize* const outputSize){
-    SortParticlesFromArray(communicator, array, size, type, centerOfBox, boxWidth, TreeHeight,&outputArray, &outputSize);
-  }
-
-  
-  static void testMergeLeaves(const FMpi::FComm& communicator, IndexedParticle*& workingArray, FSize* workingSize,
-			      FSize ** leavesIndices, ParticleClass** leavesArray, FSize* const leavesSize){
-    MergeLeaves(communicator,workingArray,workingSize,leavesIndices,leavesArray,leavesSize);
-  }
-
-  template<class ContainerClass>
-  static void testEqualizeAndFillTree(const FMpi::FComm& communicator,  ContainerClass* particlesSaver,
-				      FSize * leavesIndices, ParticleClass* leavesArray, FSize& nbLeavesInIntervals, FSize nbParts, 
-				      FAbstractBalanceAlgorithm * balancer){
-    EqualizeAndFillTree(communicator,particlesSaver,leavesIndices, leavesArray, nbLeavesInIntervals, nbParts, balancer);
-  }
-
-  //////////////////////////////////////////////////////////////////////////
-  // The builder function
-  //////////////////////////////////////////////////////////////////////////
-  
-  template <class ContainerClass>
-  static void ArrayToTree(const FMpi::FComm& communicator, const ParticleClass array[], const FSize size,
-			  const FPoint& boxCenter, const FReal boxWidth, const int treeHeight,
-			  ContainerClass* particleSaver, FAbstractBalanceAlgorithm* balancer,const SortingType type = QuickSort){
-      
-    IndexedParticle* particlesArray = nullptr;
-    FSize particlesSize = 0;
-    SortParticlesFromArray(communicator, array, size, type, boxCenter, boxWidth, treeHeight,
-			   &particlesArray, &particlesSize);
-
-    ParticleClass* leavesArray = nullptr;
-    FSize leavesSize = 0;
-    FSize * leavesIndices = nullptr;
-    
-    MergeLeaves(communicator, particlesArray, &particlesSize, &leavesIndices, &leavesArray, &leavesSize);
-
-    EqualizeAndFillTree(communicator, particleSaver, leavesIndices, leavesArray, leavesSize, particlesSize, balancer);
-
-    /** To produce stats after the Equalize phase
-     */
-    int* nbPartsPerProc;
-    int myParts = particleSaver->getSize();
-    int nbProc = communicator.processCount();
-	
-    if(communicator.processId()==0){
-      nbPartsPerProc = new int[nbProc];
+    static void testSortParticlesFromArray( const FMpi::FComm& communicator, const ParticleClass array[], const FSize size, const SortingType type,
+                                            const FPoint& centerOfBox, const FReal boxWidth,
+                                            const int TreeHeight, IndexedParticle**const outputArray, FSize* const outputSize){
+        SortParticlesFromArray(communicator, array, size, type, centerOfBox, boxWidth, TreeHeight,&outputArray, &outputSize);
+    }
+
+
+    static void testMergeLeaves(const FMpi::FComm& communicator, IndexedParticle*& workingArray, FSize* workingSize,
+                                FSize ** leavesIndices, ParticleClass** leavesArray, FSize* const leavesSize){
+        MergeLeaves(communicator,workingArray,workingSize,leavesIndices,leavesArray,leavesSize);
+    }
+
+    template<class ContainerClass>
+    static void testEqualizeAndFillTree(const FMpi::FComm& communicator,  ContainerClass* particlesSaver,
+                                        FSize * leavesIndices, ParticleClass* leavesArray, FSize& nbLeavesInIntervals, FSize nbParts,
+                                        FAbstractBalanceAlgorithm * balancer){
+        EqualizeAndFillTree(communicator,particlesSaver,leavesIndices, leavesArray, nbLeavesInIntervals, nbParts, balancer);
     }
-	
-    MPI_Gather(&myParts,1,MPI_INT,nbPartsPerProc,1,MPI_INT,0,communicator.getComm());
-
-    if(communicator.processId()==0){
-      float av=0;
-      int min=myParts, max=myParts;
-      for(int id=0; id<nbProc ; id++){
-	if(nbPartsPerProc[id] > max){
-	  max=nbPartsPerProc[id];
-	}
-	if(nbPartsPerProc[id] < min){
-	  min=nbPartsPerProc[id];
-	}
-	av += float(nbPartsPerProc[id]);
-      }
-      av /= float(nbProc);
-      printf("End of Equalize Phase : \n \t Min number of parts : %d \n \t Max number of parts : %d \n \t Average number of parts : %e \n",
-	     min,max,av);
-      delete[] nbPartsPerProc;
+
+    //////////////////////////////////////////////////////////////////////////
+    // The builder function
+    //////////////////////////////////////////////////////////////////////////
+
+    template <class ContainerClass>
+    static void ArrayToTree(const FMpi::FComm& communicator, const ParticleClass array[], const FSize size,
+                            const FPoint& boxCenter, const FReal boxWidth, const int treeHeight,
+                            ContainerClass* particleSaver, FAbstractBalanceAlgorithm* balancer,const SortingType type = QuickSort){
+
+        IndexedParticle* particlesArray = nullptr;
+        FSize particlesSize = 0;
+        SortParticlesFromArray(communicator, array, size, type, boxCenter, boxWidth, treeHeight,
+                               &particlesArray, &particlesSize);
+
+        ParticleClass* leavesArray = nullptr;
+        FSize leavesSize = 0;
+        FSize * leavesIndices = nullptr;
+
+        MergeLeaves(communicator, particlesArray, &particlesSize, &leavesIndices, &leavesArray, &leavesSize);
+
+        EqualizeAndFillTree(communicator, particleSaver, leavesIndices, leavesArray, leavesSize, particlesSize, balancer);
+
+        /** To produce stats after the Equalize phase
+     */
+        int* nbPartsPerProc;
+        int myParts = particleSaver->getSize();
+        int nbProc = communicator.processCount();
+
+        if(communicator.processId()==0){
+            nbPartsPerProc = new int[nbProc];
+        }
+
+        MPI_Gather(&myParts,1,MPI_INT,nbPartsPerProc,1,MPI_INT,0,communicator.getComm());
+
+        if(communicator.processId()==0){
+            float av=0;
+            int min=myParts, max=myParts;
+            for(int id=0; id<nbProc ; id++){
+                if(nbPartsPerProc[id] > max){
+                    max=nbPartsPerProc[id];
+                }
+                if(nbPartsPerProc[id] < min){
+                    min=nbPartsPerProc[id];
+                }
+                av += float(nbPartsPerProc[id]);
+            }
+            av /= float(nbProc);
+            printf("End of Equalize Phase : \n \t Min number of parts : %d \n \t Max number of parts : %d \n \t Average number of parts : %e \n",
+                   min,max,av);
+            delete[] nbPartsPerProc;
+        }
     }
-  }
 
 
 };
diff --git a/Src/Utils/FMpi.hpp b/Src/Utils/FMpi.hpp
index 190c41de54b12c878ff75e44810843a3d280e7ce..b0ef166dccaff80ba4b157458371f353bfc82610 100755
--- a/Src/Utils/FMpi.hpp
+++ b/Src/Utils/FMpi.hpp
@@ -53,298 +53,298 @@
 
 class FMpi {
 public:
-  ////////////////////////////////////////////////////////
-  // MPI Flag
-  ////////////////////////////////////////////////////////
-  enum FMpiTag {
-    // FMpiTreeBuilder
-    TagExchangeIndexs,
-    TagSplittedLeaf,
-    TagExchangeNbLeafs,
-    TagSandSettling,
-
-    // FQuickSort
-    TagQuickSort,
-
-    // FMM
-    TagFmmM2M,
-    TagFmmL2L,
-    TagFmmP2P,
-
-    // Bitonic,
-    TagBitonicMin,
-    TagBitonicMax,
-    TagBitonicMinMess,
-    TagBitonicMaxMess,
-
-    // Last defined tag
-    TagLast,
-  };
-
-  ////////////////////////////////////////////////////////
-  // FComm to factorize MPI_Comm work
-  ////////////////////////////////////////////////////////
-
-  /** This class is used to put all the usual method
+    ////////////////////////////////////////////////////////
+    // MPI Flag
+    ////////////////////////////////////////////////////////
+    enum FMpiTag {
+        // FMpiTreeBuilder
+        TagExchangeIndexs,
+        TagSplittedLeaf,
+        TagExchangeNbLeafs,
+        TagSandSettling,
+
+        // FQuickSort
+        TagQuickSort,
+
+        // FMM
+        TagFmmM2M,
+        TagFmmL2L,
+        TagFmmP2P,
+
+        // Bitonic,
+        TagBitonicMin,
+        TagBitonicMax,
+        TagBitonicMinMess,
+        TagBitonicMaxMess,
+
+        // Last defined tag
+        TagLast,
+    };
+
+    ////////////////////////////////////////////////////////
+    // FComm to factorize MPI_Comm work
+    ////////////////////////////////////////////////////////
+
+    /** This class is used to put all the usual method
    * related mpi comm
    */
-  class FComm : public FNoCopyable {
-    int rank;   //< rank related to the comm
-    int nbProc; //< nb proc in this group
-
-    MPI_Comm communicator;  //< current mpi communicator
-    MPI_Group group;        //< current mpi group
+    class FComm : public FNoCopyable {
+        int rank;   //< rank related to the comm
+        int nbProc; //< nb proc in this group
+
+        MPI_Comm communicator;  //< current mpi communicator
+        MPI_Group group;        //< current mpi group
+
+
+        // reset : get rank and nb proc from mpi
+        void reset(){
+            FMpi::Assert( MPI_Comm_rank(communicator,&rank),  __LINE__ );
+            FMpi::Assert( MPI_Comm_size(communicator,&nbProc),  __LINE__ );
+        }
+
+    public:
+        /** Constructor : dup the comm given in parameter */
+        explicit FComm(MPI_Comm inCommunicator ) {
+            FMpi::Assert( MPI_Comm_dup(inCommunicator, &communicator),  __LINE__ , "comm dup");
+            FMpi::Assert( MPI_Comm_group(communicator, &group),  __LINE__ , "comm group");
+
+            reset();
+        }
+
+        /** Free communicator and group */
+        virtual ~FComm(){
+            FMpi::Assert( MPI_Comm_free(&communicator),  __LINE__ );
+            FMpi::Assert( MPI_Group_free(&group),  __LINE__ );
+        }
+
+        /** To get the mpi comm needed for communication */
+        MPI_Comm getComm() const {
+            return communicator;
+        }
+
+        /** The current rank */
+        int processId() const {
+            return rank;
+        }
+
+        /** The current number of procs in the group */
+        int processCount() const {
+            return nbProc;
+        }
+
+        ////////////////////////////////////////////////////////////
+        // Split/Chunk functions
+        ////////////////////////////////////////////////////////////
+
+        /** Get a left index related to a size */
+        template< class T >
+        T getLeft(const T inSize)  const {
+            const double step = (double(inSize) / double(processCount()));
+            return T(FMath::Ceil(step * double(processId())));
+        }
+
+        /** Get a right index related to a size */
+        template< class T >
+        T getRight(const T inSize)  const {
+            const double step = (double(inSize) / double(processCount()));
+            const T res = T(FMath::Ceil(step * double(processId()+1)));
+            if(res > inSize) return inSize;
+            else return res;
+        }
+
+        /** Get a right index related to a size and another id */
+        template< class T >
+        T getOtherRight(const T inSize, const int other)  const {
+            const double step = (double(inSize) / double(processCount()));
+            const T res = T(FMath::Ceil(step * double(other+1)));
+            if(res > inSize) return inSize;
+            else return res;
+        }
+
+        /** Get a left index related to a size and another id */
+        template< class T >
+        T getOtherLeft(const T inSize, const int other) const {
+            const double step = (double(inSize) / double(processCount()));
+            return T(FMath::Ceil(step * double(other)));
+        }
+
+        /** Get a proc id from and index */
+        template< class T >
+        int getProc(const int position, const T inSize) const {
+            const double step = (double(inSize) / processCount());
+            return int(position/step);
+        }
+
+        ////////////////////////////////////////////////////////////
+        // Mpi interface functions
+        ////////////////////////////////////////////////////////////
+
+
+        /** Reduce a value for proc == 0 */
+        template< class T >
+        T reduceSum(T data) const {
+            T result(0);
+            FMpi::Assert( MPI_Reduce( &data, &result, 1, FMpi::GetType(data), MPI_SUM, 0, communicator ), __LINE__);
+            return result;
+        }
+
+        /** Reduce an average */
+        template< class T >
+        T reduceAverageAll(T data) const {
+            T result[processCount()];
+            FMpi::Assert( MPI_Allgather( &data, 1, FMpi::GetType(data), result, 1, FMpi::GetType(data), getComm()),  __LINE__ );
+
+            T average = 0;
+            for(int idxProc = 0 ; idxProc < processCount() ;++idxProc){
+                average += result[idxProc] / processCount();
+            }
+            return average;
+        }
+
+        /** Change the group size */
+        void groupReduce(const int from , const int to){
+            int * procsIdArray = new int [to - from + 1];
+            for(int idxProc = from ;idxProc <= to ; ++idxProc){
+                procsIdArray[idxProc - from] = idxProc;
+            }
+
+            MPI_Group previousGroup = group;
+            FMpi::Assert( MPI_Group_incl(previousGroup, to - from + 1 , procsIdArray, &group),  __LINE__ );
+
+            MPI_Comm previousComm = communicator;
+            FMpi::Assert( MPI_Comm_create(previousComm, group, &communicator),  __LINE__ );
+
+            MPI_Comm_free(&previousComm);
+            MPI_Group_free(&previousGroup);
+
+            reset();
+            delete[]  procsIdArray ;
+        }
+    };
+
+    ////////////////////////////////////////////////////////
+    // FMpi methods
+    ////////////////////////////////////////////////////////
+
+    /*
+    We use init with thread because of an openmpi error:
 
+    [fourmi062:15896] [[13237,0],1]-[[13237,1],1] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104)
+    [fourmi056:04597] [[13237,0],3]-[[13237,1],3] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104)
+    [fourmi053:08571] [[13237,0],5]-[[13237,1],5] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104)
 
-    // reset : get rank and nb proc from mpi
-    void reset(){
-      FMpi::Assert( MPI_Comm_rank(communicator,&rank),  __LINE__ );
-      FMpi::Assert( MPI_Comm_size(communicator,&nbProc),  __LINE__ );
+    Erreur pour le proc1
+    [[13237,1],1][btl_openib_component.c:3227:handle_wc] from fourmi062 to: fourmi056 error polling LP CQ with status LOCAL LENGTH ERROR status number 1 for wr_id 7134664 opcode 0  vendor error 105 qp_idx 3
+    Tous on la meme erreur le 2e 1 est remplace par le rang.
+  */
+    FMpi() : communicator(nullptr) {
+        int provided = 0;
+        FMpi::Assert( MPI_Init_thread(NULL,NULL, MPI_THREAD_SERIALIZED, &provided), __LINE__);
+        communicator = new FComm(MPI_COMM_WORLD);
     }
-
-  public:
-    /** Constructor : dup the comm given in parameter */
-    explicit FComm(MPI_Comm inCommunicator ) {
-      FMpi::Assert( MPI_Comm_dup(inCommunicator, &communicator),  __LINE__ , "comm dup");
-      FMpi::Assert( MPI_Comm_group(communicator, &group),  __LINE__ , "comm group");
-
-      reset();
+    FMpi(int inArgc, char **  inArgv ) : communicator(nullptr) {
+        int provided = 0;
+        FMpi::Assert( MPI_Init_thread(&inArgc,&inArgv, MPI_THREAD_SERIALIZED, &provided), __LINE__);
+        communicator = new FComm(MPI_COMM_WORLD);
     }
 
-    /** Free communicator and group */
-    virtual ~FComm(){
-      FMpi::Assert( MPI_Comm_free(&communicator),  __LINE__ );
-      FMpi::Assert( MPI_Group_free(&group),  __LINE__ );
+    /** Delete the communicator and call mpi finalize */
+    ~FMpi(){
+        delete communicator;
+        MPI_Finalize();
     }
 
-    /** To get the mpi comm needed for communication */
-    MPI_Comm getComm() const {
-      return communicator;
+    /** Get the global communicator */
+    const FComm& global() {
+        return (*communicator);
     }
 
-    /** The current rank */
-    int processId() const {
-      return rank;
-    }
+    ////////////////////////////////////////////////////////////
+    // Mpi Types meta function
+    ////////////////////////////////////////////////////////////
 
-    /** The current number of procs in the group */
-    int processCount() const {
-      return nbProc;
+    static const MPI_Datatype GetType(const long long&){
+        return MPI_LONG_LONG;
     }
 
-    ////////////////////////////////////////////////////////////
-    // Split/Chunk functions
-    ////////////////////////////////////////////////////////////
+    static const MPI_Datatype GetType(const long int&){
+        return MPI_LONG;
+    }
 
-    /** Get a left index related to a size */
-    template< class T >
-    T getLeft(const T inSize)  const {
-      const double step = (double(inSize) / double(processCount()));
-      return T(FMath::Ceil(step * double(processId())));
+    static const MPI_Datatype GetType(const double&){
+        return MPI_DOUBLE;
     }
 
-    /** Get a right index related to a size */
-    template< class T >
-    T getRight(const T inSize)  const {
-      const double step = (double(inSize) / double(processCount()));
-      const T res = T(FMath::Ceil(step * double(processId()+1)));
-      if(res > inSize) return inSize;
-      else return res;
+    static const MPI_Datatype GetType(const float&){
+        return MPI_FLOAT;
     }
 
-    /** Get a right index related to a size and another id */
-    template< class T >
-    T getOtherRight(const T inSize, const int other)  const {
-      const double step = (double(inSize) / double(processCount()));
-      const T res = T(FMath::Ceil(step * double(other+1)));
-      if(res > inSize) return inSize;
-      else return res;
+    static const MPI_Datatype GetType(const int&){
+        return MPI_INT;
     }
 
-    /** Get a left index related to a size and another id */
-    template< class T >
-    T getOtherLeft(const T inSize, const int other) const {
-      const double step = (double(inSize) / double(processCount()));
-      return T(FMath::Ceil(step * double(other)));
+    static const MPI_Datatype GetType(const char&){
+        return MPI_CHAR;
     }
 
-    /** Get a proc id from and index */
-    template< class T >
-    int getProc(const int position, const T inSize) const {
-      const double step = (double(inSize) / processCount());
-      return int(position/step);
+    static const MPI_Datatype GetType(const FComplexe& a){
+        MPI_Datatype FMpiComplexe;
+        MPI_Type_contiguous(2, GetType(a.getReal()) , &FMpiComplexe);
+        return FMpiComplexe;
     }
 
     ////////////////////////////////////////////////////////////
     // Mpi interface functions
     ////////////////////////////////////////////////////////////
 
-
-    /** Reduce a value for proc == 0 */
-    template< class T >
-    T reduceSum(T data) const {
-      T result(0);
-      FMpi::Assert( MPI_Reduce( &data, &result, 1, FMpi::GetType(data), MPI_SUM, 0, communicator ), __LINE__);
-      return result;
+    /** generic mpi assert function */
+    static void Assert(const int test, const unsigned line, const char* const message = nullptr){
+        if(test != MPI_SUCCESS){
+            printf("[ERROR-QS] Test failled at line %d, result is %d", line, test);
+            if(message) printf(", message: %s",message);
+            printf("\n");
+            fflush(stdout);
+            MPI_Abort(MPI_COMM_WORLD, int(line) );
+        }
     }
 
-    /** Reduce an average */
-    template< class T >
-    T reduceAverageAll(T data) const {
-      T result[processCount()];
-      FMpi::Assert( MPI_Allgather( &data, 1, FMpi::GetType(data), result, 1, FMpi::GetType(data), getComm()),  __LINE__ );
-
-      T average = 0;
-      for(int idxProc = 0 ; idxProc < processCount() ;++idxProc){
-	average += result[idxProc] / processCount();
-      }
-      return average;
+    /** Compute a left index from data */
+    template <class T>
+    static T GetLeft(const T inSize, const int inIdProc, const int inNbProc) {
+        const double step = (double(inSize) / inNbProc);
+        return T(ceil(step * inIdProc));
     }
 
-    /** Change the group size */
-    void groupReduce(const int from , const int to){
-      int * procsIdArray = new int [to - from + 1];
-      for(int idxProc = from ;idxProc <= to ; ++idxProc){
-	procsIdArray[idxProc - from] = idxProc;
-      }
-
-      MPI_Group previousGroup = group;
-      FMpi::Assert( MPI_Group_incl(previousGroup, to - from + 1 , procsIdArray, &group),  __LINE__ );
-
-      MPI_Comm previousComm = communicator;
-      FMpi::Assert( MPI_Comm_create(previousComm, group, &communicator),  __LINE__ );
-
-      MPI_Comm_free(&previousComm);
-      MPI_Group_free(&previousGroup);
-
-      reset();
-      delete  procsIdArray ;
+    /** Compute a right index from data */
+    template <class T>
+    static T GetRight(const T inSize, const int inIdProc, const int inNbProc) {
+        const double step = (double(inSize) / inNbProc);
+        const T res = T(ceil(step * (inIdProc+1)));
+        if(res > inSize) return inSize;
+        else return res;
     }
-  };
 
-  ////////////////////////////////////////////////////////
-  // FMpi methods
-  ////////////////////////////////////////////////////////
-
-  /*
-    We use init with thread because of an openmpi error:
-
-    [fourmi062:15896] [[13237,0],1]-[[13237,1],1] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104)
-    [fourmi056:04597] [[13237,0],3]-[[13237,1],3] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104)
-    [fourmi053:08571] [[13237,0],5]-[[13237,1],5] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104)
-
-    Erreur pour le proc1
-    [[13237,1],1][btl_openib_component.c:3227:handle_wc] from fourmi062 to: fourmi056 error polling LP CQ with status LOCAL LENGTH ERROR status number 1 for wr_id 7134664 opcode 0  vendor error 105 qp_idx 3
-    Tous on la meme erreur le 2e 1 est remplace par le rang.
-  */
-  FMpi() : communicator(nullptr) {
-    int provided = 0;
-    FMpi::Assert( MPI_Init_thread(NULL,NULL, MPI_THREAD_SERIALIZED, &provided), __LINE__);
-    communicator = new FComm(MPI_COMM_WORLD);
-  }
-  FMpi(int inArgc, char **  inArgv ) : communicator(nullptr) {
-    int provided = 0;
-    FMpi::Assert( MPI_Init_thread(&inArgc,&inArgv, MPI_THREAD_SERIALIZED, &provided), __LINE__);
-    communicator = new FComm(MPI_COMM_WORLD);
-  }
-
-  /** Delete the communicator and call mpi finalize */
-  ~FMpi(){
-    delete communicator;
-    MPI_Finalize();
-  }
-
-  /** Get the global communicator */
-  const FComm& global() {
-    return (*communicator);
-  }
-
-  ////////////////////////////////////////////////////////////
-  // Mpi Types meta function
-  ////////////////////////////////////////////////////////////
-
-  static const MPI_Datatype GetType(const long long&){
-    return MPI_LONG_LONG;
-  }
-
-  static const MPI_Datatype GetType(const long int&){
-    return MPI_LONG;
-  }
-
-  static const MPI_Datatype GetType(const double&){
-    return MPI_DOUBLE;
-  }
-
-  static const MPI_Datatype GetType(const float&){
-    return MPI_FLOAT;
-  }
-
-  static const MPI_Datatype GetType(const int&){
-    return MPI_INT;
-  }
-
-  static const MPI_Datatype GetType(const char&){
-    return MPI_CHAR;
-  }
-
-  static const MPI_Datatype GetType(const FComplexe& a){
-    MPI_Datatype FMpiComplexe;
-    MPI_Type_contiguous(2, GetType(a.getReal()) , &FMpiComplexe);
-    return FMpiComplexe;
-  }
-
-  ////////////////////////////////////////////////////////////
-  // Mpi interface functions
-  ////////////////////////////////////////////////////////////
-
-  /** generic mpi assert function */
-  static void Assert(const int test, const unsigned line, const char* const message = nullptr){
-    if(test != MPI_SUCCESS){
-      printf("[ERROR-QS] Test failled at line %d, result is %d", line, test);
-      if(message) printf(", message: %s",message);
-      printf("\n");
-      fflush(stdout);
-      MPI_Abort(MPI_COMM_WORLD, int(line) );
+    /** Compute a proc id from index & data */
+    template <class T>
+    static int GetProc(const T position, const T inSize, const int inNbProc) {
+        const double step = double(inSize) / double(inNbProc);
+        return int(double(position)/step);
     }
-  }
-
-  /** Compute a left index from data */
-  template <class T>
-  static T GetLeft(const T inSize, const int inIdProc, const int inNbProc) {
-    const double step = (double(inSize) / inNbProc);
-    return T(ceil(step * inIdProc));
-  }
-
-  /** Compute a right index from data */
-  template <class T>
-  static T GetRight(const T inSize, const int inIdProc, const int inNbProc) {
-    const double step = (double(inSize) / inNbProc);
-    const T res = T(ceil(step * (inIdProc+1)));
-    if(res > inSize) return inSize;
-    else return res;
-  }
-
-  /** Compute a proc id from index & data */
-  template <class T>
-  static int GetProc(const T position, const T inSize, const int inNbProc) {
-    const double step = double(inSize) / double(inNbProc);
-    return int(double(position)/step);
-  }
-
-  /** assert if mpi error */
-  static void MpiAssert(const int test, const unsigned line, const char* const message = nullptr){
-    if(test != MPI_SUCCESS){
-      printf("[ERROR] Test failed at line %d, result is %d", line, test);
-      if(message) printf(", message: %s",message);
-      printf("\n");
-      fflush(stdout);
-      MPI_Abort(MPI_COMM_WORLD, int(line) );
+
+    /** assert if mpi error */
+    static void MpiAssert(const int test, const unsigned line, const char* const message = nullptr){
+        if(test != MPI_SUCCESS){
+            printf("[ERROR] Test failed at line %d, result is %d", line, test);
+            if(message) printf(", message: %s",message);
+            printf("\n");
+            fflush(stdout);
+            MPI_Abort(MPI_COMM_WORLD, int(line) );
+        }
     }
-  }
 
 private:
-  /** The original communicator */
-  FComm* communicator;
+    /** The original communicator */
+    FComm* communicator;
 };
 
 
diff --git a/Src/Utils/FQuickSort.hpp b/Src/Utils/FQuickSort.hpp
index 7084d7f9d4eafe5f33b5d4895bfa823b53c6bbb7..b4d10b1ddec9da965d1e342dca157df527c8e2c1 100755
--- a/Src/Utils/FQuickSort.hpp
+++ b/Src/Utils/FQuickSort.hpp
@@ -76,31 +76,6 @@ protected:
         return left;
     }
 
-  /* A local iteration of qs */
-  static void QsLocal(SortType array[], const CompareType& pivot,
-		      IndexType myLeft, IndexType myRight,
-		      IndexType& prefix, IndexType& sufix){
-    
-    IndexType leftIter = myLeft;
-    IndexType rightIter = myRight;
-    
-    while(true){
-      while(CompareType(array[leftIter]) <= pivot && leftIter < rightIter){
-	++leftIter;
-      }
-      while(leftIter <= rightIter && pivot < CompareType(array[rightIter])){
-	--rightIter;
-      }
-      if(rightIter < leftIter) break;
-
-      Swap(array[leftIter],array[rightIter]);
-      ++leftIter;
-      --rightIter;
-    }
- 
-    prefix = leftIter - myLeft;
-    sufix = myRight - myLeft - prefix + 1;
-  }
 
     /* The sequential qs */
     static void QsSequentialStep(SortType array[], const IndexType left, const IndexType right){
diff --git a/Src/Utils/FQuickSortMpi.hpp b/Src/Utils/FQuickSortMpi.hpp
index 642938d799b1b49e846342bf861e7c5fbbcde487..91b849fdb4cb5151f1a933d9440cc68b80b0874a 100755
--- a/Src/Utils/FQuickSortMpi.hpp
+++ b/Src/Utils/FQuickSortMpi.hpp
@@ -18,225 +18,367 @@
 
 #include "FQuickSort.hpp"
 #include "FMpi.hpp"
+#include "FLog.hpp"
 
+#include <memory>
 
 template <class SortType, class CompareType, class IndexType>
 class FQuickSortMpi : public FQuickSort< SortType, CompareType, IndexType> {
-public:
-    /* the mpi qs */
-    static void QsMpi(const SortType originalArray[], IndexType size, SortType* & outputArray, IndexType& outputSize, const FMpi::FComm& originalComm){
-        FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Quicksort" , __FILE__ , __LINE__) );
-        // We need a structure see the algorithm detail to know more
-        struct Fix{
-            IndexType pre;
-            IndexType suf;
-        };
-
-        // first we copy data into our working buffer : outputArray
-        outputArray = new SortType[size];
-        FMemUtils::memcpy(outputArray, originalArray, sizeof(SortType) * size);
-        outputSize = size;
-
-        // alloc outputArray to store pre/sufixe, maximum needed is nb procs[comm world] + 1
-        Fix fixes[originalComm.processCount() + 1];
-        Fix fixesSum[originalComm.processCount() + 1];
-        memset(fixes,0,sizeof(Fix) * originalComm.processCount());
-        memset(fixesSum,0,sizeof(Fix) * (originalComm.processCount() + 1) );
-
-        // receiving buffer
-        IndexType bufferSize = 0;
-        SortType* buffer = nullptr;
-
-        // Create the first com
-        FMpi::FComm currentComm(originalComm.getComm());
+    // We need a structure see the algorithm detail to know more
+    struct Partition{
+        IndexType lowerPart;
+        IndexType greaterPart;
+    };
+
+    struct PackData {
+        int idProc;
+        IndexType fromElement;
+        IndexType toElement;
+    };
+
+
+    static void Swap(SortType& value, SortType& other){
+        SortType temp = value;
+        value = other;
+        other = temp;
+    }
 
-        // While I am not working alone on my own data
-        while( currentComm.processCount() != 1 ){
-            const int currentRank = currentComm.processId();
-            const int currentNbProcs = currentComm.processCount();
+    /* A local iteration of qs */
+    static IndexType QsPartition(SortType array[], IndexType left, IndexType right, const CompareType& pivot){
+        IndexType idx = left;
+        while( idx <= right && CompareType(array[idx]) <= pivot){
+            idx += 1;
+        }
+        left = idx;
 
-            MPI_Request requests[currentNbProcs * 2];
-            int iterRequest = 0;
+        for( ; idx <= right ; ++idx){
+            if( CompareType(array[idx]) <= pivot ){
+                Swap(array[idx],array[left]);
+                left += 1;
+            }
+        }
 
-            /////////////////////////////////////////////////
-            // Local sort
-            /////////////////////////////////////////////////
+        return left;
+    }
 
-            // sort QsLocal part of the outputArray
-            const CompareType pivot = currentComm.reduceAverageAll( CompareType(outputArray[size/2]) );
-            Fix myFix;
-            FQuickSort< SortType, CompareType, IndexType>::QsLocal(outputArray, pivot, 0, size - 1, myFix.pre, myFix.suf);
+    static std::vector<PackData> Distribute(const int currentRank, const int currentNbProcs ,
+                                            const Partition globalElementBalance[], const Partition globalElementBalanceSum[],
+                                            const int procInTheMiddle, const bool inFromRightToLeft){
+        // First agree on who send and who recv
+        const int firstProcToSend = (inFromRightToLeft ? procInTheMiddle+1 : 0);
+        const int lastProcToSend  = (inFromRightToLeft ? currentNbProcs : procInTheMiddle+1);
+        const int firstProcToRecv = (inFromRightToLeft ? 0 : procInTheMiddle+1);
+        const int lastProcToRecv  = (inFromRightToLeft ? procInTheMiddle+1 : currentNbProcs);
+        // Get the number of element depending on the lower or greater send/recv
+        const IndexType totalElementToProceed = (inFromRightToLeft ?
+                                                     globalElementBalanceSum[lastProcToSend].lowerPart - globalElementBalanceSum[firstProcToSend].lowerPart :
+                                                     globalElementBalanceSum[lastProcToSend].greaterPart - globalElementBalanceSum[firstProcToSend].greaterPart);
+        const IndexType totalElementAlreadyOwned = (inFromRightToLeft ?
+                                                        globalElementBalanceSum[lastProcToRecv].lowerPart - globalElementBalanceSum[firstProcToRecv].lowerPart :
+                                                        globalElementBalanceSum[lastProcToRecv].greaterPart - globalElementBalanceSum[firstProcToRecv].greaterPart);
+
+        const int nbProcsToRecv = (lastProcToRecv-firstProcToRecv);
+        const int nbProcsToSend = (lastProcToSend-firstProcToSend);
+        const IndexType totalElements = (totalElementToProceed+totalElementAlreadyOwned);
+
+        std::vector<IndexType> nbElementsToRecvPerProc;
+        nbElementsToRecvPerProc.resize(nbProcsToRecv);
+        {
+            // Get the number of elements each proc should recv
+            IndexType totalRemainingElements = totalElements;
+
+            for(int idxProc = firstProcToRecv; idxProc < lastProcToRecv ; ++idxProc){
+                const IndexType nbElementsAlreadyOwned = (inFromRightToLeft ? globalElementBalance[idxProc].lowerPart : globalElementBalance[idxProc].greaterPart);
+                const IndexType averageNbElementForRemainingProc = (totalRemainingElements)/(lastProcToRecv-idxProc);
+                totalRemainingElements -= nbElementsAlreadyOwned;
+                if(nbElementsAlreadyOwned < averageNbElementForRemainingProc){
+                    nbElementsToRecvPerProc[idxProc - firstProcToRecv] = (averageNbElementForRemainingProc - nbElementsAlreadyOwned);
+                    totalRemainingElements -= nbElementsToRecvPerProc[idxProc - firstProcToRecv];
+                }
+                else{
+                    nbElementsToRecvPerProc[idxProc - firstProcToRecv] = 0;
+                }
+                ////FLOG( FLog::Controller << currentRank << "] nbElementsToRecvPerProc[" << idxProc << "] = " << nbElementsToRecvPerProc[idxProc - firstProcToRecv] << "\n"; )
+            }
+        }
 
-            // exchange fixes
-            FMpi::Assert( MPI_Allgather( &myFix, sizeof(Fix), MPI_BYTE, fixes, sizeof(Fix), MPI_BYTE, currentComm.getComm()),  __LINE__ );
+        // Store in an array the number of element to send
+        std::vector<IndexType> nbElementsToSendPerProc;
+        nbElementsToSendPerProc.resize(nbProcsToSend);
+        for(int idxProc = firstProcToSend; idxProc < lastProcToSend ; ++idxProc){
+            const IndexType nbElementsAlreadyOwned = (inFromRightToLeft ? globalElementBalance[idxProc].lowerPart : globalElementBalance[idxProc].greaterPart);
+            nbElementsToSendPerProc[idxProc-firstProcToSend] = nbElementsAlreadyOwned;
+            ////FLOG( FLog::Controller << currentRank << "] nbElementsToSendPerProc[" << idxProc << "] = " << nbElementsToSendPerProc[idxProc-firstProcToSend] << "\n"; )
+        }
 
-            // each procs compute the summation
-            fixesSum[0].pre = 0;
-            fixesSum[0].suf = 0;
-            for(int idxProc = 0 ; idxProc < currentNbProcs ; ++idxProc){
-                fixesSum[idxProc + 1].pre = fixesSum[idxProc].pre + fixes[idxProc].pre;
-                fixesSum[idxProc + 1].suf = fixesSum[idxProc].suf + fixes[idxProc].suf;
+        // Compute all the send recv but keep only the ones related to currentRank
+        std::vector<PackData> packs;
+        int idxProcSend = 0;
+        IndexType positionElementsSend = 0;
+        int idxProcRecv = 0;
+        IndexType positionElementsRecv = 0;
+        while(idxProcSend != nbProcsToSend && idxProcRecv != nbProcsToRecv){
+            if(nbElementsToSendPerProc[idxProcSend] == 0){
+                idxProcSend += 1;
+                positionElementsSend = 0;
             }
-
-            // then I need to know which procs will be in the middle
-            int splitProc = FMpi::GetProc(fixesSum[currentNbProcs].pre - 1, fixesSum[currentNbProcs].pre + fixesSum[currentNbProcs].suf, currentNbProcs);
-            if(splitProc == currentNbProcs - 1){
-                --splitProc;
+            else if(nbElementsToRecvPerProc[idxProcRecv] == 0){
+                idxProcRecv += 1;
+                positionElementsRecv = 0;
             }
-
-            /////////////////////////////////////////////////
-            // Send my data
-            /////////////////////////////////////////////////
-
-            // above pivot (right part)
-            if( fixes[currentRank].suf ){
-                const int procsInSuf = currentNbProcs - 1 - splitProc;
-                const int firstProcInSuf = splitProc + 1;
-                const IndexType elementsInSuf = fixesSum[currentNbProcs].suf;
-
-                const int firstProcToSend = FMpi::GetProc(fixesSum[currentRank].suf, elementsInSuf, procsInSuf) + firstProcInSuf;
-                const int lastProcToSend = FMpi::GetProc(fixesSum[currentRank + 1].suf - 1, elementsInSuf, procsInSuf) + firstProcInSuf;
-
-                IndexType sent = 0;
-                for(int idxProc = firstProcToSend ; idxProc <= lastProcToSend ; ++idxProc){
-                    const IndexType thisProcRight = FMpi::GetRight(elementsInSuf, idxProc - firstProcInSuf, procsInSuf);
-                    IndexType sendToProc = thisProcRight - fixesSum[currentRank].suf - sent;
-
-                    if(sendToProc + sent > fixes[currentRank].suf){
-                        sendToProc = fixes[currentRank].suf - sent;
-                    }
-                    if( sendToProc ){
-                        FMpi::Assert( MPI_Isend(&outputArray[sent + fixes[currentRank].pre], int(sendToProc * sizeof(SortType)), MPI_BYTE , idxProc, FMpi::TagQuickSort, currentComm.getComm(), &requests[iterRequest++]),  __LINE__ );
-                    }
-                    sent += sendToProc;
+            else {
+                const IndexType nbElementsInPack = FMath::Min(nbElementsToSendPerProc[idxProcSend], nbElementsToRecvPerProc[idxProcRecv]);
+                if(idxProcSend + firstProcToSend == currentRank){
+                    PackData pack;
+                    pack.idProc      = idxProcRecv + firstProcToRecv;
+                    pack.fromElement = positionElementsSend;
+                    pack.toElement   = pack.fromElement + nbElementsInPack;
+                    packs.push_back(pack);
                 }
-            }
-
-            // under pivot (left part)
-            if( fixes[currentRank].pre ){
-                const int procsInPre = splitProc + 1;
-                const IndexType elementsInPre = fixesSum[currentNbProcs].pre;
-
-                const int firstProcToSend = FMpi::GetProc(fixesSum[currentRank].pre, elementsInPre, procsInPre);
-                const int lastProcToSend = FMpi::GetProc(fixesSum[currentRank + 1].pre - 1, elementsInPre, procsInPre);
-
-                IndexType sent = 0;
-                for(int idxProc = firstProcToSend ; idxProc <= lastProcToSend ; ++idxProc){
-                    const IndexType thisProcRight = FMpi::GetRight(elementsInPre, idxProc, procsInPre);
-                    IndexType sendToProc = thisProcRight - fixesSum[currentRank].pre - sent;
-
-                    if(sendToProc + sent > fixes[currentRank].pre){
-                        sendToProc = fixes[currentRank].pre - sent;
-                    }
-                    if(sendToProc){
-                        FMpi::Assert( MPI_Isend(&outputArray[sent], int(sendToProc * sizeof(SortType)), MPI_BYTE , idxProc, FMpi::TagQuickSort, currentComm.getComm(), &requests[iterRequest++]),  __LINE__ );
-                    }
-                    sent += sendToProc;
+                else if(idxProcRecv + firstProcToRecv == currentRank){
+                    PackData pack;
+                    pack.idProc      = idxProcSend + firstProcToSend;
+                    pack.fromElement = positionElementsRecv;
+                    pack.toElement   = pack.fromElement + nbElementsInPack;
+                    packs.push_back(pack);
                 }
+                nbElementsToSendPerProc[idxProcSend] -= nbElementsInPack;
+                nbElementsToRecvPerProc[idxProcRecv] -= nbElementsInPack;
+                positionElementsSend += nbElementsInPack;
+                positionElementsRecv += nbElementsInPack;
             }
+        }
+
+        return packs;
+    }
+
+    static void RecvDistribution(SortType ** inPartRecv, IndexType* inNbElementsRecv,
+                                 const Partition globalElementBalance[], const Partition globalElementBalanceSum[],
+                                 const int procInTheMiddle, const FMpi::FComm& currentComm, const bool inFromRightToLeft){
+        // Compute to know what to recv
+        const std::vector<PackData> whatToRecvFromWho = Distribute(currentComm.processId(), currentComm.processCount(),
+                                                                   globalElementBalance, globalElementBalanceSum,
+                                                                   procInTheMiddle, inFromRightToLeft);
+        // Count the total number of elements to recv
+        IndexType totalToRecv = 0;
+        for(const PackData& pack : whatToRecvFromWho){
+            totalToRecv += pack.toElement - pack.fromElement;
+        }
+        // Alloc buffer
+        SortType* recvBuffer = new SortType[totalToRecv];
+
+        // Recv all data
+        MPI_Request requests[whatToRecvFromWho.size()];
+        for(int idxPack = 0 ; idxPack < int(whatToRecvFromWho.size()) ; ++idxPack){
+            const PackData& pack = whatToRecvFromWho[idxPack];
+            ////FLOG( FLog::Controller << currentComm.processId() << "] Recv from " << pack.idProc << " from " << pack.fromElement << " to " << pack.toElement << "\n"; )
+            FMpi::Assert( MPI_Irecv((SortType*)&recvBuffer[pack.fromElement], int((pack.toElement - pack.fromElement) * sizeof(SortType)), MPI_BYTE, pack.idProc,
+                          FMpi::TagQuickSort, currentComm.getComm(), &requests[idxPack]) , __LINE__);
+        }
+        // Wait to complete
+        FMpi::Assert( MPI_Waitall(whatToRecvFromWho.size(), requests, MPI_STATUSES_IGNORE),  __LINE__ );
+        // Copy to ouput variables
+        (*inPartRecv) = recvBuffer;
+        (*inNbElementsRecv) = totalToRecv;
+    }
 
-            /////////////////////////////////////////////////
-            // Receive data that belong to me
-            /////////////////////////////////////////////////
+    static void SendDistribution(const SortType * inPartToSend, const IndexType inNbElementsToSend,
+                                 const Partition globalElementBalance[], const Partition globalElementBalanceSum[],
+                                 const int procInTheMiddle, const FMpi::FComm& currentComm, const bool inFromRightToLeft){
+        // Compute to know what to send
+        const std::vector<PackData> whatToSendToWho = Distribute(currentComm.processId(), currentComm.processCount(),
+                                                                 globalElementBalance, globalElementBalanceSum,
+                                                                 procInTheMiddle, inFromRightToLeft);
+
+        // Post send messages
+        MPI_Request requests[whatToSendToWho.size()];
+        for(int idxPack = 0 ; idxPack < int(whatToSendToWho.size()) ; ++idxPack){
+            const PackData& pack = whatToSendToWho[idxPack];
+            ////FLOG( FLog::Controller << currentComm.processId() << "] Send to " << pack.idProc << " from " << pack.fromElement << " to " << pack.toElement << "\n"; )
+            FMpi::Assert( MPI_Isend((SortType*)&inPartToSend[pack.fromElement], int((pack.toElement - pack.fromElement) * sizeof(SortType)), MPI_BYTE , pack.idProc,
+                          FMpi::TagQuickSort, currentComm.getComm(), &requests[idxPack]) , __LINE__);
+        }
+        // Wait to complete
+        FMpi::Assert( MPI_Waitall(whatToSendToWho.size(), requests, MPI_STATUSES_IGNORE),  __LINE__ );
+    }
 
-            if( currentRank <= splitProc ){
-                // I am in S-Part (smaller than pivot)
-                const int procsInPre = splitProc + 1;
-                const IndexType elementsInPre = fixesSum[currentNbProcs].pre;
+    static CompareType SelectPivot(const SortType workingArray[], const IndexType currentSize, const FMpi::FComm& currentComm, bool* shouldStop){
+        enum ValuesState{
+            ALL_THE_SAME,
+            NO_VALUES,
+            AVERAGE_2
+        };
+        // Check if all the same
+        bool allTheSame = true;
+        for(int idx = 1 ; idx < currentSize && allTheSame; ++idx){
+            if(workingArray[0] != workingArray[idx]){
+                allTheSame = false;
+            }
+        }
+        // Check if empty
+        const bool noValues = (currentSize == 0);
+        // Get the local pivot if not empty
+        CompareType localPivot = CompareType(0);
+        if(!noValues){
+            localPivot = (CompareType(workingArray[currentSize/3])+CompareType(workingArray[(2*currentSize)/3]))/2;
+        }
 
-                IndexType myLeft = FMpi::GetLeft(elementsInPre, currentRank, procsInPre);
-                IndexType myRightLimit = FMpi::GetRight(elementsInPre, currentRank, procsInPre);
+        ////FLOG( FLog::Controller << currentComm.processId() << "] localPivot = " << localPivot << "\n" );
+
+        const int myRank = currentComm.processId();
+        const int nbProcs = currentComm.processCount();
+        // Exchange the pivos and the state
+        std::unique_ptr<int[]> allProcStates(new int[nbProcs]);
+        std::unique_ptr<CompareType[]> allProcPivots(new CompareType[nbProcs]);
+        {
+            int myState = (noValues?NO_VALUES:(allTheSame?ALL_THE_SAME:AVERAGE_2));
+            FMpi::Assert( MPI_Allgather( &myState, 1, MPI_INT, allProcStates.get(),
+                                         1, MPI_INT, currentComm.getComm()),  __LINE__ );
+            FMpi::Assert( MPI_Allgather( &localPivot, sizeof(CompareType), MPI_BYTE, allProcPivots.get(),
+                                         sizeof(CompareType), MPI_BYTE, currentComm.getComm()),  __LINE__ );
+        }
+        // Test if all the procs have ALL_THE_SAME and the same value
+        bool allProcsAreSame = true;
+        for(int idxProc = 0 ; idxProc < nbProcs && allProcsAreSame; ++idxProc){
+            if(allProcStates[idxProc] != NO_VALUES && (allProcStates[idxProc] != ALL_THE_SAME || allProcPivots[0] != allProcPivots[idxProc])){
+                allProcsAreSame = false;
+            }
+        }
 
-                size = myRightLimit - myLeft;
-                if(bufferSize < size){
-                    bufferSize = size;
-                    delete[] buffer;
-                    buffer = new SortType[bufferSize];
+        if(allProcsAreSame){
+            // All the procs are the same so we need to stop
+            (*shouldStop) = true;
+            return CompareType(0);
+        }
+        else{
+            CompareType globalPivot = 0;
+            CompareType counterValuesInPivot = 0;
+            // Compute the pivos
+            for(int idxProc = 0 ; idxProc < nbProcs; ++idxProc){
+                if(allProcStates[idxProc] != NO_VALUES){
+                    globalPivot += allProcPivots[idxProc];
+                    counterValuesInPivot += 1;
                 }
+            }
+            (*shouldStop) = false;
+            return globalPivot/counterValuesInPivot;
+        }
+    }
 
-                int idxProc = 0;
-                while( idxProc < currentNbProcs && fixesSum[idxProc + 1].pre <= myLeft ){
-                    ++idxProc;
-                }
+public:
 
-                IndexType indexArray = 0;
+    static void QsMpi(const SortType originalArray[], const IndexType originalSize,
+                      SortType** outputArray, IndexType* outputSize, const FMpi::FComm& originalComm){
+        // We do not work in place, so create a new array
+        IndexType currentSize = originalSize;
+        SortType* workingArray = new SortType[currentSize];
+        FMemUtils::memcpy(workingArray, originalArray, sizeof(SortType) * currentSize);
 
-                while( idxProc < currentNbProcs && indexArray < myRightLimit - myLeft){
-                    const IndexType firstIndex = FMath::Max(myLeft , fixesSum[idxProc].pre );
-                    const IndexType endIndex = FMath::Min(fixesSum[idxProc + 1].pre,  myRightLimit);
-                    if( (endIndex - firstIndex) ){
-                        FMpi::Assert( MPI_Irecv(&buffer[indexArray], int((endIndex - firstIndex) * sizeof(SortType)), MPI_BYTE, idxProc, FMpi::TagQuickSort, currentComm.getComm(), &requests[iterRequest++]),  __LINE__ );
-                    }
-                    indexArray += endIndex - firstIndex;
-                    ++idxProc;
-                }
-                // Proceed all send/receive
-                FMpi::Assert( MPI_Waitall(iterRequest, requests, MPI_STATUSES_IGNORE),  __LINE__ );
+        // Clone the MPI group because we will reduce it after each partition
+        FMpi::FComm currentComm(originalComm.getComm());
 
-                currentComm.groupReduce( 0, splitProc);
+        // Parallel sharing until I am alone on the data
+        while( currentComm.processCount() != 1 ){
+            // Agree on the Pivot
+            bool shouldStop;
+            const CompareType globalPivot = SelectPivot(workingArray, currentSize, currentComm, &shouldStop);
+            if(shouldStop){
+                ////FLOG( FLog::Controller << currentComm.processId() << "] shouldStop = " << shouldStop << "\n" );
+                break;
             }
-            else{
-                // I am in L-Part (larger than pivot)
-                const int procsInSuf = currentNbProcs - 1 - splitProc;
-                const IndexType elementsInSuf = fixesSum[currentNbProcs].suf;
-
-                const int rankInL = currentRank - splitProc - 1;
-                IndexType myLeft = FMpi::GetLeft(elementsInSuf, rankInL, procsInSuf);
-                IndexType myRightLimit = FMpi::GetRight(elementsInSuf, rankInL, procsInSuf);
-
-                size = myRightLimit - myLeft;
-                if(bufferSize < size){
-                    bufferSize = size;
-                    delete[] buffer;
-                    buffer = new SortType[bufferSize];
-                }
 
-                int idxProc = 0;
-                while( idxProc < currentNbProcs && fixesSum[idxProc + 1].suf <= myLeft ){
-                    ++idxProc;
-                }
+            ////FLOG( FLog::Controller << currentComm.processId() << "] globalPivot = " << globalPivot << "\n" );
 
-                IndexType indexArray = 0;
+            // Split the array in two parts lower equal to pivot and greater than pivot
+            const IndexType nbLowerElements = QsPartition(workingArray, 0, currentSize-1, globalPivot);
+            const IndexType nbGreaterElements = currentSize - nbLowerElements;
 
-                while( idxProc < currentNbProcs && indexArray < myRightLimit - myLeft){
-                    const IndexType firstIndex = FMath::Max(myLeft , fixesSum[idxProc].suf );
-                    const IndexType endIndex = FMath::Min(fixesSum[idxProc + 1].suf,  myRightLimit);
-                    if( (endIndex - firstIndex) ){
-                        FMpi::Assert( MPI_Irecv(&buffer[indexArray], int((endIndex - firstIndex) * sizeof(SortType)), MPI_BYTE, idxProc, FMpi::TagQuickSort, currentComm.getComm(), &requests[iterRequest++]),  __LINE__ );
-                    }
-                    indexArray += endIndex - firstIndex;
-                    ++idxProc;
-                }
-                // Proceed all send/receive
-                FMpi::Assert( MPI_Waitall(iterRequest, requests, MPI_STATUSES_IGNORE),  __LINE__ );
+            ////FLOG( FLog::Controller << currentComm.processId() << "] After Partition: lower = " << nbLowerElements << " greater = " << nbGreaterElements << "\n"; )
 
-                currentComm.groupReduce( splitProc + 1, currentNbProcs - 1);
-            }
+            const int currentRank = currentComm.processId();
+            const int currentNbProcs = currentComm.processCount();
 
+            // We need to know what each process is holding
+            Partition currentElementsBalance = { nbLowerElements, nbGreaterElements};
+            Partition globalElementBalance[currentNbProcs];
 
+            // Every one in the group need to know
+            FMpi::Assert( MPI_Allgather( &currentElementsBalance, sizeof(Partition), MPI_BYTE, globalElementBalance,
+                                         sizeof(Partition), MPI_BYTE, currentComm.getComm()),  __LINE__ );
 
-            // Copy res into outputArray
-            if(outputSize < size){
-                delete[] outputArray;
-                outputArray = new SortType[size];
-                outputSize = size;
+            // Find the number of elements lower or greater
+            IndexType globalNumberOfElementsGreater = 0;
+            IndexType globalNumberOfElementsLower = 0;
+            Partition globalElementBalanceSum[currentNbProcs + 1];
+            globalElementBalanceSum[0].lowerPart = 0;
+            globalElementBalanceSum[0].greaterPart = 0;
+            for(int idxProc = 0 ; idxProc < currentNbProcs ; ++idxProc){
+                globalElementBalanceSum[idxProc + 1].lowerPart = globalElementBalanceSum[idxProc].lowerPart + globalElementBalance[idxProc].lowerPart;
+                globalElementBalanceSum[idxProc + 1].greaterPart = globalElementBalanceSum[idxProc].greaterPart + globalElementBalance[idxProc].greaterPart;
+                globalNumberOfElementsGreater += globalElementBalance[idxProc].greaterPart;
+                globalNumberOfElementsLower   += globalElementBalance[idxProc].lowerPart;
             }
 
-            FMemUtils::memcpy(outputArray, buffer, sizeof(SortType) * size);
+            ////FLOG( FLog::Controller << currentComm.processId() << "] globalNumberOfElementsGreater = " << globalNumberOfElementsGreater << "\n"; )
+            ////FLOG( FLog::Controller << currentComm.processId() << "] globalNumberOfElementsLower   = " << globalNumberOfElementsLower << "\n"; )
+
+            // The proc rank in the middle from the percentage
+            int procInTheMiddle;
+            if(globalNumberOfElementsLower == 0)        procInTheMiddle = -1;
+            else if(globalNumberOfElementsGreater == 0) procInTheMiddle = currentNbProcs-1;
+            else procInTheMiddle = FMath::Min(currentNbProcs-2, (currentNbProcs*globalNumberOfElementsLower)
+                                              /(globalNumberOfElementsGreater + globalNumberOfElementsLower));
+
+            ////FLOG( FLog::Controller << currentComm.processId() << "] procInTheMiddle = " << procInTheMiddle << "\n"; )
+
+            // Send or receive depending on the state
+            if(currentRank <= procInTheMiddle){
+                // I am in the group of the lower elements
+                SendDistribution(workingArray + nbLowerElements, nbGreaterElements,
+                                 globalElementBalance, globalElementBalanceSum, procInTheMiddle, currentComm, false);
+                SortType* lowerPartRecv = nullptr;
+                IndexType nbLowerElementsRecv = 0;
+                RecvDistribution(&lowerPartRecv, &nbLowerElementsRecv,
+                                 globalElementBalance, globalElementBalanceSum, procInTheMiddle, currentComm, true);
+                // Merge previous part and just received elements
+                const IndexType fullNbLowerElementsRecv = nbLowerElementsRecv + nbLowerElements;
+                SortType* fullLowerPart = new SortType[fullNbLowerElementsRecv];
+                memcpy(fullLowerPart, workingArray, sizeof(SortType)* nbLowerElements);
+                memcpy(fullLowerPart + nbLowerElements, lowerPartRecv, sizeof(SortType)* nbLowerElementsRecv);
+                delete[] workingArray;
+                delete[] lowerPartRecv;
+                workingArray = fullLowerPart;
+                currentSize = fullNbLowerElementsRecv;
+                // Reduce working group
+                currentComm.groupReduce( 0, procInTheMiddle);
+            }
+            else {
+                // I am in the group of the greater elements
+                SortType* greaterPartRecv = nullptr;
+                IndexType nbGreaterElementsRecv = 0;
+                RecvDistribution(&greaterPartRecv, &nbGreaterElementsRecv,
+                                 globalElementBalance, globalElementBalanceSum, procInTheMiddle, currentComm, false);
+                SendDistribution(workingArray, nbLowerElements,
+                                 globalElementBalance, globalElementBalanceSum, procInTheMiddle, currentComm, true);
+                // Merge previous part and just received elements
+                const IndexType fullNbGreaterElementsRecv = nbGreaterElementsRecv + nbGreaterElements;
+                SortType* fullGreaterPart = new SortType[fullNbGreaterElementsRecv];
+                memcpy(fullGreaterPart, workingArray + nbLowerElements, sizeof(SortType)* nbGreaterElements);
+                memcpy(fullGreaterPart + nbGreaterElements, greaterPartRecv, sizeof(SortType)* nbGreaterElementsRecv);
+                delete[] workingArray;
+                delete[] greaterPartRecv;
+                workingArray = fullGreaterPart;
+                currentSize = fullNbGreaterElementsRecv;
+                // Reduce working group
+                currentComm.groupReduce( procInTheMiddle + 1, currentNbProcs - 1);
+            }
         }
 
-        /////////////////////////////////////////////////
-        // End QsMpi sort
-        /////////////////////////////////////////////////
-
-        // Clean
-        delete[] buffer;
-
-        // Normal Quick sort
-        FQuickSort< SortType, CompareType, IndexType>::QsOmp(outputArray, size);
-        outputSize = size;
+        // Finish by a local sort
+        FQuickSort< SortType, CompareType, IndexType>::QsOmp(workingArray, currentSize);
+        (*outputSize)  = currentSize;
+        (*outputArray) = workingArray;
     }
-
 };
 
 #endif // FQUICKSORTMPI_HPP
diff --git a/UTests/FUTester.hpp b/UTests/FUTester.hpp
index 6a80cba4f7e030b3fd74e5786b89b448ecfc7a56..e3d5c1f4c7c0de1713a7ad506f573c4ea518ad86 100755
--- a/UTests/FUTester.hpp
+++ b/UTests/FUTester.hpp
@@ -16,6 +16,7 @@
 #ifndef UTESTER_HPP
 #define UTESTER_HPP
 
+#include "ScalFmmConfig.h"
 
 #include <iostream>
 #include <list>
@@ -24,10 +25,10 @@
 
 
 #define TestClass(X)\
-int main(void){\
-	X Controller;\
-        return Controller.Run();\
-}\
+    int main(void){\
+    X Controller;\
+    return Controller.Run();\
+    }\
 
 
 /**
@@ -44,164 +45,164 @@ int main(void){\
 */
 template <class TestClass>
 class FUTester{
-	// Test function pointer
-	typedef void (TestClass::*TestFunc)(void);
-	
-	/** Test descriptor */
-	struct TestFuncDescriptor{
-		TestFunc func;		//< Test adress
-		std::string name;	//< Test name
-	};
-	
-
-	std::list<TestFuncDescriptor> tests;	//< all tests
-	
-	int totalTests;				//< number of tests
-	
-	int currentTest;			//< current processing test in the run
-	int currentStep;			//< current processing step in the run
-
-	int failedSteps;			//< number of failed step in the current test
-	int failedTests;			//< number of failed tests
-	
+    // Test function pointer
+    typedef void (TestClass::*TestFunc)(void);
+
+    /** Test descriptor */
+    struct TestFuncDescriptor{
+        TestFunc func;		//< Test adress
+        std::string name;	//< Test name
+    };
+
+
+    std::list<TestFuncDescriptor> tests;	//< all tests
+
+    int totalTests;				//< number of tests
+
+    int currentTest;			//< current processing test in the run
+    int currentStep;			//< current processing step in the run
+
+    int failedSteps;			//< number of failed step in the current test
+    int failedTests;			//< number of failed tests
+
 protected:
-	/** Constructor */
-	FUTester(){
-		totalTests = 0;
-	}
-
-	/** Callback before processing test */
-	virtual void Before(){}
-	
-	/** Callback after processing test */
-	virtual void After(){}
-	
-	/** Callback before each unit test */
-	virtual void PreTest(){}
-	
-	/** Callback after each unit test */
-	virtual void PostTest(){}
-	
-	/**
-	* This function has to add tests
+    /** Constructor */
+    FUTester(){
+        totalTests = 0;
+    }
+
+    /** Callback before processing test */
+    virtual void Before(){}
+
+    /** Callback after processing test */
+    virtual void After(){}
+
+    /** Callback before each unit test */
+    virtual void PreTest(){}
+
+    /** Callback after each unit test */
+    virtual void PostTest(){}
+
+    /**
+    * This function has to add tests
         * <code> AddTest(&MyTest::TestOne); </code>
-	*/
-        virtual void SetTests() = 0;
-
-	/**
-	* Add a test without giving a name
-	* @param inFunc test function address
-	*/
-        void AddTest(TestFunc inFunc){
-		char buff[256];
-		sprintf(buff,"Unnamed Test number %d",totalTests+1);
-		AddTest(inFunc,buff);
-	}
-	
-	/**
-	* Add a test with a name
-	* @param inFunc test function address
-	* @param inFuncName function name
-	*/
-        void AddTest(TestFunc inFunc, const std::string& inFuncName){
-		++totalTests;
-		TestFuncDescriptor desc;
-		desc.func = inFunc;
-                desc.name = inFuncName;
-		tests.push_back(desc);
-	}
-	
-	/**
-	* To print a message manually in the test
-	* @param value a object that ostream can work on
-	*/
-        template <class Output>
-	void Print(const Output& value){
-		std::cout<< "--- Output from program : " << value << "\n";
-	}
-
-		/**
-	* To test
-	* @param result the test result
-	* if result is false test failed
-	*/
-        void uassert(const bool result){
-		++currentStep;
-		if(!result){
-			std::cout << ">> Step " << currentStep << " Failed\n";
-			++failedSteps;
-		}
-	}
-	
-	/**
-	* To test equality
-	* @param v1 value one
-	* @param v2 value 2
-	* if v1 is not equal v2 test failed
-	*/
-	template <class T>
-        void equal(const T& v1, const T& v2){
-                uassert(v1 == v2);
-	}
-	
-	/**
-	* To test equality
-	* @param v1 value one
-	* @param v2 value 2
-	* if v1 is equal v2 test failed
-	*/
-	template <class T>
-        void different(const T& v1, const T& v2){
-                uassert(v1 != v2);
-	}
-	
+    */
+    virtual void SetTests() = 0;
+
+    /**
+    * Add a test without giving a name
+    * @param inFunc test function address
+    */
+    void AddTest(TestFunc inFunc){
+        char buff[256];
+        sprintf(buff,"Unnamed Test number %d",totalTests+1);
+        AddTest(inFunc,buff);
+    }
+
+    /**
+    * Add a test with a name
+    * @param inFunc test function address
+    * @param inFuncName function name
+    */
+    void AddTest(TestFunc inFunc, const std::string& inFuncName){
+        ++totalTests;
+        TestFuncDescriptor desc;
+        desc.func = inFunc;
+        desc.name = inFuncName;
+        tests.push_back(desc);
+    }
+
+    /**
+    * To print a message manually in the test
+    * @param value a object that ostream can work on
+    */
+    template <class Output>
+    void Print(const Output& value){
+        std::cout<< "--- Output from program : " << value << "\n";
+    }
+
+    /**
+    * To test
+    * @param result the test result
+    * if result is false test failed
+    */
+    void uassert(const bool result){
+        ++currentStep;
+        if(!result){
+            std::cout << ">> Step " << currentStep << " Failed\n";
+            ++failedSteps;
+        }
+    }
+
+    /**
+    * To test equality
+    * @param v1 value one
+    * @param v2 value 2
+    * if v1 is not equal v2 test failed
+    */
+    template <class T>
+    void equal(const T& v1, const T& v2){
+        uassert(v1 == v2);
+    }
+
+    /**
+    * To test equality
+    * @param v1 value one
+    * @param v2 value 2
+    * if v1 is equal v2 test failed
+    */
+    template <class T>
+    void different(const T& v1, const T& v2){
+        uassert(v1 != v2);
+    }
+
 public :
-	/**
-	* Processing the test
+    /**
+    * Processing the test
         * return application exit code (= nb of errors)
-	*/
-        int Run(){
-		tests.clear();
-		// register tests
-		SetTests();
-	
-		TestClass* const toTest = static_cast<TestClass*>(this);
-		currentTest = 0;
-		failedTests = 0;
-
-		Before();
-		
-		// for each tests
-                const typename std::list<TestFuncDescriptor>::const_iterator end = tests.end();
-                for(typename std::list<TestFuncDescriptor>::iterator iter = tests.begin() ; iter != end ; ++iter){
-			currentStep = 0;
-			failedSteps = 0;
-			
-			std::cout << "[Start] " << (*iter).name << "\n";
-			
-			PreTest();
-			TestFunc ff = (*iter).func;
-			(toTest->*ff)();
-			PostTest();
-			
-			if(failedSteps){
-				std::cout << "[Finished] FAILED (" << failedSteps << "/" << currentStep<< " steps failed)\n";
-				++failedTests;
-			}
-                        else{
-				std::cout << "[Finished] PASSED (" << currentStep << " steps)\n";
-			}
-			
-			++currentTest;
-		}
-		
-		
-		After();
-
-		std::cout <<"Test is over, " << (totalTests-failedTests) << " Passed, " << failedTests << " Failed\n";
-
-                return failedTests;
-	}
+    */
+    int Run(){
+        tests.clear();
+        // register tests
+        SetTests();
+
+        TestClass* const toTest = static_cast<TestClass*>(this);
+        currentTest = 0;
+        failedTests = 0;
+
+        Before();
+
+        // for each tests
+        const typename std::list<TestFuncDescriptor>::const_iterator end = tests.end();
+        for(typename std::list<TestFuncDescriptor>::iterator iter = tests.begin() ; iter != end ; ++iter){
+            currentStep = 0;
+            failedSteps = 0;
+
+            std::cout << "[Start] " << (*iter).name << "\n";
+
+            PreTest();
+            TestFunc ff = (*iter).func;
+            (toTest->*ff)();
+            PostTest();
+
+            if(failedSteps){
+                std::cout << "[Finished] FAILED (" << failedSteps << "/" << currentStep<< " steps failed)\n";
+                ++failedTests;
+            }
+            else{
+                std::cout << "[Finished] PASSED (" << currentStep << " steps)\n";
+            }
+
+            ++currentTest;
+        }
+
+
+        After();
+
+        std::cout <<"Test is over, " << (totalTests-failedTests) << " Passed, " << failedTests << " Failed\n";
+
+        return failedTests;
+    }
 
 };
 
@@ -210,33 +211,33 @@ public :
 #include "../Src/Utils/FMpi.hpp"
 
 #define TestClassMpi(X)						\
-  int main(int argc, char** argv){				\
+    int main(int argc, char** argv){				\
     X Controller(argc,argv);					\
     return Controller.Run();					\
-  }								\
-  
+    }								\
+
 template <class TestClass>
 class FUTesterMpi : public FUTester<TestClass>{
 protected:
-  FMpi app;
-  
-  //Constructor with params to initialize FMpi
-  FUTesterMpi(int argc, char ** argv) : app(argc,argv){
-  }
+    FMpi app;
+
+    //Constructor with params to initialize FMpi
+    FUTesterMpi(int argc, char ** argv) : app(argc,argv){
+    }
 
-  /**
+    /**
    * To print a message manually in the test
    * @param value a object that ostream can work on
    */
-  template <class Output>
-  void Print(const Output& value){
-    if(app.global().processId()==0){
-      std::cout<< "--- Output from program : " << value << "\n";
+    template <class Output>
+    void Print(const Output& value){
+        if(app.global().processId()==0){
+            std::cout<< "--- Output from program : " << value << "\n";
+        }
     }
-  }
 
-  
+
 };
- 
+
 #endif  
 #endif
diff --git a/UTests/utestMpiQs.cpp b/UTests/utestMpiQs.cpp
index aed9241dd51e7f594ea534d46908063e3d8dd05d..bd66483e9d133de4404bf873ef14ba72ba77c50a 100644
--- a/UTests/utestMpiQs.cpp
+++ b/UTests/utestMpiQs.cpp
@@ -1,3 +1,18 @@
+// ===================================================================================
+// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner
+// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
+// This software is a computer program whose purpose is to compute the FMM.
+//
+// This software is governed by the CeCILL-C and LGPL licenses and
+// abiding by the rules of distribution of free software.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public and CeCILL-C Licenses for more details.
+// "http://www.cecill.info".
+// "http://www.gnu.org/licenses".
+// ===================================================================================
 #ifndef UTESTMPIQS_HPP
 #define UTESTMPIQS_HPP