diff --git a/Src/Files/FMpiTreeBuilder.hpp b/Src/Files/FMpiTreeBuilder.hpp
index 423659f56e63b69234e96a44520776fb1a4c3b57..8c5d482d841b1a1edad1f65e8a9bb4e1dc67f76d 100755
--- a/Src/Files/FMpiTreeBuilder.hpp
+++ b/Src/Files/FMpiTreeBuilder.hpp
@@ -95,7 +95,7 @@ private:
         // sort particles
         if(type == QuickSort){
             FQuickSortMpi<IndexedParticle,MortonIndex, FSize>::QsMpi(realParticlesIndexed, loader.getNumberOfParticles(), *outputArray, *outputSize,communicator);
-            delete [] (realParticlesIndexed);
+	    delete [] (realParticlesIndexed);
         }
         else {
             FBitonicSort<IndexedParticle,MortonIndex, FSize>::Sort( realParticlesIndexed, loader.getNumberOfParticles(), communicator );
@@ -130,7 +130,8 @@ private:
         // sort particles
         if(type == QuickSort){
             FQuickSortMpi<IndexedParticle,MortonIndex, FSize>::QsMpi(realParticlesIndexed, size, *outputArray, *outputSize,communicator);
-            delete [] (realParticlesIndexed);
+            printf("Proc : %d, nb of parts : %lld\n",communicator.processId(),*outputSize);
+	    delete [] (realParticlesIndexed);
         }
         else {
             FBitonicSort<IndexedParticle,MortonIndex, FSize>::Sort( realParticlesIndexed, size, communicator );
@@ -144,8 +145,8 @@ private:
     // To merge the leaves
     //////////////////////////////////////////////////////////////////////////
 
-    static void MergeLeaves(const FMpi::FComm& communicator, IndexedParticle*& workingArray, FSize& workingSize,
-                            char** leavesArray, FSize* const leavesSize){
+    static void MergeLeaves(const FMpi::FComm& communicator, IndexedParticle*& workingArray, FSize* workingSize,
+                            FSize ** leavesIndices, ParticleClass** leavesArray, FSize* const leavesSize){
         FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) );
         const int rank = communicator.processId();
         const int nbProcs = communicator.processCount();
@@ -159,7 +160,7 @@ private:
             FTRACE( FTrace::FRegion regionTrace("Remove Splited leaves", __FUNCTION__ , __FILE__ , __LINE__) );
 
             MortonIndex otherFirstIndex = -1;
-            if(workingSize != 0 && rank != 0 && rank != nbProcs - 1){
+            if((*workingSize) != 0 && rank != 0 && rank != nbProcs - 1){
 	      MPI_Sendrecv(&workingArray[0].index, 1, MPI_LONG_LONG, rank - 1, FMpi::TagExchangeIndexs,
 			   &otherFirstIndex, 1, MPI_LONG_LONG, rank + 1, FMpi::TagExchangeIndexs,
 			   communicator.getComm(), MPI_STATUS_IGNORE);
@@ -176,18 +177,18 @@ private:
             }
 
             // at this point every one know the first index of his right neighbors
-            const bool needToRecvBeforeSend = (rank != 0 && ((workingSize && otherFirstIndex == workingArray[0].index ) || !workingSize));
+            const bool needToRecvBeforeSend = (rank != 0 && (((*workingSize) && otherFirstIndex == workingArray[0].index ) || !(*workingSize)));
             MPI_Request requestSendLeaf;
 
             IndexedParticle* sendBuffer = 0;
             if(rank != nbProcs - 1 && needToRecvBeforeSend == false){
-	      FSize idxPart = workingSize - 1 ;
+	      FSize idxPart = (*workingSize) - 1 ;
 	      while(idxPart >= 0 && workingArray[idxPart].index == otherFirstIndex){
 		--idxPart;
 	      }
-	      const int particlesToSend = int(workingSize - 1 - idxPart);
+	      const int particlesToSend = int((*workingSize) - 1 - idxPart);
 	      if(particlesToSend){
-		workingSize -= particlesToSend;
+		(*workingSize) -= particlesToSend;
 		sendBuffer = new IndexedParticle[particlesToSend];
 		memcpy(sendBuffer, &workingArray[idxPart + 1], particlesToSend * sizeof(IndexedParticle));
 
@@ -210,10 +211,10 @@ private:
 		sendByOther /= int(sizeof(IndexedParticle));
 
 		const IndexedParticle* const reallocOutputArray = workingArray;
-		const FSize reallocOutputSize = workingSize;
+		const FSize reallocOutputSize = (*workingSize);
 
-		workingSize += sendByOther;
-		workingArray = new IndexedParticle[workingSize];
+		(*workingSize) += sendByOther;
+		workingArray = new IndexedParticle[(*workingSize)];
 		FMemUtils::memcpy(&workingArray[sendByOther], reallocOutputArray, reallocOutputSize * sizeof(IndexedParticle));
 		delete[] reallocOutputArray;
 
@@ -226,11 +227,11 @@ private:
             }
 
             if(rank != nbProcs - 1 && needToRecvBeforeSend == true){
-	      MPI_Send( workingArray, int(workingSize * sizeof(IndexedParticle)), MPI_BYTE,
+	      MPI_Send( workingArray, int((*workingSize) * sizeof(IndexedParticle)), MPI_BYTE,
 			rank + 1, FMpi::TagSplittedLeaf, communicator.getComm());
 	      delete[] workingArray;
 	      workingArray = 0;
-	      workingSize  = 0;
+	      (*workingSize)  = 0;
             }
             else if(rank != nbProcs - 1){
 	      MPI_Wait( &requestSendLeaf, MPI_STATUS_IGNORE);
@@ -239,42 +240,42 @@ private:
             }
 	  }
 	}
-	{
-	  FTRACE( FTrace::FRegion regionTrace("Remove Splited leaves", __FUNCTION__ , __FILE__ , __LINE__) );
-	  // We now copy the data from a sorted type into real particles array + counter
+	{//Filling the Array with leaves and parts //// COULD BE MOVED IN AN OTHER FUCTION
 	  
-	  (*leavesSize)  = 0;
-	  (*leavesArray) = 0;
+	  (*leavesSize)    = 0; //init ptr
+	  (*leavesArray)   = 0; //init ptr
+	  (*leavesIndices) = 0; //init ptr
 	  
-	  if(workingSize){
-	    (*leavesArray) = new char[workingSize * (sizeof(ParticleClass) + sizeof(int))];
+	  if((*workingSize)){
 	    
-	    MortonIndex previousIndex = -1;
-	    char* writeIndex = (*leavesArray);
-	    int* writeCounter = 0;
+	    //Array of particles
+	    *leavesArray = new ParticleClass[(*workingSize)];
+	    
+	    //Temporary array, we don't know yet how many leaves there will be 
+	    FSize * tempIndicesArray = new FSize[(*workingSize)];
+	    memset(tempIndicesArray,0,sizeof(FSize)*(*workingSize));
 	    
-	    for( FSize idxPart = 0; idxPart < workingSize ; ++idxPart){
-	      if( workingArray[idxPart].index != previousIndex ){
-		previousIndex = workingArray[idxPart].index;
-		++(*leavesSize);
-		
-		writeCounter = reinterpret_cast<int*>( writeIndex );
-		writeIndex += sizeof(int);
-		
-		(*writeCounter) = 0;
+	    FSize idxInIndices = 0;
+	    MortonIndex previousIndex = -1;
+	    for(FSize idxPart = 0 ; idxPart < (*workingSize) ; ++idxPart){
+	      if(workingArray[idxPart].index != previousIndex){
+		previousIndex = workingArray[idxPart];
+		tempIndicesArray[idxInIndices] = idxPart;
+		idxInIndices++;
 	      }
-	      
-	      memcpy(writeIndex, &workingArray[idxPart].particle, sizeof(ParticleClass));
-	      
-	      writeIndex += sizeof(ParticleClass);
-	      ++(*writeCounter);
+	      memcpy(&(*leavesArray)[idxPart],&workingArray[idxPart].particle,sizeof(ParticleClass));
 	    }
+	    *leavesSize = idxInIndices;
+	    
+	    *leavesIndices = new FSize[idxInIndices];
+	    memcpy(*leavesIndices,tempIndicesArray,(*leavesSize)*sizeof(FSize));
+
 	  }
 	  
-	  delete [] workingArray;
+	  delete []  workingArray;
 	  
 	  workingArray = 0;
-	  workingSize = 0;
+	
 	}
     }
 
@@ -286,40 +287,32 @@ private:
     /** Put the interval into a tree */
     template <class ContainerClass>
     static void EqualizeAndFillTree(const FMpi::FComm& communicator,  ContainerClass* particlesSaver,
-                                    char*& leavesArray, FSize& nbLeavesInIntervals, FAbstractBalanceAlgorithm * balancer){
+                                    FSize * leavesIndices, ParticleClass* leavesArray, FSize& nbLeavesInIntervals, FSize nbParts, 
+				    FAbstractBalanceAlgorithm * balancer){
 
 
         FTRACE( FTrace::FFunction functionTrace(__FUNCTION__, "Loader to Tree" , __FILE__ , __LINE__) );
         const int myRank = communicator.processId();
         const int nbProcs = communicator.processCount();
-        const FSize myNumberOfLeaves = nbLeavesInIntervals;
-	//Test if -np 1
-	if(nbProcs == 1){
-	  //there is no need to equalize tree between if there is one process
-	  //Saving directly the particles
-	  FSize idxInLeafArray = 0;
-	  for(FSize idxLeaf = 0 ; idxLeaf < nbLeavesInIntervals ; idxLeaf++){
-	    int nbPartInLeaf = *(reinterpret_cast<int*>(&leavesArray[idxInLeafArray]));
-	    idxInLeafArray += sizeof(int);
-	    for(FSize idxPartInLeaf = 0 ; idxPartInLeaf<nbPartInLeaf ; idxPartInLeaf++){
-	      particlesSaver->push(*(reinterpret_cast<ParticleClass*>(&leavesArray[idxInLeafArray+idxPartInLeaf*sizeof(ParticleClass)])));
-	    }
-	    idxInLeafArray += sizeof(ParticleClass)*nbPartInLeaf;
+	const FSize myCurrentsParts = nbParts;
+	
+	if(nbProcs == 1){ //I'm the only one !
+	  //Just copy each part into the Particle Saver
+	  for(FSize idxPart =0 ; idxPart < myCurrentsParts ; ++idxPart){
+	    particlesSaver->push(leavesArray[idxPart]);
 	  }
 	}
 	else{
-	  
 	  // We have to know the number of leaves each procs holds
-	  int*const numberOfLeavesPerProc = new int[nbProcs];
-	  memset(numberOfLeavesPerProc, 0, sizeof(int) * nbProcs);
-	  int intNbLeavesInIntervals = int(nbLeavesInIntervals);
-	  MPI_Allgather(&intNbLeavesInIntervals, 1, MPI_INT, numberOfLeavesPerProc, 1, MPI_INT, communicator.getComm());
-	  printf("Proc : %d : Currently have %lld leaves \n", myRank, myNumberOfLeaves);
-
-	  //Start working THERE
-
+	  FSize*const numberOfLeavesPerProc = new FSize[nbProcs];
+	  memset(numberOfLeavesPerProc, 0, sizeof(FSize) * nbProcs);
+	  FSize intNbLeavesInIntervals = nbLeavesInIntervals;
+	  MPI_Allgather(&intNbLeavesInIntervals, 1, MPI_LONG_LONG_INT, numberOfLeavesPerProc, 1, MPI_LONG_LONG_INT, communicator.getComm());
+	  //For debug, print the input datas for each proc
+	  //printf("Proc : %d : Currently have %lld parts in %lld leaves \n", myRank, myCurrentsParts, myNumberOfLeaves);
+	  
 	  //We need the max number of leafs over th procs
-	  int*const leavesOffsetPerProc = new int[nbProcs + 1];
+	  FSize*const leavesOffsetPerProc = new FSize[nbProcs + 1];
 	  FSize totalNumberOfLeaves  = 0;
 
 	  leavesOffsetPerProc[0] = 0;
@@ -328,213 +321,180 @@ private:
             leavesOffsetPerProc[idxProc] = leavesOffsetPerProc[idxProc-1] + numberOfLeavesPerProc[idxProc-1];
             totalNumberOfLeaves += numberOfLeavesPerProc[idxProc];
 	  }
-	  leavesOffsetPerProc[nbProcs] = int(totalNumberOfLeaves);
-
-	  const FSize currentLeafsOnMyLeft  = leavesOffsetPerProc[myRank];
-	  const FSize currentRightLeafIdx   = leavesOffsetPerProc[myRank+1];
-
-	  //Creation of an array to store how many parts are in each leaf
-	  int*const numberOfParticlesPerLeaf = new int[totalNumberOfLeaves];
-	  int*const myParticlesCounterArray  = &numberOfParticlesPerLeaf[leavesOffsetPerProc[myRank]];
-
-	  memset(numberOfParticlesPerLeaf, 0, sizeof(int)*totalNumberOfLeaves);
-
-	  //Loop over leafArray to fill myParts
-	  size_t idxOfParticlesNumber = 0;
-	  for(int idxLeaf = 0 ; idxLeaf < nbLeavesInIntervals ; ++idxLeaf){
-            const int numberOfParticlesInThisLeaf = (*reinterpret_cast<int*>(&leavesArray[idxOfParticlesNumber]));
-            myParticlesCounterArray[idxLeaf] += numberOfParticlesInThisLeaf;
-            idxOfParticlesNumber += (sizeof(ParticleClass)*numberOfParticlesInThisLeaf+sizeof(int));
-	  }
-
-	  MPI_Allgatherv(myParticlesCounterArray,numberOfLeavesPerProc[myRank], MPI_INT,
-			 numberOfParticlesPerLeaf, numberOfLeavesPerProc, leavesOffsetPerProc, MPI_INT, communicator.getComm());
-
-	  FSize totalNumberOfParticles = 0;
-	  for(int idxLeaf = 0 ; idxLeaf < totalNumberOfLeaves ; ++idxLeaf){
-            totalNumberOfParticles += numberOfParticlesPerLeaf[idxLeaf];
-	  }
-
-	  const FSize correctLeftLeavesNumber     = balancer->getLeft( totalNumberOfLeaves,numberOfParticlesPerLeaf,totalNumberOfParticles,
-								       NULL,nbProcs,myRank);
-	  const FSize correctRightLeavesIndex     = balancer->getRight(totalNumberOfLeaves,numberOfParticlesPerLeaf,totalNumberOfParticles,
-								       NULL,nbProcs,myRank);
-
-	  //// TODO REMOVE WHEN DEBUG printf("Proc [%d] :: will work from leaf %lld \t to leaf %lld \n",myRank,correctLeftLeavesNumber,correctRightLeavesIndex);
-
-	  MPI_Request* requests = new MPI_Request[nbProcs * 2];
-	  int counterRequest = 0;
-
-	  if(currentLeafsOnMyLeft < correctLeftLeavesNumber || correctRightLeavesIndex < currentRightLeafIdx){
-            size_t offsetLeafToSend = 0;
-            int counterLeafToSend = 0;
-            int idxProcToProceed  = 0;
-
-            while( idxProcToProceed < nbProcs && (balancer->getLeft(totalNumberOfLeaves,numberOfParticlesPerLeaf,totalNumberOfParticles,NULL,nbProcs,idxProcToProceed) < currentRightLeafIdx)){
-	      const FSize procToProceedRightIdx = balancer->getRight(totalNumberOfLeaves,numberOfParticlesPerLeaf,totalNumberOfParticles,NULL,nbProcs,idxProcToProceed);
-	      const FSize procToProceedLeftIdx  = balancer->getLeft(totalNumberOfLeaves,numberOfParticlesPerLeaf,totalNumberOfParticles,NULL,nbProcs,idxProcToProceed);
-	      const bool procToProceedHasLeftInMyInterval  = (currentLeafsOnMyLeft <= procToProceedLeftIdx && procToProceedLeftIdx < currentRightLeafIdx);
-	      const bool procToProceedHasRightInMyInterval = (currentLeafsOnMyLeft <= procToProceedRightIdx && procToProceedRightIdx < currentRightLeafIdx);
-	      const bool procIncludeMyInterval = (procToProceedLeftIdx <= currentLeafsOnMyLeft  && currentRightLeafIdx <= procToProceedRightIdx);
-	      //// TODO REMOVE WHEN DEBUG printf("%d] idxProcToProceed %d procToProceedRightIdx %llu procToProceedLeftIdx %llu procToProceedHasLeftInMyInterval %d procToProceedHasRightInMyInterval %d\n",
-	      //// TODO REMOVE WHEN DEBUG        myRank, idxProcToProceed, procToProceedRightIdx, procToProceedLeftIdx, procToProceedHasLeftInMyInterval, procToProceedHasRightInMyInterval);
-
-	      if(idxProcToProceed != myRank && (procToProceedHasLeftInMyInterval || procToProceedHasRightInMyInterval || procIncludeMyInterval) ){
-		const int firstLeafToSend = FMath::Max(int(procToProceedLeftIdx - currentLeafsOnMyLeft), 0);
-		const int lastLeafToSend  = int(FMath::Min(procToProceedRightIdx - currentLeafsOnMyLeft, myNumberOfLeaves ));
-
-		//// TODO REMOVE WHEN DEBUG printf("Proc :: %d (from leaf %d to %d)\n", myRank, firstLeafToSend, lastLeafToSend);
+	  leavesOffsetPerProc[nbProcs] = totalNumberOfLeaves;
+	  const FSize currentLeafOnL = leavesOffsetPerProc[myRank];   //current leaf on my left
+	  const FSize currentLeafOnR = leavesOffsetPerProc[myRank+1]; //current leaf on my left + my number of particles
+	  
+	  //Building of counter send buffer
+	  FSize * toSend = new FSize[nbProcs];
+	  memset(toSend,0,sizeof(FSize)*nbProcs);
 
-		while(counterLeafToSend != firstLeafToSend){
-		  const int numberOfParticlesInThisLeaf = (*reinterpret_cast<int*>(&leavesArray[offsetLeafToSend]));
-		  offsetLeafToSend  += (sizeof(ParticleClass)*numberOfParticlesInThisLeaf+sizeof(int));
-		  counterLeafToSend += 1;
-		}
-		const size_t offetSetToSend = offsetLeafToSend;
-		while(counterLeafToSend != lastLeafToSend){
-		  const int numberOfParticlesInThisLeaf = (*reinterpret_cast<int*>(&leavesArray[offsetLeafToSend]));
-		  offsetLeafToSend  += (sizeof(ParticleClass)*numberOfParticlesInThisLeaf+sizeof(int));
-		  counterLeafToSend += 1;
-		}
+	  //Building of array of indices to send
+	  FSize * idxToSend = new FSize[nbProcs];
+	  memset(idxToSend,0,sizeof(FSize)*nbProcs);
+	  
 
-		//// TODO REMOVE WHEN DEBUG printf("Proc :: %d send %d bytes to %d (from leaf %d to %d)\n",
-		//// TODO REMOVE WHEN DEBUG        myRank, int(offsetLeafToSend - offetSetToSend), idxProcToProceed, firstLeafToSend, lastLeafToSend);
-		MPI_Isend(&leavesArray[offetSetToSend], int(offsetLeafToSend - offetSetToSend), MPI_BYTE,
-			  idxProcToProceed, firstLeafToSend + int(currentLeafsOnMyLeft), communicator.getComm(), &requests[counterRequest++]);
-	      }
-	      idxProcToProceed += 1;
-            }
-	  }
+	  for(int idxProc = 0 ; idxProc < nbProcs ; ++idxProc){
+	    if(idxProc != myRank){
+	      const FSize correctLeftLeafNumber  = balancer->getLeft(totalNumberOfLeaves,NULL,0,0,nbProcs,idxProc);
+	      const FSize correctRightLeafNumber = balancer->getRight(totalNumberOfLeaves,NULL,0,0,nbProcs,idxProc);
 
-	  struct RecvBlockInfo{
-            char* buffer;
-            int nbLeaves;
-	  };
-	  RecvBlockInfo* recvBlockInfo = new RecvBlockInfo[nbProcs];
-	  int nbBlocksToRecv = 0;
-
-	  if(correctLeftLeavesNumber < currentLeafsOnMyLeft || currentRightLeafIdx < correctRightLeavesIndex){
-            FSize iterCorrectLeafIdx = correctLeftLeavesNumber;
-            int idxProcToProceed = 0;
-            while(iterCorrectLeafIdx < correctRightLeavesIndex){
-	      if(currentLeafsOnMyLeft <= iterCorrectLeafIdx && iterCorrectLeafIdx < currentRightLeafIdx){
-		//// TODO REMOVE WHEN DEBUG printf("%d] currentLeafsOnMyLeft %llu iterCorrectLeafIdx %llu iterCorrectLeafIdx %llu currentRightLeafIdx %llu\n",
-		//// TODO REMOVE WHEN DEBUG        myRank, currentLeafsOnMyLeft, iterCorrectLeafIdx, iterCorrectLeafIdx, currentRightLeafIdx);
-		iterCorrectLeafIdx = currentRightLeafIdx;
-		idxProcToProceed   = myRank + 1;
+	      //5 cases : Refer to ParalleleDetails.pdf to know more
+	      
+	      //First and Last : there are no particles in my current interval that belongs to this proc
+	      if((correctRightLeafNumber < currentLeafOnL) || (correctLeftLeafNumber > currentLeafOnR)){
+		toSend[idxProc] = 0;
 	      }
 	      else{
-		//// TODO REMOVE WHEN DEBUG printf("%d] currentLeafsOnMyLeft %llu iterCorrectLeafIdx %llu iterCorrectLeafIdx %llu currentRightLeafIdx %llu correctRightLeavesIndex %llu\n",
-		//// TODO REMOVE WHEN DEBUG        myRank, currentLeafsOnMyLeft, iterCorrectLeafIdx, iterCorrectLeafIdx, currentRightLeafIdx, correctRightLeavesIndex);
-		while(leavesOffsetPerProc[idxProcToProceed+1] <= iterCorrectLeafIdx){
-		  //// TODO REMOVE WHEN DEBUG     printf("%d] leavesOffsetPerProc[%d+1] %llu iterCorrectLeafIdx %lld\n",
-		  //// TODO REMOVE WHEN DEBUG            myRank, idxProcToProceed, leavesOffsetPerProc[idxProcToProceed+1], iterCorrectLeafIdx);
-		  idxProcToProceed += 1;
+		//Second : the first part of my current interval belongs to idxProc
+		if((correctLeftLeafNumber <= currentLeafOnL) && (correctRightLeafNumber < currentLeafOnR)){
+		  idxToSend[idxProc] = 0;
+		  FSize maxToSendLeft = leavesIndices[correctRightLeafNumber-currentLeafOnL+1];
+		  toSend[idxProc] = maxToSendLeft - leavesIndices[idxToSend[idxProc]];
 		}
-		const int nbLeafToReceive  = FMath::Min(leavesOffsetPerProc[idxProcToProceed+1], int(correctRightLeavesIndex)) - int(iterCorrectLeafIdx);
-		FSize nbParticlesToReceive = 0;
-		for(int idxLeaf = 0 ; idxLeaf < nbLeafToReceive ; ++idxLeaf){
-		  nbParticlesToReceive += numberOfParticlesPerLeaf[idxLeaf + iterCorrectLeafIdx];
+		else{//Third : all i have belongs to idxProc
+		  if((correctLeftLeafNumber <= currentLeafOnL) && (correctRightLeafNumber >= currentLeafOnR)){
+		    toSend[idxProc] = myCurrentsParts;
+		    idxToSend[idxProc] = 0;
+		  }
+		  else{
+		    //Forth : In my interval, there is currently all the datas belonging by idxProc
+		    if((correctLeftLeafNumber >= currentLeafOnL) && (correctRightLeafNumber <= currentLeafOnR)){
+		      idxToSend[idxProc] = (correctLeftLeafNumber - currentLeafOnL+1);
+		      FSize maxToSend  = (correctRightLeafNumber == currentLeafOnR)? 
+			(myCurrentsParts) : leavesIndices[correctRightLeafNumber-currentLeafOnL +1];
+		      toSend[idxProc] = maxToSend - leavesIndices[idxToSend[idxProc]];
+		    }
+		    else{
+		      //Fifth The right part of my current interval belongs to idxProc 
+		      if((correctLeftLeafNumber >= currentLeafOnL) && (correctRightLeafNumber > currentLeafOnR)){
+			idxToSend[idxProc] = correctLeftLeafNumber-currentLeafOnL+1;
+			FSize sizeToSend = (correctLeftLeafNumber == currentLeafOnR)?
+			  myCurrentsParts : myCurrentsParts-leavesIndices[correctLeftLeafNumber-currentLeafOnL+1];
+			toSend[idxProc] = sizeToSend;
+		      }
+		    }
+		  }
 		}
-
-		FSize bytesToRecv     = (sizeof(ParticleClass)*nbParticlesToReceive) + sizeof(int)*nbLeafToReceive;
-		char* bufferToReceive = new char[bytesToRecv];
-
-		//// TODO REMOVE WHEN DEBUG printf("Proc :: %d recv %d bytes to %d (from leaf %d to %d)\n",
-		//// TODO REMOVE WHEN DEBUG        myRank, bytesToRecv, idxProcToProceed, iterCorrectLeafIdx, iterCorrectLeafIdx + nbLeafToReceive);
-
-		MPI_Irecv(bufferToReceive, int(bytesToRecv), MPI_BYTE, idxProcToProceed, int(iterCorrectLeafIdx),
-			  communicator.getComm(), &requests[counterRequest++]);
-
-		recvBlockInfo[nbBlocksToRecv].buffer   = bufferToReceive;
-		recvBlockInfo[nbBlocksToRecv].nbLeaves = nbLeafToReceive;
-		nbBlocksToRecv += 1;
-
-		iterCorrectLeafIdx += nbLeafToReceive;
 	      }
-            }
+	    }
 	  }
-
-	  //// TODO REMOVE WHEN DEBUG printf("%d Wait!\n", myRank);
-	  MPI_Waitall(counterRequest, requests, MPI_STATUSES_IGNORE);
-	  //// TODO REMOVE WHEN DEBUG printf("%d Done!\n", myRank);
-
-	  int idxBlockRecvInLeft = 0;
-	  if(correctLeftLeavesNumber < currentLeafsOnMyLeft){
-            const int nbLeavesRecv = int(FMath::Min(currentLeafsOnMyLeft, correctRightLeavesIndex) - correctLeftLeavesNumber);
-            //// TODO REMOVE WHEN DEBUG printf("%d] has receive %d from left\n", myRank, nbLeavesRecv);
-            int idxLeaf  = 0;
-            while(idxLeaf < nbLeavesRecv){
-	      //// TODO REMOVE WHEN DEBUG printf("%d] block %d has %d leaves\n", myRank, idxBlockRecvInLeft, recvBlockInfo[idxBlockRecvInLeft].nbLeaves);
-	      size_t offsetBuffer = 0;
-	      for(int idxLeafInBlock = 0 ; idxLeafInBlock < recvBlockInfo[idxBlockRecvInLeft].nbLeaves ; ++idxLeafInBlock){
-		const int numberOfParticlesInThisLeaf = (*reinterpret_cast<int*>(&recvBlockInfo[idxBlockRecvInLeft].buffer[offsetBuffer]));
-		const ParticleClass*const particles   = reinterpret_cast<ParticleClass*>(&recvBlockInfo[idxBlockRecvInLeft].buffer[offsetBuffer] + sizeof(int));
-		//// TODO REMOVE WHEN DEBUG printf("%d] block %d leaf %d has %d part\n", myRank, idxBlockRecvInLeft, idxLeafInBlock, numberOfParticlesInThisLeaf);
-		for(int idxParticle = 0 ; idxParticle < numberOfParticlesInThisLeaf ; ++idxParticle){
-		  particlesSaver->push(particles[idxParticle]);
-		}
-		offsetBuffer  += (sizeof(ParticleClass)*numberOfParticlesInThisLeaf+sizeof(int));
+	  //Then, we exchange the datas to send
+	  FSize * globalSendRecvMap = new FSize[nbProcs*nbProcs];
+	  memset(globalSendRecvMap,0,sizeof(FSize)*nbProcs*nbProcs);
+	  //This could be replace by an array toRecv buildt in the same way as toSend
+	  MPI_Allgather(toSend,nbProcs,MPI_LONG_LONG,globalSendRecvMap,nbProcs,MPI_LONG_LONG,communicator.getComm());
+	  
+	  // { // ----------------For debug---------------------------------
+	  //   FSize totRemaining = 0;
+	  //   { //This is a sum of all parts to know if we forgot some of them
+	  //     FSize totToSend = 0;
+	  //     for(int t=0;t<nbProcs;++t){
+	  // 	if(t==myRank){
+	  // 	  for(int k=0;k<nbProcs ; ++k){
+	  // 	    totToSend += toSend[k];
+	  // 	    printf("Proc : %d will send %lld parts to %d starting (leavesIndices[%lld] = %lld)\n",
+	  // 		   myRank,toSend[k],k,idxToSend[k],leavesIndices[idxToSend[k]]);
+	  // 	  }
+	  // 	}
+	  // 	MPI_Barrier(MPI_COMM_WORLD);
+	  //     }
+	  //     totRemaining = myCurrentsParts-totToSend;
+	  //   }
+	  //   if(myRank == 0){
+	  //     for(int k=0;k<nbProcs ; ++k){
+	  // 	for(int t=0 ; t<nbProcs ; ++t){
+	  // 	  printf("%lld\t",globalSendRecvMap[k*nbProcs+t]);
+	  // 	}
+	  // 	printf("\n");
+	  //     }
+	  //   }
+	  //   MPI_Barrier(MPI_COMM_WORLD);
+	  //   for(int k=0;k<nbProcs ; ++k){
+	  //     totRemaining += globalSendRecvMap[k*nbProcs+myRank];
+	  //   }
+	  //   printf("Proc : %d, will have %lld parts \n",myRank,totRemaining);
+	  //   FSize totfor0 = communicator.reduceSum(totRemaining);
+	  //   if(myRank==0){
+	  //     printf("================ %lld ================\n",totfor0);
+	  //   }
+	  // }
+	  
+	  //Then, we have our global recv map.
+	  //We just need to send and recv for real.
+	  
+	  //Finally, store the remaining parts, recv the parts, send my parts
+	  ParticleClass * finalPartBuffer;
+	  FSize finalTotParts;
+	  {
+	    finalTotParts = myCurrentsParts;     //We need to know how many particles we will have
+	    FSize finalCurrentParts = myCurrentsParts; //Parts that I had and that belongs to me
+	    
+	    for(int idxProc=0 ; idxProc<nbProcs ; ++idxProc){
+	      finalCurrentParts -= toSend[idxProc]; //substract the parts sent
+	      finalTotParts -= toSend[idxProc];     //substract the parts sent
+	      finalTotParts += globalSendRecvMap[idxProc*nbProcs+myRank]; //add the parts received
+	    }
+	    finalPartBuffer = new ParticleClass[finalTotParts];
+	    memset(finalPartBuffer,0,sizeof(ParticleClass)*finalTotParts);
+	    
+	    //Copy of the parts we already hold
+	    FSize finalIdxToStart = 0; //idx of the start of my parts inside leavesArray
+	    FSize idxToWrite = 0;      //idx to write my parts 
+	    {//we go from idxProc==0 to idxProc==myRank to increment the self starter
+	      for(int idxProc=0 ; idxProc<myRank ; ++idxProc){
+		idxToWrite += globalSendRecvMap[idxProc*nbProcs+myRank];
+		finalIdxToStart += toSend[idxProc];
 	      }
-	      idxLeaf += recvBlockInfo[idxBlockRecvInLeft].nbLeaves;
-	      delete[] recvBlockInfo[idxBlockRecvInLeft].buffer;
-	      idxBlockRecvInLeft += 1;
-            }
-	  }
-	  //// TODO REMOVE WHEN DEBUG printf("currentLeafsOnMyLeft %lld correctLeftLeavesNumber %lld currentRightLeafIdx %lld correctRightLeavesIndex %lld \n",
-	  //// TODO REMOVE WHEN DEBUG        currentLeafsOnMyLeft, correctLeftLeavesNumber, currentRightLeafIdx, correctRightLeavesIndex);
-	  if((currentLeafsOnMyLeft <= correctLeftLeavesNumber && correctLeftLeavesNumber < currentRightLeafIdx)
-	     || (currentLeafsOnMyLeft < correctRightLeavesIndex && correctRightLeavesIndex <= currentRightLeafIdx)){
-	    const int nbLeavesToSkip = int(correctLeftLeavesNumber-currentLeafsOnMyLeft);
-            size_t offsetBuffer = 0;
-            //// TODO REMOVE WHEN DEBUG printf("%d] skip %d leaves\n", myRank, nbLeavesToSkip);
-            for(int idxToSkip = 0 ; idxToSkip < nbLeavesToSkip ; ++idxToSkip){
-	      const int numberOfParticlesInThisLeaf = (*reinterpret_cast<int*>(&leavesArray[offsetBuffer]));
-	      offsetBuffer += (sizeof(ParticleClass)*numberOfParticlesInThisLeaf+sizeof(int));
-	      //// TODO REMOVE WHEN DEBUG printf("%d] leaf %d had %d part\n", myRank, idxToSkip, numberOfParticlesInThisLeaf);
-            }
-            const int nbLeafToCopy = int(FMath::Min(currentRightLeafIdx, correctRightLeavesIndex) - FMath::Max(currentLeafsOnMyLeft, correctLeftLeavesNumber));
-            //// TODO REMOVE WHEN DEBUG printf("%d] Need to copy %d leaves\n", myRank, nbLeafToCopy);
-            for(int idxToProcess = 0 ; idxToProcess < nbLeafToCopy ; ++idxToProcess){
-	      const int numberOfParticlesInThisLeaf = (*reinterpret_cast<int*>(&leavesArray[offsetBuffer]));
-	      //// TODO REMOVE WHEN DEBUG printf("%d] leaf %d had %d part\n", myRank, idxToProcess, numberOfParticlesInThisLeaf);
-	      const ParticleClass*const particles   = reinterpret_cast<ParticleClass*>(&leavesArray[offsetBuffer] + sizeof(int));
-	      for(int idxParticle = 0 ; idxParticle < numberOfParticlesInThisLeaf ; ++idxParticle){
-		particlesSaver->push(particles[idxParticle]);
+	      memcpy(&finalPartBuffer[idxToWrite],&leavesArray[finalIdxToStart],sizeof(ParticleClass)*finalCurrentParts);
+	    }
+	    
+	  
+	    //Second, receive in place: 
+	    
+	    MPI_Request* requests = new MPI_Request[nbProcs * 2];
+	    int counterRequest = 0;
+	    int tag = 99;
+	    FSize idxToWriteRecvedDatas = 0;
+	    //While I received from left, i write the datas at the start of the buffer
+	    for(int idxProc=0 ; idxProc<nbProcs ; ++idxProc){
+	      if(idxProc == myRank){ //When I myRank==idxProc, I increment the idxToWrite of What I kept to avoid erasing my parts with received parts
+		idxToWriteRecvedDatas += finalCurrentParts;
 	      }
-
-	      offsetBuffer += (sizeof(ParticleClass)*numberOfParticlesInThisLeaf+sizeof(int));
-            }
-	  }
-	  if(currentRightLeafIdx < correctRightLeavesIndex){
-            const int nbLeavesRecv = int(correctRightLeavesIndex - FMath::Max(currentRightLeafIdx, correctLeftLeavesNumber));
-            //// TODO REMOVE WHEN DEBUG printf("%d] has receive %d from right\n", myRank, nbLeavesRecv);
-            int idxLeaf  = 0;
-            while(idxLeaf < nbLeavesRecv){
-	      //// TODO REMOVE WHEN DEBUG printf("%d] block %d has %d leaves\n", myRank, idxBlockRecvInLeft, recvBlockInfo[idxBlockRecvInLeft].nbLeaves);
-	      size_t offsetBuffer = 0;
-	      for(int idxLeafInBlock = 0 ; idxLeafInBlock < recvBlockInfo[idxBlockRecvInLeft].nbLeaves ; ++idxLeafInBlock){
-		const int numberOfParticlesInThisLeaf = (*reinterpret_cast<int*>(&recvBlockInfo[idxBlockRecvInLeft].buffer[offsetBuffer]));
-		const ParticleClass*const particles   = reinterpret_cast<ParticleClass*>(&recvBlockInfo[idxBlockRecvInLeft].buffer[offsetBuffer] + sizeof(int));
-		//// TODO REMOVE WHEN DEBUG printf("%d] block %d leaf %d has %d part\n", myRank, idxBlockRecvInLeft, idxLeafInBlock, numberOfParticlesInThisLeaf);
-		for(int idxParticle = 0 ; idxParticle < numberOfParticlesInThisLeaf ; ++idxParticle){
-		  particlesSaver->push(particles[idxParticle]);
+	      else{ //I received and inc the write index of what I get
+		if(globalSendRecvMap[idxProc*nbProcs+myRank]){//If i expect something from idxProc
+		  MPI_Irecv(&finalPartBuffer[idxToWriteRecvedDatas],int(sizeof(ParticleClass))*int(globalSendRecvMap[idxProc*nbProcs+myRank]),MPI_BYTE,
+			    idxProc,tag,communicator.getComm(),&requests[counterRequest++]);
+		  idxToWriteRecvedDatas += globalSendRecvMap[idxProc*nbProcs+myRank];
 		}
-		offsetBuffer  += (sizeof(ParticleClass)*numberOfParticlesInThisLeaf+sizeof(int));
 	      }
-	      idxLeaf += recvBlockInfo[idxBlockRecvInLeft].nbLeaves;
-	      delete[] recvBlockInfo[idxBlockRecvInLeft].buffer;
-	      idxBlockRecvInLeft += 1;
-            }
+	    }
+	    
+	    //Third, send
+	    for(int idxProc=0 ; idxProc<nbProcs ; ++idxProc){
+	      if(toSend[idxProc]){ //If i have something for idxProc
+		MPI_Isend(&leavesArray[leavesIndices[idxToSend[idxProc]]],int(sizeof(ParticleClass))*int(globalSendRecvMap[myRank*nbProcs+idxProc]),MPI_BYTE,
+			  idxProc,tag,communicator.getComm(),&requests[counterRequest++]);
+	      }
+	    }
+	    //Wait for the comm :
+	    MPI_Waitall(counterRequest,requests,MPI_STATUSES_IGNORE);
+	    delete[] requests;
 	  }
-
-	  delete[] leavesArray;
+	  
+	  for(FSize idPartsToStore=0; idPartsToStore<finalTotParts ; ++idPartsToStore){
+	    particlesSaver->push(finalPartBuffer[idPartsToStore]);
+	  }
+	  
+	  delete[] finalPartBuffer;
+	  delete[] globalSendRecvMap;
+	  delete[] idxToSend;
+	  delete[] toSend;
 	  delete[] numberOfLeavesPerProc;
 	  delete[] leavesOffsetPerProc;
-	  delete[] numberOfParticlesPerLeaf;
-	  delete[] requests;
-	  delete[] recvBlockInfo;
 	}
+	delete[] leavesArray;
+	delete[] leavesIndices;
     }
 
 public:
@@ -547,15 +507,17 @@ public:
     static void ArrayToTree(const FMpi::FComm& communicator, const ParticleClass array[], const FSize size,
                             const FPoint& boxCenter, const FReal boxWidth, const int treeHeight,
                             ContainerClass* particleSaver, FAbstractBalanceAlgorithm* balancer,const SortingType type = QuickSort){
-
+      
         IndexedParticle* particlesArray = 0;
         FSize particlesSize = 0;
         SortParticlesFromArray(communicator, array, size, type, boxCenter, boxWidth, treeHeight,
                                &particlesArray, &particlesSize);
 
         char* leavesArray = 0;
-        FSize leavesSize = 0;
-        MergeLeaves(communicator, particlesArray, particlesSize, &leavesArray, &leavesSize);
+	FSize leavesSize = 0;
+	FSize * leavesIndices = 0;
+
+        MergeLeaves(communicator, particlesArray, &particlesSize, &leavesIndices, &leavesArray, &leavesSize);
 
         EqualizeAndFillTree(communicator, particleSaver, leavesArray, leavesSize, balancer);
     }