From c3e0c4a8bf11763a0f4205edbdb62a57075d3623 Mon Sep 17 00:00:00 2001
From: Olivier Coulaud <Olivier.Coulaud@inria.fr>
Date: Thu, 14 Dec 2017 17:09:57 +0100
Subject: [PATCH] Restore letSTF just after separation with develop

---
 CMakeLists.txt                                |    9 +-
 .../algorithm/distributed/distribute.hpp      |    1 +
 Contribs/inria/linear_tree/balance_tree.hpp   |    6 +-
 Examples/CMakeLists.txt                       |    1 -
 Examples/RotationFMM.cpp                      |   53 +-
 LICENCE                                       |    2 +-
 Obsolete/GroupTree/FBasicCellPOD.hpp          |    6 +-
 Obsolete/GroupTree/FChebCellPOD.hpp           |    8 +-
 Src/Adaptive/FAdaptiveStarPU.hpp              |    4 +-
 Src/Adaptive/starpu_node_data_handles.hpp     |    4 +-
 Src/Containers/FOctree.hpp                    |    7 +-
 Src/Core/FCoreCommon.hpp                      |   22 +-
 Src/Files/FAbstractLoader.hpp                 |    2 +
 Src/Files/FBlockedMpiInterpolation.hpp        |  228 +
 Src/Files/FFmaGenericLoader.hpp               |   28 +-
 Src/Files/FMpiFmaGenericLoader.hpp            |  266 +
 Src/Files/FTreeMpiCsvSaver.hpp                |    4 +-
 Src/GroupTree/Core/FBlockedLinearTree.hpp     |  196 -
 Src/GroupTree/Core/FBuildGroupTree.hpp        |  397 ++
 Src/GroupTree/Core/FCheckResults.hpp          |   93 +
 .../Core/FDistributedGroupTreeBuilder.hpp     | 1515 ++++--
 .../FDistributedLETGroupTreeValidator.hpp     |  260 +
 Src/GroupTree/Core/FGroupLinearTree.hpp       |  238 +-
 Src/GroupTree/Core/FGroupOfCells.hpp          |   28 +-
 Src/GroupTree/Core/FGroupOfParticles.hpp      |   16 +-
 .../FGroupTaskStarpuImplicitAlgorithm.hpp     | 4659 +++++++++--------
 Src/GroupTree/Core/FGroupTools.hpp            |  266 +
 Src/GroupTree/Core/FGroupTree.hpp             | 2721 ++++++----
 Src/GroupTree/Core/FOutOfBlockInteraction.hpp |    2 +-
 .../Core/FP2PGroupParticleContainer.hpp       |    6 +-
 .../FStarPUCptInteractionsWrapper.hpp         |    8 +-
 Src/Utils/FMpi.hpp                            |   37 +-
 Src/Utils/FValidationAlgorithm.hpp            |  219 +
 Tests/CMakeLists.txt                          |    1 +
 Tests/GroupTree/testBlockedChebyshev.cpp      |    3 +-
 .../testBlockedImplicitChebyshev.cpp          |    3 +
 .../GroupTree/testBlockedImplicitUniform.cpp  |    3 +-
 .../GroupTree/testBlockedMpiInterpolation.cpp |   68 +
 .../LET_STF_Implicit/testCompareGroupTree.cpp |  431 ++
 .../testFMMInterpolationStarPU.hpp            |  284 +
 .../testGroupTreeFromLinearTree.cpp           |  218 +
 Tests/LET_STF_Implicit/testLETGroupTree.cpp   |  254 +
 .../testLetImplicitChebyshev.cpp              |   30 +
 .../testLetImplicitUniform.cpp                |   29 +
 Tests/LET_STF_Implicit/testSizeGroupTree.cpp  |  356 ++
 .../LET_STF_Implicit/testSizeLETGroupTree.cpp |  267 +
 UTests/utestLetTree.cpp                       |  213 +
 Utils/stdComplex.hpp                          |   26 +
 48 files changed, 9428 insertions(+), 4070 deletions(-)
 create mode 100644 Src/Files/FBlockedMpiInterpolation.hpp
 delete mode 100644 Src/GroupTree/Core/FBlockedLinearTree.hpp
 create mode 100644 Src/GroupTree/Core/FBuildGroupTree.hpp
 create mode 100644 Src/GroupTree/Core/FCheckResults.hpp
 create mode 100644 Src/GroupTree/Core/FDistributedLETGroupTreeValidator.hpp
 create mode 100644 Src/GroupTree/Core/FGroupTools.hpp
 create mode 100644 Tests/GroupTree/testBlockedMpiInterpolation.cpp
 create mode 100644 Tests/LET_STF_Implicit/testCompareGroupTree.cpp
 create mode 100644 Tests/LET_STF_Implicit/testFMMInterpolationStarPU.hpp
 create mode 100644 Tests/LET_STF_Implicit/testGroupTreeFromLinearTree.cpp
 create mode 100644 Tests/LET_STF_Implicit/testLETGroupTree.cpp
 create mode 100644 Tests/LET_STF_Implicit/testLetImplicitChebyshev.cpp
 create mode 100644 Tests/LET_STF_Implicit/testLetImplicitUniform.cpp
 create mode 100644 Tests/LET_STF_Implicit/testSizeGroupTree.cpp
 create mode 100644 Tests/LET_STF_Implicit/testSizeLETGroupTree.cpp
 create mode 100644 UTests/utestLetTree.cpp
 create mode 100644 Utils/stdComplex.hpp

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 593bb1fef..a397bb14d 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -7,11 +7,12 @@ endif()
 cmake_policy(SET CMP0004 NEW)
 
 set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
+set(FUSE_LIST " MPI;BLAS;FFT;STARPU;CUDA;OPENCL;OMP4;SSE;AVX;AVX2;MIC;MPI2")
 
 #===========================================================================
 # Project Declaration
 #===========================================================================
-project(SCALFMM C CXX)
+project(SCALFMM C CXX )
 
 # check if compiling into source directories
 string(COMPARE EQUAL "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}" insource)
@@ -215,7 +216,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/morse/
   ##############################################################################
   #                           FUSE list                                        #
   ##############################################################################
-  set(FUSE_LIST "")
+ # set(FUSE_LIST "")
   # then do list(APPEND FUSE_LIST "BLAS") to protect from FUSE_BLAS
   list(APPEND FUSE_LIST "MPI")
 
@@ -307,7 +308,6 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/morse/
     endif()
 
     find_package(MPI REQUIRED)
-
     if (MPI_CXX_INCLUDE_PATH)
       include_directories( ${MPI_CXX_INCLUDE_PATH} )
     endif()
@@ -315,7 +315,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/morse/
       set(SCALFMM_CXX_FLAGS "${SCALFMM_CXX_FLAGS} ${MPI_CXX_COMPILE_FLAGS}")
     endif()
     if (MPI_CXX_INCLUDE_PATH)
-      set(SCALFMM_INCLUDES  "${SCALFMM_INCLUDES}; ${MPI_CXX_INCLUDE_PATH}")
+      set(SCALFMM_INCLUDES  "${SCALFMM_INCLUDES};${MPI_CXX_INCLUDE_PATH}")
     endif()
     if (MPI_CXX_LINK_FLAGS)
       list(APPEND "CMAKE_EXE_LINKER_FLAGS ${MPI_CXX_LINK_FLAGS}")
@@ -821,6 +821,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/morse/
   #                      Build - lib                               #
   ##################################################################
   #
+  MESSAGE(STATUS "FUSE : ${FUSE_LIST} ")
   add_subdirectory(Src)
   # Link with scalfmm lib
   set(scalfmm_lib scalfmm)
diff --git a/Contribs/inria/algorithm/distributed/distribute.hpp b/Contribs/inria/algorithm/distributed/distribute.hpp
index ed9958f8c..a391dda3e 100644
--- a/Contribs/inria/algorithm/distributed/distribute.hpp
+++ b/Contribs/inria/algorithm/distributed/distribute.hpp
@@ -13,6 +13,7 @@
 #include "inria/utils.hpp"
 #include "inria/meta.hpp"
 
+
 #include <numeric>
 #include <iterator>
 #include <vector>
diff --git a/Contribs/inria/linear_tree/balance_tree.hpp b/Contribs/inria/linear_tree/balance_tree.hpp
index b37ebbf8f..b8c7d8bfe 100644
--- a/Contribs/inria/linear_tree/balance_tree.hpp
+++ b/Contribs/inria/linear_tree/balance_tree.hpp
@@ -189,8 +189,8 @@ create_balanced_linear_tree(
  * @return [description]
  */
 std::size_t send_get_max_morton_idx(
-    inria::mpi_config& conf,
-    std::size_t& max_morton_idx
+    inria::mpi_config&  conf,
+    std::size_t&        max_morton_idx
     )
 {
     // Setting parametter
@@ -291,7 +291,7 @@ std::vector<details::cblt::node_info_from_range<Range>> create_balanced_linear_t
             last_morton_index = curr_idx_morton;
             ++nb_leaf;
         }
-    } 
+    }
 
     return {begin(lin_tree), end(lin_tree)};
 }
diff --git a/Examples/CMakeLists.txt b/Examples/CMakeLists.txt
index 36551cded..aad3d39cb 100644
--- a/Examples/CMakeLists.txt
+++ b/Examples/CMakeLists.txt
@@ -14,7 +14,6 @@ file(
 	./*.cpp
 	)
 
-
 # Adding the project sources dir as an include dir
 INCLUDE_DIRECTORIES(
      ${SCALFMM_BINARY_DIR}/Src 
diff --git a/Examples/RotationFMM.cpp b/Examples/RotationFMM.cpp
index f2ff07731..148545cf4 100644
--- a/Examples/RotationFMM.cpp
+++ b/Examples/RotationFMM.cpp
@@ -56,7 +56,7 @@ int main(int argc, char* argv[])
     FHelpDescribeAndExit(argc, argv,
                          "Driver for HArmonic Spherical + Rotation  --  kernel  (1/r kernel).",
                          FParameterDefinitions::InputFile, FParameterDefinitions::OctreeHeight,
-                         FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::InputFile,
+                         FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::OutputFile,
                          FParameterDefinitions::NbThreads);
 
     const std::string defaultFile(/*SCALFMMDataPath+*/"../Data/test20k.fma");
@@ -188,7 +188,56 @@ int main(int argc, char* argv[])
 
 	}
 	// -----------------------------------------------------
-
+	// -----------------------------------------------------
+	if(FParameters::existParameter(argc, argv, FParameterDefinitions::OutputFile.options)){
+	  std::string name(FParameters::getStr(argc,argv,FParameterDefinitions::OutputFile.options,   "output.fma"));
+	  FFmaGenericWriter<FReal> writer(name) ;
+	  //
+	  FSize NbPoints = loader.getNumberOfParticles();
+	  FReal * particles ;
+	  particles = new FReal[8*NbPoints] ;
+	  memset(particles,0,8*NbPoints*sizeof(FReal));
+	  FSize j = 0 ;
+	  tree.forEachLeaf([&](LeafClass* leaf){
+	      //
+	      // Input
+	      const FReal*const posX = leaf->getTargets()->getPositions()[0];
+	      const FReal*const posY = leaf->getTargets()->getPositions()[1];
+	      const FReal*const posZ = leaf->getTargets()->getPositions()[2];
+	      const FReal*const physicalValues = leaf->getTargets()->getPhysicalValues();
+	      const FVector<FSize>& indexes = leaf->getTargets()->getIndexes();
+	      //
+	      // Computed data
+	      const FReal*const potentials = leaf->getTargets()->getPotentials();
+	      const FReal*const forcesX = leaf->getTargets()->getForcesX();
+	      const FReal*const forcesY = leaf->getTargets()->getForcesY();
+	      const FReal*const forcesZ = leaf->getTargets()->getForcesZ();
+	      //
+	      const FSize nbParticlesInLeaf = leaf->getTargets()->getNbParticles();
+	      for(FSize idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart){
+		j = 8*indexes[idxPart];
+		particles[j]      = posX[idxPart] ;
+		particles[j+1]  = posY[idxPart] ;
+		particles[j+2]  = posZ[idxPart] ;
+		particles[j+3]  = physicalValues[idxPart] ;
+		particles[j+4]  = potentials[idxPart] ;
+		particles[j+5]  =  forcesX[idxPart] ;
+		particles[j+6]  =  forcesY[idxPart] ;
+		particles[j+7]  =  forcesZ[idxPart] ;
+	      }
+	    });
+
+          writer.writeHeader( loader.getCenterOfBox(), loader.getBoxWidth() ,  NbPoints, sizeof(FReal), 8) ;
+          writer.writeArrayOfReal(particles,  8 , NbPoints);
+
+          delete[] particles;
+
+          //
+          std::string name1( "output.fma");
+          //
+          FFmaGenericWriter<FReal> writer1(name1) ;
+          writer1.writeDistributionOfParticlesFromOctree(&tree,NbPoints) ;
+        }
 
 	return 0;
 }
diff --git a/LICENCE b/LICENCE
index 1c9d7daf2..dec2b2083 100644
--- a/LICENCE
+++ b/LICENCE
@@ -1,4 +1,4 @@
-Copyright ScalFmm 2011-2016 INRIA
+Copyright ScalFmm 2011-2017 INRIA
 =================================
 
 This software is a computer program which purpose is to compute the FMM.
diff --git a/Obsolete/GroupTree/FBasicCellPOD.hpp b/Obsolete/GroupTree/FBasicCellPOD.hpp
index d47258c98..132c43c01 100644
--- a/Obsolete/GroupTree/FBasicCellPOD.hpp
+++ b/Obsolete/GroupTree/FBasicCellPOD.hpp
@@ -2,9 +2,9 @@
 #define FBASICCELLPOD_HPP
 
 
-#include "../../Utils/FGlobal.hpp"
-#include "../../Containers/FTreeCoordinate.hpp"
-#include "../StarPUUtils/FStarPUDefaultAlign.hpp"
+#include "../../Src/Utils/FGlobal.hpp"
+#include "../../Src/Containers/FTreeCoordinate.hpp"
+#include "../../Src/GroupTree/StarPUUtils/FStarPUDefaultAlign.hpp"
 
 struct alignas(FStarPUDefaultAlign::StructAlign) FBasicCellPOD {
     MortonIndex mortonIndex;
diff --git a/Obsolete/GroupTree/FChebCellPOD.hpp b/Obsolete/GroupTree/FChebCellPOD.hpp
index 8d801f5c5..e79b6b8d5 100644
--- a/Obsolete/GroupTree/FChebCellPOD.hpp
+++ b/Obsolete/GroupTree/FChebCellPOD.hpp
@@ -1,10 +1,10 @@
 #ifndef FCHEBCELLPOD_HPP
 #define FCHEBCELLPOD_HPP
 
-#include "../../Utils/FGlobal.hpp"
-#include "../Core/FBasicCellPOD.hpp"
-#include "../StarPUUtils/FStarPUDefaultAlign.hpp"
-#include "../../Kernels/Chebyshev/FChebTensor.hpp"
+#include "../../Src/Utils/FGlobal.hpp"
+#include "FBasicCellPOD.hpp"
+#include "../Src/GroupTree/StarPUUtils/FStarPUDefaultAlign.hpp"
+#include "../../Src/Kernels/Chebyshev/FChebTensor.hpp"
 
 typedef FBasicCellPOD FChebCellPODCore;
 
diff --git a/Src/Adaptive/FAdaptiveStarPU.hpp b/Src/Adaptive/FAdaptiveStarPU.hpp
index f22ba0f36..4b8d57c70 100644
--- a/Src/Adaptive/FAdaptiveStarPU.hpp
+++ b/Src/Adaptive/FAdaptiveStarPU.hpp
@@ -1,6 +1,8 @@
 #ifndef SCALFMM_STARPU_ALGO_HPP_
 #define SCALFMM_STARPU_ALGO_HPP_
 
+//@FUSE_STARPU
+
 #include <algorithm>
 #include <cmath> // Used to round box differences
 #include <functional>
@@ -9,7 +11,7 @@
 #include <vector>
 #include <unordered_map>
 
-#include <starpu/1.2/starpu.h>
+#include <starpu.h>
 
 #include "Core/FCoreCommon.hpp"
 #include "Containers/FTreeCoordinate.hpp"
diff --git a/Src/Adaptive/starpu_node_data_handles.hpp b/Src/Adaptive/starpu_node_data_handles.hpp
index 6e17f68cc..d18959e07 100644
--- a/Src/Adaptive/starpu_node_data_handles.hpp
+++ b/Src/Adaptive/starpu_node_data_handles.hpp
@@ -8,7 +8,9 @@
 #ifndef _SCALFMM_STARPU_NODE_DATA_HANDLES_HPP_
 #define _SCALFMM_STARPU_NODE_DATA_HANDLES_HPP_
 
-#include <starpu/1.2/starpu.h>
+// @FUSE_STARPU
+
+#include <starpu.h>
 
 #include <utility>
 
diff --git a/Src/Containers/FOctree.hpp b/Src/Containers/FOctree.hpp
index 28f69cea8..57a1dc87b 100644
--- a/Src/Containers/FOctree.hpp
+++ b/Src/Containers/FOctree.hpp
@@ -43,9 +43,10 @@ class FOctree : public FNoCopyable {
 public:
     using FRealType = FReal;
     using CellClassType = CellClass;
-    using ContainerClassType = ContainerClass;
-    using LeafClassType = LeafClass_; //< The type of the Leaf used in the Octree
-    using LeafClass = LeafClass_;     //< The type of the Leaf used in the Octree
+    using ContainerClassType = ContainerClass;   //< The type of the container used to store particles in the Octree
+    using LeafClassType = LeafClass_;   //< The type of the Leaf used in the Octree
+    using LeafClass     = LeafClass_;     //< The type of the Leaf used in the Octree
+    using LeafClass_T    = LeafClass_;     //< The type of the Leaf used in the Octree
 
 protected:
     typedef FOctree<FReal, CellClass , ContainerClass, LeafClass, CellAllocatorClass>      OctreeType;
diff --git a/Src/Core/FCoreCommon.hpp b/Src/Core/FCoreCommon.hpp
index d89b0b1af..50b4a51a4 100644
--- a/Src/Core/FCoreCommon.hpp
+++ b/Src/Core/FCoreCommon.hpp
@@ -29,6 +29,27 @@ enum FFmmOperations {
 //
     FFmmNearAndFarFields = (FFmmNearField|FFmmFarField)
 };
+std::string FFmmOperations_string(/*enum FFmmOperations*/ const unsigned int & value){
+
+ //if (value & FFmmNearAndFarFields) return std::string("FFmmNearAndFarFields") ;
+ //if (value & FFmmFarField) return std::string("FFmmFarField") ;
+ //if (value & FFmmNearField) return std::string("FFmmNearField" );
+  std::string op("");
+  if (value & FFmmP2P)
+    op += " FFmmP2P |";
+  if (value & FFmmP2M)
+    op += " FFmmP2M |";
+  if (value & FFmmM2M)
+    op += " FFmmM2M |";
+  if (value & FFmmM2L)
+    op += " FFmmM2L |";
+  if (value & FFmmL2L)
+    op += " FFmmL2L |";
+  if (value & FFmmL2P)
+    op += " FFmmL2P |";
+  op.erase(op.size()-2,op.size()-1);
+  return op;
+};
 
 /**
  * \brief Algorithm interface
@@ -98,7 +119,6 @@ protected:
         FAssertLF(FAbstractAlgorithm::upperWorkingLevel <= FAbstractAlgorithm::lowerWorkingLevel);
         FAssertLF(2 <= FAbstractAlgorithm::upperWorkingLevel);
     }
-
     virtual void executeCore(const unsigned operationsToProceed) = 0;
 
 public:
diff --git a/Src/Files/FAbstractLoader.hpp b/Src/Files/FAbstractLoader.hpp
index 410cb52b2..d25be50ec 100644
--- a/Src/Files/FAbstractLoader.hpp
+++ b/Src/Files/FAbstractLoader.hpp
@@ -24,6 +24,8 @@
 template <class FReal>
 class FAbstractLoader {
 public:	
+  using  dataType= FReal ;
+
     /** Default destructor */
     virtual ~FAbstractLoader(){
     }
diff --git a/Src/Files/FBlockedMpiInterpolation.hpp b/Src/Files/FBlockedMpiInterpolation.hpp
new file mode 100644
index 000000000..41731c094
--- /dev/null
+++ b/Src/Files/FBlockedMpiInterpolation.hpp
@@ -0,0 +1,228 @@
+#ifndef _F_BLOCKED_MPI_INTERPOLATION_HPP_
+#define _F_BLOCKED_MPI_INTERPOLATION_HPP_
+
+
+#include "../../Src/Utils/FGlobal.hpp"
+
+#include "../../Src/GroupTree/Core/FGroupTree.hpp"
+
+#include "../../Src/Components/FSimpleLeaf.hpp"
+#include "../../Src/Components/FSymbolicData.hpp"
+#include "../../Src/Containers/FVector.hpp"
+
+#include "../../Src/Kernels/P2P/FP2PParticleContainer.hpp"
+
+#include "../../Src/Utils/FMath.hpp"
+#include "../../Src/Utils/FMemUtils.hpp"
+#include "../../Src/Utils/FParameters.hpp"
+
+#include "../../Src/Files/FRandomLoader.hpp"
+#include "../../Src/Files/FFmaGenericLoader.hpp"
+
+#include "../../Src/GroupTree/Core/FGroupSeqAlgorithm.hpp"
+#include "../../Src/GroupTree/Core/FGroupTaskAlgorithm.hpp"
+#include "../../Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp"
+#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp"
+
+#include "../../Src/Utils/FParameterNames.hpp"
+
+#include "../../Src/Components/FTestParticleContainer.hpp"
+#include "../../Src/Components/FTestCell.hpp"
+#include "../../Src/Components/FTestKernels.hpp"
+
+#include "../../Src/Core/FFmmAlgorithmThreadProc.hpp"
+#include "../../Src/Files/FMpiTreeBuilder.hpp"
+#include "../../Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp"
+#include "../../Src/Core/FFmmAlgorithm.hpp" //For validation
+
+#include "../../Src/Files/FMpiFmaGenericLoader.hpp"
+#include "../../Src/Containers/FCoordinateComputer.hpp"
+
+#include "../../Src/GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp"
+
+#include <memory>
+using namespace std;
+
+namespace blockedMpiInterpolation{
+
+//Function header
+void timeAverage(int mpi_rank, int nproc, double elapsedTime);
+FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total);
+
+template<
+    class GroupCellClass,
+    class GroupCellUpClass,
+    class GroupCellDownClass,
+    class GroupCellSymbClass,
+    class KernelClass,
+    class MatrixKernelClass
+    >
+auto execute_algorithm(int argc, char* argv[]){
+    //Define parameters
+    const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree"};
+    const FParameterNames LocalOptionEllipsoid = {{"-ellipsoid"} , " non uniform distribution on  an ellipsoid of aspect ratio given by a=0.5 b=0.25 c=0.125"};
+    const FParameterNames LocalOptionCube = {{"-cube", "-uniform"} , " uniform distribution on cube (default)"};
+    // Define types
+    using FReal = double;
+    using GroupContainerClass =
+        FP2PGroupParticleContainer<FReal>;
+    using GroupOctreeClass =
+        FGroupTree< FReal, GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass, GroupContainerClass, 1, 4, FReal>;
+    using GroupKernelClass =
+        FStarPUAllCpuCapacities<KernelClass>;
+    using GroupCpuWrapper =
+        FStarPUCpuWrapper<typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass> ;
+    using GroupAlgorithm =
+        FGroupTaskStarPUMpiAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupCpuWrapper> ;
+    // Init MPI_COM
+    FMpi mpiComm(argc,argv);
+
+    // Init timer
+    FTic timer;
+
+    // Getting parameters
+    const int groupSize =
+        FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250);
+    const unsigned int TreeHeight    =
+        FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5);
+        
+        const FSize totalNbParticles =
+        FParameters::getValue(argc,argv,FParameterDefinitions::NbParticles.options, FSize(20));
+
+    const FSize NbParticles   =
+        getNbParticlesPerNode(mpiComm.global().processCount(), mpiComm.global().processId(), totalNbParticles);
+
+    // init particles position and physical value
+    struct TestParticle{
+        FPoint<FReal> position;
+        FReal physicalValue;
+        const FPoint<FReal>& getPosition(){
+            return position;
+        }
+		const unsigned int getWriteDataSize(void) const {
+			return sizeof(FReal);
+		}
+		const unsigned int getWriteDataNumber(void) const {
+			return 3;
+		}
+		const FReal* getPtrFirstData(void) const {
+			return position.data();
+		}
+    };
+
+    // LOADING PARTICLE
+    #ifndef LOAD_FILE
+        FRandomLoader<FReal> loader(NbParticles, 1.0, FPoint<FReal>(0,0,0), mpiComm.global().processId());
+        FAssertLF(loader.isOpen());
+        TestParticle* allParticles = new TestParticle[loader.getNumberOfParticles()];
+        memset(allParticles,0,(unsigned int) (sizeof(TestParticle)* loader.getNumberOfParticles()));
+        for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
+            loader.fillParticle(&allParticles[idxPart].position);
+            allParticles[idxPart].physicalValue = 0.1;
+        }
+    #else
+        const char* const filename = FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma");
+        FMpiFmaGenericLoader<FReal> loader(filename,mpiComm.global());
+        FAssertLF(loader.isOpen());
+        TestParticle* allParticles = new TestParticle[loader.getMyNumberOfParticles()];
+        memset(allParticles,0,(unsigned int) (sizeof(TestParticle)* loader.getMyNumberOfParticles()));
+        for(FSize idxPart = 0 ; idxPart < loader.getMyNumberOfParticles() ; ++idxPart){
+            loader.fillParticle(&allParticles[idxPart].position,&allParticles[idxPart].physicalValue);
+        }
+    #endif
+
+    FVector<TestParticle> myParticles;
+    FLeafBalance balancer;
+    FMpiTreeBuilder< FReal,
+                    TestParticle >::DistributeArrayToContainer(
+                                        mpiComm.global(),
+                                        allParticles,
+                                        loader.getNumberOfParticles(),
+                                        loader.getCenterOfBox(),
+                                        loader.getBoxWidth(),
+                                        TreeHeight,
+                                        &myParticles,
+                                        &balancer);
+
+    // Each proc need to know the righest morton index
+    const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPosition<FReal>(
+                loader.getCenterOfBox(),
+                loader.getBoxWidth(),
+                TreeHeight,
+                myParticles[myParticles.getSize()-1].position );
+    const MortonIndex myLeftLimite = host.getMortonIndex();
+    MortonIndex leftLimite = -1;
+    if(mpiComm.global().processId() != 0){
+        FMpi::Assert(MPI_Recv(&leftLimite, sizeof(leftLimite), MPI_BYTE,
+                              mpiComm.global().processId()-1, 0,
+                              mpiComm.global().getComm(), MPI_STATUS_IGNORE), __LINE__);
+    }
+    if(mpiComm.global().processId() != mpiComm.global().processCount()-1){
+        FMpi::Assert(MPI_Send(const_cast<MortonIndex*>(&myLeftLimite), sizeof(myLeftLimite), MPI_BYTE,
+                              mpiComm.global().processId()+1, 0,
+                              mpiComm.global().getComm()), __LINE__);
+    }
+    FLOG(std::cout << "My last index is " << leftLimite << "\n");
+    FLOG(std::cout << "My left limite is " << myLeftLimite << "\n");
+
+    // Put the data into the tree
+    FP2PParticleContainer<FReal> myParticlesInContainer;
+    for(FSize idxPart = 0 ; idxPart < myParticles.getSize() ; ++idxPart){
+        myParticlesInContainer.push(myParticles[idxPart].position,
+                                    myParticles[idxPart].physicalValue);
+    }
+    GroupOctreeClass groupedTree(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize,
+                                 &myParticlesInContainer, true, leftLimite);
+    timer.tac();
+	std::cerr << "Done  " << "(@Creating and Inserting Particles = " << timer.elapsed() << "s)." << std::endl;
+
+    int operationsToProceed =  FFmmP2P | FFmmP2M | FFmmM2M | FFmmM2L | FFmmL2L | FFmmL2P;
+    { // -----------------------------------------------------
+
+
+        const MatrixKernelClass MatrixKernel;
+        // Create Matrix Kernel
+        GroupKernelClass groupkernel(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), &MatrixKernel);
+        // Run the algorithm
+        GroupAlgorithm groupalgo(mpiComm.global(), &groupedTree,&groupkernel);
+		mpiComm.global().barrier();
+        timer.tic();
+		starpu_fxt_start_profiling();
+        groupalgo.execute(operationsToProceed);
+		mpiComm.global().barrier();
+		starpu_fxt_stop_profiling();
+        timer.tac();
+		timeAverage(mpiComm.global().processId(), mpiComm.global().processCount(), timer.elapsed());
+    }
+    return &groupedTree;
+}
+
+void timeAverage(int mpi_rank, int nproc, double elapsedTime){
+    if(mpi_rank == 0){
+                double sumElapsedTimeMin = elapsedTime;
+                double sumElapsedTimeMax = elapsedTime;
+        for(int i = 1; i < nproc; ++i){
+            double tmp;
+            MPI_Recv(&tmp, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
+            if(tmp < sumElapsedTimeMin)
+                sumElapsedTimeMin = tmp;
+            if(tmp > sumElapsedTimeMax)
+                sumElapsedTimeMax = tmp;
+        }
+        std::cout << "Min time per node (MPI)  : " << sumElapsedTimeMin << "s" << std::endl;
+        std::cout << "Max time per node (MPI)  : " << sumElapsedTimeMax << "s" << std::endl;
+    } else {
+        MPI_Send(&elapsedTime, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD);
+    }
+    MPI_Barrier(MPI_COMM_WORLD);
+}
+
+FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total){
+	if(mpi_rank < (total%mpi_count))
+		return ((total - (total%mpi_count))/mpi_count)+1;
+	return ((total - (total%mpi_count))/mpi_count);
+}
+
+}
+
+#endif
diff --git a/Src/Files/FFmaGenericLoader.hpp b/Src/Files/FFmaGenericLoader.hpp
index f2e6b0f36..909937e88 100644
--- a/Src/Files/FFmaGenericLoader.hpp
+++ b/Src/Files/FFmaGenericLoader.hpp
@@ -199,7 +199,6 @@ private:
     FReal *       tmpVal;     ///< Temporary array to read data
     /// Count of other data pieces to read in a particle record after the 4 first ones.
     unsigned int  otherDataToRead;
-
     void open_file(const std::string filename, const bool binary) {
             if(binary) {
                 this->file = new std::fstream (filename.c_str(),std::ifstream::in| std::ios::binary);
@@ -216,6 +215,7 @@ private:
     }
 
 public:
+    using  dataType= FReal ;  // Just to what kind of data we handle
     /**
      * This constructor opens a file using the given mode and reads its
      * header. The file will be kept opened until destruction of the object.
@@ -543,8 +543,8 @@ template <class FReal>
 class FFmaGenericWriter {
 
 protected:
-    std::fstream *file;  ///< the stream used to read the file
-    bool binaryFile  ;   ///< if true the file to read is in binary mode
+    std::fstream *file;  ///< the stream used to write the file
+    bool _binaryFile  ;   ///< if true the file is in binary mode
 
 public:
     /**
@@ -555,11 +555,11 @@ public:
      *
      * @param filename the name of the file to open.
      */
-    FFmaGenericWriter(const std::string & filename): binaryFile(false) {
+    FFmaGenericWriter(const std::string & filename): _binaryFile(false) {
         std::string ext(".bfma");
         // open particle file
         if(filename.find(".bfma") !=std::string::npos) {
-            binaryFile = true;
+            _binaryFile = true;
             this->file = new std::fstream (filename.c_str(),std::ifstream::out| std::ios::binary);
         }
         else if(filename.find(".fma")!=std::string::npos ) {
@@ -583,7 +583,7 @@ public:
      * @param filename the name of the file to open.
      * @param binary   true if the file to open is in binary mode
      */
-    FFmaGenericWriter(const std::string & filename, const bool binary ) : file(nullptr), binaryFile(binary)
+    FFmaGenericWriter(const std::string & filename, const bool binary ) : file(nullptr), _binaryFile(binary)
 	{
             if(binary) {
                 this->file = new std::fstream (filename.c_str(),std::ifstream::out| std::ios::binary);
@@ -616,6 +616,14 @@ public:
     }
 
     /**
+     * To know if opened file is in binary mode
+     * @return true ifopened file is in binary mode
+     */
+    bool isBinary() const{
+        return this->_binaryFile;
+    }
+
+     /**
      * Writes the header of the FMA file
      * \warning All values inside typePart should be of the same type (float or double)
      *
@@ -633,7 +641,7 @@ public:
             typeFReal[1] = ndata;
         }
         FReal x = boxWidth * FReal(0.5);
-        if(this->binaryFile) {
+        if(this->_binaryFile) {
             this->writerBinaryHeader(centerOfBox,x,nbParticles,typeFReal);
         }
         else {
@@ -656,7 +664,7 @@ public:
                      const unsigned int  dataType, const unsigned int  nbDataPerRecord) {
         unsigned int typeFReal[2]  = {dataType , nbDataPerRecord };
         FReal x = boxWidth * FReal(0.5);
-        if(this->binaryFile) {
+        if(this->_binaryFile) {
             this->writerBinaryHeader(centerOfBox,x,nbParticles,typeFReal);
         }
         else {
@@ -698,7 +706,7 @@ public:
     template <class dataPart>
     void writeArrayOfParticles(const dataPart *dataToWrite, const FSize N){
         //		std::cout << "NB points to write: "<< N <<std::endl;
-        if(binaryFile){
+        if(_binaryFile){
             unsigned int recordSize=  dataToWrite[0].getWriteDataSize() ;
             unsigned int typeFReal[2]      = {sizeof(FReal) , sizeof(dataPart) / sizeof(FReal) };
             // std::cout << "typeData "<< typeFReal[0] << " "<< typeFReal[1] <<"  "<< std::endl;
@@ -755,7 +763,7 @@ public:
      * \endcode
      */
     void writeArrayOfReal(const FReal *dataToWrite, const FSize nbData, const FSize N){
-        if(binaryFile){
+        if(_binaryFile){
             file->write((const char*)(dataToWrite), N*nbData*sizeof(FReal));
         }
         else{
diff --git a/Src/Files/FMpiFmaGenericLoader.hpp b/Src/Files/FMpiFmaGenericLoader.hpp
index 1a1053728..85c18e134 100644
--- a/Src/Files/FMpiFmaGenericLoader.hpp
+++ b/Src/Files/FMpiFmaGenericLoader.hpp
@@ -8,6 +8,8 @@
 #ifndef FMPIFMAGENERICLOADER_HPP
 #define FMPIFMAGENERICLOADER_HPP
 
+#include <cstdlib>
+#include <vector>
 
 #include "Utils/FMpi.hpp"
 #include "Files/FFmaGenericLoader.hpp"
@@ -69,5 +71,269 @@ public:
   }
 
 };
+/**
+ *
+ * \brief Writes a set of distributed particles to an FMA formated file.
+ *
+ * The file may be in ASCII or binary mode. The example below shows how to use the class.
+ *
+ * \code
+ * // Instanciate the writer with a binary fma file (extension .bfma).
+ * \endcode
+ * ----------------------------------------
+ * FMA is a simple format to store particles in a file. It is organized as follow.
+ *
+ * file
+ */
+template <class FReal>
+class FMpiFmaGenericWriter : public FFmaGenericWriter<FReal> {
+
+protected:
+  const FMpi* _parallelManager ;
+  bool _writeDone ;
+  int _headerSize;
+  int _nbDataTowritePerRecord ;  //< number of data to write for one particle
+  FSize _numberOfParticles ;     //< number of particle (global) to write in the file
+  using FFmaGenericWriter<FReal>::file;
+  MPI_File _mpiFile;              //< MPI pointer on data file (write mode)
+
+public:
+    /**
+     * This constructor opens a file to be written to.
+     *
+     * - The opening mode is guessed from the file extension : `.fma` will open
+     * in ASCII mode, `.bfma` will open in binary mode.
+     *
+     * @param filename the name of the file to open.
+     */
+  FMpiFmaGenericWriter(const std::string inFilename,  const FMpi& para)  : FFmaGenericWriter<FReal>(inFilename),
+                       _parallelManager(&para),_writeDone(false),_headerSize(0),_nbDataTowritePerRecord(8),_numberOfParticles(0)
+  {
+    if ( ! this->isBinary()){
+        std::cout << "FMpiFmaGenericWriter only works with binary file (.bfma)." << std::endl;
+        std::exit(EXIT_FAILURE);
+      }
+    int fileIsOpen = MPI_File_open( _parallelManager->global().getComm(), inFilename.c_str(),
+                                    MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &_mpiFile );
+    // Is it open?
+    if(fileIsOpen != MPI_SUCCESS){
+        std::cout << "Cannot create parallel file, FMpiFmaGenericWriter constructeur abort." << std::endl;
+        std::exit(EXIT_FAILURE);
+        return;
+      }
+  }
+    /**
+     * Writes the header of FMA file.
+     *
+     * Should be used if we write the particles with writeArrayOfReal method
+     *
+     * @param centerOfBox      The center of the Box (FPoint<FReal> class)
+     * @param boxWidth         The width of the box
+     * @param nbParticles      Number of particles in the box (or to save)
+     * @param dataType         Size of the data type of the values in particle
+     * @param nbDataPerRecord  Number of record/value per particle
+     */
+    void writeHeader(const FPoint<FReal> &centerOfBox,const FReal &boxWidth, const FSize &nbParticles,
+                     const unsigned int  dataType, const unsigned int  nbDataPerRecord) {
+//      * \code
+//      *   DatatypeSize  Number_of_record_per_line
+//      *   NB_particles  half_Box_width  Center_X  Center_Y  Center_Z
+//      *   Particle_values
+//      * \endcode
+          _headerSize = 0 ;
+      _nbDataTowritePerRecord = nbDataPerRecord ;
+      _numberOfParticles     = nbParticles ;
+      if(_parallelManager->global().processId()==0){
+          int sizeType=0 ;
+            int ierr = 0 ;
+          MPI_Datatype mpiFSize_t = _parallelManager->GetType(nbParticles) ;
+          MPI_Datatype mpiFReal_t = _parallelManager->GetType(boxWidth) ;
+  //
+          unsigned int typeFReal[2]  = {sizeof(FReal) , nbDataPerRecord};
+
+          ierr =MPI_File_write_at(_mpiFile, 0, &typeFReal, 2, MPI_INT,  MPI_STATUS_IGNORE);
+          MPI_Type_size(MPI_INT, &sizeType) ;
+          _headerSize += sizeType*2 ;
+          ierr =MPI_File_write_at(_mpiFile, _headerSize, &nbParticles, 1, mpiFSize_t,  MPI_STATUS_IGNORE);
+          MPI_Type_size(mpiFSize_t, &sizeType) ;
+          _headerSize += sizeType*1 ;
+
+         FReal boxSim[4] = {boxWidth ,centerOfBox.getX() , centerOfBox.getX() , centerOfBox.getX() } ;
+
+          ierr =MPI_File_write_at(_mpiFile, _headerSize, &boxSim[0], 4, mpiFReal_t,  MPI_STATUS_IGNORE);
+         MPI_Type_size(mpiFReal_t, &sizeType) ;
+         _headerSize += sizeType*4 ;
+         // Build the header offset
+         std::cout << " headerSize "<<   _headerSize << std::endl;
+          }
+         MPI_Bcast(&_headerSize,1,MPI_INT,0,_parallelManager->global().getComm());
+          std::cout << "  _headerSize  " <<  _headerSize  <<std::endl;
+
+    }
+  ~FMpiFmaGenericWriter(){
+      MPI_File_close(&_mpiFile );
+  }
+
+    /**
+     *  Write all for all particles the position, physical values, potential and forces
+     *
+     * @param myOctree the octree
+     * @param nbParticlesnumber of particles
+     * @param mortonLeafDistribution the morton distribution of the leaves (this is a vecor of size 2* the number of MPI processes
+     *
+     */
+    template <class OCTREECLASS>
+    void writeDistributionOfParticlesFromOctree( OCTREECLASS &myOctree, const FSize& nbParticles, const std::vector<MortonIndex> &mortonLeafDistribution){
+      //
+      // Write the header
+      int sizeType = 0,ierr = 0 ;
+      FReal tt =0.0 ;
+      MPI_Datatype mpiFSize_t = _parallelManager->GetType(nbParticles) ;
+      MPI_Datatype mpiFReal_t = _parallelManager->GetType(tt) ;
+      MPI_Type_size(mpiFReal_t, &sizeType) ;
+      int myRank = _parallelManager->global().processId()  ;
+      _headerSize = 0 ;
+      //
+      unsigned int typeFReal[2]  = {sizeof(FReal) , static_cast<unsigned int>(_nbDataTowritePerRecord)};
+      if(myRank==0){
+          ierr =MPI_File_write_at(_mpiFile, 0, &typeFReal, 2, MPI_INT,  MPI_STATUS_IGNORE);
+        }
+      MPI_Type_size(MPI_INT, &sizeType) ;
+      _headerSize += sizeType*2 ;
+      if(myRank==0){
+          ierr =MPI_File_write_at(_mpiFile, _headerSize, &nbParticles, 1, mpiFSize_t,  MPI_STATUS_IGNORE);
+        }
+      MPI_Type_size(mpiFSize_t, &sizeType) ;
+      _headerSize += sizeType*1 ;
+      auto centerOfBox =myOctree.getBoxCenter()  ;
+      FReal boxSim[4] = {myOctree.getBoxWidth()*0.5 , centerOfBox.getX() , centerOfBox.getX() , centerOfBox.getX() } ;
+
+      if(myRank==0){
+          ierr =MPI_File_write_at(_mpiFile, _headerSize, &boxSim[0], 4, mpiFReal_t,  MPI_STATUS_IGNORE);
+        }
+      if(ierr >0){
+          std::cerr << "Error during the construction of the header in FMpiFmaGenericWriter::writeDistributionOfParticlesFromOctree"<<std::endl;
+        }
+      MPI_Type_size(mpiFReal_t, &sizeType) ;
+      _headerSize += sizeType*4 ;
+    //
+    // Construct the local number of particles on my process
+    FSize nbLocalParticles =0 ,maxPartLeaf =0;
+    MortonIndex starIndex = mortonLeafDistribution[2*myRank], endIndex =  mortonLeafDistribution[2*myRank+1];
+    myOctree.template forEachCellMyLeaf<typename OCTREECLASS::LeafClass_T >(
+        [&](typename OCTREECLASS::GroupSymbolCellClass_T* gsymb ,
+                     typename OCTREECLASS::GroupCellUpClass_T*   /* gmul */,
+                     typename OCTREECLASS::GroupCellDownClass_T* /* gloc */,
+                     typename OCTREECLASS::LeafClass_T * leafTarget
+                     )
+    {
+        if (! (gsymb->getMortonIndex() < starIndex || gsymb->getMortonIndex() > endIndex)) {
+            auto n =  leafTarget->getNbParticles();
+            nbLocalParticles += n;
+            maxPartLeaf = std::max(maxPartLeaf,n);
+          }
+      }
+    );
+    std::vector<FReal> particles(maxPartLeaf*_nbDataTowritePerRecord);
+    // Build the offset for eaxh processes
+    FSize before=0;  // Number of particles before me (rank < myrank)
+    MPI_Scan(&nbLocalParticles,&before,1,mpiFSize_t,MPI_SUM,_parallelManager->global().getComm());
+    before -= nbLocalParticles ;
+    MPI_Offset offset = _headerSize + sizeType*_nbDataTowritePerRecord*before;
+    //
+    // Write particles in file
+    myOctree.template forEachCellMyLeaf<typename OCTREECLASS::LeafClass_T >(
+        [&](typename OCTREECLASS::GroupSymbolCellClass_T* gsymb ,
+                     typename OCTREECLASS::GroupCellUpClass_T*   /* gmul */,
+                     typename OCTREECLASS::GroupCellDownClass_T* /* gloc */,
+                     typename OCTREECLASS::LeafClass_T * leafTarget
+                     )
+    {
+        if (! (gsymb->getMortonIndex() < starIndex || gsymb->getMortonIndex() > endIndex)) {
+            const FSize nbPartsInLeaf = leafTarget->getNbParticles();
+            const FReal*const posX = leafTarget->getPositions()[0];
+            const FReal*const posY = leafTarget->getPositions()[1];
+            const FReal*const posZ = leafTarget->getPositions()[2];
+            const FReal*const physicalValues = leafTarget->getPhysicalValues();
+            const FReal*const forceX = leafTarget->getForcesX();
+            const FReal*const forceY = leafTarget->getForcesY();
+            const FReal*const forceZ = leafTarget->getForcesZ();
+            const FReal*const potential = leafTarget->getPotentials();
+            for (int i=0, k=0 ; i < nbPartsInLeaf ;++i,k+=_nbDataTowritePerRecord ) {
+                particles[k] = posX[i];  particles[k+1] = posY[i];  particles[k+2] = posZ[i];
+                particles[k+3] = physicalValues[i]; particles[k+4] = potential[i];
+                particles[k+5] = forceX[i];  particles[k+6] = forceY[i];  particles[k+7] = forceZ[i];
+              }
+            MPI_File_write_at(_mpiFile, offset, particles.data(), static_cast<int>(_nbDataTowritePerRecord*nbPartsInLeaf), mpiFReal_t,  MPI_STATUS_IGNORE);
+            offset+=sizeType*_nbDataTowritePerRecord*nbPartsInLeaf;
+          }
+      }
+    );
+
+    MPI_File_close(&_mpiFile );
+
+    }
+
+//    /**
+//     *  Write an array of data in a file Fill
+//     *
+//     * @param dataToWrite array of particles of type FReal
+//     * @param nbData number of data per particle
+//     * @param N number of particles
+//     *
+//     *   The size of the array is N*nbData
+//     *
+//     *   example
+//     * \code
+//     * FmaRParticle * const particles = new FmaRParticle[nbParticles];
+//     * memset(particles, 0, sizeof(FmaRParticle) * nbParticles) ;
+//     * ...
+//     * FFmaGenericWriter<FReal> writer(filenameOut) ;
+//     * Fwriter.writeHeader(Centre,BoxWith, nbParticles,*particles) ;
+//     * Fwriter.writeArrayOfReal(particles, nbParticles);
+//     * \endcode
+//     */
+//    void writeArrayOfReal(const FReal *dataToWrite, const FSize nbData, const FSize N){
+//      /*
+//      if(! _writeDone){
+//          FSize previousNumberofParticles;
+//          MPI_Scan(&N,&previousNumberofParticles,1,_parallelManager->GetType(N),MPI_SUM,_parallelManager->global().getComm());
+//          FSize offset= previousNumberofParticles-N;
+//          //To this header size, we had the parts that belongs to proc on my left
+//          this->skipHeaderAndPart(offset) ;
+//          FFmaGenericWriter<FReal>::writeArrayOfReal(dataToWrite,4,N) ;
+//          //
+
+//          std::cout <<" node " << _parallelManager->global().processId() << "Npart " << N << "  before Me" << previousNumberofParticles-N<< std::endl;
+//          _writeDone  = true;
+//        }
+//      else {
+//          std::cerr << " The writeArrayOfReal should be call only once !!!! "<< std::endl;
+//          std::exit(EXIT_FAILURE);
+//        }
+//        */
+//   }
+//private:
+//     void skipHeaderAndPart(const FSize &numberOfParticleToSkip){
+//       if(this->binaryFile) {
+//           //This is header size in bytes
+//           //   MEANING :      sizeof(FReal)+nbAttr, nb of parts, boxWidth+boxCenter
+//           _headerSize = sizeof(int)*2 + sizeof(FSize) + sizeof(FReal)*4;
+//          file->seekg(_headerSize+numberOfParticleToSkip* 4/*FFmaGenericWriter<FReal>::getNbRecordPerline()*/*sizeof(FReal), std::ios::beg);
+
+//       } else {
+//           // First finish to read the current line
+//           file->ignore(std::numeric_limits<std::streamsize>::max(), '\n');
+//           for(int i = 0; i < numberOfParticleToSkip; ++i) {
+//               file->ignore(std::numeric_limits<std::streamsize>::max(), '\n');
+//           }
+//       }
+//     }
+
+
+
+} ;
+#ifdef __EXPRIMENTAL_DOESNT_WORK
 
+#endif
 #endif //FMPIFMAGENERICLOADER_HPP
diff --git a/Src/Files/FTreeMpiCsvSaver.hpp b/Src/Files/FTreeMpiCsvSaver.hpp
index 903c917b2..80a77880a 100644
--- a/Src/Files/FTreeMpiCsvSaver.hpp
+++ b/Src/Files/FTreeMpiCsvSaver.hpp
@@ -3,8 +3,8 @@
 #define FTREEMPICSVSAVER_HPP
 
 
-#include "../Utils/FGlobal.hpp"
-#include "../Utils/FMpi.hpp"
+#include "Utils/FGlobal.hpp"
+#include "Utils/FMpi.hpp"
 
 #include <cstring>
 #include <iostream>
diff --git a/Src/GroupTree/Core/FBlockedLinearTree.hpp b/Src/GroupTree/Core/FBlockedLinearTree.hpp
deleted file mode 100644
index dd2229d28..000000000
--- a/Src/GroupTree/Core/FBlockedLinearTree.hpp
+++ /dev/null
@@ -1,196 +0,0 @@
-#ifndef _FBLOCKED_LINEAR_TREE_HPP_
-#define _FBLOCKED_LINEAR_TREE_HPP_
-
-#include <vector>
-#include "../../Utils/FLog.hpp"
-#include "FDistributedGroupTreeBuilder.hpp"
-
-using FReal = double;
-
-template<class node_t>
-class FBlockedLinearTree {
-
-protected:
-
-    int block_size;
-    int nb_block;
-
-    std::vector<node_t>* linear_tree;
-
-public:
-
-////////////////////////////////////////////////
-// constructor
-////////////////////////////////////////////////
-
-    /**
-     * FBlockedLinearTree  Constructor of blocked linear tree
-     * @author benjamin.dufoyer@inria.fr
-     * @param  in_block_size    Block size needed
-     * @param  in_linear_tree   Linear tree
-     * @param  in_box_center    Box Center of particle container
-     * @param  in_box_width     Box Width of particle container
-     */
-    FBlockedLinearTree(){}
-
-    /**
-     * This function create a blocked linear tree from the current distributed
-     * linear tree
-     * This function stock the linear tree with his adress
-     * @author benjamin.dufoyer@inria.fr
-     * @param  in_linear_tree linear tree
-     * @param  in_block_size  block size
-     */
-    void create_local_blocked_linear_tree(
-            std::vector<node_t>* in_linear_tree,
-            int in_block_size
-    ){
-        this->create(in_linear_tree,in_block_size);
-    }
-
-    /**
-     * this function create a blocked linear tree from the current distributed
-     * linear tree and she redistribute block according to the block size
-     * the function stock the linear tree with his adress
-     * @author benjamin.dufoyer@inria.fr
-     * @param  in_linear_tree linear tree
-     * @param  in_block_size  blocksize needed
-     * @param  conf [description]
-     */
-    void create_global_blocked_linear_tree(
-            std::vector<node_t>* in_linear_tree,
-            int in_block_size,
-            const inria::mpi_config& conf
-    ){
-        this->create(in_linear_tree,in_block_size);
-        this->redistribute_block(conf);
-    }
-
-    void create(
-        std::vector<node_t>* in_linear_tree,
-        int in_block_size
-    ){
-        this->block_size    = in_block_size;
-        this->linear_tree   = in_linear_tree;
-        this->nb_block      = (int)in_linear_tree->size()/in_block_size;
-        if(this->linear_tree->size()%this->block_size != 0)
-            this->nb_block += 1;
-    }
-
-////////////////////////////////////////////////
-// destructor
-////////////////////////////////////////////////
-
-    ~FBlockedLinearTree(){
-        linear_tree = nullptr;
-    }
-
-////////////////////////////////////////////////
-// Function
-////////////////////////////////////////////////
-
-    /**
-     * redistribute_block redistribute leaf of the linear_tree with the good
-     * block size. For N proc, N-1 proc have the same number of leaf,
-     * the rest is for the proc N
-     * @author benjamin.dufoyer@inria.fr
-     * @param  conf mpi configuration to work with the other process
-     */
-    void redistribute_block(const inria::mpi_config& conf){
-
-        dstr_grp_tree_builder::parrallel_build_block(
-                        conf,
-                        this->linear_tree,
-                        this->block_size);
-        //Update nb_block
-        if(this->linear_tree->size()%block_size == 0)
-            this->nb_block = (int)this->linear_tree->size()/block_size;
-        else
-            this->nb_block = (int)this->linear_tree->size()/block_size+1;
-
-    }
-
-    size_t get_nb_leaf() const{
-        return this->linear_tree->size();
-    }
-
-    int get_nb_block() const{
-        return this->nb_block;
-    }
-
-    int get_block_size() const{
-        return this->block_size;
-    }
-
-    /**
-     * get_block_size_at return the block size of the number of the block
-     * placed in parametter,
-     * [INFO] first block is 0
-     * [INFO] last block is this->nb_block-1
-     * @author benjamin.dufoyer@inria.fr
-     * @param  num_block number of the block
-     * @return size of the block
-     */
-    int get_block_size_at(int num_block) const{
-        FAssertLF(num_block < this->nb_block);
-        int size;
-        if(num_block == this->nb_block-1){
-            size = this->linear_tree->size() - ((this->nb_block-1)*this->block_size);
-        } else {
-            size = this->block_size;
-        }
-        return size;
-    }
-
-    /**
-     * get_leaf_at return the leaf at the position placed in parameter
-     * @author benjamin.dufoyer@inria.fr
-     * @param  position position of the leaf
-     * @return the leaf
-     */
-    node_t get_leaf_at(int position){
-        return this->linear_tree->at(position);
-    }
-
-    /**
-     * get_leaf_at return the leaf at the position placed in parameter
-     * @author benjamin.dufoyer@inria.fr
-     * @param  position position of the leaf
-     * @return the leaf
-     */
-    node_t at(int position){
-        return this->get_leaf_at(position);
-    }
-
-    size_t get_leaf_level() const{
-        return this->linear_tree->back().level;
-    }
-
-    size_t get_tree_height() const{
-        return this->get_leaf_level();
-    }
-
-    size_t get_first_morton_index(){
-        return this->linear_tree->front().morton_index;
-    }
-
-    size_t get_last_morton_index(){
-        return this->linear_tree->back().morton_index;
-    }
-
-    void print_info_tree(){
-        std::cout << " nb_leaf : " << this->linear_tree->size() << std::endl;
-        std::cout << " nb_block : " << nb_block << std::endl;
-        std::cout << " block_size : " << block_size << std::endl;
-        for(int i = 0 ; i < this->linear_tree->size() ; i++){
-            std::cout << linear_tree->at(i) << std::endl;
-        }
-    }
-
-    std::vector<node_t>* get_tree(){
-        return this->linear_tree;
-    }
-
-};
-
-#endif //_FBLOCKED_LINEAR_TREE_HPP_
diff --git a/Src/GroupTree/Core/FBuildGroupTree.hpp b/Src/GroupTree/Core/FBuildGroupTree.hpp
new file mode 100644
index 000000000..626b97b66
--- /dev/null
+++ b/Src/GroupTree/Core/FBuildGroupTree.hpp
@@ -0,0 +1,397 @@
+// ==== CMAKE =====
+// @FUSE_MPI
+// @FUSE_STARPU
+//
+#ifndef FBuildGroupTree
+#define FBuildGroupTree
+
+#include <vector>
+#include <string>
+#include "Utils/FGlobal.hpp"
+
+// include algo for linear tree
+#include "inria/algorithm/distributed/mpi.hpp"
+#include "inria/linear_tree/node.hpp"
+#include "inria/linear_tree/balance_tree.hpp"
+// tree class
+#include "GroupTree/Core/FGroupTree.hpp"
+// symbolic data
+#include "Components/FSymbolicData.hpp"
+// GroupParticleContainer
+#include "GroupTree/Core/FP2PGroupParticleContainer.hpp"
+// file loader
+#include "Files/FMpiFmaGenericLoader.hpp"
+// FBox
+#include "Adaptive/FBox.hpp"
+// Group linear tree
+#include "GroupTree/Core/FGroupLinearTree.hpp"
+// Function for GroupLinearTree
+#include "GroupTree/Core/FDistributedGroupTreeBuilder.hpp"
+//
+#include "GroupTree/Core/FGroupTools.hpp"
+
+// To construct either the duplicated Octree or the LET
+#include "Utils/FLeafBalance.hpp"
+
+namespace groupTree {
+  //
+  // @param[in]    mpi_comm   the MPI communicator
+  // @param[inout] myParticleslocal array of particles on my node. On output the array is sorted
+  // @param[in]    total number of particles in the simulation
+  // @param[in]    box  size of the simulation box
+  // @param[in]    TreeHeight    Height of the tree
+  // @param[inout]    localGroupTree  the LET of the octree
+  // @param[out]    m_idx_distribution  Distribution of the leaves on the processors
+  // @param[out]   nb_blocks
+  template <class LOADER, class particleType , class OCTREEGRPOUPCLASS>
+  void buildLetTree( inria::mpi::communicator & mpi_comm, LOADER& loader, std::vector<particleType> &myParticles,
+                     const  FBox<FPoint<FReal>> box,
+                     const int TreeHeight, const int groupSize,
+                     OCTREEGRPOUPCLASS * &localGroupTree,
+                     std::vector<MortonIndex> &m_idx_distribution, int & nb_blocks
+                     ){
+    //
+    const std::size_t max_level = sizeof(MortonIndex) * 8 / 3;
+    const FSize totalNumberOfParticles = loader.getNumberOfParticles() ;
+    const FSize localNumberOfParticles = loader.getMyNumberOfParticles() ;
+
+    myParticles.resize(localNumberOfParticles) ;
+
+    // iterate on all of my particles
+    for(FSize idxPart = 0; idxPart < static_cast<FSize>(localNumberOfParticles );++idxPart){
+        particleType   tmp;
+        // get the current particles
+        loader.fillParticle(&tmp.pos,&tmp.phi);
+        // set the morton index of the current particle at the max_level
+        tmp.morton_index = inria::linear_tree::get_morton_index(tmp.pos, box, max_level);
+        // set the weight of the particle
+      //  tmp.phi = 0.1;
+        // add the particle to my vector of particle
+        myParticles[idxPart].fill(tmp.pos, tmp.phi,tmp.morton_index);
+      }
+    // Now i have all of my particles in a vector, they all have a morton index
+    // now we will sort them
+    inria::sort(mpi_comm,myParticles, [](const auto& p1, const auto& p2) {
+        return p1.morton_index < p2.morton_index;
+      });
+
+    std::cout << " I have "         << myParticles.size() << " particles ..." << std::endl;
+    std::cout << "For a total of "
+              << totalNumberOfParticles << " particles ..." << std::endl;
+
+    // create the linear tree
+    // a linear tree is a tree, with only the leaf
+    int level = TreeHeight -1 ;
+    auto linear_tree = inria::linear_tree::create_balanced_linear_tree_at_level(
+          mpi_comm,
+          level,
+          box,
+          myParticles);
+
+    // create GroupLinearTree
+    FGroupLinearTree<typename decltype(linear_tree)::value_type>  group_linear_tree{mpi_comm};
+    group_linear_tree.create_local_group_linear_tree(  &linear_tree, groupSize );
+
+    // group_linear_tree.print_info_tree() ;
+
+    // Redistribute the particle according to the linear tree
+    // Redistribution of particles
+    inria::linear_tree::redistribute_particles(mpi_comm,
+                                               linear_tree,
+                                               myParticles);
+
+    // Now we need to modify the morton index of of all particle to
+    // have the morton index at TreeHeight-1
+#pragma omp parallel for shared(myParticles)
+    for(unsigned i = 0 ; i < myParticles.size(); ++i){
+        myParticles[i].morton_index = inria::linear_tree::get_morton_index(myParticles[i].pos, box, level);
+      }
+
+    // Now we need to share the particle distribution to build the GroupTree
+    group_linear_tree.set_index_particle_distribution(myParticles);
+
+    // Now i can declare my groupTree
+    // it's a empty instance of the FGroupTree
+    FReal width = std::max(box.width(0) , std::max(box.width(1) ,box.width(2) )) ;
+    //   using test = typename std::remove_pointer<typename std::remove_reference<decltype(localGroupTree)>::type >::type;
+    // //   std::cout << "&&&&&"<<typeid(test).name() <<std::endl;
+    //
+    localGroupTree = new OCTREEGRPOUPCLASS (TreeHeight,groupSize, box.center(), box.c1() /* corner*/,
+                                            width, width/FReal(1<<(TreeHeight-1)));
+    // Now i can fill the localGroupTree
+    localGroupTree->create_tree(group_linear_tree,myParticles);
+    localGroupTree->printInfoBlocks();
+    // get the index particle distribution (needed by the algorithm)
+
+    m_idx_distribution = group_linear_tree.get_index_particle_distribution_implicit();
+    //  for(int i = 0 ; i < mpi_comm.size() ;++i)
+    //    m_idx_distribution[2*i] += 1;
+    nb_blocks = dstr_grp_tree_builder::set_cell_group_global_index(*localGroupTree,mpi_comm);
+    // now we create the LET
+    localGroupTree->create_LET(group_linear_tree);
+
+ //   std::cout << " End buildLetTree function " << std::endl;
+  }
+  // BuilddMortonDistributionForCGroupCellInTree
+  //
+  // @param[in]  parallelManager            The Height of the octree
+  // @param[in]  mortonLeaves            The Height of the octree
+  // @param[in]  TreeHeight            The Height of the octree
+  // @param[in]  groupSize
+  // @param[in]  MortonIndexDistribution The Morton distribution at the leaf level
+  // @param[in]  nodeRepartition
+  // @param[out] sizeForEachGroup         For each level give the size of all group cell in the process
+  void BuilddMortonDistributionForCGroupCellInTree(const FMpi &parallelManager,  std::vector<MortonIndex> &mortonLeaves,
+                                                   const int & TreeHeight, const int groupSize,
+                                                   const std::vector<MortonIndex> &MortonIndexDistribution,
+                                                   std::vector<std::vector<std::vector<MortonIndex>>>& nodeRepartition,
+                                                   std::vector< std::vector<int>> &sizeForEachGroup ){
+    //
+    const int nproc = parallelManager.global().processCount() ;
+    //
+    // Build the groupe size of all groups in the Tree (sequential one)
+    //
+    std::cout << "Morton distribution inside BuilddMortonDistributionForCGroupCellInTree " <<std::endl;
+    for (auto v : MortonIndexDistribution)
+      std::cout << "  " << v ;
+    std::cout << std::endl;
+    int processId ;
+    for( processId = 0; processId < nproc; ++processId)
+      {
+        FSize size_last, countGroup;
+        // pas de +1 si on ne commence pas à 0
+        FSize leafOnProcess = MortonIndexDistribution[2*processId+1] - MortonIndexDistribution[2*processId]  ;
+        size_last = leafOnProcess%groupSize;
+        countGroup = (leafOnProcess - size_last)/groupSize;
+        for(int i = 0; i < countGroup; ++i)
+          sizeForEachGroup[TreeHeight-1].push_back(groupSize);
+        if(size_last > 0)
+          sizeForEachGroup[TreeHeight-1].push_back((int)size_last);
+      }
+    //
+    //Pour chaque niveau calcul de la taille des groupe
+    for(int idxLevel = TreeHeight - 2; idxLevel >= 0; --idxLevel)
+      {
+        processId = 0;
+        int countCellsInTheGroup = 0;
+        MortonIndex previousMortonCell = -1;
+
+        std::cout << "Compute Level " << idxLevel << std::endl;
+        for(std::size_t idxLeaf = 0; idxLeaf < mortonLeaves.size(); ++idxLeaf)
+          {
+            MortonIndex mortonCell = (mortonLeaves[idxLeaf]) >> (3*(TreeHeight - 1 - idxLevel));
+            if(mortonCell <= nodeRepartition[idxLevel][processId][1]) //Si l'indice est dans le working interval
+              {
+                if(mortonCell != previousMortonCell) //Si c'est un nouvelle indice
+                  {
+                    ++countCellsInTheGroup; //On le compte dans le groupe
+                    previousMortonCell = mortonCell;
+                    if(countCellsInTheGroup == groupSize) //Si le groupe est plein on ajoute le compte
+                      {
+                        sizeForEachGroup[idxLevel].push_back(groupSize);
+                        countCellsInTheGroup = 0;
+                      }
+                  }
+              }
+            else //Si l'on change d'interval de process on ajoute ce que l'on a compté
+              {
+                if(countCellsInTheGroup > 0)
+                  sizeForEachGroup[idxLevel].push_back(countCellsInTheGroup);
+                countCellsInTheGroup = 1;
+                previousMortonCell = mortonCell;
+                ++processId;
+              }
+          }
+        if(countCellsInTheGroup > 0)
+          sizeForEachGroup[idxLevel].push_back(countCellsInTheGroup);
+        //
+        // Print sizeForEachGroup at the current level
+        for( auto v : sizeForEachGroup[idxLevel])
+          std::cout << "  "<< v ;
+        std::cout << std::endl;
+      }
+
+
+  }
+  // Build Node distribution for all LEvel starting with Leaf Distribution
+  //
+  void createNodeRepartition(std::vector<MortonIndex> distributedMortonIndex,
+                             std::vector<std::vector<std::vector<MortonIndex>>>& nodeRepartition,
+                             int nproc, int treeHeight) {
+    //
+    nodeRepartition.resize(treeHeight, std::vector<std::vector<MortonIndex>>(nproc, std::vector<MortonIndex>(2)));
+    for(int node_id = 0; node_id < nproc; ++node_id){
+        nodeRepartition[treeHeight-1][node_id][0] = distributedMortonIndex[node_id*2];
+        nodeRepartition[treeHeight-1][node_id][1] = distributedMortonIndex[node_id*2+1];
+      }
+    for(int idxLevel = treeHeight - 2; idxLevel >= 0  ; --idxLevel){
+        nodeRepartition[idxLevel][0][0] = nodeRepartition[idxLevel+1][0][0] >> 3;
+        nodeRepartition[idxLevel][0][1] = nodeRepartition[idxLevel+1][0][1] >> 3;
+        for(int node_id = 1; node_id < nproc; ++node_id){
+            nodeRepartition[idxLevel][node_id][0] = FMath::Max(nodeRepartition[idxLevel+1][node_id][0] >> 3, nodeRepartition[idxLevel][node_id-1][0]+1); //Berenger phd :)
+            nodeRepartition[idxLevel][node_id][1] = nodeRepartition[idxLevel+1][node_id][1] >> 3;
+          }
+      }
+  }
+
+  //
+  //
+  // @param[in]    mpi_comm   the MPI communicator
+  // @param[in]    filename  Particles name file
+  // @param[in]    option   option to build the groupTree
+  //                     1 we use a given Morton distribution
+  //                     2 we build the Morton distribution
+  // @param[out] myParticleslocal array of particles on my node. On output the array is sorted
+  // @param[in]    box  size of the simulation box
+  // @param[in]    TreeHeight    Height of the tree
+  // @param[in]    localGroupTree  the LET of the octree
+  // @param[inout]    localGroupTree  the LET of the octree
+  // @param[inout]    m_idx_distribution  Distribution of the leaves on the processors
+  // @param[out]    nb_blocks
+  template <class PARTICLE_T , class OCTREEGRPOUPCLASS>
+  void buildDuplicatedTree( const FMpi &parallelManager, const int option, const std::string &filename,
+                            std::vector<PARTICLE_T> &myParticles, const  FBox<FPoint<FReal>>& box,
+                            const int TreeHeight, const int groupSize, OCTREEGRPOUPCLASS * &GroupTree,
+                            std::vector<MortonIndex> &MortonIndexDistribution ,int & nb_block)
+  {
+
+    //
+    //loader
+    std::cout << "Opening : " << filename << " ...";
+    FFmaGenericLoader<FReal>  loader(filename);
+    FAssertLF(loader.isOpen());
+    std::cout << " done." << std::endl;
+    const FSize totalNbParticles   = loader.getNumberOfParticles();
+    //
+    const std::size_t max_level = sizeof(PARTICLE_T::morton_index) * 8 / 3;
+    //
+    myParticles.resize(totalNbParticles);
+    for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
+        FPoint<FReal> pos ;
+        FReal physicalValue ;
+        loader.fillParticle(&pos, &physicalValue);//Same with file or not
+        //
+   //     physicalValue = 0.1 ;
+        MortonIndex morton = inria::linear_tree::get_morton_index( pos, box, max_level);
+        myParticles[idxPart].fill(pos,physicalValue,morton) ;
+      }
+    std::sort(myParticles.begin(), myParticles.end(), [&](const PARTICLE_T& a, const PARTICLE_T& b) {
+        return (a.getMorton() < b.getMorton()  ) ;
+      }
+    );
+    //
+    FP2PParticleContainer<FReal> allParticles;
+
+    // Set the right MortonIndex
+  //  MortonIndex mm = 0 ;
+    for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
+        myParticles[idxPart].morton_index = inria::linear_tree::get_morton_index(  myParticles[idxPart] .pos, box,
+                                                                                   TreeHeight-1);
+  //      mm = std::max(mm,myParticles[idxPart].morton_index );
+        allParticles.push(myParticles[idxPart].getPosition(), myParticles[idxPart].physicalValue() );
+      }
+    // Create the linear tree
+    // a linear tree is a tree, with only the leaf
+    // Build a vector of MortonIndex at Leaf level from particles
+    //
+    std::size_t nbLeaves = 1 , pos=0;
+    MortonIndex previousMorton = myParticles[0].morton_index;
+    for(std::size_t idxPart = 1 ; idxPart < myParticles.size(); ++idxPart){
+        if(previousMorton != myParticles[idxPart].morton_index){
+            previousMorton = myParticles[idxPart].morton_index ;
+            ++nbLeaves ;
+          }
+      }
+    std::cout<< "Number of leaves" << nbLeaves <<std::endl ;
+    std::vector<MortonIndex> mortonLeaves(nbLeaves,-1) ;
+
+    previousMorton    = myParticles[0].morton_index;
+    mortonLeaves[pos] = myParticles[0].morton_index;
+
+    for(std::size_t idxPart = 1 ; idxPart < myParticles.size(); ++idxPart){
+        if(previousMorton != myParticles[idxPart].getMorton() ){
+            ++pos ;
+            previousMorton    = myParticles[idxPart].morton_index ;
+            mortonLeaves[pos] = myParticles[idxPart].morton_index ;
+          }
+      }
+    const int nproc = parallelManager.global().processCount() ; //
+    if(option >1 ) {
+        std::cout << " Construct the distribution used in Beregnger's thesis "<< std::endl;
+        FLeafBalance balancer;
+        MortonIndexDistribution.clear() ;
+        //
+        // Build the Morton index as in Berenger's thesis
+        //Calcul du working interval au niveau des feuilles
+        previousMorton = -1;
+        int countLeaf  = 0;
+        int processId  = 0;
+        FSize leafOnProcess = balancer.getRight(nbLeaves, nproc, 0) - balancer.getLeft(nbLeaves, nproc, 0);
+        std::cout << "  leafOnProcess  " << leafOnProcess <<  " empty? " << MortonIndexDistribution.empty() << "  "  << MortonIndexDistribution.size() <<std::endl;
+        MortonIndexDistribution.push_back(previousMorton);
+        for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles()  ; ++idxPart)
+          {
+            if(myParticles[idxPart].morton_index != previousMorton)
+              {
+                previousMorton = myParticles[idxPart].morton_index ;
+                ++countLeaf;
+                if(countLeaf == leafOnProcess)
+                  {
+                    ++processId;
+                    if (processId < nproc){
+                      leafOnProcess = balancer.getRight(nbLeaves, nproc, processId) - balancer.getLeft(nbLeaves, nproc, processId);
+                      MortonIndexDistribution.push_back(previousMorton);
+                      MortonIndexDistribution.push_back(previousMorton);
+                      countLeaf = 0;
+                      }
+                  }
+              }
+          }
+        MortonIndexDistribution.push_back(myParticles[loader.getNumberOfParticles() - 1].morton_index) ;
+        //
+      }
+    // otherwise we use the given Morton distribution
+    std::cout << "    Morton distribution to build the duplicated tree " <<MortonIndexDistribution.size() << " "<<std::endl<<std::flush;
+    for (auto v : MortonIndexDistribution)
+      std::cout << "  " << v ;
+    std::cout << std::endl;
+    //////////////////////////////////////////////////////////////////////////
+    std::vector< std::vector<std::vector<MortonIndex>>> nodeRepartition;
+    std::vector< std::vector<int>>                          sizeForEachGroup(TreeHeight);
+    createNodeRepartition(MortonIndexDistribution, nodeRepartition, nproc, TreeHeight) ;
+    for ( std::size_t idLevel=0;  idLevel< nodeRepartition.size() ; ++idLevel){
+        std::cout << "  nodeRepartition at level " << idLevel << std::endl ;
+        for ( std::size_t procID=0 ;  procID<  nodeRepartition[idLevel].size();  ++procID){
+            std::cout << "  n  proc( " << procID << "  ) " <<
+                         " [ " << nodeRepartition[idLevel][procID][0] << ", "
+                      << nodeRepartition[idLevel][procID][1] <<" ]" <<std::endl ;
+          }
+      }
+
+    BuilddMortonDistributionForCGroupCellInTree(parallelManager,mortonLeaves,TreeHeight,groupSize,
+                                                MortonIndexDistribution,nodeRepartition,sizeForEachGroup ) ;
+
+    //
+    // Print group size per level
+    std::cout << std::endl<< "  Group size at the leaf level " << std::endl ;
+    int totalLeaves = 0 ;
+    for ( std::size_t idLevel=2;  idLevel< sizeForEachGroup.size() ; ++idLevel){
+        std::cout << "  Group size at level " << idLevel << std::endl ;
+        totalLeaves = 0 ;
+        for ( auto v : sizeForEachGroup[idLevel]){
+            totalLeaves += v;
+            std::cout << "   " << v ;
+          }
+        std::cout << std::endl ;std::cout << " Total number of leaves: " <<totalLeaves << std::endl;
+      }
+    //
+    GroupTree  = new OCTREEGRPOUPCLASS (TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(),
+                                        groupSize, &allParticles, sizeForEachGroup, true);
+    //
+    //
+
+
+  }
+}
+
+#endif
diff --git a/Src/GroupTree/Core/FCheckResults.hpp b/Src/GroupTree/Core/FCheckResults.hpp
new file mode 100644
index 000000000..f07bf7323
--- /dev/null
+++ b/Src/GroupTree/Core/FCheckResults.hpp
@@ -0,0 +1,93 @@
+#ifndef _FGROUPTREE_CHECK_RESULTS_
+#define _FGROUPTREE_CHECK_RESULTS_
+
+#include <iostream>
+#include <string>
+#include <vector>
+#include <algorithm>
+
+#include "Utils/FGlobal.hpp"
+#include "Utils/FAssert.hpp"
+#include "Utils/FMath.hpp"
+#include "Files/FFmaGenericLoader.hpp"
+#include "Utils/FPoint.hpp"
+// FBox
+#include "Adaptive/FBox.hpp"
+// Group linear tree
+#include "GroupTree/Core/FGroupLinearTree.hpp"
+//
+#include "GroupTree/Core/FGroupTools.hpp"
+//
+//
+// param[in] FMpiComm
+// param[in] seqLoader
+// param[in]  box
+// param[in]  TreeHeight
+// param[inout]  myParticles
+//
+template < class LOADER_T, typename PARTICLE_T, typename BOX_T >
+void readAndSortAllParticle(LOADER_T & seqLoader, const  BOX_T & box,
+                            std::vector<PARTICLE_T> &myParticles, const int TreeHeight ){
+
+  using REAL= typename LOADER_T::dataType ;
+  FAssertLF(seqLoader.isOpen());
+  const FSize NbParticles   = seqLoader.getNumberOfParticles();
+  //
+  // Read File
+  myParticles.clear() ;
+
+  myParticles.resize(NbParticles) ;
+  const std::size_t max_level = sizeof(PARTICLE_T::morton_index) * 8 / 3;
+
+  for(FSize idxPart = 0 ; idxPart < NbParticles; ++idxPart){
+      FPoint<REAL> pos ;
+      REAL physicalValue ;
+      seqLoader.fillParticle(&pos, &physicalValue);//Same with file or not
+      //
+       MortonIndex morton = inria::linear_tree::get_morton_index( pos, box, max_level);
+      myParticles[idxPart].fill(pos,physicalValue,morton) ;
+    }
+  std::sort(myParticles.begin(), myParticles.end(), [&](const PARTICLE_T& a, const PARTICLE_T& b) {
+      return (a.getMorton() < b.getMorton()  ) ;
+    }
+  );
+  // Set the right MortonIndex
+  for(FSize idxPart = 0 ; idxPart < NbParticles ; ++idxPart){
+      myParticles[idxPart].morton_index = inria::linear_tree::get_morton_index(  myParticles[idxPart] .pos, box,
+                                                                                 TreeHeight-1);
+    }
+}
+//
+// param[in] FMpiComm
+// param[in] elapsedTime time on each processor
+// param[out]  minTime  the minimum time on each processor
+// param[out]  maxTime  the maximal time on each processor
+// param[out]  meanTime  the mean time on each processor
+//
+template <typename PARTICLE, class REAl, typename OCTREECLASS1,
+          typename OCTREECLASS2,class FmmClass1, class FmmClass2>
+void checkWithDuplicatedTree( const int& myrank, const PARTICLE &arrayParticles,
+                              OCTREECLASS1    & treeCheck,
+                              FmmClass1       & algorithm,
+                              OCTREECLASS2    & grouptree,
+                              FmmClass2       & groupalgo,
+                              const int &operationsToProceed,
+                              const REAl& epsilon ) {
+  //
+  std::cout << "checkWithDuplicatedTree - nb part " <<  arrayParticles.size()  <<std::endl;
+
+  //  Compute a sequential FMM
+  algorithm.execute(operationsToProceed);
+  //
+  std::string fileName("output-Let-") ;
+  fileName += std::to_string(myrank) + ".fma" ;
+  groupTree::saveSolutionInFile(fileName, arrayParticles.size() ,treeCheck) ;
+
+  groupTree::checkCellTree(grouptree, groupalgo, treeCheck, epsilon) ;
+  groupTree::checkLeaves(grouptree, groupalgo, treeCheck, epsilon) ;
+
+  std::cout << "Comparing is over" << std::endl;
+}
+
+
+#endif
diff --git a/Src/GroupTree/Core/FDistributedGroupTreeBuilder.hpp b/Src/GroupTree/Core/FDistributedGroupTreeBuilder.hpp
index 0ab85ceed..f79e2edc5 100644
--- a/Src/GroupTree/Core/FDistributedGroupTreeBuilder.hpp
+++ b/Src/GroupTree/Core/FDistributedGroupTreeBuilder.hpp
@@ -2,23 +2,26 @@
  * This file contain function to manage the FGroupLinearTree and build a
  * GroupTree with LET
  * The LET is the Local Essential Tree
- * The LET is the symbolic information of leaf for P2P and M2L operation
+ * The LET is the symbolic information of leaf for P2P M2L and M2M operation
  *
  * @author benjamin.dufoyer@inria.fr
  */
-
+// ==== CMAKE =====
+// @FUSE_MPI
+// ================
+//
 
 #ifndef _FDISTRIBUTED_GROUPTREE_BUILDER_HPP_
 #define _FDISTRIBUTED_GROUPTREE_BUILDER_HPP_
 
-#include "FGroupTree.hpp"
-#include "FOutOfBlockInteraction.hpp"
 #include <cmath>
 #include <algorithm>
-
 #include <stdint.h>
 #include <limits.h>
 
+#include "inria/algorithm/distributed/mpi.hpp"
+
+
 // Define a MPI type for std::size_t
 #if SIZE_MAX == UCHAR_MAX
    #define my_MPI_SIZE_T MPI_UNSIGNED_CHAR
@@ -31,187 +34,13 @@
 #elif SIZE_MAX == ULLONG_MAX
    #define my_MPI_SIZE_T MPI_UNSIGNED_LONG_LONG
 #else
-   #error "what is happening here?"
+   #error "FDistributedGroupTreeBuilder.hpp: MPI_TYPE what is happening here?"
 #endif
 
-#define MAX_SIZE_MPI_MESSAGE 4000000
 
 
 namespace dstr_grp_tree_builder{
 
-
-/**
- * Return the number of MPI Message needed to send this buffer according to the
- * MAX_SIZE_MPI_MESSAGE variables define in the front of this documents
- * @author benjamin.dufoyer@inria.fr
- * @param  size_buffer  size of the buffer
- * @return              the number of message
- */
-unsigned get_nb_mpi_msg(long unsigned size_buffer){
-    unsigned nb_message = 1;
-    while(size_buffer > MAX_SIZE_MPI_MESSAGE){
-        size_buffer -= size_buffer - MAX_SIZE_MPI_MESSAGE;
-        nb_message++;
-    }
-    return nb_message;
-}
-unsigned get_nb_mpi_msg(int size_obj, int nb_obj_to_send){
-    return get_nb_mpi_msg(size_obj*nb_obj_to_send);
-}
-template<class send_type>
-unsigned get_nb_mpi_msg(send_type obj_to_send, int nb_obj_to_send){
-    return get_nb_mpi_msg(sizeof(send_type)*nb_obj_to_send);
-}
-
-
-/**
- * This function return the number of element to put into a MPI message
- * according to the MAX_SIZE_MPI_MESSAGE variables define in the front of this
- * documents
- * @author benjamin.dufoyer@inria.fr
- * @param  obj_to_send      a object to send
- * @param  nb_obj_to_send   number of object to send
- * @return                  the number of element per message
- */
-unsigned get_nb_elt_interval(unsigned long int size_buffer){
-    return (unsigned)MAX_SIZE_MPI_MESSAGE/(unsigned)size_buffer;
-}
-template<class send_type>
-unsigned get_nb_elt_interval(send_type obj_to_send, int nb_obj_to_send){
-    return get_nb_elt_interval(size_of(obj_to_send)*nb_obj_to_send);
-}
-unsigned get_nb_elt_interval(int size_obj, int nb_obj_to_send){
-    return get_nb_elt_interval(size_obj*nb_obj_to_send);
-}
-
-/**
- * This function split MPI message if the buffer is too large
- * this funciton run with the irecv_splited
- * She send 1 or more message if the buffer have a
- * good size and send more than 1 message if the size of the buffer
- * is bigger than MAX_SIZE_MPI_MESSAGE define at the front of this documents
- * @author benjamin.dufoyer@inria.fr
- * @param  conf                 MPI Conf
- * @param  addr_send            Vector address of data to send
- * @param  idx_to_send          index where the data to send start
- * @param  nb_element_to_send   number of element to send
- * @param  destination          number of the destination proc
- * @param  tag                  MPI tag
- */
-template<class class_sended>
-void isend_splited(const inria::mpi_config& conf,
-                   std::vector<class_sended>* addr_send,
-                   unsigned* idx_to_send,
-                   std::size_t nb_element_to_send,
-                   int& destination,
-                   int tag = 1 )
-{
-    // getting usefull variable
-    unsigned  size_buffer = (unsigned)sizeof(class_sended)*(unsigned)nb_element_to_send;
-    unsigned nb_message = get_nb_mpi_msg(size_buffer);
-    // Check the number of message
-    if(nb_message > 1){
-        unsigned nb_elt_interval = get_nb_elt_interval(size_buffer);
-        // Send all messages
-        unsigned nb_elt;
-        for(unsigned i = 0; i < nb_message ; ++i ){
-            if(nb_element_to_send > nb_elt_interval){
-                nb_elt = nb_elt_interval;
-                nb_element_to_send -= nb_elt_interval;
-            } else {
-                // last message
-                nb_elt = (unsigned)nb_element_to_send;
-            }
-            conf.comm.isend(
-                &addr_send->data()[*idx_to_send],
-                (int)sizeof(class_sended)*(int)nb_elt,
-                MPI_CHAR,
-                destination,tag
-            );
-            *idx_to_send += nb_elt;
-        }
-    } else {
-        // send 1 message if the buffer is not too big
-        conf.comm.isend(
-            &addr_send->data()[*idx_to_send],
-            (int)sizeof(class_sended)*(int)nb_element_to_send,
-            MPI_CHAR,
-            destination,tag
-        );
-        *idx_to_send += (unsigned)nb_element_to_send;
-    }
-}
-
-/**
- * This function post 1 or more MPI Irecv. She check if the buffer is too big
- * She modify dynamicly the vector of request for the waitAll, she realloc
- * every time when it's needly
- * @author benjamin.dufoyer@inria.fr
- * @param  conf                 MPI conf
- * @param  vector_request       Adress of the vector with MPI status
- * @param  idx_request          Index of the current MPI status
- * @param  addr_recev           Address of the vector where data will be stock
- * @param  idx_reception        Index of the vector where data will be stock
- * @param  nb_element_to_recv   Number of element to recv
- * @param  destination          number of the destination proc
- * @param  tag                  tag of the communication
- */
-template<class class_recv>
-void irecv_splited(const inria::mpi_config& conf,
-                    std::vector<inria::mpi::request>* vector_request,
-                    int* idx_request,
-                    std::vector<class_recv>* addr_recev,
-                    unsigned* idx_reception,
-                    std::size_t  nb_element_to_recv,
-                    int& destination,
-                    int tag = 1 )
-{
-    // getting usefull variable
-    unsigned long int size_buffer = sizeof(class_recv)*nb_element_to_recv;
-    unsigned nb_message = get_nb_mpi_msg(size_buffer);
-    // check if this function is call at good time
-    if( nb_message > 1){
-        unsigned nb_elt_interval = get_nb_elt_interval(size_buffer);
-        // resize the vector of request
-        {
-            // we do -1 because, we don't count the message already allocate
-            unsigned current_nb_msg = (unsigned)vector_request->size()-1;
-            vector_request->resize(current_nb_msg+nb_message);
-        }
-        // send the good number of message
-        unsigned nb_elt = 0;
-        for(unsigned i = 0; i < nb_message ; ++i ){
-            // compute the number of element recev
-            if(nb_element_to_recv > nb_elt_interval){
-                nb_elt = nb_elt_interval;
-                nb_element_to_recv -= nb_elt_interval;
-            } else {
-                // last message
-                nb_elt = (unsigned)nb_element_to_recv;
-            }
-            vector_request->data()[*idx_request] =
-                conf.comm.irecv(
-                    &addr_recev->data()[*idx_reception],
-                    (int)sizeof(class_recv)*(int)nb_elt,
-                    MPI_CHAR,
-                    destination,tag
-                );
-                *idx_reception += nb_elt;
-                *idx_request+=1;
-        }
-    } else {
-        vector_request->data()[*idx_request] =
-            conf.comm.irecv(
-                &addr_recev->data()[*idx_reception],
-                (int)sizeof(class_recv)*(int)nb_element_to_recv,
-                MPI_CHAR,
-                destination,tag
-            );
-        *idx_request += 1;
-        *idx_reception =+ (unsigned)nb_element_to_recv;
-    }
-}
-
 /**
  * fill_new_linear_tree this function fill the new linear tree with the value
  * of the current linear tree
@@ -242,9 +71,11 @@ void fill_new_linear_tree(
     unsigned destination_interval = max_destination-min_destination;
     unsigned source_interval = max_copy-min_copy;
     if(source_interval < destination_interval){
-        memcpy(&destination->data()[min_destination],&source->data()[min_copy],sizeof(node_t)*source_interval);
+        memcpy(&destination->data()[min_destination],&source->data()[min_copy],
+	       sizeof(node_t)*source_interval);
     } else {
-        memcpy(&destination->data()[min_destination],&source->data()[min_copy],sizeof(node_t)*destination_interval);
+        memcpy(&destination->data()[min_destination],&source->data()[min_copy],
+	       sizeof(node_t)*destination_interval);
     }
 }
 
@@ -274,7 +105,7 @@ void fill_new_linear_tree(
      //int* array_global_nb_leaf  = (int *)malloc(sizeof(int) * nb_proc); //nb leaf
      const int  my_rank        = conf.comm.rank();
      // Check if i have leaf on my proc
-     FAssert(nb_local_leaf > 0);
+     FAssertLF(nb_local_leaf > 0);
      // Distribute the local number of leaf to every process
      conf.comm.allgather(&nb_local_leaf,
                      1,
@@ -292,33 +123,36 @@ void fill_new_linear_tree(
      int nb_leaf_needed  = nb_local_group * group_size;
      // Check if we habe enought leafs for every proc
      if( (nb_leaf_needed*(nb_proc-1)) > nb_global_leaf ){
-         std::cout << " nb_leaf_needed : " << nb_leaf_needed << std::endl;
-         std::cout << " nb_global_leaf : " << nb_global_leaf << std::endl;
-         std::cout << " res :  " << (nb_leaf_needed*(nb_proc-1)) << std::endl;
+         std::cout << " nb_leaf_needed: " << nb_leaf_needed << std::endl;
+         std::cout << " nb_global_leaf: " << nb_global_leaf << std::endl;
+         std::cout << " res:            " << (nb_leaf_needed*(nb_proc-1)) << std::endl;
      }
-     FAssert( (nb_leaf_needed*(nb_proc-1)) < nb_global_leaf );
+     FAssertLF( (nb_leaf_needed*(nb_proc-1)) < nb_global_leaf );  // OC: Pourquoi cela ? Ne suffit il pas de faire un exit dans le if ??
 
      struct message_info{
          int process_rank;
          int nb_leaf;
      };
-     // We stock the future interaction in 2 vector
+     // We stock the future interaction in 2 vectors
      std::vector<message_info> interaction_send;
      std::vector<message_info> interaction_recev;
 
      // The number of leaf send and revev from left
      // it's used to fill the new linear_tree
-     int nb_leaf_recev_left = 0;
+     int nb_leaf_recev_left  = 0;
      int nb_leaf_recev_right = 0;
-
      int nb_leaf_send_right  = 0;
      int nb_leaf_send_left   = 0;
+     //
      // COMPUTE FOR LEFT PROCESS
      // Check to know if the current proc need to send leaf
      // The compute is from left to right because it's the right process
      // who don't have a fix number of particle
-     if(!my_rank == 0){ //The first process don't have computation on his left
-         for(int i = 1 ; i < my_rank ; i++ ){
+     //
+     // OC: Ne peut-on mettre une topologie 1d dans le communicateur pour simplifier le code
+     //
+     if(!my_rank == 0){ //The first process don't have computation on his left  OC: Execpt in periodic
+         for(int i = 1 ; i < my_rank ; ++i ){
              array_global_nb_leaf[i] += array_global_nb_leaf[i-1];
          }
          // Check if on left process need leaf or have too many leaf
@@ -373,7 +207,7 @@ void fill_new_linear_tree(
          }
      }
 
-     // Now we have 2 vector with all interaction with other process
+     // Now we have 2 vectors with all interaction with other process
      // in the first we will post every recev message
      // in a second time we post every send message
 
@@ -400,8 +234,8 @@ void fill_new_linear_tree(
      }
 
      ////Posting sending message
-     for(unsigned i = 0 ; i < (unsigned)interaction_send.size(); i++ ){
-         int size_send = (int)sizeof(node_t)*interaction_send[i].nb_leaf;
+     for(unsigned i = 0 ; i < (unsigned)interaction_send.size(); ++i ){
+         int sizeToSend = (int)sizeof(node_t)*interaction_send[i].nb_leaf;
          // Compute the pointer to send cell
          unsigned start = 0;
          if(my_rank < interaction_send[i].process_rank){
@@ -410,7 +244,7 @@ void fill_new_linear_tree(
 
          //sending leaf
          conf.comm.isend(&linear_tree->data()[start],
-                        size_send,
+                        sizeToSend,
                         MPI_CHAR,
                         interaction_send[i].process_rank,1);
      }
@@ -426,11 +260,10 @@ void fill_new_linear_tree(
                           nb_leaf_send_left,
                           nb_leaf_send_right);
 
-
-
-
-     // waiting for the end of MPI request
+     // waiting for the send of all MPI request
+     // usefull as buffer are local in the procedure
      inria::mpi::request::waitall(interaction_recev.size(),tab_mpi_status);
+     //
 
      //free(array_global_nb_leaf);
      // swaping linear_tree pointer
@@ -455,7 +288,7 @@ void fill_new_linear_tree(
           class type2_t>
  void share_particle_division(
      const inria::mpi_config& conf,
-     std::pair<type1_t,type2_t> my_pair,
+     std::pair<type1_t,type2_t>& my_pair,
      std::vector<std::pair<type1_t,type2_t>>& particle_index_distribution
  ){
      conf.comm.allgather(
@@ -484,8 +317,8 @@ void share_particle_division(
     std::vector<particle_t>& particle,
     std::vector<std::pair<type1_t,type2_t>>& particle_index_distribution)
 {
-    FAssert(particle_index_distribution.size() == (unsigned)conf.comm.size());
-    FAssert(particle.size() > 0);
+    FAssertLF(particle_index_distribution.size() == (unsigned)conf.comm.size());
+    FAssertLF(particle.size() > 0);
 
     std::pair<type1_t,type2_t> my_idx;
     my_idx.first = particle.front().morton_index;
@@ -508,9 +341,6 @@ std::vector<MortonIndex> sort_and_delete_duplicate_data(
     if(nb_data != 0) {
 
         // Sort every morton index
-        //std::sort(data_to_modify.begin(),data_to_modify.begin()+nb_data, [](MortonIndex a, MortonIndex b){
-        //    return a < b;
-        //});
         FQuickSort<MortonIndex>::QsSequential(data_to_modify.data(),nb_data);
 
         // Compute the number of different morton index
@@ -520,7 +350,7 @@ std::vector<MortonIndex> sort_and_delete_duplicate_data(
         for(unsigned i = 1 ; i < nb_data ; ++i){
             if(last_m_idx != data_to_modify[i]){
                 last_m_idx = data_to_modify[i];
-                nb_leaf++;
+                ++nb_leaf;
             }
         }
         // Alloc the returned vector
@@ -548,7 +378,6 @@ std::vector<MortonIndex> sort_and_delete_duplicate_data(
 
 
 /**
- * IDEA Factoriser la fin avec la fonction pour le M2L IDEA
  * This function compute the morton index of every leaf needed for the P2P
  * First we compute every morton index needed for every leaf
  * We sort the result
@@ -567,14 +396,14 @@ std::vector<MortonIndex> get_leaf_P2P_interaction(
     const MortonIndex& local_min_m_idx,
     const MortonIndex& local_max_m_idx
 ){
-    // 26 is for every interaction
-    std::vector<MortonIndex> external_interaction(tree.getTotalNbLeaf()*26,0);
+    // 26 is the number of neigbors of one cell
+   std::vector<MortonIndex> external_interaction(tree.getTotalNbLeaf()*26,0); //OC: Tableau tres grand
     // Reset interactions
     // idx to know where we are in the vector
     unsigned idx_vector= 0;
     // First leaf level
     {
-        // We iterate on every particle group
+        // We iterate on all particle group // OC: Local on the each proc ?
         for(int idxGroup = 0 ; idxGroup < tree.getNbParticleGroup() ; ++idxGroup){
             // get the particle group
             // it's ugly but, if i use template, it's not convenient
@@ -598,7 +427,9 @@ std::vector<MortonIndex> get_leaf_P2P_interaction(
                     // Iterate on every neighbors
                     for(int idxInter = 0 ; idxInter < counter ; ++idxInter){
                         // Check if the current proc already have the leaf
-                        if(interactionsIndexes[idxInter] < local_min_m_idx  || interactionsIndexes[idxInter] > local_max_m_idx ){
+                        if(interactionsIndexes[idxInter] >=  local_min_m_idx &&  interactionsIndexes[idxInter] <= local_max_m_idx){
+                            // do nothing
+                        } else {
                             // Check if the leaf exist
                             if(interactionsIndexes[idxInter] >= global_min_m_idx && interactionsIndexes[idxInter] <= global_max_m_idx ){
                                 external_interaction[idx_vector] = interactionsIndexes[idxInter];
@@ -615,9 +446,6 @@ std::vector<MortonIndex> get_leaf_P2P_interaction(
 
 
 /**
- * IDEA on peut factoriser le post traitement de du P2P avec celui qui est fait
- *  ici IDEA
- *
  * This function compute the leaf needed for the M2L operation
  * We take every leaf of the tree, get her parent, get the neigbors of
  * the parents and take every child of the parent's neighbors.
@@ -641,10 +469,11 @@ std::vector<MortonIndex> get_leaf_M2L_interaction_at_level(
         GroupOctreeClass& tree,
         int dim = 3)
 {
+
     // idx to fill the vector
     unsigned idx_vector = 0;
     // All External leaf
-    std::vector<MortonIndex> external_interaction(tree.getNbCellGroupAtLevel(level)*tree.getNbElementsPerBlock()*216,0);
+    std::vector<MortonIndex> external_interaction(tree.getNbCellGroupAtLevel(level)*tree.getNbElementsPerBlock()*189,0);
     // iterate on the group
     for(int idxGroup = 0 ; idxGroup < tree.getNbCellGroupAtLevel(level) ; ++idxGroup){
         auto* containers = tree.getCellGroup(level,idxGroup);
@@ -655,11 +484,9 @@ std::vector<MortonIndex> get_leaf_M2L_interaction_at_level(
                 ++leafIdx){
             // Getting the current morton index
             curr_m_idx  = containers->getCellMortonIndex(leafIdx);
-            // Compute the morton index of the father
-            // If it's a new father
             // Compute coordinate
-            MortonIndex interactionsIndexes[216];
-            int interactionsPosition[216];
+            MortonIndex interactionsIndexes[189];
+            int interactionsPosition[189];
             FTreeCoordinate coord(curr_m_idx);
             // Getting neigbors of the father
             int counter = coord.getInteractionNeighbors(level,interactionsIndexes,interactionsPosition);
@@ -668,12 +495,11 @@ std::vector<MortonIndex> get_leaf_M2L_interaction_at_level(
                 if( tmp    >= global_min_m_idx
                     && tmp <= global_max_m_idx)
                 {
-                    if(tmp < local_min_m_idx ||
-                       tmp > local_max_m_idx){
+                    if(tmp >= local_min_m_idx &&
+                        tmp <= local_max_m_idx){
+                            // do nothing
+                    } else {
                         //Stock the leaf
-                        if(idx_vector > external_interaction.size()){
-                            std::cout << "ERROR " << std::endl;
-                        }
                         external_interaction[idx_vector] = tmp;
                         ++idx_vector;
                     }
@@ -730,7 +556,7 @@ std::vector<MortonIndex> concat_M2L_P2P(
         nb_leaf += (leaf_P2P.size()) - idx_P2P;
     }
     // Allocate the vector
-    std::vector<MortonIndex> leaf_needed(nb_leaf,0);
+    std::vector<MortonIndex> leaf_needed(nb_leaf,-1);
     idx_P2P = 0;
     idx_M2L = 0;
     std::size_t idx_leaf = 0;
@@ -752,19 +578,24 @@ std::vector<MortonIndex> concat_M2L_P2P(
             ++idx_P2P;
             ++idx_M2L;
         }
-        idx_leaf++;
+        ++idx_leaf;
     }
+  //   std::cout << "  idx_leaf  " << idx_leaf << " nb_leaf "  << nb_leaf <<std::endl;
     // Copy the rest of leaf with a memcpy
     if(idx_leaf < nb_leaf){
-        void* destination = &leaf_needed.data()[idx_leaf];
+  //       std::cout << "  MEMCOPY " << std::endl;
+        void* destination =  &leaf_needed.data()[idx_leaf];
         void* source;
         std::size_t num = 0;
         if(idx_P2P == leaf_P2P.size()){
+ //           std::cout << "    M2L " <<idx_M2L << "  " << leaf_M2L[idx_M2L]<< " "
+  //                    << leaf_M2L.size() -1 << "  " << leaf_M2L[leaf_M2L.size() -1 ]
+ //                        << "  "<<  leaf_M2L.size() << " " << leaf_M2L.size() - idx_M2L <<std::endl;
             source = &leaf_M2L[idx_M2L];
-            num = sizeof(MortonIndex)* ((leaf_M2L.size()-1) - idx_M2L);
+            num = sizeof(MortonIndex)*(leaf_M2L.size() - idx_M2L);
         } else {
             source = &leaf_P2P[idx_P2P];
-            num = sizeof(MortonIndex)* ((leaf_P2P.size()-1) - idx_P2P);
+            num = sizeof(MortonIndex)* (leaf_P2P.size() - idx_P2P);
         }
         memcpy(destination,source,num);
     }
@@ -794,33 +625,50 @@ std::vector<std::vector<std::size_t>> get_matrix_interaction(
 {
     // Getting MPI Info
     const int  nb_proc        = conf.comm.size();
+    const int  my_rank        = conf.comm.rank();
     // Alloc interaction matrix
     std::vector<std::vector<std::size_t>> matrix_interaction(2,std::vector<std::size_t>(nb_proc,0));
     std::vector<std::size_t> global_matrix_interaction(nb_proc,0);
     // Initialise idx on particle_distribution
-    std::size_t idx_part = 0;
-    // Interate on every leaf to know where she is
+    size_t idx_part = 0;
+    // Iterate on every leaf to know where she is
+    MortonIndex max_morton_index = 0;
+    if(needed_leaf.size() > 0)
+        max_morton_index = needed_leaf[needed_leaf.size()-1]+1;
+    // iterate on every mortonIndex
     for(unsigned idx_leaf = 0; idx_leaf < needed_leaf.size(); ++idx_leaf){
         MortonIndex current_leaf = needed_leaf[idx_leaf];
         // if she is on the current proc
         if(current_leaf >= particle_distribution[idx_part].first
         && current_leaf <= particle_distribution[idx_part].second){
-            matrix_interaction[0][idx_part] += 1;
+            if(idx_part == (unsigned)my_rank){
+                needed_leaf[idx_leaf] = max_morton_index;
+            } else {
+                matrix_interaction[0][idx_part] += 1;
+            }
         } else {
             // While the current leaf is not on the good interval
-            while(particle_distribution[idx_part].second < current_leaf){
+            while(idx_part < particle_distribution.size() && particle_distribution[idx_part].second < current_leaf){
                 idx_part += 1;
             }
+            if(idx_part == particle_distribution.size())
+                break;
             if(particle_distribution[idx_part].first > current_leaf){
                 // in this case the leaf is not in interval, so she doesn't exist
-                needed_leaf[idx_leaf] = 0;
+                needed_leaf[idx_leaf] = max_morton_index;
             } else {
                 // In the case it's a normal case, we juste increment the
                 // number of leaf send at the proc idx_part
-                matrix_interaction[0][idx_part] += 1;
+                if(idx_part == (unsigned)my_rank){
+                    needed_leaf[idx_leaf] = max_morton_index;
+                } else {
+                    matrix_interaction[0][idx_part] += 1;
+                }
             }
         }
     }
+    // i don't need to send to me
+    matrix_interaction[0][my_rank] = 0;
     // now we have the number of leaf to send at every proc
     // we proceed a AlltoAll to share this information at every proc
     conf.comm.alltoall(matrix_interaction[0].data(),
@@ -830,125 +678,58 @@ std::vector<std::vector<std::size_t>> get_matrix_interaction(
                        1,
                        my_MPI_SIZE_T);
     // removing bad leaf
-    needed_leaf.erase(std::remove(needed_leaf.begin(),needed_leaf.end(),0),needed_leaf.end());
+    needed_leaf.erase(std::remove(needed_leaf.begin(),needed_leaf.end(),max_morton_index),needed_leaf.end());
 
     return {begin(matrix_interaction),end(matrix_interaction)};
 }
 
-
 /**
-* This function compute the number of block needed to send all leaf
-* stock in leaf_needed.
-* This function return a vector with all idx of block needed by the proc
-* @author benjamin.dufoyer@inria.fr
-* @param  tree         GroupTree
-* @param  leaf_needed  Vector where leaf are stock
-* @return Vector with all block idx
-*/
-template<class GroupOctreeClass>
-std::vector<MortonIndex> get_nb_block_from_leaf(GroupOctreeClass& tree,
-                  MortonIndex* leaf_needed,
-                  std::size_t nb_leaf)
-{
-    std::vector<MortonIndex> block_to_send(tree.getNbParticleGroup(),0);
-    if(nb_leaf == 0)
-        return {block_to_send.begin(),block_to_send.begin()};
-    // declaration of idx varaibles
-    unsigned idx_vector = 0;
-    unsigned idx_leaf = 0;
-    // iterate on every group
-    for(int idx_group = 0 ; idx_group < tree.getNbParticleGroup() ; ++idx_group){
-        if(idx_leaf >= nb_leaf)
-            break;
-        // get the current block
-        auto* container = tree.getParticleGroup(idx_group);
-        // get first leaf in this interval
-        while( idx_leaf < nb_leaf && container->getStartingIndex() > leaf_needed[idx_leaf]){
-            ++idx_leaf;
-        }
-        if(idx_leaf >= nb_leaf)
-            break;
-        while( container->getEndingIndex() < leaf_needed[idx_leaf] &&
-            idx_leaf < nb_leaf){
-            // if the leaf exist, keep the leaf
-            if(container->exists(leaf_needed[idx_leaf])){
-                block_to_send[idx_vector] = idx_group;
-                ++idx_vector;
-                ++idx_leaf;
-                break;
-            }
-            ++idx_leaf;
-        }
-        if(idx_leaf == nb_leaf)
-            break;
-    }
-    return {block_to_send.begin(),block_to_send.begin()+idx_vector};
-}
-/*
- template<class GroupOctreeClass>
- std::vector<MortonIndex> get_nb_block_from_node(GroupOctreeClass& tree,
-              MortonIndex* node_needed,
-              std::size_t nb_node,
-              int level,
-              std::vector<bool>* block_already_send)
- {
-     std::vector<int> block_to_send(tree.getNbCellGroupAtLevel(level),0);
-     int idx_vect = 0 ;
-     // iterate of every node
-     for(unsigned i = 0 ; i < nb_node; ++i){
-         // iteracte of every block
-         for(unsigned idxGroup = 0 ; idxGroup < (unsigned)tree.getNbCellGroupAtLevel(level) ; ++idxGroup){
-             // If the block is not already send
-             if(block_already_send->at(idxGroup) == false){
-                 auto* containers = tree.getCellGroup(level,idxGroup);
-                 if(containers->isInside(node_needed[i])){
-                     block_to_send[idx_vect] = idxGroup;
-                     ++idx_vect;
-                     block_already_send->at(idxGroup) = true;
-                 }
-             }
-         }
-     }
-     return {block_to_send.begin(),block_to_send.begin()+idx_vect};
- }*/
-
-
+ * This function return the number of block at node level
+ * This algo is different than the computation at leaf level, because
+ * it's only the proc who have the smallest rank who have the attribution of
+ * the block
+ * @author benjamin.dufoyer@inria.fr
+ * @param  tree                 local GroupTree
+ * @param  node_needed          List of needed node
+ * @param  nb_node              Number of node needed in the array
+ * @param  level                Level of the node
+ * @return                      Vector of index of block
+ */
  template<class GroupOctreeClass>
  std::vector<MortonIndex> get_nb_block_from_node(GroupOctreeClass& tree,
               MortonIndex* node_needed,
               std::size_t nb_node,
-              int level,
-              std::vector<bool>* block_already_send)
+              int level)
  {
      int idx_vect = 0 ;
      std::vector<int> block_to_send(tree.getNbCellGroupAtLevel(level),0);
-
      unsigned idx_node = 0;
      // iterate on every group
      for(unsigned idx_group = 0; idx_group < (unsigned)tree.getNbCellGroupAtLevel(level) ;++idx_group){
          // if the current block hasnt been already send
-        if(!block_already_send->at(idx_group)){
-            auto* containers = tree.getCellGroup(level,idx_group);
-            // check if we have check every node
-            if(idx_node == nb_node){
-                break;
-            }
-            // while the morton index of the current node is not high
-            while(idx_node < nb_node && node_needed[idx_node] < containers->getStartingIndex()){
-                ++idx_node;
-            }
-            while(idx_node < nb_node && node_needed[idx_node] < containers->getEndingIndex()){
-                 if(containers->isInside(node_needed[idx_node])){
-                     block_to_send[idx_vect] = idx_group;
-                     ++idx_vect;
-                     ++idx_node;
-                     break;
-                 }
+        auto* containers = tree.getCellGroup(level,idx_group);
+        // check if we have check every node
+        if(idx_node == nb_node){
+            break;
+        }
+        // while the morton index of the current node is not high
+        while(idx_node < nb_node && node_needed[idx_node] < containers->getStartingIndex()){
+            ++idx_node;
+        }
+        // while the current morton index is in the block
+        while(idx_node < nb_node && node_needed[idx_node] < containers->getEndingIndex()){
+             // if the container have the current morton index
+             // keep the block and go out of the while
+             if(containers->isInside(node_needed[idx_node])){
+                 block_to_send[idx_vect] = idx_group;
+                 ++idx_vect;
                  ++idx_node;
-            }
-            if(idx_node == nb_node){
-                break;
-            }
+                 break;
+             }
+             ++idx_node;
+        }
+        if(idx_node == nb_node){
+            break;
         }
      }
      return {block_to_send.begin(),block_to_send.begin()+idx_vect};
@@ -971,20 +752,17 @@ std::vector<MortonIndex> get_nb_block_from_leaf(GroupOctreeClass& tree,
  void send_get_number_of_block_node_level(
      std::vector<MortonIndex>& vect_recv,
      std::vector<std::vector<std::size_t>> global_matrix_interaction,
-     //std::vector<std::size_t>& global_matrix_interaction,
      std::size_t& nb_msg_recv,
      GroupOctreeClass& tree,
      std::vector<std::pair<int,int>>& nb_block_to_receiv,
-     std::vector<std::pair<int,std::vector<MortonIndex>>>& leaf_to_send,
+     std::vector<std::pair<int,std::vector<MortonIndex>>>& block_to_send,
      int level,
      const inria::mpi_config& conf
  )
  {
      int idx_status = 0;
-     int idx_proc = 0;
+     int idx_proc   = 0;
      inria::mpi::request tab_mpi_status[nb_msg_recv];
-     bool leaf_level = (tree.getHeight()-1 == level);
-     std::vector<bool> block_already_send(tree.getNbCellGroupAtLevel(level),false);
 
      // Post the number reception of the number of block
      for(unsigned i = 0; i < global_matrix_interaction[0].size() ; ++i)
@@ -1010,27 +788,20 @@ std::vector<MortonIndex> get_nb_block_from_leaf(GroupOctreeClass& tree,
          // If we have interaction with this proc
          if(global_matrix_interaction[1][i] != 0){
              // Compute the number of leaf
-             if(leaf_level){
-                 leaf_to_send[idx_status].second = get_nb_block_from_leaf(
-                                     tree,
-                                     &vect_recv.data()[idx_vect],
-                                     global_matrix_interaction[1][i]);
-            } else {
-                 leaf_to_send[idx_status].second = get_nb_block_from_node(
-                                     tree,
+             int  nb_block;
+             block_to_send[idx_status].second = get_nb_block_from_node(
+                                 tree,
                                  &vect_recv.data()[idx_vect],
-                                     global_matrix_interaction[1][i],
-                                     level,
-                                     &block_already_send);
-             }
-             int nb_block = (int)leaf_to_send[idx_status].second.size();
-             leaf_to_send[idx_status].first = idx_proc;
+                                 global_matrix_interaction[1][i],
+                                 level);
+             nb_block = (int)block_to_send[idx_status].second.size();
+             block_to_send[idx_status].first = i;
              // send the number of leaf
              conf.comm.isend(
                  &nb_block,
                  1,
                  MPI_INT,
-                 idx_proc,1
+                 i,1
              );
              idx_vect += global_matrix_interaction[1][i];
              idx_status += 1;
@@ -1057,7 +828,6 @@ std::vector<MortonIndex> get_nb_block_from_leaf(GroupOctreeClass& tree,
 std::vector<MortonIndex> send_get_leaf_morton(
     std::vector<MortonIndex>&     needed_leaf,
     std::vector<std::vector<std::size_t>>&     global_matrix_interaction,
-    //std::vector<std::size_t>&     global_matrix_interaction,
     std::size_t&                  nb_msg_recv,
     std::size_t&                  nb_leaf_recv,
     const inria::mpi_config& conf)
@@ -1075,15 +845,15 @@ std::vector<MortonIndex> send_get_leaf_morton(
     // Posting every recv message
     for(unsigned i = 0; i < global_matrix_interaction[1].size() ; ++i ){
         if(global_matrix_interaction[1][i] != 0){
-            irecv_splited(
-                conf,
-                &tab_mpi_status,
-                &idx_status,
-                &vect_recv,
-                &idx_vect,
-                global_matrix_interaction[1][i],
-                idx_proc,1
+            std::size_t nb_leaf = global_matrix_interaction[1][i];
+            tab_mpi_status[idx_status] = conf.comm.irecv(
+                &vect_recv[idx_vect],
+                int(nb_leaf*sizeof(MortonIndex)),
+                MPI_CHAR,
+                i,1
             );
+            idx_vect += (unsigned)nb_leaf;
+            idx_status+= 1;
         }
         idx_proc += 1;
     }
@@ -1093,39 +863,47 @@ std::vector<MortonIndex> send_get_leaf_morton(
     idx_vect = 0;
     for(unsigned i = 0; i < global_matrix_interaction[0].size() ; ++i){
         if(global_matrix_interaction[0][i] != 0){
-            isend_splited(
-                conf,
-                &needed_leaf,
-                &idx_vect,
-                global_matrix_interaction[0][i],
-                idx_proc,1
+            std::size_t nb_leaf = global_matrix_interaction[0][i];
+            conf.comm.isend(
+                &needed_leaf[idx_vect],
+                int(nb_leaf*sizeof(MortonIndex)),
+                MPI_CHAR,
+                i,1
             );
+            idx_vect += (unsigned)nb_leaf;
         }
         idx_proc += 1;
     }
     if(nb_msg_recv != 0 ){
-        inria::mpi::request::waitall(tab_mpi_status.size(),tab_mpi_status.data());
+        inria::mpi::request::waitall(idx_status,tab_mpi_status.data());
     }
+    conf.comm.barrier();
     return{begin(vect_recv),end(vect_recv)};
 
 }
 
 
+struct particle_symbolic_block{
+    int idx_global_block;
+    FSize nb_particles;
+    std::vector<FSize> nb_particle_per_leaf;
+    friend
+    std::ostream& operator<<(std::ostream& os, const particle_symbolic_block& n) {
+        return os << "--->  nb particle " << n.nb_particles << "<--";
+    }
+};
 
-/**
- * This struct is used to stock information who wille be send to other proc
- */
-struct block_t{
-    std::size_t n_block;
+struct cell_symbolic_block{
+    int idx_global_block;
     MortonIndex start_index;
     MortonIndex end_index;
     int nb_leaf_in_block;
-    // used to show the block
+    std::vector<MortonIndex> m_idx_in_block;
+
     friend
-    std::ostream& operator<<(std::ostream& os, const block_t& n) {
-        return os << "--> n_block : " << n.n_block << " start : " << n.start_index << " end : " << n.end_index << " nb_leaf " << n.nb_leaf_in_block  <<  "<--";
+    std::ostream& operator<<(std::ostream& os, const cell_symbolic_block& n) {
+        return os << "--> n_block : " << n.idx_global_block << " start : " << n.start_index << " end : " << n.end_index << " nb_leaf " << n.nb_leaf_in_block  << "<--";
     }
-
 };
 
 /**
@@ -1143,22 +921,39 @@ struct block_t{
  * @param  tree              it's the GroupTree where block are stock
  * @param  conf                it's the MPI conf
  */
+
 template<class GroupOctreeClass>
-std::vector<std::vector<block_t>> exchange_block(
-    std::vector<std::pair<int,int>> nb_block_to_receiv,
-    std::vector<std::pair<int,std::vector<MortonIndex>>> block_to_send,
+std::pair<std::vector<cell_symbolic_block>,std::vector<particle_symbolic_block>> exchange_block(
+    std::vector<std::pair<int,int>>& nb_block_to_receiv,
+    std::vector<std::pair<int,std::vector<MortonIndex>>>& block_to_send,
     GroupOctreeClass& tree,
     int level,
     const inria::mpi_config& conf
 )
 {
+    struct sending_cell_structure{
+        int idx_global_block;
+        MortonIndex start_index;
+        MortonIndex end_index;
+        int nb_leaf_in_block;
+    };
+
+    int my_rank =  conf.comm.rank();
+    bool leaf_level = ( level == tree.getHeight() -1 );
+    int block_size = tree.getNbElementsPerBlock();
     // declaration of the array of MPI status for synchro
-    unsigned nb_message_recv = 0;
+    unsigned nb_message_recv  = 0;
+    unsigned nb_block_to_recv = 0;
     for(unsigned i = 0 ; i <  nb_block_to_receiv.size() ;++i ){
-        if(nb_block_to_receiv[i].second != 0 ){
+        if(nb_block_to_receiv[i].second != 0 && nb_block_to_receiv[i].first != my_rank){
+            // computing of the number of message and the number of block to recv
             ++nb_message_recv;
+            nb_block_to_recv += nb_block_to_receiv[i].second;
         }
     }
+    if(leaf_level)
+        nb_message_recv = nb_message_recv+ (nb_message_recv*2);
+    // compute the total number of message
     std::vector<inria::mpi::request> tab_mpi_status(nb_message_recv);
 
     // Compute the total number of block i will send
@@ -1167,69 +962,279 @@ std::vector<std::vector<block_t>> exchange_block(
         total_size += block_to_send[i].second.size();
     }
 
-    // Declaration of the buffer of block
-    std::vector<block_t> data_to_send(total_size);
+    // Buffer to send the cell structure
+    std::vector<sending_cell_structure>             cell_symb_to_send(total_size);
+
+    // buffer to send the morton index
+    std::vector<size_t>                morton_index_send(total_size*block_size,0);
+    // buffer to send particles block
+    std::vector<FSize>  nb_particle_per_leaf(0,0);
+    std::vector<unsigned> particle_symb_to_send(0,0);
+
+    if(leaf_level){
+        nb_particle_per_leaf.resize(total_size*block_size,0);
+        particle_symb_to_send.resize(total_size);
+    }
+
+
     std::size_t idx_vect_to_send = 0;
+    std::size_t idx_m_idx        = 0;
     // Filling the buffer of block
     for(unsigned i = 0 ; i < block_to_send.size(); ++i){
-        for(unsigned j = 0 ; j < block_to_send[i].second.size() ; j++){
-            auto* container = tree.getCellGroup(level ,((int)block_to_send[i].second[j]));
-            block_t block_to_add{
-                (size_t)block_to_send[i].second[j],
-                container->getStartingIndex(),
-                container->getEndingIndex(),
-                container->getNumberOfCellsInBlock()
-            };
-            data_to_send[idx_vect_to_send] = block_to_add;
-            ++idx_vect_to_send;
+        for(unsigned j = 0 ; j < block_to_send[i].second.size() ; ++j){
+            if(block_to_send[i].first != my_rank){
+                auto* container = tree.getCellGroup(level ,((int)block_to_send[i].second[j]));
+                sending_cell_structure block_to_add{
+                    container->getIdxGlobal(),
+                    container->getStartingIndex(),
+                    container->getEndingIndex(),
+                    container->getNumberOfCellsInBlock()
+                };
+                // Get all morton index of the block
+                for(int k = 0 ; k < container->getNumberOfCellsInBlock(); ++k ){
+                    morton_index_send[idx_m_idx+k] = container->getCellMortonIndex(k);
+                }
+                for(int k = container->getNumberOfCellsInBlock(); k  < block_size; ++k ){
+                    morton_index_send[idx_m_idx+k] = container->getCellMortonIndex(container->getNumberOfCellsInBlock()-1);
+                }
+                // add the block to the vector
+                cell_symb_to_send[idx_vect_to_send] = block_to_add;
+                if(leaf_level){
+                    // get the particle container associated
+                    auto* container_particle =  tree.getParticleGroup(((int)block_to_send[i].second[j]));
+                    particle_symb_to_send[idx_vect_to_send] = container_particle->getIdxGlobal();
+                    // iterate on every leaf
+                    for(int k = 0 ; k < container_particle->getNumberOfLeavesInBlock(); ++k){
+                        // stock the number of particles in the leaf
+                        nb_particle_per_leaf[idx_m_idx+k] = container_particle->getNbParticlesInLeaf(k);
+                    }
+                }
+                idx_m_idx += block_size;
+                ++idx_vect_to_send;
+            }
         }
     }
-    // Posting recv
-    std::vector<std::vector<block_t>> block_t_recv(nb_block_to_receiv.size());
+    // Now i have my vector(s) to send all of my blocks
+
+    // the first vector will contain all of cell_block and the send all of
+    // particle block
+    std::vector<sending_cell_structure> symbolic_block_rcv(nb_block_to_recv);
+
+    int size_of_vect = nb_block_to_recv*block_size;
+    std::vector<FSize>       nb_part_leaf(0,0);
+    std::vector<unsigned>    idx_global_particle_rcv(0,0);
+    if(leaf_level){
+        nb_part_leaf.resize(size_of_vect,0);
+        idx_global_particle_rcv.resize(nb_block_to_recv,0);
+    }
+
+
     int idx_status = 0;
+    unsigned offset_block = 0;
+    unsigned offset_m_idx = 0;
+    // Posting recv
     for(unsigned i = 0; i < nb_block_to_receiv.size(); ++i)
     {
+        if(nb_message_recv == 0)
+            break;
         // Setting parameter
         int source   = nb_block_to_receiv[i].first;
         int nb_block = nb_block_to_receiv[i].second;
-        if(nb_block != 0){
-            block_t_recv[i].resize(nb_block);
+        if(nb_block != 0 && source != my_rank){
             // Posting reveiv message
-            unsigned idx = 0;
-            irecv_splited(
-                conf,
-                &tab_mpi_status,
-                &idx_status,
-                &block_t_recv.data()[i],
-                &idx,
-                nb_block,
+            tab_mpi_status[idx_status] =
+            conf.comm.irecv(
+                &symbolic_block_rcv[offset_block],
+                int(nb_block*sizeof(sending_cell_structure)),
+                MPI_CHAR,
                 source,1
             );
+            idx_status += 1;
+
+            // if it's the leaf level, i need to recv the particle block
+            if(leaf_level){
+                tab_mpi_status[idx_status] =
+                conf.comm.irecv(
+                    &nb_part_leaf[offset_m_idx],
+                    int((nb_block*block_size*sizeof(FSize))),
+                    MPI_CHAR,
+                    source,3
+                );
+                idx_status += 1;
+
+                tab_mpi_status[idx_status] =
+                conf.comm.irecv(
+                    &idx_global_particle_rcv[offset_block],
+                    nb_block,
+                    MPI_UNSIGNED,
+                    source,4
+                );
+                idx_status += 1;
+
+            }
+            offset_block += nb_block;
+            offset_m_idx += (nb_block*block_size);
         }
     }
+    FAssertLF(idx_status == (int)nb_message_recv);
 
     // post sending message
-    unsigned offset_block = 0;
+    offset_block = 0;
+    offset_m_idx = 0;
     for(unsigned i = 0 ; i < block_to_send.size(); ++i){
-        // Setting parameters
-        int destination = block_to_send[i].first;
-        size_t nb_block    = (int)block_to_send[i].second.size();
         // Posting send message
-        if(nb_block != 0){
-            isend_splited(
-                conf,
-                &data_to_send,
-                &offset_block,
-                nb_block,
+        int nb_block    = (int)block_to_send[i].second.size();
+        int destination = block_to_send[i].first;
+
+        if(nb_block != 0 && destination != my_rank){
+            // Setting parameters
+            conf.comm.isend(
+                &cell_symb_to_send[offset_block],
+                int(nb_block*sizeof(sending_cell_structure)),
+                MPI_CHAR,
                 destination,1
             );
+
+            if(leaf_level){
+                conf.comm.isend(
+                    &nb_particle_per_leaf[offset_m_idx],
+                    int(nb_block*block_size*sizeof(FSize)),
+                    MPI_CHAR,
+                    destination,3
+                );
+                conf.comm.isend(
+                    &particle_symb_to_send[offset_block],
+                    nb_block,
+                    MPI_UNSIGNED,
+                    destination,4
+                );
+            }
+            offset_block = (offset_block+nb_block);
+            offset_m_idx = (offset_m_idx + (nb_block*block_size));
         }
+
     }
     // Waiting for all request
-    if(nb_message_recv != 0){
-        inria::mpi::request::waitall(tab_mpi_status.size(),tab_mpi_status.data());
+    inria::mpi::request::waitall(idx_status,tab_mpi_status.data());
+    // Sending morton idx
+    if(leaf_level)
+        nb_message_recv /= 2;
+    idx_status = 0;
+    std::vector<size_t> m_idx_to_recv(size_of_vect,0);
+    inria::mpi::request tab_status[nb_message_recv];
+    offset_block = 0;
+    offset_m_idx = 0;
+    for(unsigned i = 0; i < nb_block_to_receiv.size(); ++i)
+    {
+        if(nb_message_recv == 0)
+            break;
+        // Setting parameter
+        int source   = nb_block_to_receiv[i].first;
+        int nb_block = nb_block_to_receiv[i].second;
+        if(nb_block != 0 && source != my_rank){
+            // Posting reveiv message
+            tab_status[idx_status] =
+            conf.comm.irecv(
+                &m_idx_to_recv.data()[offset_m_idx],
+                int(nb_block*block_size),
+                my_MPI_SIZE_T,
+                source,2
+            );
+            idx_status += 1;
+            offset_m_idx = (offset_m_idx + (nb_block*block_size));
+        }
+    }
+
+    offset_block = 0;
+    offset_m_idx = 0;
+    for(unsigned i = 0 ; i < block_to_send.size(); ++i){
+        // Posting send message
+        int nb_block    = (int)block_to_send[i].second.size();
+        int destination = block_to_send[i].first;
+
+        if(nb_block != 0 && destination != my_rank){
+            conf.comm.isend(
+                &morton_index_send.data()[offset_m_idx],
+                int(nb_block*block_size),
+                my_MPI_SIZE_T,
+                destination,2
+            );
+            offset_m_idx = (offset_m_idx + (nb_block*block_size));
+        }
     }
-    return{begin(block_t_recv),end(block_t_recv)};
+    if(nb_message_recv > 0)
+        inria::mpi::request::waitall(idx_status,tab_status);
+    conf.comm.barrier();
+
+    if(nb_message_recv > 0){
+        std::pair<std::vector<cell_symbolic_block>,
+                std::vector<particle_symbolic_block>> pair_return;
+        pair_return.first.resize(symbolic_block_rcv.size());
+
+        if(leaf_level)
+            pair_return.second.resize(symbolic_block_rcv.size());
+        else
+            pair_return.second.resize(0);
+
+        int nb_leaf_before_me = 0;
+
+        for(unsigned i = 0 ; i < symbolic_block_rcv.size() ; ++i){
+            // filling symbolique information
+            cell_symbolic_block new_block{
+                symbolic_block_rcv[i].idx_global_block,
+                symbolic_block_rcv[i].start_index,
+                symbolic_block_rcv[i].end_index,
+                symbolic_block_rcv[i].nb_leaf_in_block
+            };
+            // filling morton index vector
+            new_block.m_idx_in_block.clear();
+            new_block.m_idx_in_block.insert(
+                new_block.m_idx_in_block.begin(),
+                m_idx_to_recv.begin()+nb_leaf_before_me,
+        m_idx_to_recv.begin()+(nb_leaf_before_me+new_block.nb_leaf_in_block));
+                for (size_t nb = 0; nb < new_block.m_idx_in_block.size(); nb++) {
+            if(new_block.m_idx_in_block[nb] > symbolic_block_rcv[i].end_index || new_block.m_idx_in_block[nb] < symbolic_block_rcv[i].start_index){
+                std::cout << "ERROR" << i << '\n';
+                std::cout << new_block.m_idx_in_block[nb] << " " ;
+                std::cout << symbolic_block_rcv[i].end_index << " ";
+                std::cout << symbolic_block_rcv[i].start_index << '\n';
+                std::cout << "nb_leaf_in_block "<< symbolic_block_rcv[i].nb_leaf_in_block << "\n";
+                std::cout << m_idx_to_recv.size() << std::endl;
+                for(int idx = nb_leaf_before_me ; idx < nb_leaf_before_me+block_size; ++idx ){
+                    std::cout << " " << m_idx_to_recv[i] ;
+                }
+                std::cout << std::endl;
+            }
+        }
+        // adding to the vector
+        pair_return.first[i] = new_block;
+            if(leaf_level){
+                particle_symbolic_block new_p_block;
+                new_p_block.idx_global_block =idx_global_particle_rcv[i];
+                new_p_block.nb_particle_per_leaf.clear();
+                new_p_block.nb_particle_per_leaf.insert(
+                new_p_block.nb_particle_per_leaf.begin(),
+                nb_part_leaf.begin()+nb_leaf_before_me,
+                nb_part_leaf.begin()+(nb_leaf_before_me+new_block.nb_leaf_in_block));
+
+                FSize nb_particles_in_block = 0;
+
+                for(unsigned j = 0 ; j <  new_p_block.nb_particle_per_leaf.size() ; ++j){
+                    nb_particles_in_block += new_p_block.nb_particle_per_leaf[j];
+                }
+
+                new_p_block.nb_particles = nb_particles_in_block;
+                pair_return.second[i] = new_p_block;
+            }
+            nb_leaf_before_me += block_size;
+        }
+        return {pair_return.first,pair_return.second};
+    }
+    std::pair<std::vector<cell_symbolic_block>,
+            std::vector<particle_symbolic_block>> pair_return;
+    pair_return.first.resize(0);
+    pair_return.second.resize(0);
+    return {pair_return.first,pair_return.second};
 }
 
 
@@ -1253,7 +1258,7 @@ void compute_block_node_level(
     int level,
     GroupOctreeClass& tree
 ){
-    FAssert(under_level.size() == current_level.size() );
+    FAssertLF(under_level.size() == current_level.size() );
     // Iterate on every interaction of the under level
     for(unsigned i = 0 ; i < under_level.size() ; ++i){
         // Init variables for the search
@@ -1356,7 +1361,7 @@ void compute_block_node_level(
  * @param  conf MPI conf
  */
 template<class GroupOctreeClass>
-std::vector<std::vector<block_t>> send_get_symbolic_block_at_level(
+std::pair<std::vector<cell_symbolic_block>,std::vector<particle_symbolic_block>> send_get_symbolic_block_at_level(
     std::vector<MortonIndex>&               needed_leaf,
     std::vector<std::vector<size_t>>&       matrix_interaction,
     GroupOctreeClass&                       tree,
@@ -1381,6 +1386,7 @@ std::vector<std::vector<block_t>> send_get_symbolic_block_at_level(
         }
     }
 
+
     ////////////////////////////////////////////////////////////
     /// FIRST STEP
     /// Getting the list of leaf needed by every proc
@@ -1392,12 +1398,14 @@ std::vector<std::vector<block_t>> send_get_symbolic_block_at_level(
             nb_msg_recv,
             nb_leaf_recv,
             conf);
+    // free needed_leaf
+    std::vector<MortonIndex>().swap(needed_leaf);
+
     ////////////////////////////////////////////////////////////
     // SECOND STEP
     // Compute the block to send to other proc
     // And send the number of block sended
     ////////////////////////////////////////////////////////////
-
     // Init variable to stock
     std::vector<std::pair<int,int>> nb_block_to_receiv(nb_msg_send);
     std::vector<std::pair<int,std::vector<MortonIndex>>>
@@ -1413,6 +1421,11 @@ std::vector<std::vector<block_t>> send_get_symbolic_block_at_level(
         level,
         conf);
 
+    std::vector<MortonIndex>().swap(vect_recv);
+
+
+
+
     ////////////////////////////////////////////////////////////
     /// THIRD STEP
     /// Getting the list of leaf needed by every proc
@@ -1425,51 +1438,629 @@ std::vector<std::vector<block_t>> send_get_symbolic_block_at_level(
               conf);
 }
 
+
 /**
- * This function call a function of groupTree to create the block recev
- * to create a LET group tree
- * The principe is, we send to the group tree the vector and
+ * This algorithm compute the global index of every block in the local tree
  * @author benjamin.dufoyer@inria.fr
- * @param  tree                  local group tree
- * @param  let_block             block to create the LET
- * @param  local_min_m_idx       local Morton Index of my GroupTree
+ * @param  tree [description]
+ * @param  conf [description]
+ */
+ template<class GroupOctreeClass>
+ int set_cell_group_global_index_at(
+           GroupOctreeClass&     tree,
+           int                   level,
+           int                   nb_block_under_level,
+     const inria::mpi_config&    conf,
+           bool                  particle = false
+ ){
+     int nb_proc = conf.comm.size();
+     int my_rank = conf.comm.rank();
+     int nb_block_before_me = 0;
+     int my_nb_block;
+     if(!particle) {
+        my_nb_block = tree.getNbCellGroupAtLevel(level);
+    } else {
+        my_nb_block = tree.getNbParticleGroup();
+     }
+     // get the number of block at the under level
+     if(my_rank == 0){
+         conf.comm.recv(
+             &nb_block_before_me,
+             1,
+             MPI_INT,
+             nb_proc-1,level);
+     } else if( my_rank == nb_proc-1){
+         conf.comm.send(
+             &nb_block_under_level,
+             1,
+             MPI_INT,
+             0,level
+         );
+     }
+
+     if(nb_proc != 0){
+         // get the number of block before me
+         if(my_rank != 0){
+             conf.comm.recv(
+                 &nb_block_before_me,
+                 1,
+                 MPI_INT,
+                 my_rank-1,level
+             );
+         }
+         // send the number of block before me with my number
+         if(my_rank != (nb_proc-1) ){
+             int nb_block_after_me = my_nb_block + nb_block_before_me;
+             conf.comm.send(
+                 &nb_block_after_me,
+                 1,
+                 MPI_INT,
+                 my_rank+1,level
+             );
+         }
+     }
+     // Now i have the total number of block before me, i will compute
+     // the idex of all of my block at this level
+     for(int idx_group = 0 ; idx_group < my_nb_block ;++idx_group){
+         if(!particle){
+            auto* container = tree.getCellGroup(level,idx_group);
+            container->setIdxGlobal(nb_block_before_me);
+        } else {
+            auto* container = tree.getParticleGroup(idx_group);
+            container->setIdxGlobal(int(nb_block_before_me));
+        }
+        ++nb_block_before_me;
+     }
+     return nb_block_before_me;
+}
+
+/**
+ * This function launch the computaition of the flobal index of every
+ * group at every level
+ * @author benjamin.dufoyer@inria.fr
+ * @param  tree         local group tree
+ * @param  conf         MPI conf
+ * @param  level_min    [OPTIONNAL] minimum level
  */
 template<class GroupOctreeClass>
-void add_let_leaf_block_to_tree(
-    GroupOctreeClass& tree,
-    std::vector<std::vector<block_t>>& let_block,
-    const MortonIndex& local_min_m_idx,
-    int level)
-{
-    // if we have no block to add
-    if(let_block.size() == 0)
-        return;
-
-    // Compute the number of block for each level
-    int nb_block = 0;
-    for(unsigned i = 0 ; i < let_block.size(); ++i){
-        for(unsigned j = 0 ; j < let_block[i].size() ; ++j){
-            ++nb_block;
+int set_cell_group_global_index(
+          GroupOctreeClass&     tree,
+    const inria::mpi_config&    conf,
+          int                   level_min = 1
+){
+    int nb_proc = conf.comm.size();
+    if(nb_proc > 1){
+        // Can be a task
+        int nb_block_before_me = 0;
+        // set the idx global on the particle block
+        nb_block_before_me = set_cell_group_global_index_at(tree,0,nb_block_before_me,conf,true);
+
+        for(int i = tree.getHeight()-1; i >= level_min ; --i){
+            nb_block_before_me = set_cell_group_global_index_at(tree,i,nb_block_before_me,conf);
+        }
+        conf.comm.bcast(
+            &nb_block_before_me,
+            1,
+            MPI_INT,
+            nb_proc-1
+        );
+        return nb_block_before_me;
+    } else {
+        int idx_global = 0;
+        for(int i = 0 ; i < tree.getNbParticleGroup(); ++i){
+            tree.getParticleGroup(i)->setIdxGlobal(idx_global);
+            ++idx_global;
         }
+        for(int i = tree.getHeight()-1; i >= 1 ; --i){
+            for(int j = 0; j < tree.getNbCellGroupAtLevel(i) ; ++j ){
+                tree.getCellGroup(i,j)->setIdxGlobal(idx_global);
+                ++idx_global;
+            }
+        }
+        return idx_global;
     }
+}
 
-    unsigned idx_vect = 0;
-    std::vector<block_t> leaf_block_to_add(nb_block);
-    for(unsigned i = 0 ; i < let_block.size() ;++i){
-        for(unsigned j =0 ; j < let_block[i].size();j++){
-            leaf_block_to_add[idx_vect]= let_block[i][j];
-            ++idx_vect;
+
+/**
+ * This function add the blocks for the M2M operation
+ * 1) Compute the min and max morton index of my distribution at the level
+ * 2) Check if i have this morton index at the upper level
+ * 3) Share this information at my neighboor
+ * 4) Send the morton index needed and post recv of block
+ * 5) Send the block to my neihboor if needed
+ * 6) Add block to the tree
+ *
+ * [RESTRICTION] You need to add the LET to the tree BEFORE calling this
+ * function
+ *
+ * @author benjamin.dufoyer@inria.fr
+ * @param  tree     LET GroupTree
+ * @param  conf     MPI conf
+ * @param  level    Level to check
+ */
+template<class GroupOctreeClassWithLET>
+void send_get_block_M2M_at_level(
+            GroupOctreeClassWithLET&   tree,
+    const   inria::mpi_config&  conf,
+            int                 level
+){
+    // structure for sending message
+    struct sending_cell_structure_M2M{
+        int idx_global_block;
+        MortonIndex start_index;
+        MortonIndex end_index;
+        int nb_leaf_in_block;
+        int idx_global_particle_block = 0;
+    };
+    // boolean to know if we are at the leaf level
+    bool leaf_level = (tree.getHeight()-1 == level);
+    // get the block_size
+    int block_size = tree.getNbElementsPerBlock();
+    // Prepare buffer for sending to other proc
+    sending_cell_structure_M2M block_needer_min{-1,0,0,0};
+    sending_cell_structure_M2M block_needer_max{-1,0,0,0};
+    std::vector<MortonIndex> m_idx_min(block_size,0);
+    std::vector<MortonIndex> m_idx_max(block_size,0);
+    std::vector<FSize>       nb_particle_min(0,0);
+    std::vector<FSize>       nb_particle_max(0,0);
+    //  IDEA can be a task
+    //  Compute the minimum morton index of my distribution
+    //  iterate on every group
+    for(int idx_group = 0 ; idx_group < tree.getNbCellGroupAtLevel(level);++idx_group){
+        auto* container = tree.getCellGroup(level,idx_group);
+        // if the block is mine
+        if(container->isMine()){
+            // get the symbolic information of the block
+            block_needer_min.idx_global_block = container->getIdxGlobal();
+            block_needer_min.start_index = container->getStartingIndex();
+            block_needer_min.end_index = container->getEndingIndex();
+            block_needer_min.nb_leaf_in_block = container->getNumberOfCellsInBlock();
+            // get every morton index
+            for(int idx_cell = 0 ; idx_cell < container->getNumberOfCellsInBlock() ; ++idx_cell){
+                m_idx_min[idx_cell] = container->getCellMortonIndex(idx_cell);
+            }
+            // if it's the leaf level we need to send the number of particule
+            // too
+            if(leaf_level){
+                auto* container_particle = tree.getParticleGroup(idx_group);
+                block_needer_min.idx_global_particle_block = container_particle->getIdxGlobal();
+                nb_particle_min.resize(block_size,0);
+                for(int idx_cell = 0 ; idx_cell < container_particle->getNumberOfLeavesInBlock() ; ++idx_cell){
+                    nb_particle_min[idx_cell] = container_particle->getNbParticlesInLeaf(idx_cell);
+                }
+            }
+            // break the loop
+            break;
         }
     }
-    // Now i have a vector with all leaf block
-    // Sorting block
-    std::sort(leaf_block_to_add.begin(),leaf_block_to_add.end(),[](block_t a, block_t b){
-            return a.start_index < b.start_index;
-    });
-    // Add the block to the tree
-    tree.add_LET_block(leaf_block_to_add,level,local_min_m_idx);
+    // IDEA can be a task
+    // compute the maximum morton index of my distribution
+    // iterate on every groups
+    for(int idx_group = tree.getNbCellGroupAtLevel(level)-1 ; idx_group >= 0; --idx_group){
+        auto* container = tree.getCellGroup(level,idx_group);
+        // if the block is Mine
+        if(container->isMine()){
+            // stock symbolic information
+            block_needer_max.idx_global_block = container->getIdxGlobal();
+            block_needer_max.start_index = container->getStartingIndex();
+            block_needer_max.end_index = container->getEndingIndex();
+            block_needer_max.nb_leaf_in_block = container->getNumberOfCellsInBlock();
+            // get every morton index
+            for(int idx_cell = 0 ; idx_cell < container->getNumberOfCellsInBlock() ; ++idx_cell){
+                m_idx_max[idx_cell] = container->getCellMortonIndex(idx_cell);
+            }
+            // if it's the leaf level we need to send to number of particule
+            // per leaf
+            if(leaf_level){
+                auto* container_particle = tree.getParticleGroup(idx_group);
+                block_needer_max.idx_global_particle_block = container_particle->getIdxGlobal();
+                nb_particle_max.resize(block_size,0);
+                for(int idx_cell = 0 ; idx_cell < container_particle->getNumberOfLeavesInBlock() ; ++idx_cell){
+                    nb_particle_max[idx_cell] = container_particle->getNbParticlesInLeaf(idx_cell);
+                }
+            }
+            break;
+        }
+    }
+    // compute the max and the min morton Index att the upper level
+
+    // Now we have our max and our min at the current level
+    // Now we want to check if we have the parents of our min and our max
+    bool flag_min = false;
+    bool flag_max = false;
+
+    // MPI info
+    int nb_proc = conf.comm.size();
+    int my_rank = conf.comm.rank();
+
+    // reception buffer
+    // Symbolic block buffer
+    sending_cell_structure_M2M buffer_right_neighbor{-1,0,0,0};
+    sending_cell_structure_M2M buffer_left_neighbor{-1,0,0,0};
+    // Morton index buffer
+    std::vector<MortonIndex> buffer_m_idx_right(block_size,0);
+    std::vector<MortonIndex> buffer_m_idx_left(block_size,0);
+    // number of particle buffer
+    std::vector<FSize>       buffer_nb_particle_right(0,0);
+    std::vector<FSize>       buffer_nb_particle_left(0,0);
+
+    // flag for neighboot
+    bool flag_right_neighboor = false;
+    bool flag_left_neighboor  = false;
+
+    // array of request
+    int nb_message = 0;
+    inria::mpi::request tab_mpi_status[12];
+
+    // if i'm 0, i don't need a block from left
+    if(my_rank == 0){
+        flag_min = true;
+        flag_left_neighboor = true;
+    }
+    // if i'm the last proc, i don't need block from right
+    if(my_rank == nb_proc-1){
+        flag_max = true;
+        flag_right_neighboor = true;
+    }
+
+    // Now we need to send to the neighboor if we need a block, and recv if
+    // he need block
+
+    // First send and recv from left
+
+    if(my_rank != 0){
+        /////////////////////////////////////////////
+        //// SENDING
+        /////////////////////////////////////////////
+        // Send symbolic information of my min block
+        tab_mpi_status[nb_message] =
+            conf.comm.isend(
+                &block_needer_min,
+                sizeof(sending_cell_structure_M2M),
+                MPI_CHAR,
+                my_rank-1,1);
+        ++nb_message;
+        // send the morton index of the min block
+        tab_mpi_status[nb_message] =
+            conf.comm.isend(
+                m_idx_min.data(),
+                int(sizeof(MortonIndex)*block_size),
+                MPI_CHAR,
+                my_rank-1,2);
+        ++nb_message;
+        // if it's the leaf level
+        if(leaf_level){
+            // send the number of particle of the particle block attached
+            tab_mpi_status[nb_message] =
+                conf.comm.isend(
+                    nb_particle_min.data(),
+                    int(sizeof(FSize)*block_size),
+                    MPI_CHAR,
+                    my_rank-1,3);
+                    ++nb_message;
+        }
+        // recev the symbolic block from my left neighbor
+        tab_mpi_status[nb_message] =
+            conf.comm.irecv(
+                &buffer_left_neighbor,
+                sizeof(sending_cell_structure_M2M),
+                MPI_CHAR,
+                my_rank-1,1);
+        ++nb_message;
+        // recv the morton index of the block send by my left neighbor
+        tab_mpi_status[nb_message] =
+            conf.comm.irecv(
+                buffer_m_idx_left.data(),
+                int(sizeof(MortonIndex)*block_size),
+                MPI_CHAR,
+                my_rank-1,2);
+        ++nb_message;
+        // if it's the leaf level
+        if(leaf_level){
+            // need to recev the number of particle of the particle attached
+            buffer_nb_particle_left.resize(block_size,0);
+            tab_mpi_status[nb_message] =
+                conf.comm.irecv(
+                    buffer_nb_particle_left.data(),
+                    int(sizeof(FSize)*block_size),
+                    MPI_CHAR,
+                    my_rank-1,3);
+                    ++nb_message;
+        }
+    }
+    // Send and recv from right
+    if(my_rank != nb_proc-1){
+        // send my block max
+        tab_mpi_status[nb_message] =
+            conf.comm.isend(
+                &block_needer_max,
+                sizeof(sending_cell_structure_M2M),
+                MPI_CHAR,
+                my_rank+1,1);
+        ++nb_message;
+        // send the morton index of the right block
+        tab_mpi_status[nb_message] =
+            conf.comm.isend(
+                m_idx_max.data(),
+                int(sizeof(MortonIndex)*block_size),
+                MPI_CHAR,
+                my_rank+1,2);
+        ++nb_message;
+        // if it's the leaf level
+        if(leaf_level){
+            // send the number of particle of the particle block attached
+            tab_mpi_status[nb_message] =
+                conf.comm.isend(
+                    nb_particle_max.data(),
+                    int(sizeof(FSize)*block_size),
+                    MPI_CHAR,
+                    my_rank+1,3);
+            ++nb_message;
+        }
+
+        // recv the block from the right
+        tab_mpi_status[nb_message] =
+            conf.comm.irecv(
+                &buffer_right_neighbor,
+                sizeof(sending_cell_structure_M2M),
+                MPI_CHAR,
+                my_rank+1,1);
+        ++nb_message;
+        // recv the morton index of the right
+        tab_mpi_status[nb_message] =
+            conf.comm.irecv(
+                buffer_m_idx_right.data(),
+                int(sizeof(MortonIndex)*block_size),
+                MPI_CHAR,
+                my_rank+1,2);
+        ++nb_message;
+        // if it's leaf level
+        if(leaf_level){
+            // recv number of particle
+            buffer_nb_particle_right.resize(block_size,0);
+            tab_mpi_status[nb_message] =
+                conf.comm.irecv(
+                    buffer_nb_particle_right.data(),
+                    int(sizeof(FSize)*block_size),
+                    MPI_CHAR,
+                    my_rank+1,3);
+            ++nb_message;
+        }
+
+    }
+    // Wait all request
+    inria::mpi::request::waitall(nb_message,tab_mpi_status);
+
+    // Now we have the min and the max block at the level L
+    // But now we need to send the block needer of this block
+    // to insert task on him with starPU
+
+    // buffer to recv blocks
+    sending_cell_structure_M2M block_from_left;
+    sending_cell_structure_M2M block_from_right;
+
+    // buffer to recv morton index
+    std::vector<MortonIndex> m_idx_from_left(block_size,-1);
+    std::vector<MortonIndex> m_idx_from_right(block_size,-1);
+
+    // buffer to send morton index
+    std::vector<MortonIndex> m_idx_to_send_right(block_size,-1);
+    std::vector<MortonIndex> m_idx_to_send_left(block_size,-1);
+    // buffer to send symbolic information
+    sending_cell_structure_M2M block_to_left{-1,0,0,0};
+    sending_cell_structure_M2M block_to_right{-1,0,0,0};
+
+
+    nb_message = 0;
+    // IDEA can be a task
+    // we post the recv for the left block
+    if(!flag_min){
+        // posting reception of the block
+        tab_mpi_status[nb_message] =
+            conf.comm.irecv(
+                &block_from_left,
+                sizeof(sending_cell_structure_M2M),
+                MPI_CHAR,
+                my_rank-1,2);
+        ++nb_message;
+        tab_mpi_status[nb_message] =
+            conf.comm.irecv(
+                m_idx_from_left.data(),
+                int(sizeof(MortonIndex)*block_size),
+                MPI_CHAR,
+                my_rank-1,3);
+        ++nb_message;
+    }
+    // IDEA can be a task
+    // we post the recv for the right block
+    if(!flag_max){
+        // posting the reception buffer
+        tab_mpi_status[nb_message] =
+            conf.comm.irecv(
+                &block_from_right,
+                sizeof(sending_cell_structure_M2M),
+                MPI_CHAR,
+                my_rank+1,2);
+        ++nb_message;
+        tab_mpi_status[nb_message] =
+            conf.comm.irecv(
+                m_idx_from_right.data(),
+                int(sizeof(MortonIndex)*block_size),
+                MPI_CHAR,
+                my_rank+1,3);
+        ++nb_message;
+    }
+
+    // IDEA Can be a task
+    // if i need to send to the right
+    if(!flag_right_neighboor){
+        bool flag = false;
+        // seeking the first block who is mine at the upper level
+        for(int i = (tree.getNbCellGroupAtLevel(level-1)-1) ; i >= 0  ; --i){
+            auto* container = tree.getCellGroup(level-1,i);
+            // if the block is mine
+            if(container->isMine()){
+                // stock symbolic information
+                block_to_right.idx_global_block = container->getIdxGlobal();
+                block_to_right.start_index = container->getStartingIndex();
+                block_to_right.end_index = container->getEndingIndex();
+                block_to_right.nb_leaf_in_block = container->getNumberOfCellsInBlock();
+                // stock the morton index of the block
+                for(int idx_cell = 0 ; idx_cell < container->getNumberOfCellsInBlock(); ++idx_cell ){
+                    m_idx_to_send_right[idx_cell] = container->getCellMortonIndex(idx_cell);
+                }
+                // put the flag on true
+                flag = true;
+                // send the 2 buffer
+                tab_mpi_status[nb_message] =
+                conf.comm.isend(
+                    &block_to_right,
+                    sizeof(sending_cell_structure_M2M),
+                    MPI_CHAR,
+                    my_rank+1,2);
+                ++nb_message;
+                tab_mpi_status[nb_message] =
+                conf.comm.isend(
+                    &m_idx_to_send_right.data()[0],
+                    int(sizeof(MortonIndex)*block_size),
+                    MPI_CHAR,
+                    my_rank+1,3);
+                ++nb_message;
+                break;
+            }
+        }
+        // we don't have block at the upper level
+        if(!flag){
+            // send fake block
+            tab_mpi_status[nb_message] =
+            conf.comm.isend(
+                &block_to_right,
+                sizeof(sending_cell_structure_M2M),
+                MPI_CHAR,
+                my_rank+1,2);
+            ++nb_message;
+            tab_mpi_status[nb_message] =
+            conf.comm.isend(
+                m_idx_to_send_right.data(),
+                int(sizeof(MortonIndex)*block_size),
+                MPI_CHAR,
+                my_rank+1,3);
+            ++nb_message;
+        }
+    }
+    // IDEA Can be a task
+
+    if(!flag_left_neighboor){
+        bool flag = false;
+        // seek the first block who is mine
+        for(int i = 0 ; i < tree.getNbCellGroupAtLevel(level-1) ; ++i){
+            auto* container = tree.getCellGroup(level-1,i);
+            // send the first left block who is mine
+            if(container->isMine()){
+                // stock symbolic information
+                block_to_left.idx_global_block = container->getIdxGlobal();
+                block_to_left.start_index = container->getStartingIndex();
+                block_to_left.end_index = container->getEndingIndex();
+                block_to_left.nb_leaf_in_block = container->getNumberOfCellsInBlock();
+                // stock morton index
+                for(int idx_cell = 0 ; idx_cell < container->getNumberOfCellsInBlock(); ++idx_cell ){
+                    m_idx_to_send_left[idx_cell] = container->getCellMortonIndex(idx_cell);
+                }
+                // put the flag on true
+                flag = true;
+                // send block
+                tab_mpi_status[nb_message] =
+                conf.comm.isend(
+                    &block_to_left,
+                    sizeof(sending_cell_structure_M2M),
+                    MPI_CHAR,
+                    my_rank-1,2);
+                ++nb_message;
+                tab_mpi_status[nb_message] =
+                conf.comm.isend(
+                    m_idx_to_send_left.data(),
+                    int(sizeof(MortonIndex)*block_size),
+                    MPI_CHAR,
+                    my_rank-1,3);
+                ++nb_message;
+                break;
+            }
+        }
+        // we don't have block at the upper level
+        if(!flag){
+            // send fake block
+            tab_mpi_status[nb_message] =
+            conf.comm.isend(
+                &block_to_left,
+                sizeof(sending_cell_structure_M2M),
+                MPI_CHAR,
+                my_rank-1,2);
+            ++nb_message;
+            tab_mpi_status[nb_message] =
+            conf.comm.isend(
+                m_idx_to_send_left.data(),
+                int(sizeof(MortonIndex)*block_size),
+                MPI_CHAR,
+                my_rank-1,3);
+            ++nb_message;
+        }
+    }
+
+
+    // Wait for the send/recv
+    if(nb_message > 0)
+        inria::mpi::request::waitall(nb_message,tab_mpi_status);
+
+    // now i have the block needed for the M2M
+    // now we need to add this block
+
+    // We add the block, if the idx_global_block is -1, the block
+    // is invalid so we don't need him, and we don't need to add him to the tree
+    if(!flag_min && block_from_left.idx_global_block != -1){
+        tree.insert_block(block_from_left,m_idx_from_left,level-1);
+    }
+    if(!flag_max && block_from_right.idx_global_block != -1){
+        tree.insert_block(block_from_right,m_idx_from_right,level-1);
+    }
+    if(!flag_right_neighboor && buffer_right_neighbor.idx_global_block != -1){
+        tree.insert_block(buffer_right_neighbor,buffer_m_idx_right,level,&buffer_nb_particle_right);
+    }
+    if(!flag_left_neighboor && buffer_left_neighbor.idx_global_block != -1){
+        tree.insert_block(buffer_left_neighbor,buffer_m_idx_left,level,&buffer_nb_particle_left);
+    }
 }
 
+/**
+ * This function exchange blocks with neighbors proc
+ * The left proc have the first block
+ * The right proc have the last block
+ *
+ * The blocks send are the block who have the boolean "isMine" on 1 on the
+ * sender
+ *
+ * @author benjamin.dufoyer@inria.fr
+ * @param  tree         The group tree
+ * @param  conf
+ * @param  level_min    [OPTIONNAL] minimum level to apply this function
+ */
+template<class GroupOctreeClass>
+void send_get_block_M2M(
+            GroupOctreeClass&   tree,
+    const   inria::mpi_config&  conf,
+            int                 level_min = 1
+){
+    int nb_proc = conf.comm.size();
+    // if we have less than 1 proc, we don't need to exchange block
+    if(nb_proc > 1){
+        // get the M2M block at every level
+        for(int i = tree.getHeight()-1 ; i > level_min ; --i){
+            send_get_block_M2M_at_level(tree,conf,i);
+        }
+    }
+}
+
+
 }
 
 
diff --git a/Src/GroupTree/Core/FDistributedLETGroupTreeValidator.hpp b/Src/GroupTree/Core/FDistributedLETGroupTreeValidator.hpp
new file mode 100644
index 000000000..bfc954905
--- /dev/null
+++ b/Src/GroupTree/Core/FDistributedLETGroupTreeValidator.hpp
@@ -0,0 +1,260 @@
+// ==== CMAKE ====
+// @FUSE_MPI
+// ================
+//
+
+
+#ifndef _FDISTRIBUTED_LET_GROUPTREE_VALIDATOR_
+#define _FDISTRIBUTED_LET_GROUPTREE_VALIDATOR_
+
+
+#include "inria/algorithm/distributed/mpi.hpp"
+
+
+namespace dstr_grp_tree_vldr{
+
+/**
+ * This function check the level of the LetGroupTree to check if we forget
+ * a group
+ * The principe is simple. We compute every interaction of every cell at the
+ * level, we check if we have the morton index of the interaction in our tree
+ * If we don't have this index, we send a request to the proc who have this
+ * index to check if he exist, if he exist, it's a error
+ * @author benjamin.dufoyer@inria.fr
+ * @param  tree  localGroupTree
+ * @param  level level to check
+ * @param  conf  conf MPI
+ * @return true if it's ok, false if we forget a group
+ */
+template<class GroupOctreeClass>
+bool validate_group_tree_at_level(
+    GroupOctreeClass& tree,
+    int level,
+    const inria::mpi_config& conf
+){
+    // MPI information
+    const int nb_proc = conf.comm.size();
+    const int my_rank = conf.comm.rank();
+
+    // Compute my min and my max morton index at the level
+    MortonIndex min_morton_index_at_level = 0;
+    MortonIndex max_morton_index_at_level = 0;
+    for(int i = 0 ; i < tree.getNbCellGroupAtLevel(level) ; ++i){
+        auto* container =  tree.getCellGroup(level,i);
+        if(container->isMine()){
+            min_morton_index_at_level = container->getStartingIndex();
+            break;
+        }
+    }
+    for(int i = tree.getNbCellGroupAtLevel(level)-1; i >= 0 ; --i){
+        auto* container =  tree.getCellGroup(level,i);
+        if(container->isMine()){
+            max_morton_index_at_level = container->getEndingIndex();
+            break;
+        }
+    }
+
+    // Sharing my interval and getting interval from all proc
+    std::pair<MortonIndex,MortonIndex> my_interval(min_morton_index_at_level,max_morton_index_at_level);
+    std::vector<std::pair<MortonIndex,MortonIndex>> all_interval(nb_proc);
+    conf.comm.allgather(&my_interval,
+                        sizeof(my_interval),
+                        MPI_CHAR,
+                        all_interval.data(),
+                        sizeof(my_interval),
+                        MPI_CHAR);
+
+    // if i have 1 block or more
+    // Get all MortonIndex for interaction
+    std::vector<MortonIndex> morton_index_not_in_tree(0);
+    if(my_interval.second != 0){
+        // vector to stock all MortonIndex
+        std::vector<MortonIndex> external_interaction(tree.getNbCellGroupAtLevel(level)*tree.getNbElementsPerBlock()*189,0);
+        unsigned idx_vector = 0;
+        // iterate on every group
+        for(int idx_group = 0 ; idx_group < tree.getNbCellGroupAtLevel(level) ; ++idx_group){
+            // get the current group
+            auto* container = tree.getCellGroup(level,idx_group);
+            if(container->isMine()){
+                // iterate on every cell
+                for(int cell_idx = 0;
+                        cell_idx < container->getNumberOfCellsInBlock();
+                        ++cell_idx){
+                            // Getting the current morton index
+                            MortonIndex curr_m_idx  = container->getCellMortonIndex(cell_idx);
+                            MortonIndex interactionsIndexes[189];
+                            int interactionsPosition[189];
+                            FTreeCoordinate coord(curr_m_idx);
+                            // Getting neigbors of the father
+                            int counter = coord.getInteractionNeighbors(level,interactionsIndexes,interactionsPosition);
+                            for(int idx_neighbor = 0 ; idx_neighbor < counter ; ++idx_neighbor){
+                                MortonIndex tmp = interactionsIndexes[idx_neighbor];
+                                if(tmp >= min_morton_index_at_level && tmp < max_morton_index_at_level){
+                                    // do nothing, it's my interval
+                                } else {
+                                    //Stock the index
+                                    external_interaction[idx_vector] = tmp;
+                                    ++idx_vector;
+                                }
+                            } // end for neigbors
+                        } // end for leaf
+                    } // end for group
+                }
+        if(idx_vector > 0){
+            FQuickSort<MortonIndex>::QsSequential(external_interaction.data(),idx_vector);
+            // vector to have all mortonIndex with no duplicate data
+            std::vector<MortonIndex> morton_needed(0);
+            MortonIndex last_morton_index = -1;
+            for(unsigned i = 0 ; i < idx_vector ; ++i){
+                if(external_interaction[i] != last_morton_index){
+                    morton_needed.push_back(external_interaction[i]);
+                    last_morton_index = external_interaction[i];
+                }
+            }
+            // free the old vector
+            std::vector<MortonIndex>().swap(external_interaction);
+            // vector to stock morton index who are not in the tree
+
+            for(unsigned i = 0 ; i < morton_needed.size(); ++i ){
+                bool flag = false;
+                MortonIndex current_morton_index = morton_needed[i];
+                for(int j = 0 ; j < tree.getNbCellGroupAtLevel(level); ++j){
+                    auto* container = tree.getCellGroup(level,j);
+                    if(!container->isMine()){
+                        if(container->isInside(current_morton_index) || container->getEndingIndex() == current_morton_index){
+                            flag =true;
+                            break;
+                        }
+                    }
+                }
+                // if we are here, we don't have the interaction
+                if(!flag)
+                    morton_index_not_in_tree.push_back(current_morton_index);
+            }
+        }
+    }
+
+    // Now we have all morton index who is not in our tree
+    std::vector<unsigned> nb_message_to_send(nb_proc,0);
+    std::vector<unsigned> nb_message_to_recev(nb_proc,0);
+    for(unsigned i = 0 ; i < morton_index_not_in_tree.size() ;++i ){
+        for(unsigned j = 0 ; j < all_interval.size() ; ++j){
+            MortonIndex min = all_interval[j].first;
+            MortonIndex max = all_interval[j].second;
+            if(morton_index_not_in_tree[i] >= min &&  morton_index_not_in_tree[i] <= max ){
+                nb_message_to_send[j] += 1;
+                break;
+            }
+        }
+    }
+
+    // Send the number of morton index we will send
+    conf.comm.alltoall(nb_message_to_send.data(),
+                        1,
+                        MPI_UNSIGNED,
+                        nb_message_to_recev.data(),
+                        1,
+                        MPI_UNSIGNED);
+
+    // Compute the number of message and the number of morton index
+    int nb_morton_index = 0;
+    int nb_message =0;
+    for(unsigned i = 0 ; i < nb_message_to_recev.size() ; ++i){
+        nb_morton_index += nb_message_to_recev[i];
+        if(nb_message_to_recev[i] > 0){
+            ++nb_message;
+        }
+        if(nb_message_to_send[i] > 0){
+            ++nb_message;
+        }
+    }
+
+    // declare the reception buffer
+    std::vector<MortonIndex> morton_recv(nb_morton_index,0);
+    // tab of MPI request to wait the completion
+    inria::mpi::request tab_mpi_status[nb_message];
+
+    int idx_message =0;
+    unsigned offset = 0;
+    // post all reception
+    for(unsigned i = 0 ; i < nb_message_to_recev.size(); ++i){
+        if(nb_message_to_recev[i] > 0){
+            unsigned nb_m_idx = nb_message_to_recev[i];
+            tab_mpi_status[idx_message] = conf.comm.irecv(&morton_recv[offset],
+                            int(nb_m_idx*sizeof(MortonIndex)),
+                            MPI_CHAR,
+                            i,1);
+            ++idx_message;
+            offset += nb_m_idx;
+        }
+    }
+
+    offset = 0;
+    // post all send message
+    for(unsigned i = 0 ; i < nb_message_to_send.size(); ++i){
+        if(nb_message_to_send[i] > 0){
+            unsigned nb_m_idx = nb_message_to_send[i];
+            tab_mpi_status[idx_message] = conf.comm.isend(&morton_index_not_in_tree[offset],
+                            int(nb_m_idx*sizeof(MortonIndex)),
+                            MPI_CHAR,
+                            i,1);
+            ++idx_message;
+            offset += nb_m_idx;
+        }
+    }
+
+    // Wait all request
+    inria::mpi::request::waitall(idx_message,tab_mpi_status);
+
+    offset = 0 ;
+    bool flag = true;
+    for(unsigned i = 0 ; i < nb_message_to_recev.size() ; ++i ){
+        unsigned nb_morton_index_2 = nb_message_to_recev[i];
+        for(unsigned j = 0 ; j < nb_morton_index_2 ; ++j ){
+            MortonIndex current_idx = morton_recv[j+offset];
+            for(int k = 0; k < tree.getNbCellGroupAtLevel(level);++k){
+                auto* container = tree.getCellGroup(level,k);
+                if(container->isMine()){
+                    if(container->isInside(current_idx)){
+                        std::cout << " [Error][level "<<level << "] " << current_idx << " on " << my_rank << " Not transfered to " << i << std::endl;
+                        flag = false;
+                    }
+                }
+            }
+        }
+        offset += nb_morton_index_2;
+    }
+
+    // return the flag
+    return flag;
+
+}
+
+/**
+ * This function check every level of the LetGroupTree to know if we forget
+ * a group
+ * @author benjamin.dufoyer@inria.fr
+ * @param  tree local group tree + let
+ * @param  conf MPI cong
+ * @return true if the tree is ok
+ */
+template<class GroupOctreeClass>
+bool validate_group_tree(
+    GroupOctreeClass& tree,
+    const inria::mpi_config& conf
+){
+    bool res = true;
+    // check every level
+    for(int i = tree.getHeight()-1 ; i > 0 ; --i){
+        res = validate_group_tree_at_level(tree,i,conf);
+        // if the current level is not good
+        if(!res)
+            break;
+    }
+    return res;
+}
+
+
+}
+
+#endif
diff --git a/Src/GroupTree/Core/FGroupLinearTree.hpp b/Src/GroupTree/Core/FGroupLinearTree.hpp
index aed3a5983..c7a877fd7 100644
--- a/Src/GroupTree/Core/FGroupLinearTree.hpp
+++ b/Src/GroupTree/Core/FGroupLinearTree.hpp
@@ -3,7 +3,6 @@
 
 #include <vector>
 #include "../../Utils/FLog.hpp"
-#include "FDistributedGroupTreeBuilder.hpp"
 
 using FReal = double;
 
@@ -14,14 +13,15 @@ class FGroupLinearTree {
 
 protected:
 
-    int block_size;
-    int nb_block;
+    int block_size;   //<
+    int nb_block;     //<
 
-    const inria::mpi_config& mpi_conf;
+    // Copy of the MPI conf 
+    const inria::mpi_config mpi_conf;  //<
 
-    std::vector<node_t>* linear_tree;
-    std::vector<std::pair<MortonIndex,MortonIndex>> index_particle_distribution;
-    bool unknow_index_particle_distribution = true;
+    std::vector<node_t>*                           linear_tree;         //<
+    std::vector<std::pair<MortonIndex,MortonIndex>> index_particle_distribution;  //<
+    bool unknow_index_particle_distribution = true;  //<
 
 public:
 
@@ -36,14 +36,19 @@ public:
      * @param  in_linear_tree   Linear tree
      * @param  in_box_center    Box Center of particle container
      * @param  in_box_width     Box Width of particle container
+     * @warning We copy the MPI comm because O3 compilation fail the utest
      */
-    FGroupLinearTree(const inria::mpi_config& conf):
+    FGroupLinearTree(const inria::mpi_config conf):
         mpi_conf(conf),
         index_particle_distribution(conf.comm.size())
     {
         linear_tree = new std::vector<node_t>[1];
     }
 
+////////////////////////////////////////////////////
+// Function of initialisation
+////////////////////////////////////////////////////
+
     /**
      * This function create a blocked linear tree from the current distributed
      * linear tree
@@ -187,6 +192,12 @@ public:
         return this->linear_tree->back().morton_index;
     }
 
+
+    const inria::mpi_config get_mpi_conf() const{
+        return mpi_conf;
+    }
+
+
     /**
      * This function print the information of this current class
      * @author benjamin.dufoyer@inria.fr
@@ -215,35 +226,92 @@ public:
      * @param  particle_container [description]
      */
     template<class particle_t>
-    void set_index_particle_distribution(
-        std::vector<particle_t> particle_container)
+    void set_index_particle_distribution( std::vector<particle_t>& particle_container)
     {
-       unknow_index_particle_distribution = false;
-       dstr_grp_tree_builder::share_particle_division(
-           this->mpi_conf,
-           particle_container,
-           index_particle_distribution);
+        unknow_index_particle_distribution = false;
+        if(this->mpi_conf.comm.size() > 1){
+            dstr_grp_tree_builder::share_particle_division(
+                  this->mpi_conf,
+                  particle_container,
+                  this->index_particle_distribution);
+        } else {
+            this->index_particle_distribution.resize(1);
+            std::pair<MortonIndex,MortonIndex> my_distrib;
+            my_distrib.first  = particle_container.front().morton_index;
+            my_distrib.second = particle_container.back().morton_index;
+            this->index_particle_distribution[0] = my_distrib;
+        }
     }
 
+    /**ad
+     * this function do a update of the current particle distribution
+     * and the pair is put in parameter
+     * @author benjamin.dufoyer@inria.fr
+     * @param  new_distrib [description]
+     */
     void update_index_particle_distribution(std::pair<MortonIndex,MortonIndex> new_distrib){
-        dstr_grp_tree_builder::share_particle_division(
-            this->mpi_conf,
-            new_distrib,
-            index_particle_distribution);
+        unknow_index_particle_distribution = false;
+        if(this->mpi_conf.comm.size() > 1){
+            dstr_grp_tree_builder::share_particle_division(
+                this->mpi_conf,
+                new_distrib,
+                this->index_particle_distribution);
+        } else {
+            this->index_particle_distribution.resize(1);
+            this->index_particle_distribution[0] = new_distrib;
+        }
     }
 
-    std::vector<std::pair<MortonIndex,MortonIndex>>*
+    /**
+     * this function return a pointer of the total particule repartition
+     * @author benjamin.dufoyer@inria.fr
+     */
+    std::vector<std::pair<MortonIndex,MortonIndex>>
     get_index_particle_distribution(){
         // TO get the particle repartition, you will compute it before
         FAssert(!unknow_index_particle_distribution);
-        return &this->index_particle_distribution;
+        return this->index_particle_distribution;
     }
 
-    std::pair<MortonIndex,MortonIndex> get_index_particle_distribution_at(unsigned i){
+
+  std::vector<MortonIndex> get_index_particle_distribution_implicit(){
+
+    std::vector<MortonIndex> distribution( (this->index_particle_distribution.size()*2) /*+2*/,-1); // Pouruoi +2 OC ?
+    if(this->mpi_conf.comm.size() == 0){
+      for(unsigned i = 1; i < distribution.size() ; ++i ){
+	distribution[i] = this->index_particle_distribution[0].second;
+      }
+    } 
+    else {
+  //    int idx_vect = 0 ;
+   //   distribution[0] =  ;
+      distribution[1] = this->index_particle_distribution[0].second ;
+
+      for(unsigned i = 1 ; i < this->index_particle_distribution.size() ; ++i){
+          distribution[2*i]   = this->index_particle_distribution[i-1].second ;
+          distribution[2*i+1] = this->index_particle_distribution[i].second;
+      }
+//      int idx_vect = static_cast<int>(2*this->index_particle_distribution.size() );
+//      ///////////// TO REMOVE ???
+//      distribution[idx_vect] = this->index_particle_distribution[this->index_particle_distribution.size()-1].second;
+//      ++idx_vect;
+//      distribution[idx_vect] = this->index_particle_distribution[this->index_particle_distribution.size()-1].second;
+    }
+    return distribution;
+  }
+
+    /**
+     * this function return the particle distribution for a rank of proc
+     * put in parameter
+     * @author benjamin.dufoyer@inria.fr
+     * @param  proc_rank rank of the proc
+     * @return a pair of morton index
+     */
+    std::pair<MortonIndex,MortonIndex> get_index_particle_distribution_at(unsigned proc_rank){
         // TO get the particle repartition, you will compute it before
         FAssert(!unknow_index_particle_distribution);
-        FAssert(i < this->index_particle_distribution.size());
-        return this->index_particle_distribution.data()[i];
+        FAssert(proc_rank < this->index_particle_distribution.size());
+        return this->index_particle_distribution.data()[proc_rank];
     }
 
     /**
@@ -259,127 +327,25 @@ public:
         int my_rank = this->mpi_conf.comm.rank();
         MortonIndex left_limit = -1;
         if(my_rank != 0){
-            left_limit = (MortonIndex )this->index_particle_distribution[my_rank-1].second;
+            left_limit = static_cast<MortonIndex>(this->index_particle_distribution[my_rank-1].second);
         }
         return left_limit;
     }
 
+
     /**
-     * This function compute the leaf needed to build the LET part of the Group
-     * Tree.
-     * After she send block needed by other proc and she recev block needed
+     * This function is used to show the FGroupLinearTee more easly
      * @author benjamin.dufoyer@inria.fr
-     * @param tree      local group tree
-     * [Optionial]
-     * @param dim       Dimension of coordinate of particle
      */
-    template<class GroupTreeClass>
-    void create_let_group_tree_at_level(
-        GroupTreeClass& tree,
-        int level,
-        int dim = 3
-    ){
-        FAssert(index_particle_distribution.size() != 0 );
-        FAssert(dim > 0);
-        bool leaf_level = (tree.getHeight()-1 == level);
-        // Compute min and max global morton index at the level needed
-        // This variable is used to put value in const
-        MortonIndex gmin =  this->index_particle_distribution.front().first;
-        MortonIndex gmax =  this->index_particle_distribution.back().second;
-        // update the morton index
-        if(!leaf_level){
-            gmin = gmin >> 3;
-            gmax = gmax >> 3;
-        }
-        const MortonIndex global_min_m_idx = gmin;
-        const MortonIndex global_max_m_idx = gmax;
-
-        // Compute min and max local morton index
-        const MortonIndex local_min_m_idx =
-            tree.getParticleGroup(0)->getStartingIndex() >>( (tree.getHeight()-1-level)*dim);
-        const MortonIndex local_max_m_idx = tree.getParticleGroup(
-            (tree.getNbParticleGroup()-1) )->getEndingIndex() >>( (tree.getHeight()-1-level)*dim);
-
-        std::vector<MortonIndex> leaf_P2P;
-        if(leaf_level){
-            // IDEA : can be a task
-            // This function compute the leaf needed by the P2P operation
-            // This function return a vector with all leaf needed
-            // get leaf P2P
-            leaf_P2P = dstr_grp_tree_builder::get_leaf_P2P_interaction(
-                tree,
-                global_min_m_idx,
-                global_max_m_idx,
-                local_min_m_idx,
-                local_max_m_idx);
-        }
-
-        // IDEA can be a task
-        // This function compute the leaf needed by the M2L operation
-        // This function return a vector with all leaf needed
-        // get leaf M2L
-        std::vector<MortonIndex> leaf_M2L =
-        dstr_grp_tree_builder::get_leaf_M2L_interaction_at_level(
-            global_min_m_idx,
-            global_max_m_idx,
-            local_min_m_idx,
-            local_max_m_idx,
-            level,
-            tree,
-            dim);
-
-        std::vector<MortonIndex> needed_leaf;
-        if(leaf_level){
-            // this function return the concatenation of the leaf for the P2P and
-            // the leaf for the M2L
-            needed_leaf = dstr_grp_tree_builder::concat_M2L_P2P(leaf_P2P,leaf_M2L);
-        } else {
-            needed_leaf = leaf_M2L;
-            this->update_index_particle_distribution(
-                std::pair<MortonIndex,MortonIndex>(local_min_m_idx
-                                                ,local_max_m_idx)
-            );
-        }
-
-        std::vector<std::vector<size_t>> global_matrix_interaction = dstr_grp_tree_builder::get_matrix_interaction(
-            needed_leaf,
-            index_particle_distribution,
-            this->mpi_conf);
-
-        // Send and get leaf
-        // Auto is used to get the block more easly
-        // it's a vector<vector<block_t>>
-        // block_t is a struct define on FDistributedGroupTreeBuilder.hpp
-        auto let_block =
-        dstr_grp_tree_builder::send_get_symbolic_block_at_level(
-            needed_leaf,
-            global_matrix_interaction,
-            tree,
-            level,
-            this->mpi_conf);
-
-        // Add the block recev to the local group tree
-        dstr_grp_tree_builder::add_let_leaf_block_to_tree(
-                tree,
-                let_block,
-                local_min_m_idx,
-                level);
-
+    friend
+    std::ostream& operator<<(std::ostream& os, const FGroupLinearTree& n) {
+    return os << "--> Number of leaf : " << n.get_nb_leaf()
+            << "\n first leaf : "      << n.get_first_morton_index()
+            << "\n last  leaf : "      << n.get_last_morton_index()
+            << "\n block_size "          << n.get_block_size()
+            << "\n number of block : "   << n.get_nb_block();
     }
 
-        /**
-         * This function is used to show the FGroupLinearTee more easly
-         * @author benjamin.dufoyer@inria.fr
-         */
-        friend
-        std::ostream& operator<<(std::ostream& os, const FGroupLinearTree& n) {
-        return os << "--> Number of leaf : " << n.get_nb_leaf()
-                  << "\n first leaf : "      << n.get_first_morton_index()
-                  << "\n last  leaf : "      << n.get_last_morton_index()
-                  << "\n block_size "          << n.get_block_size()
-                  << "\n number of block : "   << n.get_nb_block();
-        }
-
 
 };
 
diff --git a/Src/GroupTree/Core/FGroupOfCells.hpp b/Src/GroupTree/Core/FGroupOfCells.hpp
index 9462df8ee..ed3e7c8d7 100644
--- a/Src/GroupTree/Core/FGroupOfCells.hpp
+++ b/Src/GroupTree/Core/FGroupOfCells.hpp
@@ -21,6 +21,8 @@ class FGroupOfCells {
         MortonIndex startingIndex;
         MortonIndex endingIndex;
         int numberOfCellsInBlock;
+        int idxGlobal;
+        bool isMine;
     };
 
 protected:
@@ -45,9 +47,9 @@ protected:
     bool deleteBuffer;
 
 public:
-    using multipole_t = PoleCellClass;
+    using multipole_t       = PoleCellClass;
     using local_expansion_t = LocalCellClass;
-    using symbolic_data_t = SymbolCellClass;
+    using symbolic_data_t   = SymbolCellClass;
 
     FGroupOfCells()
         : allocatedMemoryInByte(0), memoryBuffer(nullptr),
@@ -156,6 +158,8 @@ public:
         blockHeader->startingIndex = inStartingIndex;
         blockHeader->endingIndex   = inEndingIndex;
         blockHeader->numberOfCellsInBlock  = inNumberOfCells;
+        blockHeader->idxGlobal     = -1;
+        blockHeader->isMine        = false;
 #ifndef SCALFMM_SIMGRID_NODATA
         cellMultipoles = (PoleCellClass*)FAlignedMemory::AllocateBytes<32>(inNumberOfCells*sizeof(PoleCellClass));
         cellLocals     = (LocalCellClass*)FAlignedMemory::AllocateBytes<32>(inNumberOfCells*sizeof(LocalCellClass));
@@ -263,6 +267,26 @@ public:
         return blockHeader->numberOfCellsInBlock;
     }
 
+    /** The index of the current block in the global range **/
+    int getIdxGlobal() const{
+        return blockHeader->idxGlobal;
+    }
+
+    /* set the index of the current block */
+    void setIdxGlobal(int idx){
+        blockHeader->idxGlobal = idx;
+    }
+
+    // Return true if the current block is owned by the local tree
+    bool isMine() const {
+        return blockHeader->isMine;
+    }
+
+    // declare the block owned by the local tree
+    void declare_mine(){
+        blockHeader->isMine = true;
+    }
+
     /** The size of the interval endingIndex-startingIndex (set from the constructor) */
     MortonIndex getSizeOfInterval() const {
         return MortonIndex(blockHeader->endingIndex-blockHeader->startingIndex);
diff --git a/Src/GroupTree/Core/FGroupOfParticles.hpp b/Src/GroupTree/Core/FGroupOfParticles.hpp
index 810bf307b..bf1a86d4d 100644
--- a/Src/GroupTree/Core/FGroupOfParticles.hpp
+++ b/Src/GroupTree/Core/FGroupOfParticles.hpp
@@ -24,7 +24,7 @@ class FGroupOfParticles {
         MortonIndex startingIndex;
         MortonIndex endingIndex;
         int numberOfLeavesInBlock;
-
+        int idxGlobal;
         //< The real number of particles allocated
         FSize nbParticlesAllocatedInGroup;
         //< Starting point of position
@@ -166,7 +166,7 @@ public:
         blockHeader->numberOfLeavesInBlock  = inNumberOfLeaves;
         blockHeader->nbParticlesAllocatedInGroup = nbParticlesAllocatedInGroup;
         blockHeader->nbParticlesInGroup = inNbParticles;
-
+        blockHeader->idxGlobal = -1;
         // Init particle pointers
         blockHeader->positionsLeadingDim = (sizeof(FReal) * nbParticlesAllocatedInGroup);
         particlePosition[0] = reinterpret_cast<FReal*>((reinterpret_cast<size_t>(leafHeader + inNumberOfLeaves)
@@ -268,6 +268,14 @@ public:
         return blockHeader->startingIndex <= inIndex && inIndex < blockHeader->endingIndex;
     }
 
+    int getIdxGlobal() const{
+        return blockHeader->idxGlobal;
+    }
+
+    void setIdxGlobal(int idxGlobal){
+        blockHeader->idxGlobal = idxGlobal;
+    }
+
     /** Return the idx in array of the cell */
     MortonIndex getLeafMortonIndex(const int id) const{
         FAssertLF(id < blockHeader->numberOfLeavesInBlock);
@@ -330,6 +338,10 @@ public:
         }
     }
 
+    // This function return the number of particle in the leaf who have the id
+    FSize getNbParticlesInLeaf(int id) const{
+        return leafHeader[id].nbParticles;
+    }
 
     /** Return the address of the leaf if it exists (or NULL) */
     template<class ParticlesAttachedClass>
diff --git a/Src/GroupTree/Core/FGroupTaskStarpuImplicitAlgorithm.hpp b/Src/GroupTree/Core/FGroupTaskStarpuImplicitAlgorithm.hpp
index 017dc3e8f..46c4f0287 100644
--- a/Src/GroupTree/Core/FGroupTaskStarpuImplicitAlgorithm.hpp
+++ b/Src/GroupTree/Core/FGroupTaskStarpuImplicitAlgorithm.hpp
@@ -2,6 +2,15 @@
 #ifndef FGROUPTASKSTARPUALGORITHM_HPP
 #define FGROUPTASKSTARPUALGORITHM_HPP
 
+// @FUSE_STARPU
+// @FUSE_MPI
+
+
+#include <vector>
+#include <list>
+#include <memory>
+#include <iostream>
+
 #include "../../Utils/FGlobal.hpp"
 #include "../../Core/FCoreCommon.hpp"
 #include "../../Utils/FQuickSort.hpp"
@@ -13,8 +22,7 @@
 
 #include "FOutOfBlockInteraction.hpp"
 
-#include <vector>
-#include <memory>
+
 #ifdef SCALFMM_USE_STARPU_EXTRACT
 #include <list>
 #endif
@@ -52,9 +60,8 @@
 #endif
 
 #include "Containers/FBoolArray.hpp"
-#include <iostream>
-#include <vector>
-using namespace std;
+
+//using namespace std;
 
 //#define STARPU_USE_REDUX
 template <class OctreeClass, class CellContainerClass, class KernelClass, class ParticleGroupClass, class StarPUCpuWrapperClass
@@ -68,2506 +75,2978 @@ template <class OctreeClass, class CellContainerClass, class KernelClass, class
           >
 class FGroupTaskStarPUImplicitAlgorithm : public FAbstractAlgorithm {
 protected:
-    typedef FGroupTaskStarPUImplicitAlgorithm<OctreeClass, CellContainerClass, KernelClass, ParticleGroupClass, StarPUCpuWrapperClass
+  typedef FGroupTaskStarPUImplicitAlgorithm<OctreeClass, CellContainerClass, KernelClass, ParticleGroupClass, StarPUCpuWrapperClass
 #ifdef SCALFMM_ENABLE_CUDA_KERNEL
-    , StarPUCudaWrapperClass
+  , StarPUCudaWrapperClass
 #endif
 #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
-    , StarPUOpenClWrapperClass
-#endif
-    > ThisClass;
-
-    template <class OtherBlockClass>
-    struct BlockInteractions{
-        OtherBlockClass* otherBlock;
-        int otherBlockId;
-        std::vector<OutOfBlockInteraction> interactions;
-    };
-
-    struct CellHandles{
-        starpu_data_handle_t symb;
-        starpu_data_handle_t up;
-        starpu_data_handle_t down;
-        int intervalSize;
-    };
-
-    struct ParticleHandles{
-        starpu_data_handle_t symb;
-        starpu_data_handle_t down;
-        int intervalSize;
-    };
-
-    std::vector< std::vector< std::vector<BlockInteractions<CellContainerClass>>>> externalInteractionsAllLevel;
+  , StarPUOpenClWrapperClass
+#endif
+  > ThisClass;
+
+  template <class OtherBlockClass>
+  struct BlockInteractions{
+    OtherBlockClass*                   otherBlock;      //<
+    int                                otherBlockId;    //<
+    std::vector<OutOfBlockInteraction> interactions;    //<
+  };
+
+  struct CellHandles{
+    starpu_data_handle_t symb;      //< Symbolique part of a cell
+    starpu_data_handle_t up;        //< Multipole expansion into  a cell
+    starpu_data_handle_t down;      //< Local expansion  into a cell
+    int intervalSize;
+    int groupID  ;                   //< Group Id
+  };
+
+  struct ParticleHandles{
+    starpu_data_handle_t symb;        //< Symbolique part of a leaf
+    starpu_data_handle_t down;         //< Local expansion into a leaf ??????
+    int intervalSize;
+  };
+
+  std::vector< std::vector< std::vector<BlockInteractions<CellContainerClass>>>> externalInteractionsAllLevel;
 #ifdef SCALFMM_USE_STARPU_EXTRACT
-    std::vector< std::vector< std::vector<std::vector<int>>>> externalInteractionsAllLevelInnerIndexes;
-    std::vector< std::vector< std::vector<std::vector<int>>>> externalInteractionsAllLevelOuterIndexes;
+  std::vector< std::vector< std::vector<std::vector<int>>>> externalInteractionsAllLevelInnerIndexes;
+  std::vector< std::vector< std::vector<std::vector<int>>>> externalInteractionsAllLevelOuterIndexes;
 #endif
-    std::vector< std::vector<BlockInteractions<ParticleGroupClass>>> externalInteractionsLeafLevel;
+  std::vector< std::vector<BlockInteractions<ParticleGroupClass>>> externalInteractionsLeafLevel;
 #ifdef SCALFMM_USE_STARPU_EXTRACT
-    std::vector< std::vector<std::vector<int>>> externalInteractionsLeafLevelOuter;
-    std::vector< std::vector<std::vector<int>>> externalInteractionsLeafLevelInner;
+  std::vector< std::vector<std::vector<int>>> externalInteractionsLeafLevelOuter;
+  std::vector< std::vector<std::vector<int>>> externalInteractionsLeafLevelInner;
 #endif
-    std::list<const std::vector<OutOfBlockInteraction>*> externalInteractionsLeafLevelOpposite;
-
-    OctreeClass*const tree;       //< The Tree
-    KernelClass*const originalCpuKernel;
+  std::list<const std::vector<OutOfBlockInteraction>*> externalInteractionsLeafLevelOpposite;
 
-    std::vector<CellHandles>* cellHandles;
-    std::vector<ParticleHandles> particleHandles;
+  OctreeClass*const tree;                          //< A pointer on the Tree either duplicated or the let tree
+  KernelClass*const originalCpuKernel;             //<
 
-    starpu_codelet p2m_cl;
-    starpu_codelet m2m_cl;
-    starpu_codelet l2l_cl;
-    starpu_codelet l2l_cl_nocommute;
-    starpu_codelet l2p_cl;
+  std::vector<CellHandles>*    cellHandles;           //< Pointer on the vector of cell handle
+  std::vector<ParticleHandles> particleHandles;       //<
+  int nb_block;                                       //< Number of block ??
+  starpu_codelet p2m_cl;
+  starpu_codelet m2m_cl;
+  starpu_codelet l2l_cl;
+  starpu_codelet l2l_cl_nocommute;
+  starpu_codelet l2p_cl;
 
-    starpu_codelet m2l_cl_in;
-    starpu_codelet m2l_cl_inout;
-    starpu_codelet m2l_cl_inout_mpi;
+  starpu_codelet m2l_cl_in;
+  starpu_codelet m2l_cl_inout;
+  starpu_codelet m2l_cl_inout_mpi;
 
-    starpu_codelet p2p_cl_in;
-    starpu_codelet p2p_cl_inout;
-    starpu_codelet p2p_cl_inout_mpi;
+  starpu_codelet p2p_cl_in;
+  starpu_codelet p2p_cl_inout;
+  starpu_codelet p2p_cl_inout_mpi;
 
 #ifdef STARPU_USE_REDUX
-    starpu_codelet p2p_redux_init;
-    starpu_codelet p2p_redux_perform;
-    starpu_codelet p2p_redux_read;
+  starpu_codelet p2p_redux_init;
+  starpu_codelet p2p_redux_perform;
+  starpu_codelet p2p_redux_read;
 #endif
 
-    const bool noCommuteAtLastLevel;
-    const bool noCommuteBetweenLevel;
+  const bool noCommuteAtLastLevel;
+  const bool noCommuteBetweenLevel;
 
 #ifdef STARPU_USE_CPU
-    StarPUCpuWrapperClass cpuWrapper;
+  StarPUCpuWrapperClass cpuWrapper;
 #endif
 #ifdef SCALFMM_ENABLE_CUDA_KERNEL
-    StarPUCudaWrapperClass cudaWrapper;
+  StarPUCudaWrapperClass cudaWrapper;
 #endif
 #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
-    StarPUOpenClWrapperClass openclWrapper;
+  StarPUOpenClWrapperClass openclWrapper;
 #endif
 
-    FStarPUPtrInterface wrappers;
-    FStarPUPtrInterface* wrapperptr;
+  FStarPUPtrInterface wrappers;
+  FStarPUPtrInterface* wrapperptr;
 
 #ifdef STARPU_SUPPORT_ARBITER
-    starpu_arbiter_t arbiterGlobal;
+  starpu_arbiter_t arbiterGlobal;
 #endif
 
 #ifdef STARPU_USE_TASK_NAME
 #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
-    std::vector<std::unique_ptr<char[]>> m2mTaskNames;
-    std::vector<std::unique_ptr<char[]>> m2lTaskNames;
-    std::vector<std::unique_ptr<char[]>> m2lOuterTaskNames;
-    std::vector<std::unique_ptr<char[]>> l2lTaskNames;
-    std::unique_ptr<char[]> p2mTaskNames;
-    std::unique_ptr<char[]> l2pTaskNames;
-    std::unique_ptr<char[]> p2pTaskNames;
-    std::unique_ptr<char[]> p2pOuterTaskNames;
+  std::vector<std::unique_ptr<char[]>> m2mTaskNames;
+  std::vector<std::unique_ptr<char[]>> m2lTaskNames;
+  std::vector<std::unique_ptr<char[]>> m2lOuterTaskNames;
+  std::vector<std::unique_ptr<char[]>> l2lTaskNames;
+  std::unique_ptr<char[]> p2mTaskNames;
+  std::unique_ptr<char[]> l2pTaskNames;
+  std::unique_ptr<char[]> p2pTaskNames;
+  std::unique_ptr<char[]> p2pOuterTaskNames;
 #else
-    FStarPUTaskNameParams* taskNames = nullptr;
+  FStarPUTaskNameParams* taskNames = nullptr;
 #endif
 #endif
 #ifdef SCALFMM_STARPU_USE_PRIO
-    typedef FStarPUFmmPrioritiesV2 PrioClass;// FStarPUFmmPriorities
+  typedef FStarPUFmmPrioritiesV2 PrioClass;// FStarPUFmmPriorities
 #endif
-    int mpi_rank, nproc;
-    std::vector<std::vector<std::vector<MortonIndex>>> nodeRepartition;
-
+  //
+  //  Parallel data
+  int mpi_rank ;              //< rank of the current processus
+  int nproc;                  //< number of  mpi processes
+  std::vector< std::vector< std::vector<MortonIndex>>> _nodeRepartition;  //< The morton index distributions level by level
 #ifdef SCALFMM_USE_STARPU_EXTRACT
-    struct ParticleExtractedHandles{
-        starpu_data_handle_t symb;
-        size_t size;
-        std::unique_ptr<unsigned char[]> data;
-        std::vector<int> leavesToExtract;
-    };
+  struct ParticleExtractedHandles{
+    starpu_data_handle_t symb;
+    size_t size;
+    std::unique_ptr<unsigned char[]> data;
+    std::vector<int> leavesToExtract;
+  };
 
-    std::list<ParticleExtractedHandles> extractedParticlesBuffer;
+  std::list<ParticleExtractedHandles> extractedParticlesBuffer;
 
-    struct DuplicatedParticlesHandle{
-        starpu_data_handle_t symb;
-        size_t size;
-        unsigned char* data; // Never delete it, we reuse already allocate memory here
-    };
+  struct DuplicatedParticlesHandle{
+    starpu_data_handle_t symb;
+    size_t size;
+    unsigned char* data; // Never delete it, we reuse already allocate memory here
+  };
 
-    std::list<DuplicatedParticlesHandle> duplicatedParticlesBuffer;
+  std::list<DuplicatedParticlesHandle> duplicatedParticlesBuffer;
 
-    starpu_codelet p2p_extract;
-    starpu_codelet p2p_insert;
-    starpu_codelet p2p_insert_bis;
+  starpu_codelet p2p_extract;
+  starpu_codelet p2p_insert;
+  starpu_codelet p2p_insert_bis;
 
-    struct CellExtractedHandles{
-        starpu_data_handle_t all;
-        size_t size;
-        std::unique_ptr<unsigned char[]> data;
-        std::vector<int> cellsToExtract;
-    };
+  struct CellExtractedHandles{
+    starpu_data_handle_t all;
+    size_t size;
+    std::unique_ptr<unsigned char[]> data;
+    std::vector<int> cellsToExtract;
+  };
 
-    std::list<CellExtractedHandles> extractedCellBuffer;
+  std::list<CellExtractedHandles> extractedCellBuffer;
 
-    struct DuplicatedCellHandle{
-        starpu_data_handle_t symb;
-        size_t sizeSymb;
-        unsigned char* dataSymb; // Never delete it, we reuse already allocate memory here
-        starpu_data_handle_t other;
-        size_t sizeOther;
-        unsigned char* dataOther; // Never delete it, we reuse already allocate memory here
+  struct DuplicatedCellHandle{
+    starpu_data_handle_t symb;
+    size_t sizeSymb;
+    unsigned char* dataSymb; // Never delete it, we reuse already allocate memory here
+    starpu_data_handle_t other;
+    size_t sizeOther;
+    unsigned char* dataOther; // Never delete it, we reuse already allocate memory here
 
-        std::unique_ptr<unsigned char[]> dataSymbPtr;
-        std::unique_ptr<unsigned char[]> dataOtherPtr;
-    };
+    std::unique_ptr<unsigned char[]> dataSymbPtr;
+    std::unique_ptr<unsigned char[]> dataOtherPtr;
+  };
 
-    std::list<DuplicatedCellHandle> duplicatedCellBuffer;
+  std::list<DuplicatedCellHandle> duplicatedCellBuffer;
 
-    starpu_codelet cell_extract_up;
-    starpu_codelet cell_insert_up;
-    starpu_codelet cell_insert_up_bis;
+  starpu_codelet cell_extract_up;
+  starpu_codelet cell_insert_up;
+  starpu_codelet cell_insert_up_bis;
 #endif
 
 public:
-    FGroupTaskStarPUImplicitAlgorithm(OctreeClass*const inTree, KernelClass* inKernels, std::vector<MortonIndex>& distributedMortonIndex)
-        : tree(inTree), originalCpuKernel(inKernels),
-          cellHandles(nullptr),
-          noCommuteAtLastLevel(FEnv::GetBool("SCALFMM_NO_COMMUTE_LAST_L2L", true)),
-          noCommuteBetweenLevel(FEnv::GetBool("SCALFMM_NO_COMMUTE_M2L_L2L", false)),
-      #ifdef STARPU_USE_CPU
-          cpuWrapper(tree->getHeight()),
-      #endif
-      #ifdef SCALFMM_ENABLE_CUDA_KERNEL
-          cudaWrapper(tree->getHeight()),
-      #endif
-      #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
-          openclWrapper(tree->getHeight()),
-      #endif
-          wrapperptr(&wrappers){
-        FAssertLF(tree, "tree cannot be null");
-        FAssertLF(inKernels, "kernels cannot be null");
-
-        FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight());
-        struct starpu_conf conf;
-        FAssertLF(starpu_conf_init(&conf) == 0);
+  FGroupTaskStarPUImplicitAlgorithm(OctreeClass*const inTree, KernelClass* inKernels,
+                                    std::vector<MortonIndex>& distributedMortonIndex,
+                                    const int nb_block_in = -1)
+    : tree(inTree), nb_block(nb_block_in), originalCpuKernel(inKernels),
+      cellHandles(nullptr),
+      noCommuteAtLastLevel(FEnv::GetBool("SCALFMM_NO_COMMUTE_LAST_L2L", true)),
+      noCommuteBetweenLevel(FEnv::GetBool("SCALFMM_NO_COMMUTE_M2L_L2L", false)),
+    #ifdef STARPU_USE_CPU
+      cpuWrapper(tree->getHeight()),
+    #endif
+    #ifdef SCALFMM_ENABLE_CUDA_KERNEL
+      cudaWrapper(tree->getHeight()),
+    #endif
+    #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
+      openclWrapper(tree->getHeight()),
+    #endif
+      wrapperptr(&wrappers){
+    FAssertLF(tree, "tree cannot be null");
+    FAssertLF(inKernels, "kernels cannot be null");
+
+    FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight());
+    struct starpu_conf conf;
+    FAssertLF(starpu_conf_init(&conf) == 0);
 #ifdef SCALFMM_STARPU_USE_PRIO
-        PrioClass::Controller().init(&conf, tree->getHeight(), inKernels);
+    PrioClass::Controller().init(&conf, tree->getHeight(), inKernels);
 #endif
-        FAssertLF(starpu_init(&conf) == 0);
-        FAssertLF(starpu_mpi_init ( 0, 0, 0 ) == 0);
-        MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank);
-        MPI_Comm_size(MPI_COMM_WORLD,&nproc);
+    FAssertLF(starpu_init(&conf) == 0);
+    FAssertLF(starpu_mpi_init ( 0, 0, 0 ) == 0);
+    MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank);
+    MPI_Comm_size(MPI_COMM_WORLD,&nproc);
 #ifdef STARPU_USE_TASK_NAME
 #ifdef SCALFMM_SIMGRID_TASKNAMEPARAMS
-        taskNames = new FStarPUTaskNameParams(mpi_rank, nproc);
+    taskNames = new FStarPUTaskNameParams(mpi_rank, nproc);
 #endif
 #endif
-        starpu_malloc_set_align(32);
+    starpu_malloc_set_align(32);
 
-        starpu_pthread_mutex_t initMutex;
-        starpu_pthread_mutex_init(&initMutex, NULL);
+    starpu_pthread_mutex_t initMutex;
+    starpu_pthread_mutex_init(&initMutex, NULL);
 #ifdef STARPU_USE_CPU
-        FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){
-            starpu_pthread_mutex_lock(&initMutex);
-            cpuWrapper.initKernel(starpu_worker_get_id(), inKernels);
-            starpu_pthread_mutex_unlock(&initMutex);
-        });
-        wrappers.set(FSTARPU_CPU_IDX, &cpuWrapper);
+    FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){
+        starpu_pthread_mutex_lock(&initMutex);
+        cpuWrapper.initKernel(starpu_worker_get_id(), inKernels);
+        starpu_pthread_mutex_unlock(&initMutex);
+      });
+    wrappers.set(FSTARPU_CPU_IDX, &cpuWrapper);
 #endif
 #ifdef SCALFMM_ENABLE_CUDA_KERNEL
-        FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){
-            starpu_pthread_mutex_lock(&initMutex);
-            cudaWrapper.initKernel(starpu_worker_get_id(), inKernels);
-            starpu_pthread_mutex_unlock(&initMutex);
-        });
-        wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper);
+    FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){
+        starpu_pthread_mutex_lock(&initMutex);
+        cudaWrapper.initKernel(starpu_worker_get_id(), inKernels);
+        starpu_pthread_mutex_unlock(&initMutex);
+      });
+    wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper);
 #endif
 #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
-        FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
-            starpu_pthread_mutex_lock(&initMutex);
-            openclWrapper.initKernel(starpu_worker_get_id(), inKernels);
-            starpu_pthread_mutex_unlock(&initMutex);
-        });
-        wrappers.set(FSTARPU_OPENCL_IDX, &openclWrapper);
+    FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
+        starpu_pthread_mutex_lock(&initMutex);
+        openclWrapper.initKernel(starpu_worker_get_id(), inKernels);
+        starpu_pthread_mutex_unlock(&initMutex);
+      });
+    wrappers.set(FSTARPU_OPENCL_IDX, &openclWrapper);
 #endif
-        starpu_pthread_mutex_destroy(&initMutex);
+    starpu_pthread_mutex_destroy(&initMutex);
 
-        starpu_pause();
+    starpu_pause();
 
-        cellHandles   = new std::vector<CellHandles>[tree->getHeight()];
+    cellHandles   = new std::vector<CellHandles>[tree->getHeight()];
 
 #ifdef STARPU_SUPPORT_ARBITER
-        arbiterGlobal = starpu_arbiter_create();
+    arbiterGlobal = starpu_arbiter_create();
 #endif
 
-        initCodelet();
-        initCodeletMpi();
-        createMachinChose(distributedMortonIndex);
-        rebuildInteractions();
+    initCodelet();
+    initCodeletMpi();
+    //
+    std::cout << "    ------------------ inside the constructor of the Algorithm ------------------"
+              << distributedMortonIndex.size() <<std::endl;
+    setMortonDistribution(distributedMortonIndex);
+    this->printDataDistributionInfo();
+
+    this->rebuildInteractions();
 
-        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max Worker " << starpu_worker_get_count() << ")\n");
+    FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max Worker " << starpu_worker_get_count() << ")\n");
 #ifdef STARPU_USE_CPU
-        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CPU " << starpu_cpu_worker_get_count() << ")\n");
+    FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CPU " << starpu_cpu_worker_get_count() << ")\n");
 #endif
 #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
-        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max OpenCL " << starpu_opencl_worker_get_count() << ")\n");
+    FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max OpenCL " << starpu_opencl_worker_get_count() << ")\n");
 #endif
 #ifdef SCALFMM_ENABLE_CUDA_KERNEL
-        FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CUDA " << starpu_cuda_worker_get_count() << ")\n");
+    FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CUDA " << starpu_cuda_worker_get_count() << ")\n");
 #endif
-        FLOG(FLog::Controller << "SCALFMM_NO_COMMUTE_LAST_L2L " << noCommuteAtLastLevel << "\n");
-        FLOG(FLog::Controller << "SCALFMM_NO_COMMUTE_M2L_L2L " << noCommuteBetweenLevel << "\n");
+    FLOG(FLog::Controller << "SCALFMM_NO_COMMUTE_LAST_L2L " << noCommuteAtLastLevel << "\n");
+    FLOG(FLog::Controller << "SCALFMM_NO_COMMUTE_M2L_L2L " << noCommuteBetweenLevel << "\n");
 
-        buildTaskNames();
-    }
+    this->buildTaskNames();
+  }
 
-    void buildTaskNames(){
+  void buildTaskNames(){
 #ifdef STARPU_USE_TASK_NAME
 #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
-        const int namesLength = 128;
-        m2mTaskNames.resize(tree->getHeight());
-        m2lTaskNames.resize(tree->getHeight());
-        m2lOuterTaskNames.resize(tree->getHeight());
-        l2lTaskNames.resize(tree->getHeight());
-        for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){
-            m2mTaskNames[idxLevel].reset(new char[namesLength]);
-            snprintf(m2mTaskNames[idxLevel].get(), namesLength, "M2M-level-%d", idxLevel);
-            m2lTaskNames[idxLevel].reset(new char[namesLength]);
-            snprintf(m2lTaskNames[idxLevel].get(), namesLength, "M2L-level-%d", idxLevel);
-            m2lOuterTaskNames[idxLevel].reset(new char[namesLength]);
-            snprintf(m2lOuterTaskNames[idxLevel].get(), namesLength, "M2L-out-level-%d", idxLevel);
-            l2lTaskNames[idxLevel].reset(new char[namesLength]);
-            snprintf(l2lTaskNames[idxLevel].get(), namesLength, "L2L-level-%d", idxLevel);
-        }
-
-        p2mTaskNames.reset(new char[namesLength]);
-        snprintf(p2mTaskNames.get(), namesLength, "P2M");
-        l2pTaskNames.reset(new char[namesLength]);
-        snprintf(l2pTaskNames.get(), namesLength, "L2P");
-        p2pTaskNames.reset(new char[namesLength]);
-        snprintf(p2pTaskNames.get(), namesLength, "P2P");
-        p2pOuterTaskNames.reset(new char[namesLength]);
-        snprintf(p2pOuterTaskNames.get(), namesLength, "P2P-out");
-#endif
-#endif
-    }
-
-    void syncData(){
-        for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){
-            for(int idxHandle = 0 ; idxHandle < int(cellHandles[idxLevel].size()) ; ++idxHandle){
-                if(isDataOwnedBerenger(tree->getCellGroup(idxLevel, idxHandle)->getStartingIndex(), idxLevel)) {//Clean only our data handle
-                    starpu_data_acquire(cellHandles[idxLevel][idxHandle].symb, STARPU_R);
-                    starpu_data_release(cellHandles[idxLevel][idxHandle].symb);
-                    starpu_data_acquire(cellHandles[idxLevel][idxHandle].up, STARPU_R);
-                    starpu_data_release(cellHandles[idxLevel][idxHandle].up);
-                    starpu_data_acquire(cellHandles[idxLevel][idxHandle].down, STARPU_R);
-                    starpu_data_release(cellHandles[idxLevel][idxHandle].down);
-                }
-            }
-        }
-        {
-            for(int idxHandle = 0 ; idxHandle < int(particleHandles.size()) ; ++idxHandle){
-                if(isDataOwnedBerenger(tree->getCellGroup(tree->getHeight()-1, idxHandle)->getStartingIndex(), tree->getHeight()-1)) {//Clean only our data handle
-                    starpu_data_acquire(particleHandles[idxHandle].symb, STARPU_R);
-                    starpu_data_release(particleHandles[idxHandle].symb);
-                    starpu_data_acquire(particleHandles[idxHandle].down, STARPU_R);
-                    starpu_data_release(particleHandles[idxHandle].down);
-                }
+    const int namesLength = 128;
+    m2mTaskNames.resize(tree->getHeight());
+    m2lTaskNames.resize(tree->getHeight());
+    m2lOuterTaskNames.resize(tree->getHeight());
+    l2lTaskNames.resize(tree->getHeight());
+    for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){
+        m2mTaskNames[idxLevel].reset(new char[namesLength]);
+        snprintf(m2mTaskNames[idxLevel].get(), namesLength, "M2M-level-%d", idxLevel);
+        m2lTaskNames[idxLevel].reset(new char[namesLength]);
+        snprintf(m2lTaskNames[idxLevel].get(), namesLength, "M2L-level-%d", idxLevel);
+        m2lOuterTaskNames[idxLevel].reset(new char[namesLength]);
+        snprintf(m2lOuterTaskNames[idxLevel].get(), namesLength, "M2L-out-level-%d", idxLevel);
+        l2lTaskNames[idxLevel].reset(new char[namesLength]);
+        snprintf(l2lTaskNames[idxLevel].get(), namesLength, "L2L-level-%d", idxLevel);
+      }
+
+    p2mTaskNames.reset(new char[namesLength]);
+    snprintf(p2mTaskNames.get(), namesLength, "P2M");
+    l2pTaskNames.reset(new char[namesLength]);
+    snprintf(l2pTaskNames.get(), namesLength, "L2P");
+    p2pTaskNames.reset(new char[namesLength]);
+    snprintf(p2pTaskNames.get(), namesLength, "P2P");
+    p2pOuterTaskNames.reset(new char[namesLength]);
+    snprintf(p2pOuterTaskNames.get(), namesLength, "P2P-out");
+#endif
+#endif
+  }
+
+  void syncData(){
+    for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){
+        for(int idxHandle = 0 ; idxHandle < int(cellHandles[idxLevel].size()) ; ++idxHandle){
+            if(isDataOwnedBerenger(tree->getCellGroup(idxLevel, idxHandle)->getStartingIndex(), idxLevel)) {//Clean only our data handle
+                starpu_data_acquire(cellHandles[idxLevel][idxHandle].symb, STARPU_R);
+                starpu_data_release(cellHandles[idxLevel][idxHandle].symb);
+                starpu_data_acquire(cellHandles[idxLevel][idxHandle].up, STARPU_R);
+                starpu_data_release(cellHandles[idxLevel][idxHandle].up);
+                starpu_data_acquire(cellHandles[idxLevel][idxHandle].down, STARPU_R);
+                starpu_data_release(cellHandles[idxLevel][idxHandle].down);
+              }
+          }
+      }
+    {
+      for(int idxHandle = 0 ; idxHandle < int(particleHandles.size()) ; ++idxHandle){
+          if(isDataOwnedBerenger(tree->getCellGroup(tree->getHeight()-1, idxHandle)->getStartingIndex(), tree->getHeight()-1)) {//Clean only our data handle
+              starpu_data_acquire(particleHandles[idxHandle].symb, STARPU_R);
+              starpu_data_release(particleHandles[idxHandle].symb);
+              starpu_data_acquire(particleHandles[idxHandle].down, STARPU_R);
+              starpu_data_release(particleHandles[idxHandle].down);
             }
         }
     }
+  }
 
-    ~FGroupTaskStarPUImplicitAlgorithm(){
-        starpu_resume();
+  ~FGroupTaskStarPUImplicitAlgorithm(){
+    starpu_resume();
 
-        cleanHandle();
-        delete[] cellHandles;
+    cleanHandle();
+    delete[] cellHandles;
 
-        starpu_pthread_mutex_t releaseMutex;
-        starpu_pthread_mutex_init(&releaseMutex, NULL);
+    starpu_pthread_mutex_t releaseMutex;
+    starpu_pthread_mutex_init(&releaseMutex, NULL);
 #ifdef STARPU_USE_CPU
-        FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){
-            starpu_pthread_mutex_lock(&releaseMutex);
-            cpuWrapper.releaseKernel(starpu_worker_get_id());
-            starpu_pthread_mutex_unlock(&releaseMutex);
-        });
-        wrappers.set(FSTARPU_CPU_IDX, &cpuWrapper);
+    FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){
+        starpu_pthread_mutex_lock(&releaseMutex);
+        cpuWrapper.releaseKernel(starpu_worker_get_id());
+        starpu_pthread_mutex_unlock(&releaseMutex);
+      });
+    wrappers.set(FSTARPU_CPU_IDX, &cpuWrapper);
 #endif
 #ifdef SCALFMM_ENABLE_CUDA_KERNEL
-        FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){
-            starpu_pthread_mutex_lock(&releaseMutex);
-            cudaWrapper.releaseKernel(starpu_worker_get_id());
-            starpu_pthread_mutex_unlock(&releaseMutex);
-        });
-        wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper);
+    FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){
+        starpu_pthread_mutex_lock(&releaseMutex);
+        cudaWrapper.releaseKernel(starpu_worker_get_id());
+        starpu_pthread_mutex_unlock(&releaseMutex);
+      });
+    wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper);
 #endif
 #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
-        FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
-            starpu_pthread_mutex_lock(&releaseMutex);
-            openclWrapper.releaseKernel(starpu_worker_get_id());
-            starpu_pthread_mutex_unlock(&releaseMutex);
-        });
-        wrappers.set(FSTARPU_OPENCL_IDX, &openclWrapper);
+    FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
+        starpu_pthread_mutex_lock(&releaseMutex);
+        openclWrapper.releaseKernel(starpu_worker_get_id());
+        starpu_pthread_mutex_unlock(&releaseMutex);
+      });
+    wrappers.set(FSTARPU_OPENCL_IDX, &openclWrapper);
 #endif
-        starpu_pthread_mutex_destroy(&releaseMutex);
+    starpu_pthread_mutex_destroy(&releaseMutex);
 
 
 #ifdef STARPU_SUPPORT_ARBITER
-        starpu_arbiter_destroy(arbiterGlobal);
+    starpu_arbiter_destroy(arbiterGlobal);
 #endif
 
-        for(auto externalInteraction : externalInteractionsLeafLevelOpposite)
-            delete externalInteraction;
+    for(auto externalInteraction : externalInteractionsLeafLevelOpposite)
+      delete externalInteraction;
 
-        starpu_mpi_shutdown();
-        starpu_shutdown();
-    }
+    starpu_mpi_shutdown();
+    starpu_shutdown();
+  }
 
-    void rebuildInteractions(){
-        FAssertLF(getenv("OMP_WAIT_POLICY") == nullptr
-                  || strcmp(getenv("OMP_WAIT_POLICY"), "PASSIVE") == 0
-                  || strcmp(getenv("OMP_WAIT_POLICY"), "passive") == 0);
+  void rebuildInteractions(){
+    std::cout << " begin rebuildInteractions " << std::endl;
+    FAssertLF(getenv("OMP_WAIT_POLICY") == nullptr
+              || strcmp(getenv("OMP_WAIT_POLICY"), "PASSIVE") == 0
+              || strcmp(getenv("OMP_WAIT_POLICY"), "passive") == 0);
 
 #pragma omp parallel
 #pragma omp single
-        buildExternalInteractionVecs();
-
+    buildExternalInteractionVecs();
+    if(this->nb_block <= 0){
+        std::cout << " buildHandles " <<std::endl;
         buildHandles();
-    }
+      }
+    else {
+        std::cout << " buildDistributedHandles " <<std::endl;
+        buildDistributedHandles();
+      }
+    std::cout << " end rebuildInteractions " << std::endl;
+    this->printCellHandels ();
+  }
 
 
 #ifdef STARPU_USE_CPU
-    void forEachCpuWorker(std::function<void(void)> func){
-        starpu_resume();
-        FStarPUUtils::ExecOnWorkers(STARPU_CPU, func);
-        starpu_pause();
-    }
-
-    void forEachCpuWorker(std::function<void(KernelClass*)> func){
-        starpu_resume();
-        FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){
-            func(cpuWrapper.getKernel(starpu_worker_get_id()));
-        });
-        starpu_pause();
-    }
+  void forEachCpuWorker(std::function<void(void)> func){
+    starpu_resume();
+    FStarPUUtils::ExecOnWorkers(STARPU_CPU, func);
+    starpu_pause();
+  }
+
+  void forEachCpuWorker(std::function<void(KernelClass*)> func){
+    starpu_resume();
+    FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){
+        func(cpuWrapper.getKernel(starpu_worker_get_id()));
+      });
+    starpu_pause();
+  }
 #endif
 #ifdef SCALFMM_ENABLE_CUDA_KERNEL
-    void forEachCudaWorker(std::function<void(void)> func){
-        starpu_resume();
-        FStarPUUtils::ExecOnWorkers(STARPU_CUDA, func);
-        starpu_pause();
-    }
-    void forEachCudaWorker(std::function<void(void*)> func){
-        starpu_resume();
-        FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){
-            func(cudaWrapper.getKernel(starpu_worker_get_id()));
-        });
-        starpu_pause();
-    }
+  void forEachCudaWorker(std::function<void(void)> func){
+    starpu_resume();
+    FStarPUUtils::ExecOnWorkers(STARPU_CUDA, func);
+    starpu_pause();
+  }
+  void forEachCudaWorker(std::function<void(void*)> func){
+    starpu_resume();
+    FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){
+        func(cudaWrapper.getKernel(starpu_worker_get_id()));
+      });
+    starpu_pause();
+  }
 #endif
 #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
-    void forEachOpenCLWorker(std::function<void(void)> func){
-        starpu_resume();
-        FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, func);
-        starpu_pause();
-    }
-    void forEachOpenCLWorker(std::function<void(void*)> func){
-        starpu_resume();
-        FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
-            func(openclWrapper.getKernel(starpu_worker_get_id()));
-        });
-        starpu_pause();
-    }
-#endif
-
-    int getRank(void) const {
-        return mpi_rank;
-    }
-    int getNProc(void) const {
-        return nproc;
-    }
-    bool isDataOwnedBerenger(MortonIndex const idx, int const idxLevel) const {
-        return dataMappingBerenger(idx, idxLevel) == mpi_rank;
-    }
-    void createMachinChose(std::vector<MortonIndex> distributedMortonIndex) {
-        nodeRepartition.resize(tree->getHeight(), std::vector<std::vector<MortonIndex>>(nproc, std::vector<MortonIndex>(2)));
-        for(int node_id = 0; node_id < nproc; ++node_id){
-            nodeRepartition[tree->getHeight()-1][node_id][0] = distributedMortonIndex[node_id*2];
-            nodeRepartition[tree->getHeight()-1][node_id][1] = distributedMortonIndex[node_id*2+1];
-        }
-        for(int idxLevel = tree->getHeight() - 2; idxLevel >= 0  ; --idxLevel){
-            nodeRepartition[idxLevel][0][0] = nodeRepartition[idxLevel+1][0][0] >> 3;
-            nodeRepartition[idxLevel][0][1] = nodeRepartition[idxLevel+1][0][1] >> 3;
-            for(int node_id = 1; node_id < nproc; ++node_id){
-                nodeRepartition[idxLevel][node_id][0] = FMath::Max(nodeRepartition[idxLevel+1][node_id][0] >> 3, nodeRepartition[idxLevel][node_id-1][0]+1); //Berenger phd :)
-                nodeRepartition[idxLevel][node_id][1] = nodeRepartition[idxLevel+1][node_id][1] >> 3;
-            }
-        }
-    }
-    int getOppositeInterIndex(const int index) const {
-        // ((( (xdiff+3) * 7) + (ydiff+3))) * 7 + zdiff + 3
-        return 343-index-1;
-    }
+  void forEachOpenCLWorker(std::function<void(void)> func){
+    starpu_resume();
+    FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, func);
+    starpu_pause();
+  }
+  void forEachOpenCLWorker(std::function<void(void*)> func){
+    starpu_resume();
+    FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){
+        func(openclWrapper.getKernel(starpu_worker_get_id()));
+      });
+    starpu_pause();
+  }
+#endif
+
+  int getRank(void) const {
+    return mpi_rank;
+  }
+  int getNProc(void) const {
+    return nproc;
+  }
+
+  // \brief
+  //
+  // @param[in]
+  // @param[in]
+  //
+  // @return
+  //
+  bool isDataOwnedBerenger(MortonIndex const idx, int const idxLevel) const {
+    return dataMappingBerenger(idx, idxLevel) == mpi_rank;
+  }
+
+  // \brief construct the distribution of the cells and the leaves
+  //
+  // @param[in] distributedMortonIndex the morton distrinution at the leave lever
+  void setMortonDistribution(const std::vector<MortonIndex> &distributedMortonIndex) {
+    //
+    std::cout << "setMortonDistribution: " <<std::endl
+              <<  "  input distribution: " << distributedMortonIndex.size() << std::endl << "    ";
+    for (auto v : distributedMortonIndex) {
+        std::cout << "   "  <<  v;
+      }
+    std::cout <<            std::endl ;
+    _nodeRepartition.resize(tree->getHeight(),
+                            std::vector<std::vector<MortonIndex>>(nproc,
+                                                                  std::vector<MortonIndex>(2)));
+    for(int node_id = 0; node_id < nproc; ++node_id){
+        _nodeRepartition[tree->getHeight()-1][node_id][0] = distributedMortonIndex[node_id*2];
+        _nodeRepartition[tree->getHeight()-1][node_id][1] = distributedMortonIndex[node_id*2+1];
+      }
+    for(int idxLevel = tree->getHeight() - 2; idxLevel >= 0  ; --idxLevel){
+        _nodeRepartition[idxLevel][0][0] = _nodeRepartition[idxLevel+1][0][0] >> 3;
+        _nodeRepartition[idxLevel][0][1] = _nodeRepartition[idxLevel+1][0][1] >> 3;
+        for(int node_id = 1; node_id < nproc; ++node_id){
+            _nodeRepartition[idxLevel][node_id][0] = FMath::Max(_nodeRepartition[idxLevel+1][node_id][0] >> 3, _nodeRepartition[idxLevel][node_id-1][0]+1); //Berenger phd :)
+            _nodeRepartition[idxLevel][node_id][1] = _nodeRepartition[idxLevel+1][node_id][1] >> 3;
+          }
+      }
+    this->printDataDistributionInfo();
+  }
+  //
+  // \brief print the local morton index distribution on the current node
+  void printDataDistributionInfo(){
+    std::cout << "Group Tree information on node "<< mpi_rank << "\n";
+    std::cout << "\t Group Size = " << -1 << "\n";
+    std::cout << "\t Tree height = " << tree->getHeight()  << "\n";
+    for(int idxLevel = 1 ; idxLevel < tree->getHeight() ; ++idxLevel){
+        std::cout << "Level "<< idxLevel <<std::endl;
+        std::cout << "\t Starting Index = " << _nodeRepartition[idxLevel][mpi_rank][0]
+                  << "\t Ending Index = " << _nodeRepartition[idxLevel][mpi_rank][1]
+                  << " Number of group "<<-1 << std::endl;
+      }
+  }
+  //
+  //
+  //
+  int getOppositeInterIndex(const int index) const {
+    // ((( (xdiff+3) * 7) + (ydiff+3))) * 7 + zdiff + 3
+    return 343-index-1;
+  }
 protected:
-    /**
+  /**
       * Runs the complete algorithm.
       */
-    void executeCore(const unsigned operationsToProceed) override {
-        FLOG( FLog::Controller << "\tStart FGroupTaskStarPUAlgorithm\n" );
-        const bool directOnly = (tree->getHeight() <= 2);
+  void executeCore(const unsigned operationsToProceed) override {
+    FLOG( FLog::Controller << "\tStart FGroupTaskStarPUAlgorithm\n" );
+    const bool directOnly = (tree->getHeight() <= 2);
+    std::cout << " Algo executeCore  " <<std::endl
+              << "       upperWorkingLevel: " << FAbstractAlgorithm::upperWorkingLevel
+              << "       lowerWorkingLevel: " <<  FAbstractAlgorithm::lowerWorkingLevel <<std::endl
+              << "       Operation to proceed: "<<  FFmmOperations_string(operationsToProceed) << std::endl;
 
 #ifdef STARPU_USE_CPU
-        FTIME_TASKS(cpuWrapper.taskTimeRecorder.start());
-#endif
-        starpu_resume();
-        FLOG( FTic timerSoumission; );
-
-        if( operationsToProceed & FFmmP2P ) directPass();
-
-        if(operationsToProceed & FFmmP2M && !directOnly) bottomPass();
-
-        if(operationsToProceed & FFmmM2M && !directOnly) upwardPass();
-
-        if(operationsToProceed & FFmmM2L && !directOnly) transferPass(FAbstractAlgorithm::upperWorkingLevel, FAbstractAlgorithm::lowerWorkingLevel-1 , true, true);
-
-        if(operationsToProceed & FFmmL2L && !directOnly) downardPass();
-
-        if(operationsToProceed & FFmmM2L && !directOnly) transferPass(FAbstractAlgorithm::lowerWorkingLevel-1, FAbstractAlgorithm::lowerWorkingLevel, true, true);
-
-        if( operationsToProceed & FFmmL2P && !directOnly) mergePass();
+    FTIME_TASKS(cpuWrapper.taskTimeRecorder.start());
+#endif
+    starpu_resume();
+    FLOG( FTic timerSoumission; );
+
+    if( operationsToProceed & FFmmP2P ) {
+        this->directPass(); }
+
+    if(operationsToProceed & FFmmP2M && !directOnly) this->bottomPass();
+
+    if(operationsToProceed & FFmmM2M && !directOnly){
+        if(this->nb_block > 0 && nproc > 1 ){
+            std::cout << "    upwardPassNoDuplicate"<<std::endl;
+            upwardPassNoDuplicate();
+          } else {
+            std::cout << "    upwardPassDuplicate" <<std::endl;
+            upwardPassDuplicate();
+          }
+      }
+    //
+    if(operationsToProceed & FFmmM2L && !directOnly)  {
+        transferPass(FAbstractAlgorithm::upperWorkingLevel,
+                     FAbstractAlgorithm::lowerWorkingLevel-1 , true, true);
+      }
+
+    if(operationsToProceed & FFmmL2L && !directOnly){
+        if(this->nb_block > 0 && nproc > 1){
+            std::cout << "    downardPassNoDuplicate " <<std::endl;
+            this->downardPassNoDuplicate();
+          } else {
+            std::cout << "    downardPassDuplicate " <<std::endl;
+            //downardPass() ;
+            this->downardPassDuplicate();
+          }
+      }
+    //if(operationsToProceed & FFmmL2L && !directOnly) this->downardPassDuplicate();
+
+    if(operationsToProceed & FFmmM2L && !directOnly) transferPass(FAbstractAlgorithm::lowerWorkingLevel-1, FAbstractAlgorithm::lowerWorkingLevel, true, true);
+
+    if( operationsToProceed & FFmmL2P && !directOnly) mergePass();
 #ifdef STARPU_USE_REDUX
-        if( operationsToProceed & FFmmL2P && !directOnly) readParticle();
+    if( operationsToProceed & FFmmL2P && !directOnly) readParticle();
 #endif
 
-        FLOG( FLog::Controller << "\t\t Submitting the tasks took " << timerSoumission.tacAndElapsed() << "s\n" );
+    FLOG( FLog::Controller << "\t\t Submitting the tasks took " << timerSoumission.tacAndElapsed() << "s\n" );
 
-        starpu_task_wait_for_all();
+    starpu_task_wait_for_all();
 
-        FLOG( FTic timerSync; );
-        syncData();
-        FLOG( FLog::Controller << "\t\t Moving data to the host took " << timerSync.tacAndElapsed() << "s\n" );
+    FLOG( FTic timerSync; );
+    syncData();
+    FLOG( FLog::Controller << "\t\t Moving data to the host took " << timerSync.tacAndElapsed() << "s\n" );
 
-        starpu_pause();
+    starpu_pause();
 
 #ifdef STARPU_USE_CPU
-        FTIME_TASKS(cpuWrapper.taskTimeRecorder.end());
-        FTIME_TASKS(cpuWrapper.taskTimeRecorder.saveToDisk("/tmp/taskstime-FGroupTaskStarPUAlgorithm.txt"));
+    FTIME_TASKS(cpuWrapper.taskTimeRecorder.end());
+    FTIME_TASKS(cpuWrapper.taskTimeRecorder.saveToDisk("/tmp/taskstime-FGroupTaskStarPUAlgorithm.txt"));
 #endif
-    }
+  }
 
 
-    void initCodelet(){
-        memset(&p2m_cl, 0, sizeof(p2m_cl));
+  void initCodelet(){
+    memset(&p2m_cl, 0, sizeof(p2m_cl));
 #ifdef STARPU_USE_CPU
-        if(originalCpuKernel->supportP2M(FSTARPU_CPU_IDX)){
-            p2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::bottomPassCallback;
-            p2m_cl.where |= STARPU_CPU;
-        }
+    if(originalCpuKernel->supportP2M(FSTARPU_CPU_IDX)){
+        p2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::bottomPassCallback;
+        p2m_cl.where |= STARPU_CPU;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_CUDA_KERNEL
-        if(originalCpuKernel->supportP2M(FSTARPU_CUDA_IDX)){
-            p2m_cl.cuda_funcs[0] = StarPUCudaWrapperClass::bottomPassCallback;
-            p2m_cl.where |= STARPU_CUDA;
-        }
+    if(originalCpuKernel->supportP2M(FSTARPU_CUDA_IDX)){
+        p2m_cl.cuda_funcs[0] = StarPUCudaWrapperClass::bottomPassCallback;
+        p2m_cl.where |= STARPU_CUDA;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
-        if(originalCpuKernel->supportP2M(FSTARPU_OPENCL_IDX)){
-            p2m_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::bottomPassCallback;
-            p2m_cl.where |= STARPU_OPENCL;
-        }
-#endif
-        p2m_cl.nbuffers = 3;
-        p2m_cl.modes[0] = STARPU_R;
-        p2m_cl.modes[1] = STARPU_RW;
-        p2m_cl.modes[2] = STARPU_R;
-        p2m_cl.name = "p2m_cl";
-
-        memset(&m2m_cl, 0, sizeof(m2m_cl));
+    if(originalCpuKernel->supportP2M(FSTARPU_OPENCL_IDX)){
+        p2m_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::bottomPassCallback;
+        p2m_cl.where |= STARPU_OPENCL;
+      }
+#endif
+    p2m_cl.nbuffers = 3;
+    p2m_cl.modes[0] = STARPU_R;
+    p2m_cl.modes[1] = STARPU_RW;
+    p2m_cl.modes[2] = STARPU_R;
+    p2m_cl.name = "p2m_cl";
+
+    memset(&m2m_cl, 0, sizeof(m2m_cl));
 #ifdef STARPU_USE_CPU
-        if(originalCpuKernel->supportM2M(FSTARPU_CPU_IDX)){
-            m2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::upwardPassCallback;
-            m2m_cl.where |= STARPU_CPU;
-        }
+    if(originalCpuKernel->supportM2M(FSTARPU_CPU_IDX)){
+        m2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::upwardPassCallback;
+        m2m_cl.where |= STARPU_CPU;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_CUDA_KERNEL
-        if(originalCpuKernel->supportM2M(FSTARPU_CUDA_IDX)){
-            m2m_cl.cuda_funcs[0] = StarPUCudaWrapperClass::upwardPassCallback;
-            m2m_cl.where |= STARPU_CUDA;
-        }
+    if(originalCpuKernel->supportM2M(FSTARPU_CUDA_IDX)){
+        m2m_cl.cuda_funcs[0] = StarPUCudaWrapperClass::upwardPassCallback;
+        m2m_cl.where |= STARPU_CUDA;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
-        if(originalCpuKernel->supportM2M(FSTARPU_OPENCL_IDX)){
-            m2m_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::upwardPassCallback;
-            m2m_cl.where |= STARPU_OPENCL;
-        }
-#endif
-        m2m_cl.nbuffers = 4;
-        m2m_cl.dyn_modes = (starpu_data_access_mode*)malloc(m2m_cl.nbuffers*sizeof(starpu_data_access_mode));
-        m2m_cl.dyn_modes[0] = STARPU_R;
-        m2m_cl.dyn_modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
-        m2m_cl.name = "m2m_cl";
-        m2m_cl.dyn_modes[2] = STARPU_R;
-        m2m_cl.dyn_modes[3] = STARPU_R;
-
-        memset(&l2l_cl, 0, sizeof(l2l_cl));
+    if(originalCpuKernel->supportM2M(FSTARPU_OPENCL_IDX)){
+        m2m_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::upwardPassCallback;
+        m2m_cl.where |= STARPU_OPENCL;
+      }
+#endif
+    m2m_cl.nbuffers = 4;
+    m2m_cl.dyn_modes = (starpu_data_access_mode*)malloc(m2m_cl.nbuffers*sizeof(starpu_data_access_mode));
+    m2m_cl.dyn_modes[0] = STARPU_R;
+    m2m_cl.dyn_modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
+    m2m_cl.name = "m2m_cl";
+    m2m_cl.dyn_modes[2] = STARPU_R;
+    m2m_cl.dyn_modes[3] = STARPU_R;
+
+    memset(&l2l_cl, 0, sizeof(l2l_cl));
 #ifdef STARPU_USE_CPU
-        if(originalCpuKernel->supportL2L(FSTARPU_CPU_IDX)){
-            l2l_cl.cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback;
-            l2l_cl.where |= STARPU_CPU;
-        }
+    if(originalCpuKernel->supportL2L(FSTARPU_CPU_IDX)){
+        l2l_cl.cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback;
+        l2l_cl.where |= STARPU_CPU;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_CUDA_KERNEL
-        if(originalCpuKernel->supportL2L(FSTARPU_CUDA_IDX)){
-            l2l_cl.cuda_funcs[0] = StarPUCudaWrapperClass::downardPassCallback;
-            l2l_cl.where |= STARPU_CUDA;
-        }
+    if(originalCpuKernel->supportL2L(FSTARPU_CUDA_IDX)){
+        l2l_cl.cuda_funcs[0] = StarPUCudaWrapperClass::downardPassCallback;
+        l2l_cl.where |= STARPU_CUDA;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
-        if(originalCpuKernel->supportL2L(FSTARPU_OPENCL_IDX)){
-            l2l_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::downardPassCallback;
-            l2l_cl.where |= STARPU_OPENCL;
-        }
-#endif
-        l2l_cl.nbuffers = 4;
-        l2l_cl.dyn_modes = (starpu_data_access_mode*)malloc(l2l_cl.nbuffers*sizeof(starpu_data_access_mode));
-        l2l_cl.dyn_modes[0] = STARPU_R;
-        l2l_cl.dyn_modes[1] = STARPU_R;
-        l2l_cl.name = "l2l_cl";
-        l2l_cl.dyn_modes[2] = STARPU_R;
-        l2l_cl.dyn_modes[3] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
-
-        memset(&l2l_cl_nocommute, 0, sizeof(l2l_cl_nocommute));
+    if(originalCpuKernel->supportL2L(FSTARPU_OPENCL_IDX)){
+        l2l_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::downardPassCallback;
+        l2l_cl.where |= STARPU_OPENCL;
+      }
+#endif
+    l2l_cl.nbuffers = 4;
+    l2l_cl.dyn_modes = (starpu_data_access_mode*)malloc(l2l_cl.nbuffers*sizeof(starpu_data_access_mode));
+    l2l_cl.dyn_modes[0] = STARPU_R;
+    l2l_cl.dyn_modes[1] = STARPU_R;
+    l2l_cl.name = "l2l_cl";
+    l2l_cl.dyn_modes[2] = STARPU_R;
+    l2l_cl.dyn_modes[3] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
+
+    memset(&l2l_cl_nocommute, 0, sizeof(l2l_cl_nocommute));
 #ifdef STARPU_USE_CPU
-        if(originalCpuKernel->supportL2L(FSTARPU_CPU_IDX)){
-            l2l_cl_nocommute.cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback;
-            l2l_cl_nocommute.where |= STARPU_CPU;
-        }
+    if(originalCpuKernel->supportL2L(FSTARPU_CPU_IDX)){
+        l2l_cl_nocommute.cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback;
+        l2l_cl_nocommute.where |= STARPU_CPU;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_CUDA_KERNEL
-        if(originalCpuKernel->supportL2L(FSTARPU_CUDA_IDX)){
-            l2l_cl_nocommute.cuda_funcs[0] = StarPUCudaWrapperClass::downardPassCallback;
-            l2l_cl_nocommute.where |= STARPU_CUDA;
-        }
+    if(originalCpuKernel->supportL2L(FSTARPU_CUDA_IDX)){
+        l2l_cl_nocommute.cuda_funcs[0] = StarPUCudaWrapperClass::downardPassCallback;
+        l2l_cl_nocommute.where |= STARPU_CUDA;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
-        if(originalCpuKernel->supportL2L(FSTARPU_OPENCL_IDX)){
-            l2l_cl_nocommute.opencl_funcs[0] = StarPUOpenClWrapperClass::downardPassCallback;
-            l2l_cl_nocommute.where |= STARPU_OPENCL;
-        }
-#endif
-        l2l_cl_nocommute.nbuffers = 4;
-        l2l_cl_nocommute.dyn_modes = (starpu_data_access_mode*)malloc(l2l_cl_nocommute.nbuffers*sizeof(starpu_data_access_mode));
-        l2l_cl_nocommute.dyn_modes[0] = STARPU_R;
-        l2l_cl_nocommute.dyn_modes[1] = STARPU_R;
-        l2l_cl_nocommute.name = "l2l_cl";
-        l2l_cl_nocommute.dyn_modes[2] = STARPU_R;
-        l2l_cl_nocommute.dyn_modes[3] = STARPU_RW;
-
-        memset(&l2p_cl, 0, sizeof(l2p_cl));
+    if(originalCpuKernel->supportL2L(FSTARPU_OPENCL_IDX)){
+        l2l_cl_nocommute.opencl_funcs[0] = StarPUOpenClWrapperClass::downardPassCallback;
+        l2l_cl_nocommute.where |= STARPU_OPENCL;
+      }
+#endif
+    l2l_cl_nocommute.nbuffers = 4;
+    l2l_cl_nocommute.dyn_modes = (starpu_data_access_mode*)malloc(l2l_cl_nocommute.nbuffers*sizeof(starpu_data_access_mode));
+    l2l_cl_nocommute.dyn_modes[0] = STARPU_R;
+    l2l_cl_nocommute.dyn_modes[1] = STARPU_R;
+    l2l_cl_nocommute.name = "l2l_cl";
+    l2l_cl_nocommute.dyn_modes[2] = STARPU_R;
+    l2l_cl_nocommute.dyn_modes[3] = STARPU_RW;
+
+    memset(&l2p_cl, 0, sizeof(l2p_cl));
 #ifdef STARPU_USE_CPU
-        if(originalCpuKernel->supportL2P(FSTARPU_CPU_IDX)){
-            l2p_cl.cpu_funcs[0] = StarPUCpuWrapperClass::mergePassCallback;
-            l2p_cl.where |= STARPU_CPU;
-        }
+    if(originalCpuKernel->supportL2P(FSTARPU_CPU_IDX)){
+        l2p_cl.cpu_funcs[0] = StarPUCpuWrapperClass::mergePassCallback;
+        l2p_cl.where |= STARPU_CPU;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_CUDA_KERNEL
-        if(originalCpuKernel->supportL2P(FSTARPU_CUDA_IDX)){
-            l2p_cl.cuda_funcs[0] = StarPUCudaWrapperClass::mergePassCallback;
-            l2p_cl.where |= STARPU_CUDA;
-        }
+    if(originalCpuKernel->supportL2P(FSTARPU_CUDA_IDX)){
+        l2p_cl.cuda_funcs[0] = StarPUCudaWrapperClass::mergePassCallback;
+        l2p_cl.where |= STARPU_CUDA;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
-        if(originalCpuKernel->supportL2P(FSTARPU_OPENCL_IDX)){
-            l2p_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::mergePassCallback;
-            l2p_cl.where |= STARPU_OPENCL;
-        }
-#endif
-        l2p_cl.nbuffers = 4;
-        l2p_cl.modes[0] = STARPU_R;
-        l2p_cl.modes[1] = STARPU_R;
-        l2p_cl.modes[2] = STARPU_R;
+    if(originalCpuKernel->supportL2P(FSTARPU_OPENCL_IDX)){
+        l2p_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::mergePassCallback;
+        l2p_cl.where |= STARPU_OPENCL;
+      }
+#endif
+    l2p_cl.nbuffers = 4;
+    l2p_cl.modes[0] = STARPU_R;
+    l2p_cl.modes[1] = STARPU_R;
+    l2p_cl.modes[2] = STARPU_R;
 #ifdef STARPU_USE_REDUX
-        l2p_cl.modes[3] = STARPU_REDUX;
+    l2p_cl.modes[3] = STARPU_REDUX;
 #else
-        l2p_cl.modes[3] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
+    l2p_cl.modes[3] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
 #endif
-        l2p_cl.name = "l2p_cl";
+    l2p_cl.name = "l2p_cl";
 
-        memset(&p2p_cl_in, 0, sizeof(p2p_cl_in));
+    memset(&p2p_cl_in, 0, sizeof(p2p_cl_in));
 #ifdef STARPU_USE_CPU
-        if(originalCpuKernel->supportP2P(FSTARPU_CPU_IDX)){
-            p2p_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::directInPassCallback;
-            p2p_cl_in.where |= STARPU_CPU;
-        }
+    if(originalCpuKernel->supportP2P(FSTARPU_CPU_IDX)){
+        p2p_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::directInPassCallback;
+        p2p_cl_in.where |= STARPU_CPU;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_CUDA_KERNEL
-        if(originalCpuKernel->supportP2P(FSTARPU_CUDA_IDX)){
-            p2p_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::directInPassCallback;
-            p2p_cl_in.where |= STARPU_CUDA;
-        }
+    if(originalCpuKernel->supportP2P(FSTARPU_CUDA_IDX)){
+        p2p_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::directInPassCallback;
+        p2p_cl_in.where |= STARPU_CUDA;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
-        if(originalCpuKernel->supportP2P(FSTARPU_OPENCL_IDX)){
-            p2p_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::directInPassCallback;
-            p2p_cl_in.where |= STARPU_OPENCL;
-        }
+    if(originalCpuKernel->supportP2P(FSTARPU_OPENCL_IDX)){
+        p2p_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::directInPassCallback;
+        p2p_cl_in.where |= STARPU_OPENCL;
+      }
 #endif
-        p2p_cl_in.nbuffers = 2;
-        p2p_cl_in.modes[0] = STARPU_R;
+    p2p_cl_in.nbuffers = 2;
+    p2p_cl_in.modes[0] = STARPU_R;
 #ifdef STARPU_USE_REDUX
-        p2p_cl_in.modes[1] = STARPU_REDUX;
+    p2p_cl_in.modes[1] = STARPU_REDUX;
 #else
-        p2p_cl_in.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
+    p2p_cl_in.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
 #endif
-        p2p_cl_in.name = "p2p_cl_in";
-        memset(&p2p_cl_inout, 0, sizeof(p2p_cl_inout));
+    p2p_cl_in.name = "p2p_cl_in";
+    memset(&p2p_cl_inout, 0, sizeof(p2p_cl_inout));
 #ifdef STARPU_USE_CPU
-        if(originalCpuKernel->supportP2PExtern(FSTARPU_CPU_IDX)){
-            p2p_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallback;
-            p2p_cl_inout.where |= STARPU_CPU;
-        }
+    if(originalCpuKernel->supportP2PExtern(FSTARPU_CPU_IDX)){
+        p2p_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallback;
+        p2p_cl_inout.where |= STARPU_CPU;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_CUDA_KERNEL
-        if(originalCpuKernel->supportP2PExtern(FSTARPU_CUDA_IDX)){
-            p2p_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::directInoutPassCallback;
-            p2p_cl_inout.where |= STARPU_CUDA;
-        }
+    if(originalCpuKernel->supportP2PExtern(FSTARPU_CUDA_IDX)){
+        p2p_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::directInoutPassCallback;
+        p2p_cl_inout.where |= STARPU_CUDA;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
-        if(originalCpuKernel->supportP2PExtern(FSTARPU_OPENCL_IDX)){
-            p2p_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::directInoutPassCallback;
-            p2p_cl_inout.where |= STARPU_OPENCL;
-        }
+    if(originalCpuKernel->supportP2PExtern(FSTARPU_OPENCL_IDX)){
+        p2p_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::directInoutPassCallback;
+        p2p_cl_inout.where |= STARPU_OPENCL;
+      }
 #endif
-        p2p_cl_inout.nbuffers = 4;
-        p2p_cl_inout.modes[0] = STARPU_R;
+    p2p_cl_inout.nbuffers = 4;
+    p2p_cl_inout.modes[0] = STARPU_R;
 #ifdef STARPU_USE_REDUX
-        p2p_cl_inout.modes[1] = STARPU_REDUX;
+    p2p_cl_inout.modes[1] = STARPU_REDUX;
 #else
-        p2p_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED);
+    p2p_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED);
 #endif
-        p2p_cl_inout.modes[2] = STARPU_R;
+    p2p_cl_inout.modes[2] = STARPU_R;
 #ifdef STARPU_USE_REDUX
-        p2p_cl_inout.modes[3] = STARPU_REDUX;
+    p2p_cl_inout.modes[3] = STARPU_REDUX;
 #else
-        p2p_cl_inout.modes[3] = starpu_data_access_mode(STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED);
+    p2p_cl_inout.modes[3] = starpu_data_access_mode(STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED);
 #endif
-        p2p_cl_inout.name = "p2p_cl_inout";
+    p2p_cl_inout.name = "p2p_cl_inout";
 
-        memset(&m2l_cl_in, 0, sizeof(m2l_cl_in));
+    memset(&m2l_cl_in, 0, sizeof(m2l_cl_in));
 #ifdef STARPU_USE_CPU
-        if(originalCpuKernel->supportM2L(FSTARPU_CPU_IDX)){
-            m2l_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::transferInPassCallback;
-            m2l_cl_in.where |= STARPU_CPU;
-        }
+    if(originalCpuKernel->supportM2L(FSTARPU_CPU_IDX)){
+        m2l_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::transferInPassCallback;
+        m2l_cl_in.where |= STARPU_CPU;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_CUDA_KERNEL
-        if(originalCpuKernel->supportM2L(FSTARPU_CUDA_IDX)){
-            m2l_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::transferInPassCallback;
-            m2l_cl_in.where |= STARPU_CUDA;
-        }
+    if(originalCpuKernel->supportM2L(FSTARPU_CUDA_IDX)){
+        m2l_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::transferInPassCallback;
+        m2l_cl_in.where |= STARPU_CUDA;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
-        if(originalCpuKernel->supportM2L(FSTARPU_OPENCL_IDX)){
-            m2l_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInPassCallback;
-            m2l_cl_in.where |= STARPU_OPENCL;
-        }
-#endif
-        m2l_cl_in.nbuffers = 3;
-        m2l_cl_in.modes[0] = STARPU_R;
-        m2l_cl_in.modes[1] = STARPU_R;
-        m2l_cl_in.modes[2] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
-        m2l_cl_in.name = "m2l_cl_in";
-
-        memset(&m2l_cl_inout, 0, sizeof(m2l_cl_inout));
+    if(originalCpuKernel->supportM2L(FSTARPU_OPENCL_IDX)){
+        m2l_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInPassCallback;
+        m2l_cl_in.where |= STARPU_OPENCL;
+      }
+#endif
+    m2l_cl_in.nbuffers = 3;
+    m2l_cl_in.modes[0] = STARPU_R;
+    m2l_cl_in.modes[1] = STARPU_R;
+    m2l_cl_in.modes[2] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
+    m2l_cl_in.name = "m2l_cl_in";
+
+    memset(&m2l_cl_inout, 0, sizeof(m2l_cl_inout));
 #ifdef STARPU_USE_CPU
-        if(originalCpuKernel->supportM2LExtern(FSTARPU_CPU_IDX)){
-            m2l_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallback;
-            m2l_cl_inout.where |= STARPU_CPU;
-        }
+    if(originalCpuKernel->supportM2LExtern(FSTARPU_CPU_IDX)){
+        m2l_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallback;
+        m2l_cl_inout.where |= STARPU_CPU;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_CUDA_KERNEL
-        if(originalCpuKernel->supportM2LExtern(FSTARPU_CUDA_IDX)){
-            m2l_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::transferInoutPassCallback;
-            m2l_cl_inout.where |= STARPU_CUDA;
-        }
+    if(originalCpuKernel->supportM2LExtern(FSTARPU_CUDA_IDX)){  //M2L method between two blocks
+        m2l_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::transferInoutPassCallback;
+        m2l_cl_inout.where |= STARPU_CUDA;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
-        if(originalCpuKernel->supportM2LExtern(FSTARPU_OPENCL_IDX)){
-            m2l_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInoutPassCallback;
-            m2l_cl_inout.where |= STARPU_OPENCL;
-        }
-#endif
-        m2l_cl_inout.nbuffers = 4;
-        m2l_cl_inout.modes[0] = STARPU_R;
-        m2l_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
-        m2l_cl_inout.modes[2] = STARPU_R;
-        m2l_cl_inout.modes[3] = STARPU_R;
-        m2l_cl_inout.name = "m2l_cl_inout";
+    if(originalCpuKernel->supportM2LExtern(FSTARPU_OPENCL_IDX)){
+        m2l_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInoutPassCallback;
+        m2l_cl_inout.where |= STARPU_OPENCL;
+      }
+#endif
+    m2l_cl_inout.nbuffers = 4;
+    m2l_cl_inout.modes[0] = STARPU_R;
+    m2l_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
+    m2l_cl_inout.modes[2] = STARPU_R;
+    m2l_cl_inout.modes[3] = STARPU_R;
+    m2l_cl_inout.name = "m2l_cl_inout";
 
 #ifdef STARPU_USE_REDUX
-        memset(&p2p_redux_init, 0, sizeof(p2p_redux_init));
+    memset(&p2p_redux_init, 0, sizeof(p2p_redux_init));
 #ifdef STARPU_USE_CPU
-        p2p_redux_init.cpu_funcs[0] = FStarPUReduxCpu::InitData<typename ParticleGroupClass::ParticleDataType>;
-        p2p_redux_init.where |= STARPU_CPU;
+    p2p_redux_init.cpu_funcs[0] = FStarPUReduxCpu::InitData<typename ParticleGroupClass::ParticleDataType>;
+    p2p_redux_init.where |= STARPU_CPU;
 #endif
-        p2p_redux_init.nbuffers = 1;
-        p2p_redux_init.modes[0] = STARPU_RW;
-        p2p_redux_init.name = "p2p_redux_init";
+    p2p_redux_init.nbuffers = 1;
+    p2p_redux_init.modes[0] = STARPU_RW;
+    p2p_redux_init.name = "p2p_redux_init";
 
-        memset(&p2p_redux_perform, 0, sizeof(p2p_redux_perform));
+    memset(&p2p_redux_perform, 0, sizeof(p2p_redux_perform));
 #ifdef STARPU_USE_CPU
-        p2p_redux_perform.cpu_funcs[0] = FStarPUReduxCpu::ReduceData<typename ParticleGroupClass::ParticleDataType>;
-        p2p_redux_perform.where |= STARPU_CPU;
+    p2p_redux_perform.cpu_funcs[0] = FStarPUReduxCpu::ReduceData<typename ParticleGroupClass::ParticleDataType>;
+    p2p_redux_perform.where |= STARPU_CPU;
 #endif
-        p2p_redux_perform.nbuffers = 2;
-        p2p_redux_perform.modes[0] = STARPU_RW;
-        p2p_redux_perform.modes[1] = STARPU_R;
-        p2p_redux_perform.name = "p2p_redux_perform";
+    p2p_redux_perform.nbuffers = 2;
+    p2p_redux_perform.modes[0] = STARPU_RW;
+    p2p_redux_perform.modes[1] = STARPU_R;
+    p2p_redux_perform.name = "p2p_redux_perform";
 
-        memset(&p2p_redux_read, 0, sizeof(p2p_redux_read));
+    memset(&p2p_redux_read, 0, sizeof(p2p_redux_read));
 #ifdef STARPU_USE_CPU
-        if(originalCpuKernel->supportL2P(FSTARPU_CPU_IDX)){
-            p2p_redux_read.cpu_funcs[0] = FStarPUReduxCpu::EmptyCodelet<typename ParticleGroupClass::ParticleDataType>;
-            p2p_redux_read.where |= STARPU_CPU;
-        }
+    if(originalCpuKernel->supportL2P(FSTARPU_CPU_IDX)){
+        p2p_redux_read.cpu_funcs[0] = FStarPUReduxCpu::EmptyCodelet<typename ParticleGroupClass::ParticleDataType>;
+        p2p_redux_read.where |= STARPU_CPU;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_CUDA_KERNEL
-        if(originalCpuKernel->supportL2P(FSTARPU_CUDA_IDX)){
-            p2p_redux_read.cuda_funcs[0] = FStarPUReduxCpu::EmptyCodelet<typename ParticleGroupClass::ParticleDataType>;
-            p2p_redux_read.where |= STARPU_CUDA;
-        }
+    if(originalCpuKernel->supportL2P(FSTARPU_CUDA_IDX)){
+        p2p_redux_read.cuda_funcs[0] = FStarPUReduxCpu::EmptyCodelet<typename ParticleGroupClass::ParticleDataType>;
+        p2p_redux_read.where |= STARPU_CUDA;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
-        if(originalCpuKernel->supportL2P(FSTARPU_OPENCL_IDX)){
-            p2p_redux_read.opencl_funcs[0] = FStarPUReduxCpu::EmptyCodelet<typename ParticleGroupClass::ParticleDataType>;
-            p2p_redux_read.where |= STARPU_OPENCL;
-        }
+    if(originalCpuKernel->supportL2P(FSTARPU_OPENCL_IDX)){
+        p2p_redux_read.opencl_funcs[0] = FStarPUReduxCpu::EmptyCodelet<typename ParticleGroupClass::ParticleDataType>;
+        p2p_redux_read.where |= STARPU_OPENCL;
+      }
 #endif
-        p2p_redux_read.nbuffers = 1;
-        p2p_redux_read.modes[0] = STARPU_R;
-        p2p_redux_read.name = "p2p_redux_read";
+    p2p_redux_read.nbuffers = 1;
+    p2p_redux_read.modes[0] = STARPU_R;
+    p2p_redux_read.name = "p2p_redux_read";
 #endif
 
 #ifdef SCALFMM_USE_STARPU_EXTRACT
-        memset(&p2p_extract, 0, sizeof(p2p_extract));
-        p2p_extract.nbuffers = 2;
-        p2p_extract.modes[0] = STARPU_R;
-        p2p_extract.modes[1] = STARPU_RW;
-        p2p_extract.name = "p2p_extract";
-        p2p_extract.cpu_funcs[0] = ThisClass::ExtractP2P;
-        p2p_extract.where |= STARPU_CPU;
-
-        memset(&p2p_insert, 0, sizeof(p2p_insert));
-        p2p_insert.nbuffers = 2;
-        p2p_insert.modes[0] = STARPU_R;
-        p2p_insert.modes[1] = STARPU_RW;
-        p2p_insert.name = "p2p_insert";
-        p2p_insert.cpu_funcs[0] = ThisClass::InsertP2P;
-        p2p_insert.where |= STARPU_CPU;
-
-        memset(&p2p_insert_bis, 0, sizeof(p2p_insert_bis));
-        p2p_insert_bis.nbuffers = 2;
-        p2p_insert_bis.modes[0] = STARPU_R;
-        p2p_insert_bis.modes[1] = STARPU_RW;
-        p2p_insert_bis.name = "p2p_insert_bis";
-        p2p_insert_bis.cpu_funcs[0] = ThisClass::InsertP2PBis;
-        p2p_insert_bis.where |= STARPU_CPU;
-
-        memset(&cell_extract_up, 0, sizeof(cell_extract_up));
-        cell_extract_up.nbuffers = 3;
-        cell_extract_up.modes[0] = STARPU_R;
-        cell_extract_up.modes[1] = STARPU_R;
-        cell_extract_up.modes[2] = STARPU_RW;
-        cell_extract_up.name = "cell_extract_up";
-        cell_extract_up.cpu_funcs[0] = ThisClass::ExtractCellUp;
-        cell_extract_up.where |= STARPU_CPU;
-
-        memset(&cell_insert_up, 0, sizeof(cell_insert_up));
-        cell_insert_up.nbuffers = 3;
-        cell_insert_up.modes[0] = STARPU_R;
-        cell_insert_up.modes[1] = STARPU_RW;
-        cell_insert_up.modes[2] = STARPU_RW;
-        cell_insert_up.name = "cell_insert_up";
-        cell_insert_up.cpu_funcs[0] = ThisClass::InsertCellUp;
-        cell_insert_up.where |= STARPU_CPU;
-
-
-        memset(&cell_insert_up_bis, 0, sizeof(cell_insert_up_bis));
-        cell_insert_up_bis.nbuffers = 3;
-        cell_insert_up_bis.modes[0] = STARPU_R;
-        cell_insert_up_bis.modes[1] = STARPU_RW;
-        cell_insert_up_bis.modes[2] = STARPU_RW;
-        cell_insert_up_bis.name = "cell_insert_up_bis";
-        cell_insert_up_bis.cpu_funcs[0] = ThisClass::InsertCellUpBis;
-        cell_insert_up_bis.where |= STARPU_CPU;
-#endif
-    }
+    memset(&p2p_extract, 0, sizeof(p2p_extract));
+    p2p_extract.nbuffers = 2;
+    p2p_extract.modes[0] = STARPU_R;
+    p2p_extract.modes[1] = STARPU_RW;
+    p2p_extract.name = "p2p_extract";
+    p2p_extract.cpu_funcs[0] = ThisClass::ExtractP2P;
+    p2p_extract.where |= STARPU_CPU;
+
+    memset(&p2p_insert, 0, sizeof(p2p_insert));
+    p2p_insert.nbuffers = 2;
+    p2p_insert.modes[0] = STARPU_R;
+    p2p_insert.modes[1] = STARPU_RW;
+    p2p_insert.name = "p2p_insert";
+    p2p_insert.cpu_funcs[0] = ThisClass::InsertP2P;
+    p2p_insert.where |= STARPU_CPU;
+
+    memset(&p2p_insert_bis, 0, sizeof(p2p_insert_bis));
+    p2p_insert_bis.nbuffers = 2;
+    p2p_insert_bis.modes[0] = STARPU_R;
+    p2p_insert_bis.modes[1] = STARPU_RW;
+    p2p_insert_bis.name = "p2p_insert_bis";
+    p2p_insert_bis.cpu_funcs[0] = ThisClass::InsertP2PBis;
+    p2p_insert_bis.where |= STARPU_CPU;
+
+    memset(&cell_extract_up, 0, sizeof(cell_extract_up));
+    cell_extract_up.nbuffers = 3;
+    cell_extract_up.modes[0] = STARPU_R;
+    cell_extract_up.modes[1] = STARPU_R;
+    cell_extract_up.modes[2] = STARPU_RW;
+    cell_extract_up.name = "cell_extract_up";
+    cell_extract_up.cpu_funcs[0] = ThisClass::ExtractCellUp;
+    cell_extract_up.where |= STARPU_CPU;
+
+    memset(&cell_insert_up, 0, sizeof(cell_insert_up));
+    cell_insert_up.nbuffers = 3;
+    cell_insert_up.modes[0] = STARPU_R;
+    cell_insert_up.modes[1] = STARPU_RW;
+    cell_insert_up.modes[2] = STARPU_RW;
+    cell_insert_up.name = "cell_insert_up";
+    cell_insert_up.cpu_funcs[0] = ThisClass::InsertCellUp;
+    cell_insert_up.where |= STARPU_CPU;
+
+
+    memset(&cell_insert_up_bis, 0, sizeof(cell_insert_up_bis));
+    cell_insert_up_bis.nbuffers = 3;
+    cell_insert_up_bis.modes[0] = STARPU_R;
+    cell_insert_up_bis.modes[1] = STARPU_RW;
+    cell_insert_up_bis.modes[2] = STARPU_RW;
+    cell_insert_up_bis.name = "cell_insert_up_bis";
+    cell_insert_up_bis.cpu_funcs[0] = ThisClass::InsertCellUpBis;
+    cell_insert_up_bis.where |= STARPU_CPU;
+#endif
+  }
 
 #ifdef SCALFMM_USE_STARPU_EXTRACT
-    static void InsertP2P(void *buffers[], void *cl_arg){
-        ParticleGroupClass containers((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]),
-                                      STARPU_VECTOR_GET_NX(buffers[1]),
-                                      nullptr);
+  static void InsertP2P(void *buffers[], void *cl_arg){
+    ParticleGroupClass containers((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]),
+        STARPU_VECTOR_GET_NX(buffers[1]),
+        nullptr);
 
-        ParticleExtractedHandles* interactionBufferPtr;
-        starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr);
+    ParticleExtractedHandles* interactionBufferPtr;
+    starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr);
 
-        containers.restoreData(interactionBufferPtr->leavesToExtract,
-                               (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]),
-                                STARPU_VECTOR_GET_NX(buffers[0]));
-    }
+    containers.restoreData(interactionBufferPtr->leavesToExtract,
+                           (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]),
+        STARPU_VECTOR_GET_NX(buffers[0]));
+  }
 
-    static void InsertP2PBis(void *buffers[], void *cl_arg){
-        ParticleExtractedHandles* interactionBufferPtr;
-        const unsigned char* dataPtr;
-        size_t datasize;
-        starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr, &dataPtr, &datasize);
+  static void InsertP2PBis(void *buffers[], void *cl_arg){
+    ParticleExtractedHandles* interactionBufferPtr;
+    const unsigned char* dataPtr;
+    size_t datasize;
+    starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr, &dataPtr, &datasize);
 
-        memcpy((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), dataPtr, datasize);
+    memcpy((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), dataPtr, datasize);
 
-        ParticleGroupClass containers((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]),
-                                      STARPU_VECTOR_GET_NX(buffers[1]),
-                                      nullptr);
+    ParticleGroupClass containers((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]),
+        STARPU_VECTOR_GET_NX(buffers[1]),
+        nullptr);
 
 
-        containers.restoreData(interactionBufferPtr->leavesToExtract,
-                               (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]),
-                                STARPU_VECTOR_GET_NX(buffers[0]));
-    }
+    containers.restoreData(interactionBufferPtr->leavesToExtract,
+                           (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]),
+        STARPU_VECTOR_GET_NX(buffers[0]));
+  }
 
-    static void ExtractP2P(void *buffers[], void *cl_arg){
-        ParticleGroupClass containers((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]),
-                                      STARPU_VECTOR_GET_NX(buffers[0]),
-                                      nullptr);
+  static void ExtractP2P(void *buffers[], void *cl_arg){
+    ParticleGroupClass containers((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]),
+        STARPU_VECTOR_GET_NX(buffers[0]),
+        nullptr);
 
-        ParticleExtractedHandles* interactionBufferPtr;
-        starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr);
+    ParticleExtractedHandles* interactionBufferPtr;
+    starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr);
 
-        containers.extractData(interactionBufferPtr->leavesToExtract,
-                               (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]),
-                               STARPU_VECTOR_GET_NX(buffers[1]));
-    }
+    containers.extractData(interactionBufferPtr->leavesToExtract,
+                           (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]),
+        STARPU_VECTOR_GET_NX(buffers[1]));
+  }
 
-    static void InsertCellUp(void *buffers[], void *cl_arg){
-        CellContainerClass currentCells((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]),
-                                        STARPU_VECTOR_GET_NX(buffers[1]),
-                                        (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]),
-                                        nullptr);
+  static void InsertCellUp(void *buffers[], void *cl_arg){
+    CellContainerClass currentCells((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]),
+        STARPU_VECTOR_GET_NX(buffers[1]),
+        (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]),
+        nullptr);
 
-        CellExtractedHandles* interactionBufferPtr;
-        starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr);
+    CellExtractedHandles* interactionBufferPtr;
+    starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr);
 
-        currentCells.restoreDataUp(interactionBufferPtr->cellsToExtract,
-                                   (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]),
-                                   STARPU_VECTOR_GET_NX(buffers[0]));
-    }
+    currentCells.restoreDataUp(interactionBufferPtr->cellsToExtract,
+                               (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]),
+        STARPU_VECTOR_GET_NX(buffers[0]));
+  }
 
-    static void InsertCellUpBis(void *buffers[], void *cl_arg){
-        unsigned char* ptr1;
-        size_t size1;
-        unsigned char* ptr2;
-        size_t size2;
-        CellExtractedHandles* interactionBufferPtr;
-        starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr, &ptr1, &size1, &ptr2, &size2);
+  static void InsertCellUpBis(void *buffers[], void *cl_arg){
+    unsigned char* ptr1;
+    size_t size1;
+    unsigned char* ptr2;
+    size_t size2;
+    CellExtractedHandles* interactionBufferPtr;
+    starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr, &ptr1, &size1, &ptr2, &size2);
 
-        memcpy((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), ptr1, size1);
-        memcpy((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]), ptr2, size2);
+    memcpy((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), ptr1, size1);
+    memcpy((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]), ptr2, size2);
 
-        CellContainerClass currentCells((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]),
-                                        STARPU_VECTOR_GET_NX(buffers[1]),
-                                        (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]),
-                                        nullptr);
+    CellContainerClass currentCells((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]),
+        STARPU_VECTOR_GET_NX(buffers[1]),
+        (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]),
+        nullptr);
 
 
-        currentCells.restoreDataUp(interactionBufferPtr->cellsToExtract,
-                                   (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]),
-                                   STARPU_VECTOR_GET_NX(buffers[0]));
-    }
+    currentCells.restoreDataUp(interactionBufferPtr->cellsToExtract,
+                               (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]),
+        STARPU_VECTOR_GET_NX(buffers[0]));
+  }
 
-    static void ExtractCellUp(void *buffers[], void *cl_arg){
-        CellContainerClass currentCells((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]),
-                                        STARPU_VECTOR_GET_NX(buffers[0]),
-                                        (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]),
-                                        nullptr);
+  static void ExtractCellUp(void *buffers[], void *cl_arg){
+    CellContainerClass currentCells((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]),
+        STARPU_VECTOR_GET_NX(buffers[0]),
+        (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]),
+        nullptr);
 
-        CellExtractedHandles* interactionBufferPtr;
-        starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr);
+    CellExtractedHandles* interactionBufferPtr;
+    starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr);
 
-        currentCells.extractDataUp(interactionBufferPtr->cellsToExtract,
-                                   (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]),
-                                   STARPU_VECTOR_GET_NX(buffers[2]));
-    }
+    currentCells.extractDataUp(interactionBufferPtr->cellsToExtract,
+                               (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]),
+        STARPU_VECTOR_GET_NX(buffers[2]));
+  }
 #endif
 
-    void initCodeletMpi(){
-        memset(&p2p_cl_inout_mpi, 0, sizeof(p2p_cl_inout_mpi));
+  void initCodeletMpi(){
+    memset(&p2p_cl_inout_mpi, 0, sizeof(p2p_cl_inout_mpi));
 #ifdef STARPU_USE_CPU
-        if(originalCpuKernel->supportP2PMpi(FSTARPU_CPU_IDX)){
-            p2p_cl_inout_mpi.where |= STARPU_CPU;
-            p2p_cl_inout_mpi.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallbackMpi;
-        }
+    if(originalCpuKernel->supportP2PMpi(FSTARPU_CPU_IDX)){
+        p2p_cl_inout_mpi.where |= STARPU_CPU;
+        p2p_cl_inout_mpi.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallbackMpi;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_CUDA_KERNEL
-        if(originalCpuKernel->supportP2PMpi(FSTARPU_CUDA_IDX)){
-            p2p_cl_inout_mpi.where |= STARPU_CUDA;
-            p2p_cl_inout_mpi.cuda_funcs[0] = StarPUCudaWrapperClass::directInoutPassCallbackMpi;
-        }
+    if(originalCpuKernel->supportP2PMpi(FSTARPU_CUDA_IDX)){
+        p2p_cl_inout_mpi.where |= STARPU_CUDA;
+        p2p_cl_inout_mpi.cuda_funcs[0] = StarPUCudaWrapperClass::directInoutPassCallbackMpi;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
-        if(originalCpuKernel->supportP2PMpi(FSTARPU_OPENCL_IDX)){
-            p2p_cl_inout_mpi.where |= STARPU_OPENCL;
-            p2p_cl_inout_mpi.opencl_funcs[0] = StarPUOpenClWrapperClass::directInoutPassCallbackMpi;
-        }
-#endif
-        p2p_cl_inout_mpi.nbuffers = 3;
-        p2p_cl_inout_mpi.modes[0] = STARPU_R;
-        p2p_cl_inout_mpi.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
-        p2p_cl_inout_mpi.modes[2] = STARPU_R;
-        p2p_cl_inout_mpi.name = "p2p_cl_inout_mpi";
-
-        memset(&m2l_cl_inout_mpi, 0, sizeof(m2l_cl_inout_mpi));
+    if(originalCpuKernel->supportP2PMpi(FSTARPU_OPENCL_IDX)){
+        p2p_cl_inout_mpi.where |= STARPU_OPENCL;
+        p2p_cl_inout_mpi.opencl_funcs[0] = StarPUOpenClWrapperClass::directInoutPassCallbackMpi;
+      }
+#endif
+    p2p_cl_inout_mpi.nbuffers = 3;
+    p2p_cl_inout_mpi.modes[0] = STARPU_R;
+    p2p_cl_inout_mpi.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
+    p2p_cl_inout_mpi.modes[2] = STARPU_R;
+    p2p_cl_inout_mpi.name = "p2p_cl_inout_mpi";
+
+    memset(&m2l_cl_inout_mpi, 0, sizeof(m2l_cl_inout_mpi));
 #ifdef STARPU_USE_CPU
-        if(originalCpuKernel->supportM2LMpi(FSTARPU_CPU_IDX)){
-            m2l_cl_inout_mpi.where |= STARPU_CPU;
-            m2l_cl_inout_mpi.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallbackMpi;
-        }
+    if(originalCpuKernel->supportM2LMpi(FSTARPU_CPU_IDX)){
+        m2l_cl_inout_mpi.where |= STARPU_CPU;
+        m2l_cl_inout_mpi.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallbackMpi;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_CUDA_KERNEL
-        if(originalCpuKernel->supportM2LMpi(FSTARPU_CUDA_IDX)){
-            m2l_cl_inout_mpi.where |= STARPU_CUDA;
-            m2l_cl_inout_mpi.cuda_funcs[0] = StarPUCudaWrapperClass::transferInoutPassCallbackMpi;
-        }
+    if(originalCpuKernel->supportM2LMpi(FSTARPU_CUDA_IDX)){
+        m2l_cl_inout_mpi.where |= STARPU_CUDA;
+        m2l_cl_inout_mpi.cuda_funcs[0] = StarPUCudaWrapperClass::transferInoutPassCallbackMpi;
+      }
 #endif
 #ifdef SCALFMM_ENABLE_OPENCL_KERNEL
-        if(originalCpuKernel->supportM2LMpi(FSTARPU_OPENCL_IDX)){
-            m2l_cl_inout_mpi.where |= STARPU_OPENCL;
-            m2l_cl_inout_mpi.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInoutPassCallbackMpi;
-        }
-#endif
-        m2l_cl_inout_mpi.nbuffers = 4;
-        m2l_cl_inout_mpi.modes[0] = STARPU_R;
-        m2l_cl_inout_mpi.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
-        m2l_cl_inout_mpi.modes[2] = STARPU_R;
-        m2l_cl_inout_mpi.modes[3] = STARPU_R;
-        m2l_cl_inout_mpi.name = "m2l_cl_inout_mpi";
-    }
-
-    /** dealloc in a starpu way all the defined handles */
-    void cleanHandle(){
-        for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){
-            for(int idxHandle = 0 ; idxHandle < int(cellHandles[idxLevel].size()) ; ++idxHandle){
-                if(isDataOwnedBerenger(tree->getCellGroup(idxLevel, idxHandle)->getStartingIndex(), idxLevel))//Clean only our data handle
-                {
-                    starpu_data_unregister(cellHandles[idxLevel][idxHandle].symb);
-                    starpu_data_unregister(cellHandles[idxLevel][idxHandle].up);
-                    starpu_data_unregister(cellHandles[idxLevel][idxHandle].down);
-                }
+    if(originalCpuKernel->supportM2LMpi(FSTARPU_OPENCL_IDX)){
+        m2l_cl_inout_mpi.where |= STARPU_OPENCL;
+        m2l_cl_inout_mpi.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInoutPassCallbackMpi;
+      }
+#endif
+    m2l_cl_inout_mpi.nbuffers = 4;
+    m2l_cl_inout_mpi.modes[0] = STARPU_R;
+    m2l_cl_inout_mpi.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED);
+    m2l_cl_inout_mpi.modes[2] = STARPU_R;
+    m2l_cl_inout_mpi.modes[3] = STARPU_R;
+    m2l_cl_inout_mpi.name = "m2l_cl_inout_mpi";
+  }
+
+  /** dealloc in a starpu way all the defined handles */
+  void cleanHandle(){
+    for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){
+        for(int idxHandle = 0 ; idxHandle < int(cellHandles[idxLevel].size()) ; ++idxHandle){
+            if(isDataOwnedBerenger(tree->getCellGroup(idxLevel, idxHandle)->getStartingIndex(), idxLevel))//Clean only our data handle
+              {
+                starpu_data_unregister(cellHandles[idxLevel][idxHandle].symb);
+                starpu_data_unregister(cellHandles[idxLevel][idxHandle].up);
+                starpu_data_unregister(cellHandles[idxLevel][idxHandle].down);
+              }
+          }
+        cellHandles[idxLevel].clear();
+      }
+    {
+      for(int idxHandle = 0 ; idxHandle < int(particleHandles.size()) ; ++idxHandle){
+          if(isDataOwnedBerenger(tree->getCellGroup(tree->getHeight()-1, idxHandle)->getStartingIndex(), tree->getHeight()-1))//Clean only our data handle
+            {
+              starpu_data_unregister(particleHandles[idxHandle].symb);
+              starpu_data_unregister(particleHandles[idxHandle].down);
             }
-            cellHandles[idxLevel].clear();
-        }
-        {
-            for(int idxHandle = 0 ; idxHandle < int(particleHandles.size()) ; ++idxHandle){
-                if(isDataOwnedBerenger(tree->getCellGroup(tree->getHeight()-1, idxHandle)->getStartingIndex(), tree->getHeight()-1))//Clean only our data handle
-                {
-                    starpu_data_unregister(particleHandles[idxHandle].symb);
-                    starpu_data_unregister(particleHandles[idxHandle].down);
-                }
-            }
-            particleHandles.clear();
-        }
-#ifdef SCALFMM_USE_STARPU_EXTRACT
-        for(auto& iter : extractedParticlesBuffer){
-            starpu_data_unregister(iter.symb);
-        }
-        for(auto& iter : duplicatedParticlesBuffer){
-            starpu_data_unregister(iter.symb);
-        }
-        for(auto& iter : extractedCellBuffer){
-            starpu_data_unregister(iter.all);
-        }
-        for(auto& iter : duplicatedCellBuffer){
-            starpu_data_unregister(iter.symb);
         }
-#endif
+      particleHandles.clear();
     }
-
-    /** Reset the handles array and create new ones to define
+#ifdef SCALFMM_USE_STARPU_EXTRACT
+    for(auto& iter : extractedParticlesBuffer){
+        starpu_data_unregister(iter.symb);
+      }
+    for(auto& iter : duplicatedParticlesBuffer){
+        starpu_data_unregister(iter.symb);
+      }
+    for(auto& iter : extractedCellBuffer){
+        starpu_data_unregister(iter.all);
+      }
+    for(auto& iter : duplicatedCellBuffer){
+        starpu_data_unregister(iter.symb);
+      }
+#endif
+  }
+
+  /** Reset the handles array and create new ones to define
      * in a starpu way each block of data
      */
-    int tag;
-    void buildHandles(){
-        cleanHandle();
-        tag = 0;
-        int where;
-        for(int idxLevel = 2 ; idxLevel < tree->getHeight() ; ++idxLevel){
-            cellHandles[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel));
-            for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
-                const CellContainerClass* currentCells = tree->getCellGroup(idxLevel, idxGroup);
-                int registeringNode = dataMappingBerenger(currentCells->getStartingIndex(), idxLevel);
-
-                where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
-                starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].symb, where,
-                                              (uintptr_t)currentCells->getRawBuffer(), currentCells->getBufferSizeInByte());
-                starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].up, where,
-                                              (uintptr_t)currentCells->getRawMultipoleBuffer(), currentCells->getMultipoleBufferSizeInByte());
-                starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].down, where,
-                                              (uintptr_t)currentCells->getRawLocalBuffer(), currentCells->getLocalBufferSizeInByte());
-
-                starpu_mpi_data_register(cellHandles[idxLevel][idxGroup].symb, tag++, registeringNode);
-                starpu_mpi_data_register(cellHandles[idxLevel][idxGroup].up, tag++, registeringNode);
-                starpu_mpi_data_register(cellHandles[idxLevel][idxGroup].down, tag++, registeringNode);
-                cellHandles[idxLevel][idxGroup].intervalSize = int(currentCells->getNumberOfCellsInBlock());
+  void printCellHandels (){
+    std::cout << "  print  Cell Handles() " << std::endl;
+    for(int idxLevel = 2 ; idxLevel < tree->getHeight() ; ++idxLevel){
+        std::cout << "   Level  " << idxLevel << " nb handles: " << cellHandles[idxLevel].size() <<std::endl;
+        for(std::size_t idxGroup = 0 ; idxGroup < cellHandles[idxLevel].size()  ; ++idxGroup){
+            std::cout << "idxG " << idxGroup << " sym " << cellHandles[idxLevel][idxGroup].symb
+                      << " mult "<< cellHandles[idxLevel][idxGroup].up
+                      << " loc " <<  cellHandles[idxLevel][idxGroup].down
+                      << " size "<< cellHandles[idxLevel][idxGroup].intervalSize
+                      << " grouID " << cellHandles[idxLevel][idxGroup].groupID  <<std::endl;
+          }
+      }
+  }
+  void buildHandles(){
+    std::cout << "  BEGIN buildHandles() " << std::endl;
+    cleanHandle();
+    int tag = 0;
+    int where;
+    for(int idxLevel = 2 ; idxLevel < tree->getHeight() ; ++idxLevel){
+        std::cout << "   Level  " << idxLevel << std::endl;
+        cellHandles[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel));
+        for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
+            const CellContainerClass* currentCells = tree->getCellGroup(idxLevel, idxGroup);
+            int registeringNode = dataMappingBerenger(currentCells->getStartingIndex(), idxLevel);
+            int idx_global      = currentCells->getIdxGlobal();
+            where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
+            std::cout << "       idxGroup " << idxGroup << " registeringNode " << registeringNode
+                      << " idx_global "<< idx_global << "  where "<< where << std::endl<<std::flush;
+            starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].symb, where,
+                                          (uintptr_t)currentCells->getRawBuffer(), currentCells->getBufferSizeInByte());
+            starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].up, where,
+                                          (uintptr_t)currentCells->getRawMultipoleBuffer(), currentCells->getMultipoleBufferSizeInByte());
+            starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].down, where,
+                                          (uintptr_t)currentCells->getRawLocalBuffer(), currentCells->getLocalBufferSizeInByte());
+
+            starpu_mpi_data_register(cellHandles[idxLevel][idxGroup].symb, tag++, registeringNode);
+            starpu_mpi_data_register(cellHandles[idxLevel][idxGroup].up, tag++, registeringNode);
+            starpu_mpi_data_register(cellHandles[idxLevel][idxGroup].down, tag++, registeringNode);
+            cellHandles[idxLevel][idxGroup].intervalSize = int(currentCells->getNumberOfCellsInBlock());
+
+            cellHandles[idxLevel][idxGroup].groupID = idxGroup;
 #ifdef STARPU_SUPPORT_ARBITER
-                starpu_data_assign_arbiter(cellHandles[idxLevel][idxGroup].up, arbiterGlobal);
-                starpu_data_assign_arbiter(cellHandles[idxLevel][idxGroup].down, arbiterGlobal);
-#endif
-            }
-        }
-        {
-            particleHandles.resize(tree->getNbParticleGroup());
-            for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
-                int registeringNode = dataMappingBerenger(tree->getCellGroup(tree->getHeight()-1, idxGroup)->getStartingIndex(), tree->getHeight()-1);
-                where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
-                ParticleGroupClass* containers = tree->getParticleGroup(idxGroup);
-                starpu_variable_data_register(&particleHandles[idxGroup].symb, where,
-                                              (uintptr_t)containers->getRawBuffer(), containers->getBufferSizeInByte());
-                starpu_variable_data_register(&particleHandles[idxGroup].down, where,
-                                              (uintptr_t)containers->getRawAttributesBuffer(), containers->getAttributesBufferSizeInByte());
-
-                starpu_mpi_data_register(particleHandles[idxGroup].symb, tag++, registeringNode);
-                starpu_mpi_data_register(particleHandles[idxGroup].down, tag++, registeringNode);
+            starpu_data_assign_arbiter(cellHandles[idxLevel][idxGroup].up, arbiterGlobal);
+            starpu_data_assign_arbiter(cellHandles[idxLevel][idxGroup].down, arbiterGlobal);
+#endif
+          }
+      }
+    {
+      particleHandles.resize(tree->getNbParticleGroup());
+      for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
+          int registeringNode = dataMappingBerenger(tree->getCellGroup(tree->getHeight()-1, idxGroup)->getStartingIndex(), tree->getHeight()-1);
+          where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
+          ParticleGroupClass* containers = tree->getParticleGroup(idxGroup);
+          starpu_variable_data_register(&particleHandles[idxGroup].symb, where,
+                                        (uintptr_t)containers->getRawBuffer(), containers->getBufferSizeInByte());
+          starpu_variable_data_register(&particleHandles[idxGroup].down, where,
+                                        (uintptr_t)containers->getRawAttributesBuffer(), containers->getAttributesBufferSizeInByte());
+
+          starpu_mpi_data_register(particleHandles[idxGroup].symb, tag++, registeringNode);
+          starpu_mpi_data_register(particleHandles[idxGroup].down, tag++, registeringNode);
 #ifdef STARPU_USE_REDUX
-                starpu_data_set_reduction_methods(particleHandles[idxGroup].down, &p2p_redux_perform,
-                                                  &p2p_redux_init);
+          starpu_data_set_reduction_methods(particleHandles[idxGroup].down, &p2p_redux_perform,
+                                            &p2p_redux_init);
 #else
 #ifdef STARPU_SUPPORT_ARBITER
-                starpu_data_assign_arbiter(particleHandles[idxGroup].down, arbiterGlobal);
+          starpu_data_assign_arbiter(particleHandles[idxGroup].down, arbiterGlobal);
 #endif // STARPU_SUPPORT_ARBITER
 #endif // STARPU_USE_REDUX
-                particleHandles[idxGroup].intervalSize = int(containers->getNumberOfLeavesInBlock());
-            }
+          particleHandles[idxGroup].intervalSize = int(containers->getNumberOfLeavesInBlock());
         }
     }
-    int dataMappingBerenger(MortonIndex const idx, int const idxLevel) const {
-        for(int i = 0; i < nproc; ++i)
-            if(nodeRepartition[idxLevel][i][0] <= nodeRepartition[idxLevel][i][1] && idx >= nodeRepartition[idxLevel][i][0] && idx <= nodeRepartition[idxLevel][i][1])
-                return i;
-        if(mpi_rank == 0)
-            cout << "[scalfmm][map error] idx " << idx << " on level " << idxLevel << " isn't mapped on any proccess. (Default set to 0)." << endl;
-        return nproc-1;
+    std::cout << "  END buildHandles() " << std::endl;
+  }
+  /*
+     * this function bind block when the tree is distributed (LET)
+     * we use the global index of every block
+     */
+  void buildDistributedHandles(){
+    std::cout << "  BEGIN buildDistributedHandles() " << std::endl;
+    cleanHandle();
+    int where;
+    for(int idxLevel = 2 ; idxLevel < tree->getHeight() ; ++idxLevel){
+        std::cout << "   Level  " << idxLevel << std::endl;
+
+        cellHandles[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel));
+        for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
+            const CellContainerClass* currentCells = tree->getCellGroup(idxLevel, idxGroup);
+            int registeringNode = dataMappingBerenger(currentCells->getStartingIndex(), idxLevel);
+            int idx_global      = currentCells->getIdxGlobal();
+            where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
+            std::cout << "       idxGroup " << idxGroup << " registeringNode " << registeringNode
+                      << " idx_global "<< idx_global << "  where "<< where << std::endl<<std::flush;
+            starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].symb, where,
+                                          (uintptr_t)currentCells->getRawBuffer(),
+                                          currentCells->getBufferSizeInByte());
+            //           std::cout <<"                 1 " <<std::endl<<std::flush;
+            starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].up, where,
+                                          (uintptr_t)currentCells->getRawMultipoleBuffer(),
+                                          currentCells->getMultipoleBufferSizeInByte());
+            //           std::cout <<"                 2 " <<std::endl<<std::flush;
+            starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].down, where,
+                                          (uintptr_t)currentCells->getRawLocalBuffer(),
+                                          currentCells->getLocalBufferSizeInByte());
+            //           std::cout <<"                 3 " <<std::endl<<std::flush;
+
+            starpu_mpi_data_register(cellHandles[idxLevel][idxGroup].symb, idx_global, registeringNode);
+            //           std::cout <<"                 4 " <<std::endl<<std::flush;
+            starpu_mpi_data_register(cellHandles[idxLevel][idxGroup].up, idx_global+this->nb_block, registeringNode);
+            //           std::cout <<"                 5 " <<std::endl<<std::flush;
+            starpu_mpi_data_register(cellHandles[idxLevel][idxGroup].down, idx_global+this->nb_block*2, registeringNode);
+            //           std::cout <<"                 6 " <<std::endl<<std::flush;
+            cellHandles[idxLevel][idxGroup].intervalSize = static_cast<int>(currentCells->getNumberOfCellsInBlock());
+#ifdef STARPU_SUPPORT_ARBITER
+            starpu_data_assign_arbiter(cellHandles[idxLevel][idxGroup].up,   arbiterGlobal);
+            starpu_data_assign_arbiter(cellHandles[idxLevel][idxGroup].down, arbiterGlobal);
+#endif
+          }
+      }
+    {
+      particleHandles.resize(tree->getNbParticleGroup());
+      for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
+          int registeringNode = dataMappingBerenger(tree->getCellGroup(tree->getHeight()-1, idxGroup)->getStartingIndex(), tree->getHeight()-1);
+          where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
+          ParticleGroupClass* containers = tree->getParticleGroup(idxGroup);
+          int idx_global = containers->getIdxGlobal();
+
+          starpu_variable_data_register(&particleHandles[idxGroup].symb, where,
+                                        (uintptr_t)containers->getRawBuffer(), containers->getBufferSizeInByte());
+          starpu_variable_data_register(&particleHandles[idxGroup].down, where,
+                                        (uintptr_t)containers->getRawAttributesBuffer(), containers->getAttributesBufferSizeInByte());
+
+          starpu_mpi_data_register(particleHandles[idxGroup].symb, idx_global, registeringNode);
+          starpu_mpi_data_register(particleHandles[idxGroup].down, idx_global+this->nb_block, registeringNode);
+#ifdef STARPU_USE_REDUX
+          starpu_data_set_reduction_methods(particleHandles[idxGroup].down, &p2p_redux_perform,
+                                            &p2p_redux_init);
+#else
+#ifdef STARPU_SUPPORT_ARBITER
+          starpu_data_assign_arbiter(particleHandles[idxGroup].down, arbiterGlobal);
+#endif // STARPU_SUPPORT_ARBITER
+#endif // STARPU_USE_REDUX
+          particleHandles[idxGroup].intervalSize = int(containers->getNumberOfLeavesInBlock());
+        }
     }
-    /**
+    std::cout << "  END buildDistributedHandles() " << std::endl;
+
+  }
+
+  int dataMappingBerenger(MortonIndex const idx, int const idxLevel) const {
+    for(int i = 0; i < nproc; ++i)
+      if(_nodeRepartition[idxLevel][i][0] <= _nodeRepartition[idxLevel][i][1] && idx >= _nodeRepartition[idxLevel][i][0] && idx <= _nodeRepartition[idxLevel][i][1])
+        return i;
+    if(mpi_rank == 0)
+      std::cout << "[scalfmm][map error] idx " << idx << " on level " << idxLevel << " isn't mapped on any proccess. (Default set to 0)." << std::endl;
+    return nproc-1;
+  }
+  /////////////////////////////////////////////////////////////////////////////////////////////
+  /**
      * This function is creating the interactions vector between blocks.
      * It fills externalInteractionsAllLevel and externalInteractionsLeafLevel.
      * Warning, the omp task for now are using the class attributes!
      *
      */
-    void buildExternalInteractionVecs(){
-        FLOG( FTic timer; FTic leafTimer; FTic cellTimer; );
-        // Reset interactions
-        externalInteractionsAllLevel.clear();
+  void buildExternalInteractionVecs(){
+    FLOG( FTic timer; FTic leafTimer; FTic cellTimer; );
+    // Reset interactions
+    externalInteractionsAllLevel.clear();
 #ifdef SCALFMM_USE_STARPU_EXTRACT
-        externalInteractionsAllLevelInnerIndexes.clear();
-        externalInteractionsAllLevelOuterIndexes.clear();
+    externalInteractionsAllLevelInnerIndexes.clear();
+    externalInteractionsAllLevelOuterIndexes.clear();
 #endif
-        externalInteractionsLeafLevel.clear();
+    externalInteractionsLeafLevel.clear();
 #ifdef SCALFMM_USE_STARPU_EXTRACT
-        externalInteractionsLeafLevelOuter.clear();
-        externalInteractionsLeafLevelInner.clear();
+    externalInteractionsLeafLevelOuter.clear();
+    externalInteractionsLeafLevelInner.clear();
 #endif
-        // One per level + leaf level
-        externalInteractionsAllLevel.resize(tree->getHeight());
+    // One per level + leaf level
+    externalInteractionsAllLevel.resize(tree->getHeight());
 #ifdef SCALFMM_USE_STARPU_EXTRACT
-        externalInteractionsAllLevelInnerIndexes.resize(tree->getHeight());
-        externalInteractionsAllLevelOuterIndexes.resize(tree->getHeight());
+    externalInteractionsAllLevelInnerIndexes.resize(tree->getHeight());
+    externalInteractionsAllLevelOuterIndexes.resize(tree->getHeight());
 #endif
 
-        // First leaf level
-        {
-            // We create one big vector per block
-            externalInteractionsLeafLevel.resize(tree->getNbParticleGroup());
+    // First leaf level
+    {
+      // We create one big vector per block
+      externalInteractionsLeafLevel.resize(tree->getNbParticleGroup());
 #ifdef SCALFMM_USE_STARPU_EXTRACT
-            externalInteractionsLeafLevelOuter.resize(tree->getNbParticleGroup());
-            externalInteractionsLeafLevelInner.resize(tree->getNbParticleGroup());
+      externalInteractionsLeafLevelOuter.resize(tree->getNbParticleGroup());
+      externalInteractionsLeafLevelInner.resize(tree->getNbParticleGroup());
 #endif
 
-            for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
-                // Create the vector
-                ParticleGroupClass* containers = tree->getParticleGroup(idxGroup);
+      for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
+          // Create the vector
+          ParticleGroupClass* containers = tree->getParticleGroup(idxGroup);
 
-                std::vector<BlockInteractions<ParticleGroupClass>>* externalInteractions = &externalInteractionsLeafLevel[idxGroup];
+          std::vector<BlockInteractions<ParticleGroupClass>>* externalInteractions = &externalInteractionsLeafLevel[idxGroup];
 #ifdef SCALFMM_USE_STARPU_EXTRACT
-                std::vector<std::vector<int>>* externalInteractionsOuter = &externalInteractionsLeafLevelOuter[idxGroup];
-                std::vector<std::vector<int>>* externalInteractionsInner = &externalInteractionsLeafLevelInner[idxGroup];
+          std::vector<std::vector<int>>* externalInteractionsOuter = &externalInteractionsLeafLevelOuter[idxGroup];
+          std::vector<std::vector<int>>* externalInteractionsInner = &externalInteractionsLeafLevelInner[idxGroup];
 #endif
 
 #ifdef SCALFMM_USE_STARPU_EXTRACT
 #pragma omp task default(none) firstprivate(idxGroup, containers, externalInteractions, externalInteractionsOuter, externalInteractionsInner)
 #else
-                #pragma omp task default(none) firstprivate(idxGroup, containers, externalInteractions)
-#endif
-                { // Can be a task(inout:iterCells)
-                    std::vector<OutOfBlockInteraction> outsideInteractions;
-                    const MortonIndex blockStartIdx = containers->getStartingIndex();
-                    const MortonIndex blockEndIdx   = containers->getEndingIndex();
-
-                    for(int leafIdx = 0 ; leafIdx < containers->getNumberOfLeavesInBlock() ; ++leafIdx){
-                        const MortonIndex mindex = containers->getLeafMortonIndex(leafIdx);
-                        // ParticleContainerClass particles = containers->template getLeaf<ParticleContainerClass>(leafIdx);
-
-                        MortonIndex interactionsIndexes[26];
-                        int interactionsPosition[26];
-                        FTreeCoordinate coord(mindex);
-                        int counter = coord.getNeighborsIndexes(tree->getHeight(),interactionsIndexes,interactionsPosition);
-
-                        for(int idxInter = 0 ; idxInter < counter ; ++idxInter){
-                            if( blockStartIdx <= interactionsIndexes[idxInter] && interactionsIndexes[idxInter] < blockEndIdx ){
-                                // Inside block interaction, do nothing
-                            }
-                            else if(interactionsIndexes[idxInter] < mindex){
-                                OutOfBlockInteraction property;
-                                property.insideIndex = mindex;
-                                property.outIndex    = interactionsIndexes[idxInter];
-                                property.relativeOutPosition = interactionsPosition[idxInter];
-                                property.insideIdxInBlock = leafIdx;
-                                property.outsideIdxInBlock = -1;
-                                outsideInteractions.push_back(property);
-                            }
-                        }
-                    }
-
-                    // Sort to match external order
-                    FQuickSort<OutOfBlockInteraction, int>::QsSequential(outsideInteractions.data(),int(outsideInteractions.size()));
-
-                    int currentOutInteraction = 0;
-                    for(int idxLeftGroup = 0 ; idxLeftGroup < idxGroup && currentOutInteraction < int(outsideInteractions.size()) ; ++idxLeftGroup){
-                        ParticleGroupClass* leftContainers = tree->getParticleGroup(idxLeftGroup);
-                        const MortonIndex blockStartIdxOther    = leftContainers->getStartingIndex();
-                        const MortonIndex blockEndIdxOther      = leftContainers->getEndingIndex();
-
-                        while(currentOutInteraction < int(outsideInteractions.size())
-                              && (outsideInteractions[currentOutInteraction].outIndex < blockStartIdxOther
-                                  || leftContainers->getLeafIndex(outsideInteractions[currentOutInteraction].outIndex) == -1)
-                              && outsideInteractions[currentOutInteraction].outIndex < blockEndIdxOther){
-                            currentOutInteraction += 1;
-                        }
-
-                        int lastOutInteraction = currentOutInteraction;
-                        int copyExistingInteraction = currentOutInteraction;
-                        while(lastOutInteraction < int(outsideInteractions.size()) && outsideInteractions[lastOutInteraction].outIndex < blockEndIdxOther){
-                            const int leafPos = leftContainers->getLeafIndex(outsideInteractions[lastOutInteraction].outIndex);
-                            if(leafPos != -1){
-                                if(copyExistingInteraction != lastOutInteraction){
-                                    outsideInteractions[copyExistingInteraction] = outsideInteractions[lastOutInteraction];
-                                }
-                                outsideInteractions[copyExistingInteraction].outsideIdxInBlock = leafPos;
-                                copyExistingInteraction += 1;
-                            }
-                            lastOutInteraction += 1;
-                        }
-
-                        const int nbInteractionsBetweenBlocks = (copyExistingInteraction-currentOutInteraction);
-                        if(nbInteractionsBetweenBlocks){
-                            externalInteractions->emplace_back();
-                            BlockInteractions<ParticleGroupClass>* interactions = &externalInteractions->back();
-                            interactions->otherBlock = leftContainers;
-                            interactions->otherBlockId = idxLeftGroup;
-                            interactions->interactions.resize(nbInteractionsBetweenBlocks);
-                            std::copy(outsideInteractions.begin() + currentOutInteraction,
-                                      outsideInteractions.begin() + copyExistingInteraction,
-                                      interactions->interactions.begin());
+#pragma omp task default(none) firstprivate(idxGroup, containers, externalInteractions)
+#endif
+          { // Can be a task(inout:iterCells)
+            std::vector<OutOfBlockInteraction> outsideInteractions;
+            const MortonIndex blockStartIdx = containers->getStartingIndex();
+            const MortonIndex blockEndIdx   = containers->getEndingIndex();
+
+            for(int leafIdx = 0 ; leafIdx < containers->getNumberOfLeavesInBlock() ; ++leafIdx){
+                const MortonIndex mindex = containers->getLeafMortonIndex(leafIdx);
+                // ParticleContainerClass particles = containers->template getLeaf<ParticleContainerClass>(leafIdx);
+
+                MortonIndex interactionsIndexes[26];
+                int interactionsPosition[26];
+                FTreeCoordinate coord(mindex);
+                int counter = coord.getNeighborsIndexes(tree->getHeight(),interactionsIndexes,interactionsPosition);
+
+                for(int idxInter = 0 ; idxInter < counter ; ++idxInter){
+                    if( blockStartIdx <= interactionsIndexes[idxInter] && interactionsIndexes[idxInter] < blockEndIdx ){
+                        // Inside block interaction, do nothing
+                      }
+                    else if(interactionsIndexes[idxInter] < mindex){
+                        OutOfBlockInteraction property;
+                        property.insideIndex = mindex;
+                        property.outIndex    = interactionsIndexes[idxInter];
+                        property.relativeOutPosition = interactionsPosition[idxInter];
+                        property.insideIdxInBlock = leafIdx;
+                        property.outsideIdxInBlock = -1;
+                        outsideInteractions.push_back(property);
+                      }
+                  }
+              }
+
+            // Sort to match external order
+            FQuickSort<OutOfBlockInteraction, int>::QsSequential(outsideInteractions.data(),int(outsideInteractions.size()));
+
+            int currentOutInteraction = 0;
+            for(int idxLeftGroup = 0 ; idxLeftGroup < idxGroup && currentOutInteraction < int(outsideInteractions.size()) ; ++idxLeftGroup){
+                ParticleGroupClass* leftContainers = tree->getParticleGroup(idxLeftGroup);
+                const MortonIndex blockStartIdxOther    = leftContainers->getStartingIndex();
+                const MortonIndex blockEndIdxOther      = leftContainers->getEndingIndex();
+
+                while(currentOutInteraction < int(outsideInteractions.size())
+                      && (outsideInteractions[currentOutInteraction].outIndex < blockStartIdxOther
+                          || leftContainers->getLeafIndex(outsideInteractions[currentOutInteraction].outIndex) == -1)
+                      && outsideInteractions[currentOutInteraction].outIndex < blockEndIdxOther){
+                    currentOutInteraction += 1;
+                  }
+
+                int lastOutInteraction = currentOutInteraction;
+                int copyExistingInteraction = currentOutInteraction;
+                while(lastOutInteraction < int(outsideInteractions.size()) && outsideInteractions[lastOutInteraction].outIndex < blockEndIdxOther){
+                    const int leafPos = leftContainers->getLeafIndex(outsideInteractions[lastOutInteraction].outIndex);
+                    if(leafPos != -1){
+                        if(copyExistingInteraction != lastOutInteraction){
+                            outsideInteractions[copyExistingInteraction] = outsideInteractions[lastOutInteraction];
+                          }
+                        outsideInteractions[copyExistingInteraction].outsideIdxInBlock = leafPos;
+                        copyExistingInteraction += 1;
+                      }
+                    lastOutInteraction += 1;
+                  }
+
+                const int nbInteractionsBetweenBlocks = (copyExistingInteraction-currentOutInteraction);
+                if(nbInteractionsBetweenBlocks){
+                    externalInteractions->emplace_back();
+                    BlockInteractions<ParticleGroupClass>* interactions = &externalInteractions->back();
+                    interactions->otherBlock = leftContainers;
+                    interactions->otherBlockId = idxLeftGroup;
+                    interactions->interactions.resize(nbInteractionsBetweenBlocks);
+                    std::copy(outsideInteractions.begin() + currentOutInteraction,
+                              outsideInteractions.begin() + copyExistingInteraction,
+                              interactions->interactions.begin());
 
 #ifdef SCALFMM_USE_STARPU_EXTRACT
-                            externalInteractionsOuter->emplace_back();
-                            externalInteractionsInner->emplace_back();
+                    externalInteractionsOuter->emplace_back();
+                    externalInteractionsInner->emplace_back();
 
-                            std::vector<int>* interactionsOuter = &externalInteractionsOuter->back();
-                            std::vector<int>* interactionsInner = &externalInteractionsInner->back();
+                    std::vector<int>* interactionsOuter = &externalInteractionsOuter->back();
+                    std::vector<int>* interactionsInner = &externalInteractionsInner->back();
 
-                            for(int idxUnique = 0 ; idxUnique < interactions->interactions.size() ; ++idxUnique){
-                                interactionsOuter->push_back(interactions->interactions[idxUnique].outsideIdxInBlock);
-                                interactionsInner->push_back(interactions->interactions[idxUnique].insideIdxInBlock);
-                            }
-                            FQuickSort<int, int>::QsSequential(interactionsOuter->data(),int(interactionsOuter->size()));
-                            FQuickSort<int, int>::QsSequential(interactionsInner->data(),int(interactionsInner->size()));
+                    for(std::size_t idxUnique = 0 ; idxUnique < interactions->interactions.size() ; ++idxUnique){
+                        interactionsOuter->push_back(interactions->interactions[idxUnique].outsideIdxInBlock);
+                        interactionsInner->push_back(interactions->interactions[idxUnique].insideIdxInBlock);
+                      }
+                    FQuickSort<int, int>::QsSequential(interactionsOuter->data(),int(interactionsOuter->size()));
+                    FQuickSort<int, int>::QsSequential(interactionsInner->data(),int(interactionsInner->size()));
 
-                            interactionsOuter->erase(std::unique(interactionsOuter->begin(), interactionsOuter->end()), interactionsOuter->end());
-                            interactionsInner->erase(std::unique(interactionsInner->begin(), interactionsInner->end()), interactionsInner->end());
+                    interactionsOuter->erase(std::unique(interactionsOuter->begin(), interactionsOuter->end()), interactionsOuter->end());
+                    interactionsInner->erase(std::unique(interactionsInner->begin(), interactionsInner->end()), interactionsInner->end());
 #endif
-                        }
+                  }
 
-                        currentOutInteraction = lastOutInteraction;
-                    }
-                }
-            }
+                currentOutInteraction = lastOutInteraction;
+              }
+          }
         }
-        FLOG( leafTimer.tac(); );
-        FLOG( cellTimer.tic(); );
-        {
-            for(int idxLevel = tree->getHeight()-1 ; idxLevel >= 2 ; --idxLevel){
-                externalInteractionsAllLevel[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel));
+    }
+    FLOG( leafTimer.tac(); );
+    FLOG( cellTimer.tic(); );
+    {
+      for(int idxLevel = tree->getHeight()-1 ; idxLevel >= 2 ; --idxLevel){
+          externalInteractionsAllLevel[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel));
 #ifdef SCALFMM_USE_STARPU_EXTRACT
-                externalInteractionsAllLevelInnerIndexes[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel));
-                externalInteractionsAllLevelOuterIndexes[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel));
+          externalInteractionsAllLevelInnerIndexes[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel));
+          externalInteractionsAllLevelOuterIndexes[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel));
 #endif
-                for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
-                    CellContainerClass* currentCells = tree->getCellGroup(idxLevel, idxGroup);
+          for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
+              CellContainerClass* currentCells = tree->getCellGroup(idxLevel, idxGroup);
 
-                    std::vector<BlockInteractions<CellContainerClass>>* externalInteractions = &externalInteractionsAllLevel[idxLevel][idxGroup];
+              std::vector<BlockInteractions<CellContainerClass>>* externalInteractions = &externalInteractionsAllLevel[idxLevel][idxGroup];
 #ifdef SCALFMM_USE_STARPU_EXTRACT
-                    std::vector<std::vector<int>>* externalInteractionsInner = &externalInteractionsAllLevelInnerIndexes[idxLevel][idxGroup];
-                    std::vector<std::vector<int>>* externalInteractionsOuter = &externalInteractionsAllLevelOuterIndexes[idxLevel][idxGroup];
+              std::vector<std::vector<int>>* externalInteractionsInner = &externalInteractionsAllLevelInnerIndexes[idxLevel][idxGroup];
+              std::vector<std::vector<int>>* externalInteractionsOuter = &externalInteractionsAllLevelOuterIndexes[idxLevel][idxGroup];
 #endif
 
 #ifdef SCALFMM_USE_STARPU_EXTRACT
 #pragma omp task default(none) firstprivate(idxGroup, currentCells, idxLevel, externalInteractions, externalInteractionsInner, externalInteractionsOuter)
 #else
-                    #pragma omp task default(none) firstprivate(idxGroup, currentCells, idxLevel, externalInteractions)
-#endif
-                    {
-                        std::vector<OutOfBlockInteraction> outsideInteractions;
-                        const MortonIndex blockStartIdx = currentCells->getStartingIndex();
-                        const MortonIndex blockEndIdx   = currentCells->getEndingIndex();
-
-                        for(int cellIdx = 0 ; cellIdx < currentCells->getNumberOfCellsInBlock() ; ++cellIdx){
-                            const MortonIndex mindex = currentCells->getCellMortonIndex(cellIdx);
-
-                            MortonIndex interactionsIndexes[189];
-                            int interactionsPosition[189];
-                            const FTreeCoordinate coord(mindex);
-                            int counter = coord.getInteractionNeighbors(idxLevel,interactionsIndexes,interactionsPosition);
-
-                            for(int idxInter = 0 ; idxInter < counter ; ++idxInter){
-                                if( blockStartIdx <= interactionsIndexes[idxInter] && interactionsIndexes[idxInter] < blockEndIdx ){
-                                    // Nothing to do
-                                }
-                                else if(interactionsIndexes[idxInter] < mindex){
-                                    OutOfBlockInteraction property;
-                                    property.insideIndex = mindex;
-                                    property.outIndex    = interactionsIndexes[idxInter];
-                                    property.relativeOutPosition = interactionsPosition[idxInter];
-                                    property.insideIdxInBlock = cellIdx;
-                                    property.outsideIdxInBlock = -1;
-                                    outsideInteractions.push_back(property);
-                                }
-                            }
-                        }
-
-                        // Manage outofblock interaction
-                        FQuickSort<OutOfBlockInteraction, int>::QsSequential(outsideInteractions.data(),int(outsideInteractions.size()));
-
-                        int currentOutInteraction = 0;
-                        for(int idxLeftGroup = 0 ; idxLeftGroup < idxGroup && currentOutInteraction < int(outsideInteractions.size()) ; ++idxLeftGroup){
-                            CellContainerClass* leftCells   = tree->getCellGroup(idxLevel, idxLeftGroup);
-                            const MortonIndex blockStartIdxOther = leftCells->getStartingIndex();
-                            const MortonIndex blockEndIdxOther   = leftCells->getEndingIndex();
-
-                            while(currentOutInteraction < int(outsideInteractions.size())
-                                  && (outsideInteractions[currentOutInteraction].outIndex < blockStartIdxOther
-                                      || leftCells->getCellIndex(outsideInteractions[currentOutInteraction].outIndex) == -1)
-                                  && outsideInteractions[currentOutInteraction].outIndex < blockEndIdxOther){
-                                currentOutInteraction += 1;
-                            }
-
-                            int lastOutInteraction = currentOutInteraction;
-                            int copyExistingInteraction = currentOutInteraction;
-                            while(lastOutInteraction < int(outsideInteractions.size()) && outsideInteractions[lastOutInteraction].outIndex < blockEndIdxOther){
-                                const int cellPos = leftCells->getCellIndex(outsideInteractions[lastOutInteraction].outIndex);
-                                if(cellPos != -1){
-                                    if(copyExistingInteraction != lastOutInteraction){
-                                        outsideInteractions[copyExistingInteraction] = outsideInteractions[lastOutInteraction];
-                                    }
-                                    outsideInteractions[copyExistingInteraction].outsideIdxInBlock = cellPos;
-                                    copyExistingInteraction += 1;
-                                }
-                                lastOutInteraction += 1;
-                            }
-
-                            // Create interactions
-                            const int nbInteractionsBetweenBlocks = (copyExistingInteraction-currentOutInteraction);
-                            if(nbInteractionsBetweenBlocks){
-                                externalInteractions->emplace_back();
-                                BlockInteractions<CellContainerClass>* interactions = &externalInteractions->back();
-                                interactions->otherBlock = leftCells;
-                                interactions->otherBlockId = idxLeftGroup;
-                                interactions->interactions.resize(nbInteractionsBetweenBlocks);
-                                std::copy(outsideInteractions.begin() + currentOutInteraction,
-                                          outsideInteractions.begin() + copyExistingInteraction,
-                                          interactions->interactions.begin());
+#pragma omp task default(none) firstprivate(idxGroup, currentCells, idxLevel, externalInteractions)
+#endif
+              {
+                std::vector<OutOfBlockInteraction> outsideInteractions;
+                const MortonIndex blockStartIdx = currentCells->getStartingIndex();
+                const MortonIndex blockEndIdx   = currentCells->getEndingIndex();
+
+                for(int cellIdx = 0 ; cellIdx < currentCells->getNumberOfCellsInBlock() ; ++cellIdx){
+                    const MortonIndex mindex = currentCells->getCellMortonIndex(cellIdx);
+
+                    MortonIndex interactionsIndexes[189];
+                    int interactionsPosition[189];
+                    const FTreeCoordinate coord(mindex);
+                    int counter = coord.getInteractionNeighbors(idxLevel,interactionsIndexes,interactionsPosition);
+
+                    for(int idxInter = 0 ; idxInter < counter ; ++idxInter){
+                        if( blockStartIdx <= interactionsIndexes[idxInter] && interactionsIndexes[idxInter] < blockEndIdx ){
+                            // Nothing to do
+                          }
+                        else if(interactionsIndexes[idxInter] < mindex){
+                            OutOfBlockInteraction property;
+                            property.insideIndex = mindex;
+                            property.outIndex    = interactionsIndexes[idxInter];
+                            property.relativeOutPosition = interactionsPosition[idxInter];
+                            property.insideIdxInBlock = cellIdx;
+                            property.outsideIdxInBlock = -1;
+                            outsideInteractions.push_back(property);
+                          }
+                      }
+                  }
+
+                // Manage outofblock interaction
+                FQuickSort<OutOfBlockInteraction, int>::QsSequential(outsideInteractions.data(),int(outsideInteractions.size()));
+
+                int currentOutInteraction = 0;
+                for(int idxLeftGroup = 0 ; idxLeftGroup < idxGroup && currentOutInteraction < int(outsideInteractions.size()) ; ++idxLeftGroup){
+                    CellContainerClass* leftCells   = tree->getCellGroup(idxLevel, idxLeftGroup);
+                    const MortonIndex blockStartIdxOther = leftCells->getStartingIndex();
+                    const MortonIndex blockEndIdxOther   = leftCells->getEndingIndex();
+
+                    while(currentOutInteraction < int(outsideInteractions.size())
+                          && (outsideInteractions[currentOutInteraction].outIndex < blockStartIdxOther
+                              || leftCells->getCellIndex(outsideInteractions[currentOutInteraction].outIndex) == -1)
+                          && outsideInteractions[currentOutInteraction].outIndex < blockEndIdxOther){
+                        currentOutInteraction += 1;
+                      }
+
+                    int lastOutInteraction = currentOutInteraction;
+                    int copyExistingInteraction = currentOutInteraction;
+                    while(lastOutInteraction < int(outsideInteractions.size()) && outsideInteractions[lastOutInteraction].outIndex < blockEndIdxOther){
+                        const int cellPos = leftCells->getCellIndex(outsideInteractions[lastOutInteraction].outIndex);
+                        if(cellPos != -1){
+                            if(copyExistingInteraction != lastOutInteraction){
+                                outsideInteractions[copyExistingInteraction] = outsideInteractions[lastOutInteraction];
+                              }
+                            outsideInteractions[copyExistingInteraction].outsideIdxInBlock = cellPos;
+                            copyExistingInteraction += 1;
+                          }
+                        lastOutInteraction += 1;
+                      }
+
+                    // Create interactions
+                    const int nbInteractionsBetweenBlocks = (copyExistingInteraction-currentOutInteraction);
+                    if(nbInteractionsBetweenBlocks){
+                        externalInteractions->emplace_back();
+                        BlockInteractions<CellContainerClass>* interactions = &externalInteractions->back();
+                        interactions->otherBlock = leftCells;
+                        interactions->otherBlockId = idxLeftGroup;
+                        interactions->interactions.resize(nbInteractionsBetweenBlocks);
+                        std::copy(outsideInteractions.begin() + currentOutInteraction,
+                                  outsideInteractions.begin() + copyExistingInteraction,
+                                  interactions->interactions.begin());
 
 #ifdef SCALFMM_USE_STARPU_EXTRACT
-                                externalInteractionsInner->emplace_back();
-                                std::vector<int>* interactionsInnerIndexes = &externalInteractionsInner->back();
-                                externalInteractionsOuter->emplace_back();
-                                std::vector<int>* interactionsOuterIndexes = &externalInteractionsOuter->back();
-
-                                for(int idxUnique = 0 ; idxUnique < interactions->interactions.size() ; ++idxUnique){
-                                    interactionsOuterIndexes->push_back(interactions->interactions[idxUnique].outsideIdxInBlock);
-                                    interactionsInnerIndexes->push_back(interactions->interactions[idxUnique].insideIdxInBlock);
-                                }
-
-                                FQuickSort<int, int>::QsSequential(interactionsOuterIndexes->data(),int(interactionsOuterIndexes->size()));
-                                interactionsOuterIndexes->erase(std::unique(interactionsOuterIndexes->begin(), interactionsOuterIndexes->end()),
-                                                                interactionsOuterIndexes->end());
-                                FQuickSort<int, int>::QsSequential(interactionsInnerIndexes->data(),int(interactionsInnerIndexes->size()));
-                                interactionsInnerIndexes->erase(std::unique(interactionsInnerIndexes->begin(), interactionsInnerIndexes->end()),
-                                                                interactionsInnerIndexes->end());
-#endif
-                            }
-
-                            currentOutInteraction = lastOutInteraction;
-                        }
-                    }
-                }
+                        externalInteractionsInner->emplace_back();
+                        std::vector<int>* interactionsInnerIndexes = &externalInteractionsInner->back();
+                        externalInteractionsOuter->emplace_back();
+                        std::vector<int>* interactionsOuterIndexes = &externalInteractionsOuter->back();
+
+                        for(std::size_t  idxUnique = 0 ; idxUnique < interactions->interactions.size() ; ++idxUnique){
+                            interactionsOuterIndexes->push_back(interactions->interactions[idxUnique].outsideIdxInBlock);
+                            interactionsInnerIndexes->push_back(interactions->interactions[idxUnique].insideIdxInBlock);
+                          }
+
+                        FQuickSort<int, int>::QsSequential(interactionsOuterIndexes->data(),int(interactionsOuterIndexes->size()));
+                        interactionsOuterIndexes->erase(std::unique(interactionsOuterIndexes->begin(), interactionsOuterIndexes->end()),
+                                                        interactionsOuterIndexes->end());
+                        FQuickSort<int, int>::QsSequential(interactionsInnerIndexes->data(),int(interactionsInnerIndexes->size()));
+                        interactionsInnerIndexes->erase(std::unique(interactionsInnerIndexes->begin(), interactionsInnerIndexes->end()),
+                                                        interactionsInnerIndexes->end());
+#endif
+                      }
+
+                    currentOutInteraction = lastOutInteraction;
+                  }
+              }
             }
         }
-        FLOG( cellTimer.tac(); );
-
-#pragma omp taskwait
-
-        FLOG( FLog::Controller << "\t\t Prepare in " << timer.tacAndElapsed() << "s\n" );
-        FLOG( FLog::Controller << "\t\t\t Prepare at leaf level in   " << leafTimer.elapsed() << "s\n" );
-        FLOG( FLog::Controller << "\t\t\t Prepare at other levels in " << cellTimer.elapsed() << "s\n" );
     }
+    FLOG( cellTimer.tac(); );
 
-    /////////////////////////////////////////////////////////////////////////////////////
-    /// Bottom Pass
-    /////////////////////////////////////////////////////////////////////////////////////
-
-    void bottomPass(){
-        FLOG( FTic timer; );
-
-        FAssertLF(cellHandles[tree->getHeight()-1].size() == particleHandles.size());
-
-        for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
-            starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                   &p2m_cl,
-                                   STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
-                                   STARPU_VALUE, &cellHandles[tree->getHeight()-1][idxGroup].intervalSize, sizeof(int),
-        #ifdef SCALFMM_STARPU_USE_PRIO
-                    STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2M(),
-        #endif
-                    STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].symb,
-                    STARPU_RW, cellHandles[tree->getHeight()-1][idxGroup].up,
-                    STARPU_R, particleHandles[idxGroup].symb,
-        #ifdef STARPU_USE_TASK_NAME
-        #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
-                    STARPU_NAME, p2mTaskNames.get(),
-        #else
-                    //"P2M-nb_i_p"
-                    STARPU_NAME, taskNames->print("P2M", "%d, %lld, %lld, %lld, %lld, %d\n",
-                                                  tree->getCellGroup(tree->getHeight()-1,idxGroup)->getNumberOfCellsInBlock(),
-                                                  tree->getCellGroup(tree->getHeight()-1,idxGroup)->getSizeOfInterval(),
-                                                  tree->getCellGroup(tree->getHeight()-1,idxGroup)->getNumberOfCellsInBlock(),
-                                                  tree->getParticleGroup(idxGroup)->getStartingIndex(),
-                                                  tree->getParticleGroup(idxGroup)->getEndingIndex(),
-                                                  starpu_mpi_data_get_rank(cellHandles[tree->getHeight()-1][idxGroup].up)),
-        #endif
-        #endif
-                    0);
-        }
-
-        FLOG( FLog::Controller << "\t\t bottomPass in " << timer.tacAndElapsed() << "s\n" );
-    }
-
-    /////////////////////////////////////////////////////////////////////////////////////
-    /// Upward Pass
-    /////////////////////////////////////////////////////////////////////////////////////
-
-    void upwardPass(){
-        FLOG( FTic timer; );
-        for(int idxLevel = FMath::Min(tree->getHeight() - 2, FAbstractAlgorithm::lowerWorkingLevel - 1) ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){
-            int idxSubGroup = 0;
-
-            for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
-                CellContainerClass*const currentCells = tree->getCellGroup(idxLevel, idxGroup);
-
-                // Skip current group if needed
-                if( tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (currentCells->getStartingIndex()<<3) ){
-                    ++idxSubGroup;
-                    FAssertLF( idxSubGroup != tree->getNbCellGroupAtLevel(idxLevel+1) );
-                    FAssertLF( (tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex()>>3) == currentCells->getStartingIndex() );
-                }
+#pragma omp taskwait
 
-                // Copy at max 8 groups
-                {
-                    starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                           &m2m_cl,
-                                           STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
-                                           STARPU_VALUE, &idxLevel, sizeof(idxLevel),
-                                           STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
+    FLOG( FLog::Controller << "\t\t Prepare in " << timer.tacAndElapsed() << "s\n" );
+    FLOG( FLog::Controller << "\t\t\t Prepare at leaf level in   " << leafTimer.elapsed() << "s\n" );
+    FLOG( FLog::Controller << "\t\t\t Prepare at other levels in " << cellTimer.elapsed() << "s\n" );
+  }
+
+  /////////////////////////////////////////////////////////////////////////////////////
+  /// Bottom Pass
+  /////////////////////////////////////////////////////////////////////////////////////
+
+  void bottomPass(){
+    FLOG( FTic timer; );
+
+    FAssertLF(cellHandles[tree->getHeight()-1].size() == particleHandles.size());
+
+    for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
+        starpu_mpi_insert_task(MPI_COMM_WORLD,
+                               &p2m_cl,
+                               STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
+                               STARPU_VALUE, &cellHandles[tree->getHeight()-1][idxGroup].intervalSize, sizeof(int),
+    #ifdef SCALFMM_STARPU_USE_PRIO
+            STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2M(),
+    #endif
+            STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].symb,
+            STARPU_RW, cellHandles[tree->getHeight()-1][idxGroup].up,
+            STARPU_R, particleHandles[idxGroup].symb,
+    #ifdef STARPU_USE_TASK_NAME
+    #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
+            STARPU_NAME, p2mTaskNames.get(),
+    #else
+            //"P2M-nb_i_p"
+            STARPU_NAME, taskNames->print("P2M", "%d, %lld, %lld, %lld, %lld, %d\n",
+                                          tree->getCellGroup(tree->getHeight()-1,idxGroup)->getNumberOfCellsInBlock(),
+                                          tree->getCellGroup(tree->getHeight()-1,idxGroup)->getSizeOfInterval(),
+                                          tree->getCellGroup(tree->getHeight()-1,idxGroup)->getNumberOfCellsInBlock(),
+                                          tree->getParticleGroup(idxGroup)->getStartingIndex(),
+                                          tree->getParticleGroup(idxGroup)->getEndingIndex(),
+                                          starpu_mpi_data_get_rank(cellHandles[tree->getHeight()-1][idxGroup].up)),
+    #endif
+    #endif
+            0);
+      }
+
+    FLOG( FLog::Controller << "\t\t bottomPass in " << timer.tacAndElapsed() << "s\n" );
+  }
+
+  /////////////////////////////////////////////////////////////////////////////////////
+  /// Upward Pass
+  /////////////////////////////////////////////////////////////////////////////////////
+
+  void upwardPassDuplicate(){
+    FLOG( FTic timer; );
+    for(int idxLevel = FMath::Min(tree->getHeight() - 2, FAbstractAlgorithm::lowerWorkingLevel - 1) ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){
+        int idxSubGroup = 0;
+
+        for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
+            CellContainerClass*const currentCells = tree->getCellGroup(idxLevel, idxGroup);
+
+            // Skip current group if needed
+            if( tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (currentCells->getStartingIndex()<<3) ){
+                ++idxSubGroup;
+                FAssertLF( idxSubGroup != tree->getNbCellGroupAtLevel(idxLevel+1) );
+                FAssertLF( (tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex()>>3) == currentCells->getStartingIndex() );
+              }
+
+            // Copy at max 8 groups
+            {
+              starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                     &m2m_cl,
+                                     STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
+                                     STARPU_VALUE, &idxLevel, sizeof(idxLevel),
+                                     STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
                        #ifdef SCALFMM_STARPU_USE_PRIO
-                                           STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2M(idxLevel),
+                                     STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2M(idxLevel),
                        #endif
-                                           STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly
-                                           (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].up, //The remaining, read/write
-                                           STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly
-                            STARPU_R, cellHandles[idxLevel+1][idxSubGroup].up, //level d'avant readonly
-        #ifdef STARPU_USE_TASK_NAME
-        #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
-                            STARPU_NAME, m2mTaskNames[idxLevel].get(),
-        #else
-                            //"M2M-l_nb_i_nbc_ic_s"
-                            STARPU_NAME, taskNames->print("M2M", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n",
-                                                          idxLevel,
-                                                          tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
-                                                          tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
-                                                          tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(),
-                                                          tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(),
-                                                          FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)-
-                                                          FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3),
-                                                          tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
-                                                          tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
-                                                          tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(),
-                                                          tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(),
-                                                          starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].up)),
-        #endif
-        #endif
-                            0);
+                                     STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly
+                                     (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].up, //The remaining, read/write
+                                     STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly
+                  STARPU_R, cellHandles[idxLevel+1][idxSubGroup].up, //level d'avant readonly
+    #ifdef STARPU_USE_TASK_NAME
+    #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
+                  STARPU_NAME, m2mTaskNames[idxLevel].get(),
+    #else
+                  //"M2M-l_nb_i_nbc_ic_s"
+                  STARPU_NAME, taskNames->print("M2M", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n",
+                                                idxLevel,
+                                                tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
+                                                tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
+                                                tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(),
+                                                tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(),
+                                                FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)-
+                                                FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3),
+                                                tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
+                                                tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
+                                                tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(),
+                                                tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(),
+                                                starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].up)),
+    #endif
+    #endif
+                  0);
 
-                }
+            }
 
-                while(tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (((currentCells->getEndingIndex()-1)<<3)+7)
-                      && (idxSubGroup+1) != tree->getNbCellGroupAtLevel(idxLevel+1)
-                      && tree->getCellGroup(idxLevel+1, idxSubGroup+1)->getStartingIndex() <= ((currentCells->getEndingIndex()-1)<<3)+7 ){
-                    idxSubGroup += 1;
+            while(tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (((currentCells->getEndingIndex()-1)<<3)+7)
+                  && (idxSubGroup+1) != tree->getNbCellGroupAtLevel(idxLevel+1)
+                  && tree->getCellGroup(idxLevel+1, idxSubGroup+1)->getStartingIndex() <= ((currentCells->getEndingIndex()-1)<<3)+7 ){
+                idxSubGroup += 1;
 
-                    starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                           &m2m_cl,
-                                           STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
-                                           STARPU_VALUE, &idxLevel, sizeof(idxLevel),
-                                           STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
+                starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                       &m2m_cl,
+                                       STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
+                                       STARPU_VALUE, &idxLevel, sizeof(idxLevel),
+                                       STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
                        #ifdef SCALFMM_STARPU_USE_PRIO
-                                           STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2M(idxLevel),
+                                       STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2M(idxLevel),
                        #endif
-                                           STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly
-                                           (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].up, //The remaining, read/write
-                                           STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly
-                            STARPU_R, cellHandles[idxLevel+1][idxSubGroup].up, //level d'avant readonly
-        #ifdef STARPU_USE_TASK_NAME
-        #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
-                            STARPU_NAME, m2mTaskNames[idxLevel].get(),
-        #else
-                            //M2M-l_nb_i_nbc_ic_s
-                            STARPU_NAME, taskNames->print("M2M", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n",
-                                                          idxLevel,
-                                                          tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
-                                                          tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
-                                                          tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(),
-                                                          tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(),
-                                                          FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)-
-                                                          FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3),
-                                                          tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
-                                                          tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
-                                                          tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(),
-                                                          tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(),
-                                                          starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].up)),
-        #endif
-        #endif
-                            0);
-                }
-
+                                       STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly
+                                       (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].up, //The remaining, read/write
+                                       STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly
+                    STARPU_R, cellHandles[idxLevel+1][idxSubGroup].up, //level d'avant readonly
+    #ifdef STARPU_USE_TASK_NAME
+    #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
+                    STARPU_NAME, m2mTaskNames[idxLevel].get(),
+    #else
+                    //M2M-l_nb_i_nbc_ic_s
+                    STARPU_NAME, taskNames->print("M2M", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n",
+                                                  idxLevel,
+                                                  tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
+                                                  tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
+                                                  tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(),
+                                                  tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(),
+                                                  FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)-
+                                                  FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3),
+                                                  tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
+                                                  tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
+                                                  tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(),
+                                                  tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(),
+                                                  starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].up)),
+    #endif
+    #endif
+                    0);
+              }
+
+          }
+      }
+    FLOG( FLog::Controller << "\t\t upwardPass in " << timer.tacAndElapsed() << "s\n" );
+  }
+
+  void upwardPassNoDuplicate(){
+    FLOG( FTic timer; );
+    // iterate on every working level from lower to upper
+    for(int idxLevel = FMath::Min(tree->getHeight() - 2, FAbstractAlgorithm::lowerWorkingLevel - 1) ; idxLevel >=
+        FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){
+        // index of subgroup
+        // iterate on every group of cells at current the level
+        for(int idxGroup = 0 ;
+            idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ;
+            ++idxGroup)
+          {
+            int idxSubGroup = 0;
+            // get the current group of cell
+            CellContainerClass*const currentCells =
+                tree->getCellGroup(idxLevel, idxGroup);
+
+            // Skip current group if needed
+            // Check if the subGroup is the child of the current group
+            while(
+                  (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex() <= (currentCells->getStartingIndex() << 3)) )
+              //&& (!tree->getCellGroup(idxLevel+1,idxSubGroup)->isMine() && !currentCells->isMine()))
+              {
+                ++idxSubGroup;
+                // if we have no more subGroup
+                if(idxSubGroup == tree->getNbCellGroupAtLevel(idxLevel+1))
+                  break;
+              }
+            // if we have no more subGroup
+            if(idxSubGroup == tree->getNbCellGroupAtLevel(idxLevel+1))
+              break;
+            // if the current block have a morton index too small
+            //if(tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex() > currentCells->getEndingIndex() << 3 ){
+            //    break;
+            //}
+            FAssertLF( idxSubGroup != tree->getNbCellGroupAtLevel(idxLevel+1) );
+            // Copy at max 8 groups
+            {
+              //    if(!currentCells->isMine() && tree->getCellGroup(idxLevel+1,idxSubGroup)->isMine() || !tree->getCellGroup(idxLevel+1,idxSubGroup)->isMine() && currentCells->isMine()  )
+              //std::cout<<currentCells->getIdxGlobal()<<" "<<currentCells->isMine() << " ----|> " << tree->getCellGroup(idxLevel+1,idxSubGroup)->getIdxGlobal()  << " "<< tree->getCellGroup(idxLevel+1,idxSubGroup)->isMine() <<std::endl;
+
+              starpu_mpi_insert_task(
+                    MPI_COMM_WORLD,
+                    &m2m_cl,
+                    STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
+                    STARPU_VALUE, &idxLevel, sizeof(idxLevel),
+                    STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
+      #ifdef SCALFMM_STARPU_USE_PRIO
+                    STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2M(idxLevel),
+      #endif
+                    STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly
+                    (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].up, //The remaining, read/write
+                    STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly
+                  STARPU_R, cellHandles[idxLevel+1][idxSubGroup].up, //level d'avant readonly
+    #ifdef STARPU_USE_TASK_NAME
+                  STARPU_NAME, m2mTaskNames[idxLevel].get(),
+    #endif
+                  0
+                  );
             }
-        }
-        FLOG( FLog::Controller << "\t\t upwardPass in " << timer.tacAndElapsed() << "s\n" );
-    }
 
-    /////////////////////////////////////////////////////////////////////////////////////
-    /// Transfer Pass
-    /////////////////////////////////////////////////////////////////////////////////////
-
-    void transferPass(const int fromLevel, const int toLevel, const bool inner, const bool outer){
-        FLOG( FTic timer; );
-        FLOG( FTic timerInBlock; FTic timerOutBlock; );
-        for(int idxLevel = fromLevel ; idxLevel < toLevel ; ++idxLevel){
-            if(inner){
-                FLOG( timerInBlock.tic() );
-                for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
-                    starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                           &m2l_cl_in,
-                                           STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
-                                           STARPU_VALUE, &idxLevel, sizeof(idxLevel),
-                                           STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
+            while(
+                  // indice de fin du subGroup
+                  tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()
+                  // le group courant est bien dans notre group
+                  <= (((currentCells->getEndingIndex()-1)<<3)+7)
+                  // on sort pas des subgroup
+                  && (idxSubGroup+1) !=
+                  tree->getNbCellGroupAtLevel(idxLevel+1)
+                  &&
+                  // Le prochain subgroup a un idxStart est dans ce group
+                  tree->getCellGroup(idxLevel+1,idxSubGroup+1)->getStartingIndex()
+                  <= ((currentCells->getEndingIndex()-1)<<3)+7 )
+              {
+                idxSubGroup += 1;
+                //      if(!currentCells->isMine() && tree->getCellGroup(idxLevel+1,idxSubGroup)->isMine() || !tree->getCellGroup(idxLevel+1,idxSubGroup)->isMine() && currentCells->isMine()  )
+                //  std::cout << currentCells->getIdxGlobal() << " " << currentCells->isMine() << " ----> " << tree->getCellGroup(idxLevel+1,idxSubGroup)->getIdxGlobal()  << " "<< tree->getCellGroup(idxLevel+1,idxSubGroup)->isMine() <<std::endl;
+
+                //    if(tree->getCellGroup(idxLevel+1,idxSubGroup)->isMine() || currentCells->isMine()){
+                starpu_mpi_insert_task(
+                      MPI_COMM_WORLD,
+                      &m2m_cl,
+                      STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
+                      STARPU_VALUE, &idxLevel, sizeof(idxLevel),
+                      STARPU_VALUE,     &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
+      #ifdef SCALFMM_STARPU_USE_PRIO
+                      STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2M(idxLevel),
+      #endif
+                      STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly
+                      (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].up, //The remaining, read/write
+                      STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly
+                    STARPU_R, cellHandles[idxLevel+1][idxSubGroup].up, //level d'avant readonly
+    #ifdef STARPU_USE_TASK_NAME
+                    STARPU_NAME, m2mTaskNames[idxLevel].get(),
+    #endif
+                    0
+                    );
+                //}
+              }
+          }
+      }
+    FLOG( FLog::Controller << "\t\t upwardPass in " << timer.tacAndElapsed() << "s\n" );
+  }
+  /////////////////////////////////////////////////////////////////////////////////////
+  /// Transfer Pass
+  /////////////////////////////////////////////////////////////////////////////////////
+
+  void transferPass(const int fromLevel, const int toLevel, const bool inner, const bool outer){
+    FLOG( FTic timer; );
+    FLOG( FTic timerInBlock; FTic timerOutBlock; );
+    for(int idxLevel = fromLevel ; idxLevel < toLevel ; ++idxLevel){
+        if(inner){// compute the interactions inside a group
+            // we compute the interaction list for each element on the fly
+            //  and we find the Morton indexes that are included in the group
+
+            FLOG( timerInBlock.tic() );
+            for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
+                starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                       &m2l_cl_in,
+                                       STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
+                                       STARPU_VALUE, &idxLevel, sizeof(idxLevel),
+                                       STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
                        #ifdef SCALFMM_STARPU_USE_PRIO
-                                           STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2L(idxLevel),
+                                       STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2L(idxLevel),
                        #endif
-                                           STARPU_R, cellHandles[idxLevel][idxGroup].symb,
-                                           STARPU_R, cellHandles[idxLevel][idxGroup].up,
-                                           (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].down,
+                                       STARPU_R, cellHandles[idxLevel][idxGroup].symb,
+                                       STARPU_R, cellHandles[idxLevel][idxGroup].up,
+                                       (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].down,
                        #ifdef STARPU_USE_TASK_NAME
                        #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
-                                           STARPU_NAME, m2lTaskNames[idxLevel].get(),
+                                       STARPU_NAME, m2lTaskNames[idxLevel].get(),
                        #else
-                                           //"M2L-l_nb_i"
-                                           STARPU_NAME, taskNames->print("M2L", "%d, %d, %lld, %lld, %lld, %lld, %lld, %d\n",
-                                                                         idxLevel,
-                                                                         tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
-                                                                         tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
-                                                                         tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
-                                                                         tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
-                                                                         tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
-                                                                         tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
-                                                                         starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].down)),
+                                       //"M2L-l_nb_i"
+                                       STARPU_NAME, taskNames->print("M2L", "%d, %d, %lld, %lld, %lld, %lld, %lld, %d\n",
+                                                                     idxLevel,
+                                                                     tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
+                                                                     tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
+                                                                     tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
+                                                                     tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
+                                                                     tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
+                                                                     tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
+                                                                     starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].down)),
                        #endif
                        #endif
-                                           0);
-                }
-                FLOG( timerInBlock.tac() );
-            }
-            if(outer){
-                FLOG( timerOutBlock.tic() );
-                for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
-                    for(int idxInteraction = 0; idxInteraction < int(externalInteractionsAllLevel[idxLevel][idxGroup].size()) ; ++idxInteraction){
-                        const int interactionid = externalInteractionsAllLevel[idxLevel][idxGroup][idxInteraction].otherBlockId;
-                        const std::vector<OutOfBlockInteraction>* outsideInteractions = &externalInteractionsAllLevel[idxLevel][idxGroup][idxInteraction].interactions;
-#ifdef SCALFMM_USE_STARPU_EXTRACT
-                        // On the same node -- do as usual
-                        if(starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].symb) == starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].symb)){
-#endif
-                            int mode = 1;
-                            starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                                   &m2l_cl_inout,
-                                                   STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
-                                                   STARPU_VALUE, &idxLevel, sizeof(idxLevel),
-                                                   STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
-                                                   STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
-                                                   STARPU_VALUE, &mode, sizeof(int),
-                           #ifdef SCALFMM_STARPU_USE_PRIO
-                                                   STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel),
-                           #endif
-                                                   STARPU_R, cellHandles[idxLevel][idxGroup].symb,
-                                                   (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].down,
-                                                   STARPU_R, cellHandles[idxLevel][interactionid].symb,
-                                                   STARPU_R, cellHandles[idxLevel][interactionid].up,
-                           #ifdef STARPU_USE_TASK_NAME
-                           #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
-                                                   STARPU_NAME, m2lOuterTaskNames[idxLevel].get(),
-                           #else
-                                                   //"M2L_out-l_nb_i_nb_i_s
-                                                   STARPU_NAME, taskNames->print("M2L_out", "%d, %d, %lld, %d, %lld, %d, %lld, %lld, %lld, %lld, %d\n",
-                                                                                 idxLevel,
-                                                                                 tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
-                                                                                 tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
-                                                                                 tree->getCellGroup(idxLevel,interactionid)->getNumberOfCellsInBlock(),
-                                                                                 tree->getCellGroup(idxLevel,interactionid)->getSizeOfInterval(),
-                                                                                 outsideInteractions->size(),
-                                                                                 tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
-                                                                                 tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
-                                                                                 tree->getCellGroup(idxLevel, interactionid)->getStartingIndex(),
-                                                                                 tree->getCellGroup(idxLevel, interactionid)->getEndingIndex(),
-                                                                                 starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].down)),
-                           #endif
-                           #endif
-                                                   0);
-
-                            mode = 2;
-                            starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                                   &m2l_cl_inout,
-                                                   STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
-                                                   STARPU_VALUE, &idxLevel, sizeof(idxLevel),
-                                                   STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
-                                                   STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
-                                                   STARPU_VALUE, &mode, sizeof(int),
-                           #ifdef SCALFMM_STARPU_USE_PRIO
-                                                   STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel),
-                           #endif
-                                                   STARPU_R, cellHandles[idxLevel][interactionid].symb,
-                                                   (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][interactionid].down,
-                                                   STARPU_R, cellHandles[idxLevel][idxGroup].symb,
-                                                   STARPU_R, cellHandles[idxLevel][idxGroup].up,
-                           #ifdef STARPU_USE_TASK_NAME
-                           #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
-                                                   STARPU_NAME, m2lOuterTaskNames[idxLevel].get(),
-                           #else
-                                                   //"M2L_out-l_nb_i_nb_i_s"
-                                                   STARPU_NAME, taskNames->print("M2L_out", "%d, %d, %lld, %d, %lld, %d, %lld, %lld, %lld, %lld, %d\n",
-                                                                                 idxLevel,
-                                                                                 tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
-                                                                                 tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
-                                                                                 tree->getCellGroup(idxLevel,interactionid)->getNumberOfCellsInBlock(),
-                                                                                 tree->getCellGroup(idxLevel,interactionid)->getSizeOfInterval(),
-                                                                                 outsideInteractions->size(),
-                                                                                 tree->getCellGroup(idxLevel, interactionid)->getStartingIndex(),
-                                                                                 tree->getCellGroup(idxLevel, interactionid)->getEndingIndex(),
-                                                                                 tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
-                                                                                 tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
-                                                                                 starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].down)),
-                           #endif
-                           #endif
-                                                   0);
-
+                                       0);
+              }
+            FLOG( timerInBlock.tac() );
+          }
+        if(outer){// compute the interactions between groups
+            // we need to store the interactions that each group has with the others,
+            // we call the list of interactions between two groups the interactions table.
+            //  This table tells where the elements are located in the groups,
+            // it gives their Morton indexes and the relative position of each interaction which
+            // is a value between 0 and 342 for the M2L ([−3; +3] × D) or 0 and 26 for the P2P ([−1; +1] × D).
+            FLOG( timerOutBlock.tic() );
+            for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
+                for(int idxInteraction = 0; idxInteraction < int(externalInteractionsAllLevel[idxLevel][idxGroup].size()) ; ++idxInteraction){
+                    const int interactionid = externalInteractionsAllLevel[idxLevel][idxGroup][idxInteraction].otherBlockId;
+                    const std::vector<OutOfBlockInteraction>* outsideInteractions = &externalInteractionsAllLevel[idxLevel][idxGroup][idxInteraction].interactions;
 #ifdef SCALFMM_USE_STARPU_EXTRACT
-                        }
-                        else{
-                                {
-
-                                    // Extract data from second group for the first one
-                                    // That is copy B to B'
-                                    extractedCellBuffer.emplace_back();
-                                    CellExtractedHandles& interactionBuffer = extractedCellBuffer.back();
-                                    interactionBuffer.cellsToExtract = externalInteractionsAllLevelOuterIndexes[idxLevel][idxGroup][idxInteraction];
-                                    interactionBuffer.size = tree->getCellGroup(idxLevel,interactionid)->extractGetSizeSymbUp(interactionBuffer.cellsToExtract);
-                                    // I allocate only if I will use it to extract
-                                    if(starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].symb) == mpi_rank){
-                                        interactionBuffer.data.reset(new unsigned char[interactionBuffer.size]);
-                                        FAssertLF(interactionBuffer.data);
-                                    }
-                                    else{
-                                        interactionBuffer.data.reset(nullptr);
-                                    }
-                                    int registeringNode = starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].symb);
-                                    int where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
-                                    starpu_variable_data_register(&interactionBuffer.all, where,
-                                                                  (uintptr_t)interactionBuffer.data.get(), interactionBuffer.size);
-                                    starpu_mpi_data_register(interactionBuffer.all, tag++, registeringNode);
-
-                                    CellExtractedHandles* interactionBufferPtr = &interactionBuffer;
-                                    starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                                           &cell_extract_up,
-                                                           STARPU_VALUE, &interactionBufferPtr, sizeof(CellExtractedHandles*),
-                                   #ifdef SCALFMM_STARPU_USE_PRIO
-                                                           STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel),
-                                   #endif
-                                                           STARPU_R, cellHandles[idxLevel][interactionid].symb,
-                                                           STARPU_R, cellHandles[idxLevel][interactionid].up,
-                                                           STARPU_RW, interactionBuffer.all, 0);
-
-                                    // Move to a new memory block that is on the same node as A
-                                    // B' to B'''
-                                    duplicatedCellBuffer.emplace_back();
-                                    DuplicatedCellHandle& duplicateB = duplicatedCellBuffer.back();
-                                    duplicateB.sizeSymb = tree->getCellGroup(idxLevel,interactionid)->getBufferSizeInByte();
-                                    duplicateB.sizeOther = tree->getCellGroup(idxLevel,interactionid)->getMultipoleBufferSizeInByte();
-                                    if(starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].symb) == mpi_rank){
-                                        // Reuse block but just to perform the send
-                                        duplicateB.dataSymbPtr.reset(new unsigned char[duplicateB.sizeSymb]);// = const_cast<unsigned char*>(tree->getCellGroup(idxLevel,interactionid)->getRawBuffer());
-                                        duplicateB.dataOtherPtr.reset(new unsigned char[duplicateB.sizeOther]);// = reinterpret_cast<unsigned char*>(tree->getCellGroup(idxLevel,interactionid)->getRawMultipoleBuffer());
-                                    }
-                                    duplicateB.dataSymb = nullptr;
-                                    duplicateB.dataOther = nullptr;
-
-                                    registeringNode = starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].symb);
-                                    where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
-                                    starpu_variable_data_register(&duplicateB.symb, where,
-                                                                  (uintptr_t)duplicateB.dataSymbPtr.get(), duplicateB.sizeSymb);
-                                    starpu_mpi_data_register(duplicateB.symb, tag++, registeringNode);
-                                    starpu_variable_data_register(&duplicateB.other, where,
-                                                                  (uintptr_t)duplicateB.dataOtherPtr.get(), duplicateB.sizeOther);
-                                    starpu_mpi_data_register(duplicateB.other, tag++, registeringNode);
-
-                                    const unsigned char* ptr1 = const_cast<unsigned char*>(tree->getCellGroup(idxLevel,interactionid)->getRawBuffer());
-                                    size_t size1 = duplicateB.sizeSymb;
-                                    const unsigned char* ptr2 = reinterpret_cast<unsigned char*>(tree->getCellGroup(idxLevel,interactionid)->getRawMultipoleBuffer());
-                                    size_t size2 = duplicateB.sizeOther;
-
-                                    starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                                           &cell_insert_up_bis,
-                                                           STARPU_VALUE, &interactionBufferPtr, sizeof(CellExtractedHandles*),
-                                                           STARPU_VALUE, &ptr1, sizeof(ptr1),
-                                                           STARPU_VALUE, &size1, sizeof(size1),
-                                                           STARPU_VALUE, &ptr2, sizeof(ptr2),
-                                                           STARPU_VALUE, &size2, sizeof(size2),
-                                   #ifdef SCALFMM_STARPU_USE_PRIO
-                                                           STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel),
-                                   #endif
-                                                           STARPU_R, interactionBuffer.all,
-                                                           STARPU_RW, duplicateB.symb,
-                                                           STARPU_RW, duplicateB.other, 0);
-
-
-                                int mode = 1;
-                                starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                                       &m2l_cl_inout,
-                                                       STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
-                                                       STARPU_VALUE, &idxLevel, sizeof(idxLevel),
-                                                       STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
-                                                       STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
-                                                       STARPU_VALUE, &mode, sizeof(int),
-                               #ifdef SCALFMM_STARPU_USE_PRIO
-                                                       STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel),
-                               #endif
-                                                       STARPU_R, cellHandles[idxLevel][idxGroup].symb,
-                                                       (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].down,
-                                                       STARPU_R, duplicateB.symb,
-                                                       STARPU_R, duplicateB.other,
-                               #ifdef STARPU_USE_TASK_NAME
-                               #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
-                                                       STARPU_NAME, m2lOuterTaskNames[idxLevel].get(),
-                               #else
-                                                       //"M2L_out-l_nb_i_nb_i_s
-                                                       STARPU_NAME, taskNames->print("M2L_out", "%d, %d, %lld, %d, %lld, %d, %lld, %lld, %lld, %lld, %d\n",
-                                                                                     idxLevel,
-                                                                                     tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
-                                                                                     tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
-                                                                                     tree->getCellGroup(idxLevel,interactionid)->getNumberOfCellsInBlock(),
-                                                                                     tree->getCellGroup(idxLevel,interactionid)->getSizeOfInterval(),
-                                                                                     outsideInteractions->size(),
-                                                                                     tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
-                                                                                     tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
-                                                                                     tree->getCellGroup(idxLevel, interactionid)->getStartingIndex(),
-                                                                                     tree->getCellGroup(idxLevel, interactionid)->getEndingIndex(),
-                                                                                     starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].down)),
-                               #endif
-                               #endif
-                                                       0);
-                            }
-                            {
-                                // Extract data from second group for the first one
-                                // That is copy A to A'
-                                extractedCellBuffer.emplace_back();
-                                CellExtractedHandles& interactionBuffer = extractedCellBuffer.back();
-                                interactionBuffer.cellsToExtract = externalInteractionsAllLevelInnerIndexes[idxLevel][idxGroup][idxInteraction];
-                                interactionBuffer.size = tree->getCellGroup(idxLevel,idxGroup)->extractGetSizeSymbUp(interactionBuffer.cellsToExtract);
-                                // I allocate only if I will use it to extract
-                                if(starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].symb) == mpi_rank){
-                                    interactionBuffer.data.reset(new unsigned char[interactionBuffer.size]);
-                                }
-                                else{
-                                    interactionBuffer.data.reset(nullptr);
-                                }
-                                int registeringNode = starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].symb);
-                                int where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
-                                starpu_variable_data_register(&interactionBuffer.all, where,
-                                                              (uintptr_t)interactionBuffer.data.get(), interactionBuffer.size);
-                                starpu_mpi_data_register(interactionBuffer.all, tag++, registeringNode);
-
-                                CellExtractedHandles* interactionBufferPtr = &interactionBuffer;
-                                starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                                       &cell_extract_up,
-                                                       STARPU_VALUE, &interactionBufferPtr, sizeof(CellExtractedHandles*),
-                               #ifdef SCALFMM_STARPU_USE_PRIO
-                                                       STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel),
-                               #endif
-                                                       STARPU_R, cellHandles[idxLevel][idxGroup].symb,
-                                                       STARPU_R, cellHandles[idxLevel][idxGroup].up,
-                                                       STARPU_RW, interactionBuffer.all, 0);
-
-                                // Move to a new memory block that is on the same node as A
-                                // B' to B'''
-                                duplicatedCellBuffer.emplace_back();
-                                DuplicatedCellHandle& duplicateB = duplicatedCellBuffer.back();
-                                duplicateB.sizeSymb = tree->getCellGroup(idxLevel,idxGroup)->getBufferSizeInByte();
-                                duplicateB.sizeOther = tree->getCellGroup(idxLevel,idxGroup)->getMultipoleBufferSizeInByte();
-                                if(starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].symb) == mpi_rank){
-                                    // Reuse block but just to perform the send
-                                    duplicateB.dataSymbPtr.reset(new unsigned char[duplicateB.sizeSymb]);//const_cast<unsigned char*>(tree->getCellGroup(idxLevel,idxGroup)->getRawBuffer());
-                                    //memcpy(duplicateB.dataSymbPtr.get(), tree->getCellGroup(idxLevel,idxGroup)->getRawBuffer(), duplicateB.sizeSymb);
-                                    duplicateB.dataOtherPtr.reset(new unsigned char[duplicateB.sizeOther]);//reinterpret_cast<unsigned char*>(tree->getCellGroup(idxLevel,idxGroup)->getRawMultipoleBuffer());
-                                    //memcpy(duplicateB.dataOtherPtr.get(), tree->getCellGroup(idxLevel,idxGroup)->getRawMultipoleBuffer(), duplicateB.sizeOther);
-                                }
-                                duplicateB.dataSymb = nullptr;
-                                duplicateB.dataOther = nullptr;
-
-                                registeringNode = starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].symb);
-                                where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
-                                starpu_variable_data_register(&duplicateB.symb, where,
-                                                              (uintptr_t)duplicateB.dataSymbPtr.get(), duplicateB.sizeSymb);
-                                starpu_mpi_data_register(duplicateB.symb, tag++, registeringNode);
-                                starpu_variable_data_register(&duplicateB.other, where,
-                                                              (uintptr_t)duplicateB.dataOtherPtr.get(), duplicateB.sizeOther);
-                                starpu_mpi_data_register(duplicateB.other, tag++, registeringNode);
-
-                                const unsigned char* ptr1 = const_cast<unsigned char*>(tree->getCellGroup(idxLevel,idxGroup)->getRawBuffer());
-                                size_t size1 = duplicateB.sizeSymb;
-                                const unsigned char* ptr2 = reinterpret_cast<unsigned char*>(tree->getCellGroup(idxLevel,idxGroup)->getRawMultipoleBuffer());
-                                size_t size2 = duplicateB.sizeOther;
-                                starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                                       &cell_insert_up_bis,
-                                                       STARPU_VALUE, &interactionBufferPtr, sizeof(CellExtractedHandles*),
-                                                       STARPU_VALUE, &ptr1, sizeof(ptr1),
-                                                       STARPU_VALUE, &size1, sizeof(size1),
-                                                       STARPU_VALUE, &ptr2, sizeof(ptr2),
-                                                       STARPU_VALUE, &size2, sizeof(size2),
-                               #ifdef SCALFMM_STARPU_USE_PRIO
-                                                       STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel),
-                               #endif
-                                                       STARPU_R, interactionBuffer.all,
-                                                       STARPU_RW, duplicateB.symb,
-                                                       STARPU_RW, duplicateB.other, 0);
-
-                                int mode = 2;
-                                starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                                       &m2l_cl_inout,
-                                                       STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
-                                                       STARPU_VALUE, &idxLevel, sizeof(idxLevel),
-                                                       STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
-                                                       STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
-                                                       STARPU_VALUE, &mode, sizeof(int),
-                               #ifdef SCALFMM_STARPU_USE_PRIO
-                                                       STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel),
-                               #endif
-                                                       STARPU_R, cellHandles[idxLevel][interactionid].symb,
-                                                       (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][interactionid].down,
-                                                       STARPU_R, duplicateB.symb,
-                                                       STARPU_R, duplicateB.other,
-                               #ifdef STARPU_USE_TASK_NAME
-                               #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
-                                                       STARPU_NAME, m2lOuterTaskNames[idxLevel].get(),
-                               #else
-                                                       //"M2L_out-l_nb_i_nb_i_s"
-                                                       STARPU_NAME, taskNames->print("M2L_out", "%d, %d, %lld, %d, %lld, %d, %lld, %lld, %lld, %lld, %d\n",
-                                                                                     idxLevel,
-                                                                                     tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
-                                                                                     tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
-                                                                                     tree->getCellGroup(idxLevel,interactionid)->getNumberOfCellsInBlock(),
-                                                                                     tree->getCellGroup(idxLevel,interactionid)->getSizeOfInterval(),
-                                                                                     outsideInteractions->size(),
-                                                                                     tree->getCellGroup(idxLevel, interactionid)->getStartingIndex(),
-                                                                                     tree->getCellGroup(idxLevel, interactionid)->getEndingIndex(),
-                                                                                     tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
-                                                                                     tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
-                                                                                     starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].down)),
-                               #endif
-                               #endif
-                                                       0);
-                            }
-                        }
+                    // On the same node -- do as usual
+                    if(starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].symb) == starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].symb)){
 #endif
-                    }
-                }
-                FLOG( timerOutBlock.tac() );
-            }
-        }
-        FLOG( FLog::Controller << "\t\t transferPass in " << timer.tacAndElapsed() << "s\n" );
-        FLOG( FLog::Controller << "\t\t\t inblock in  " << timerInBlock.elapsed() << "s\n" );
-        FLOG( FLog::Controller << "\t\t\t outblock in " << timerOutBlock.elapsed() << "s\n" );
-    }
-
-    /////////////////////////////////////////////////////////////////////////////////////
-    /// Downard Pass
-    /////////////////////////////////////////////////////////////////////////////////////
-
-    void downardPass(){
-        FLOG( FTic timer; );
-        for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel - 1 ; ++idxLevel){
-            int idxSubGroup = 0;
-
-            for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
-                CellContainerClass*const currentCells = tree->getCellGroup(idxLevel, idxGroup);
-
-                // Skip current group if needed
-                if( tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (currentCells->getStartingIndex()<<3) ){
-                    ++idxSubGroup;
-                    FAssertLF( idxSubGroup != tree->getNbCellGroupAtLevel(idxLevel+1) );
-                    FAssertLF( (tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex()>>3) == currentCells->getStartingIndex() );
-                }
-                // Copy at max 8 groups
-                {
-                    // put the right codelet
-                    if((noCommuteAtLastLevel && (idxLevel == FAbstractAlgorithm::lowerWorkingLevel - 2)) || noCommuteBetweenLevel){
+                        int mode = 1;
                         starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                               &l2l_cl_nocommute,
+                                               &m2l_cl_inout,
                                                STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
                                                STARPU_VALUE, &idxLevel, sizeof(idxLevel),
+                                               STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
                                                STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
+                                               STARPU_VALUE, &mode, sizeof(int),
                        #ifdef SCALFMM_STARPU_USE_PRIO
-                                               STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel),
+                                               STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel),
                        #endif
-                                               STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly
-                                               STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write
-                                               STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly
-                                STARPU_RW, cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly
-        #ifdef STARPU_USE_TASK_NAME
-        #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
-                                STARPU_NAME, l2lTaskNames[idxLevel].get(),
-        #else
-                                //"L2L-l_nb_i_nbc_ic_s"
-                                STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n",
-                                                              idxLevel,
-                                                              tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
-                                                              tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
-                                                              tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(),
-                                                              tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(),
-                                                              FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)-
-                                                              FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3),
-                                                              tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
-                                                              tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
-                                                              tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(),
-                                                              tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(),
-                                                              starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)),
-        #endif
-        #endif
-                                0);
-                    }
-                    else{
+                                               STARPU_R, cellHandles[idxLevel][idxGroup].symb,
+                                               (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].down,
+                                               STARPU_R, cellHandles[idxLevel][interactionid].symb,
+                                               STARPU_R, cellHandles[idxLevel][interactionid].up,
+                       #ifdef STARPU_USE_TASK_NAME
+                       #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
+                                               STARPU_NAME, m2lOuterTaskNames[idxLevel].get(),
+                       #else
+                                               //"M2L_out-l_nb_i_nb_i_s
+                                               STARPU_NAME, taskNames->print("M2L_out", "%d, %d, %lld, %d, %lld, %d, %lld, %lld, %lld, %lld, %d\n",
+                                                                             idxLevel,
+                                                                             tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
+                                                                             tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
+                                                                             tree->getCellGroup(idxLevel,interactionid)->getNumberOfCellsInBlock(),
+                                                                             tree->getCellGroup(idxLevel,interactionid)->getSizeOfInterval(),
+                                                                             outsideInteractions->size(),
+                                                                             tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
+                                                                             tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
+                                                                             tree->getCellGroup(idxLevel, interactionid)->getStartingIndex(),
+                                                                             tree->getCellGroup(idxLevel, interactionid)->getEndingIndex(),
+                                                                             starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].down)),
+                       #endif
+                       #endif
+                                               0);
+
+                        mode = 2;
                         starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                               &l2l_cl,
+                                               &m2l_cl_inout,
                                                STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
                                                STARPU_VALUE, &idxLevel, sizeof(idxLevel),
+                                               STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
                                                STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
+                                               STARPU_VALUE, &mode, sizeof(int),
                        #ifdef SCALFMM_STARPU_USE_PRIO
-                                               STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel),
+                                               STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel),
                        #endif
-                                               STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly
-                                               STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write
-                                               STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly
-                                (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly
-        #ifdef STARPU_USE_TASK_NAME
-        #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
-                                STARPU_NAME, l2lTaskNames[idxLevel].get(),
-        #else
-                                //"L2L-l_nb_i_nbc_ic_s"
-                                STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n",
-                                                              idxLevel,
-                                                              tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
-                                                              tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
-                                                              tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(),
-                                                              tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(),
-                                                              FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)-
-                                                              FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3),
-                                                              tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
-                                                              tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
-                                                              tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(),
-                                                              tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(),
-                                                              starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)),
-        #endif
-        #endif
-                                0);
-                    }
+                                               STARPU_R, cellHandles[idxLevel][interactionid].symb,
+                                               (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][interactionid].down,
+                                               STARPU_R, cellHandles[idxLevel][idxGroup].symb,
+                                               STARPU_R, cellHandles[idxLevel][idxGroup].up,
+                       #ifdef STARPU_USE_TASK_NAME
+                       #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
+                                               STARPU_NAME, m2lOuterTaskNames[idxLevel].get(),
+                       #else
+                                               //"M2L_out-l_nb_i_nb_i_s"
+                                               STARPU_NAME, taskNames->print("M2L_out", "%d, %d, %lld, %d, %lld, %d, %lld, %lld, %lld, %lld, %d\n",
+                                                                             idxLevel,
+                                                                             tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
+                                                                             tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
+                                                                             tree->getCellGroup(idxLevel,interactionid)->getNumberOfCellsInBlock(),
+                                                                             tree->getCellGroup(idxLevel,interactionid)->getSizeOfInterval(),
+                                                                             outsideInteractions->size(),
+                                                                             tree->getCellGroup(idxLevel, interactionid)->getStartingIndex(),
+                                                                             tree->getCellGroup(idxLevel, interactionid)->getEndingIndex(),
+                                                                             tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
+                                                                             tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
+                                                                             starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].down)),
+                       #endif
+                       #endif
+                                               0);
 
-                }
-                while(tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (((currentCells->getEndingIndex()-1)<<3)+7)
-                      && (idxSubGroup+1) != tree->getNbCellGroupAtLevel(idxLevel+1)
-                      && tree->getCellGroup(idxLevel+1, idxSubGroup+1)->getStartingIndex() <= ((currentCells->getEndingIndex()-1)<<3)+7 ){
-                    idxSubGroup += 1;
+#ifdef SCALFMM_USE_STARPU_EXTRACT
+                      }
+                    else{
+                        {
 
-                    // put the right codelet
-                    if((noCommuteAtLastLevel && (idxLevel == FAbstractAlgorithm::lowerWorkingLevel - 2)) || noCommuteBetweenLevel){
-                        starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                               &l2l_cl_nocommute,
-                                               STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
-                                               STARPU_VALUE, &idxLevel, sizeof(idxLevel),
-                                               STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
+                          // Extract data from second group for the first one
+                          // That is copy B to B'
+                          extractedCellBuffer.emplace_back();
+                          CellExtractedHandles& interactionBuffer = extractedCellBuffer.back();
+                          interactionBuffer.cellsToExtract = externalInteractionsAllLevelOuterIndexes[idxLevel][idxGroup][idxInteraction];
+                          interactionBuffer.size = tree->getCellGroup(idxLevel,interactionid)->extractGetSizeSymbUp(interactionBuffer.cellsToExtract);
+                          // I allocate only if I will use it to extract
+                          if(starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].symb) == mpi_rank){
+                              interactionBuffer.data.reset(new unsigned char[interactionBuffer.size]);
+                              FAssertLF(interactionBuffer.data);
+                            }
+                          else{
+                              interactionBuffer.data.reset(nullptr);
+                            }
+                          int registeringNode = starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].symb);
+                          int where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
+                          starpu_variable_data_register(&interactionBuffer.all, where,
+                                                        (uintptr_t)interactionBuffer.data.get(), interactionBuffer.size);
+                          starpu_mpi_data_register(interactionBuffer.all, tag++, registeringNode);
+
+                          CellExtractedHandles* interactionBufferPtr = &interactionBuffer;
+                          starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                                 &cell_extract_up,
+                                                 STARPU_VALUE, &interactionBufferPtr, sizeof(CellExtractedHandles*),
                        #ifdef SCALFMM_STARPU_USE_PRIO
-                                               STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel),
+                                                 STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel),
                        #endif
-                                               STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly
-                                               STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write
-                                               STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly
-                                STARPU_RW, cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly
-        #ifdef STARPU_USE_TASK_NAME
-        #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
-                                STARPU_NAME, l2lTaskNames[idxLevel].get(),
-        #else
-                                //"L2L-l_nb_i_nbc_ic_s"
-                                STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n",
-                                                              idxLevel,
-                                                              tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
-                                                              tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
-                                                              tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(),
-                                                              tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(),
-                                                              FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)-
-                                                              FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3),
-                                                              tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
-                                                              tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
-                                                              tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(),
-                                                              tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(),
-                                                              starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)),
-        #endif
-        #endif
-                                0);
-                    }
-                    else{
-                        starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                               &l2l_cl,
-                                               STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
-                                               STARPU_VALUE, &idxLevel, sizeof(idxLevel),
-                                               STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
+                                                 STARPU_R, cellHandles[idxLevel][interactionid].symb,
+                                                 STARPU_R, cellHandles[idxLevel][interactionid].up,
+                                                 STARPU_RW, interactionBuffer.all, 0);
+
+                          // Move to a new memory block that is on the same node as A
+                          // B' to B'''
+                          duplicatedCellBuffer.emplace_back();
+                          DuplicatedCellHandle& duplicateB = duplicatedCellBuffer.back();
+                          duplicateB.sizeSymb = tree->getCellGroup(idxLevel,interactionid)->getBufferSizeInByte();
+                          duplicateB.sizeOther = tree->getCellGroup(idxLevel,interactionid)->getMultipoleBufferSizeInByte();
+                          if(starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].symb) == mpi_rank){
+                              // Reuse block but just to perform the send
+                              duplicateB.dataSymbPtr.reset(new unsigned char[duplicateB.sizeSymb]);// = const_cast<unsigned char*>(tree->getCellGroup(idxLevel,interactionid)->getRawBuffer());
+                              duplicateB.dataOtherPtr.reset(new unsigned char[duplicateB.sizeOther]);// = reinterpret_cast<unsigned char*>(tree->getCellGroup(idxLevel,interactionid)->getRawMultipoleBuffer());
+                            }
+                          duplicateB.dataSymb = nullptr;
+                          duplicateB.dataOther = nullptr;
+
+                          registeringNode = starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].symb);
+                          where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
+                          starpu_variable_data_register(&duplicateB.symb, where,
+                                                        (uintptr_t)duplicateB.dataSymbPtr.get(), duplicateB.sizeSymb);
+                          starpu_mpi_data_register(duplicateB.symb, tag++, registeringNode);
+                          starpu_variable_data_register(&duplicateB.other, where,
+                                                        (uintptr_t)duplicateB.dataOtherPtr.get(), duplicateB.sizeOther);
+                          starpu_mpi_data_register(duplicateB.other, tag++, registeringNode);
+
+                          const unsigned char* ptr1 = const_cast<unsigned char*>(tree->getCellGroup(idxLevel,interactionid)->getRawBuffer());
+                          size_t size1 = duplicateB.sizeSymb;
+                          const unsigned char* ptr2 = reinterpret_cast<unsigned char*>(tree->getCellGroup(idxLevel,interactionid)->getRawMultipoleBuffer());
+                          size_t size2 = duplicateB.sizeOther;
+
+                          starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                                 &cell_insert_up_bis,
+                                                 STARPU_VALUE, &interactionBufferPtr, sizeof(CellExtractedHandles*),
+                                                 STARPU_VALUE, &ptr1, sizeof(ptr1),
+                                                 STARPU_VALUE, &size1, sizeof(size1),
+                                                 STARPU_VALUE, &ptr2, sizeof(ptr2),
+                                                 STARPU_VALUE, &size2, sizeof(size2),
                        #ifdef SCALFMM_STARPU_USE_PRIO
-                                               STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel),
+                                                 STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel),
                        #endif
-                                               STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly
-                                               STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write
-                                               STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly
-                                (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly
-        #ifdef STARPU_USE_TASK_NAME
-        #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
-                                STARPU_NAME, l2lTaskNames[idxLevel].get(),
-        #else
-                                //"L2L-l_nb_i_nbc_ic_s"
-                                STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n",
-                                                              idxLevel,
-                                                              tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
-                                                              tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
-                                                              tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(),
-                                                              tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(),
-                                                              FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)-
-                                                              FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3),
-                                                              tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
-                                                              tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
-                                                              tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(),
-                                                              tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(),
-                                                              starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)),
-        #endif
-        #endif
-                                0);
-                    }
+                                                 STARPU_R, interactionBuffer.all,
+                                                 STARPU_RW, duplicateB.symb,
+                                                 STARPU_RW, duplicateB.other, 0);
+
+
+                          int mode = 1;
+                          starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                                 &m2l_cl_inout,
+                                                 STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
+                                                 STARPU_VALUE, &idxLevel, sizeof(idxLevel),
+                                                 STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
+                                                 STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
+                                                 STARPU_VALUE, &mode, sizeof(int),
+                       #ifdef SCALFMM_STARPU_USE_PRIO
+                                                 STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel),
+                       #endif
+                                                 STARPU_R, cellHandles[idxLevel][idxGroup].symb,
+                                                 (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].down,
+                                                 STARPU_R, duplicateB.symb,
+                                                 STARPU_R, duplicateB.other,
+                       #ifdef STARPU_USE_TASK_NAME
+                       #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
+                                                 STARPU_NAME, m2lOuterTaskNames[idxLevel].get(),
+                       #else
+                                                 //"M2L_out-l_nb_i_nb_i_s
+                                                 STARPU_NAME, taskNames->print("M2L_out", "%d, %d, %lld, %d, %lld, %d, %lld, %lld, %lld, %lld, %d\n",
+                                                                               idxLevel,
+                                                                               tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
+                                                                               tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
+                                                                               tree->getCellGroup(idxLevel,interactionid)->getNumberOfCellsInBlock(),
+                                                                               tree->getCellGroup(idxLevel,interactionid)->getSizeOfInterval(),
+                                                                               outsideInteractions->size(),
+                                                                               tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
+                                                                               tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
+                                                                               tree->getCellGroup(idxLevel, interactionid)->getStartingIndex(),
+                                                                               tree->getCellGroup(idxLevel, interactionid)->getEndingIndex(),
+                                                                               starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].down)),
+                       #endif
+                       #endif
+                                                 0);
+                        }
+                        {
+                          // Extract data from second group for the first one
+                          // That is copy A to A'
+                          extractedCellBuffer.emplace_back();
+                          CellExtractedHandles& interactionBuffer = extractedCellBuffer.back();
+                          interactionBuffer.cellsToExtract = externalInteractionsAllLevelInnerIndexes[idxLevel][idxGroup][idxInteraction];
+                          interactionBuffer.size = tree->getCellGroup(idxLevel,idxGroup)->extractGetSizeSymbUp(interactionBuffer.cellsToExtract);
+                          // I allocate only if I will use it to extract
+                          if(starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].symb) == mpi_rank){
+                              interactionBuffer.data.reset(new unsigned char[interactionBuffer.size]);
+                            }
+                          else{
+                              interactionBuffer.data.reset(nullptr);
+                            }
+                          int registeringNode = starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].symb);
+                          int where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
+                          starpu_variable_data_register(&interactionBuffer.all, where,
+                                                        (uintptr_t)interactionBuffer.data.get(), interactionBuffer.size);
+                          starpu_mpi_data_register(interactionBuffer.all, tag++, registeringNode);
+
+                          CellExtractedHandles* interactionBufferPtr = &interactionBuffer;
+                          starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                                 &cell_extract_up,
+                                                 STARPU_VALUE, &interactionBufferPtr, sizeof(CellExtractedHandles*),
+                       #ifdef SCALFMM_STARPU_USE_PRIO
+                                                 STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel),
+                       #endif
+                                                 STARPU_R, cellHandles[idxLevel][idxGroup].symb,
+                                                 STARPU_R, cellHandles[idxLevel][idxGroup].up,
+                                                 STARPU_RW, interactionBuffer.all, 0);
+
+                          // Move to a new memory block that is on the same node as A
+                          // B' to B'''
+                          duplicatedCellBuffer.emplace_back();
+                          DuplicatedCellHandle& duplicateB = duplicatedCellBuffer.back();
+                          duplicateB.sizeSymb = tree->getCellGroup(idxLevel,idxGroup)->getBufferSizeInByte();
+                          duplicateB.sizeOther = tree->getCellGroup(idxLevel,idxGroup)->getMultipoleBufferSizeInByte();
+                          if(starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].symb) == mpi_rank){
+                              // Reuse block but just to perform the send
+                              duplicateB.dataSymbPtr.reset(new unsigned char[duplicateB.sizeSymb]);//const_cast<unsigned char*>(tree->getCellGroup(idxLevel,idxGroup)->getRawBuffer());
+                              //memcpy(duplicateB.dataSymbPtr.get(), tree->getCellGroup(idxLevel,idxGroup)->getRawBuffer(), duplicateB.sizeSymb);
+                              duplicateB.dataOtherPtr.reset(new unsigned char[duplicateB.sizeOther]);//reinterpret_cast<unsigned char*>(tree->getCellGroup(idxLevel,idxGroup)->getRawMultipoleBuffer());
+                              //memcpy(duplicateB.dataOtherPtr.get(), tree->getCellGroup(idxLevel,idxGroup)->getRawMultipoleBuffer(), duplicateB.sizeOther);
+                            }
+                          duplicateB.dataSymb = nullptr;
+                          duplicateB.dataOther = nullptr;
+
+                          registeringNode = starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].symb);
+                          where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
+                          starpu_variable_data_register(&duplicateB.symb, where,
+                                                        (uintptr_t)duplicateB.dataSymbPtr.get(), duplicateB.sizeSymb);
+                          starpu_mpi_data_register(duplicateB.symb, tag++, registeringNode);
+                          starpu_variable_data_register(&duplicateB.other, where,
+                                                        (uintptr_t)duplicateB.dataOtherPtr.get(), duplicateB.sizeOther);
+                          starpu_mpi_data_register(duplicateB.other, tag++, registeringNode);
+
+                          const unsigned char* ptr1 = const_cast<unsigned char*>(tree->getCellGroup(idxLevel,idxGroup)->getRawBuffer());
+                          size_t size1 = duplicateB.sizeSymb;
+                          const unsigned char* ptr2 = reinterpret_cast<unsigned char*>(tree->getCellGroup(idxLevel,idxGroup)->getRawMultipoleBuffer());
+                          size_t size2 = duplicateB.sizeOther;
+                          starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                                 &cell_insert_up_bis,
+                                                 STARPU_VALUE, &interactionBufferPtr, sizeof(CellExtractedHandles*),
+                                                 STARPU_VALUE, &ptr1, sizeof(ptr1),
+                                                 STARPU_VALUE, &size1, sizeof(size1),
+                                                 STARPU_VALUE, &ptr2, sizeof(ptr2),
+                                                 STARPU_VALUE, &size2, sizeof(size2),
+                       #ifdef SCALFMM_STARPU_USE_PRIO
+                                                 STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel),
+                       #endif
+                                                 STARPU_R, interactionBuffer.all,
+                                                 STARPU_RW, duplicateB.symb,
+                                                 STARPU_RW, duplicateB.other, 0);
+
+                          int mode = 2;
+                          starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                                 &m2l_cl_inout,
+                                                 STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
+                                                 STARPU_VALUE, &idxLevel, sizeof(idxLevel),
+                                                 STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
+                                                 STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
+                                                 STARPU_VALUE, &mode, sizeof(int),
+                       #ifdef SCALFMM_STARPU_USE_PRIO
+                                                 STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel),
+                       #endif
+                                                 STARPU_R, cellHandles[idxLevel][interactionid].symb,
+                                                 (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][interactionid].down,
+                                                 STARPU_R, duplicateB.symb,
+                                                 STARPU_R, duplicateB.other,
+                       #ifdef STARPU_USE_TASK_NAME
+                       #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
+                                                 STARPU_NAME, m2lOuterTaskNames[idxLevel].get(),
+                       #else
+                                                 //"M2L_out-l_nb_i_nb_i_s"
+                                                 STARPU_NAME, taskNames->print("M2L_out", "%d, %d, %lld, %d, %lld, %d, %lld, %lld, %lld, %lld, %d\n",
+                                                                               idxLevel,
+                                                                               tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
+                                                                               tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
+                                                                               tree->getCellGroup(idxLevel,interactionid)->getNumberOfCellsInBlock(),
+                                                                               tree->getCellGroup(idxLevel,interactionid)->getSizeOfInterval(),
+                                                                               outsideInteractions->size(),
+                                                                               tree->getCellGroup(idxLevel, interactionid)->getStartingIndex(),
+                                                                               tree->getCellGroup(idxLevel, interactionid)->getEndingIndex(),
+                                                                               tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
+                                                                               tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
+                                                                               starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].down)),
+                       #endif
+                       #endif
+                                                 0);
+                        }
+                      }
+#endif
+                  }
+              }
+            FLOG( timerOutBlock.tac() );
+          }
+      }
+    FLOG( FLog::Controller << "\t\t transferPass in " << timer.tacAndElapsed() << "s\n" );
+    FLOG( FLog::Controller << "\t\t\t inblock in  " << timerInBlock.elapsed() << "s\n" );
+    FLOG( FLog::Controller << "\t\t\t outblock in " << timerOutBlock.elapsed() << "s\n" );
+  }
+
+  /////////////////////////////////////////////////////////////////////////////////////
+  /// Downard Pass
+  /////////////////////////////////////////////////////////////////////////////////////
+
+  void downardPassDuplicate(){
+    FLOG( FTic timer; );
+    for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel - 1 ; ++idxLevel){
+        int idxSubGroup = 0;
+        std::cout << "            Level "<< idxLevel << " -> " << idxLevel+1<<"  nbGroupCell " << tree->getNbCellGroupAtLevel(idxLevel) << std::endl;
+
+        for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
+            CellContainerClass*const currentCells = tree->getCellGroup(idxLevel, idxGroup);
+
+            // Skip current group if needed
+            std::cout <<"      IF " << tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() << " <= " <<(currentCells->getStartingIndex()<<3)<< std::endl;
+            if( tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (currentCells->getStartingIndex()<<3) ){
+                ++idxSubGroup;
+                FAssertLF( idxSubGroup != tree->getNbCellGroupAtLevel(idxLevel+1) );
+                FAssertLF( (tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex()>>3) == currentCells->getStartingIndex() );
+              }
+            std::cout <<   "               idxSubGroup " << idxSubGroup << std::endl;
+            // Copy at max 8 groups
+            {
+              // put the right codelet
+              if((noCommuteAtLastLevel && (idxLevel == FAbstractAlgorithm::lowerWorkingLevel - 2)) || noCommuteBetweenLevel){
+                  std::cout << "               (noCommuteAtLastLevel)  ID   "<<cellHandles[idxLevel][idxGroup].groupID << " intervalSize " <<cellHandles[idxLevel][idxGroup].intervalSize <<std::endl;
+                  starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                         &l2l_cl_nocommute,
+                                         STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
+                                         STARPU_VALUE, &idxLevel, sizeof(idxLevel),
+                                         STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
+                       #ifdef SCALFMM_STARPU_USE_PRIO
+                                         STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel),
+                       #endif
+                                         STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly
+                                         STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write
+                                         STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly
+                      STARPU_RW, cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly
+    #ifdef STARPU_USE_TASK_NAME
+    #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
+                      STARPU_NAME, l2lTaskNames[idxLevel].get(),
+    #else
+                      //"L2L-l_nb_i_nbc_ic_s"
+                      STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n",
+                                                    idxLevel,
+                                                    tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
+                                                    tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
+                                                    tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(),
+                                                    tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(),
+                                                    FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)-
+                                                    FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3),
+                                                    tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
+                                                    tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
+                                                    tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(),
+                                                    tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(),
+                                                    starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)),
+    #endif
+    #endif
+                      0);
+                }
+              else{
+                  std::cout << "               (CommuteAtLastLevel)  ID   "<<cellHandles[idxLevel][idxGroup].groupID << " intervalSize " <<cellHandles[idxLevel][idxGroup].intervalSize <<std::endl;
+                  starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                         &l2l_cl,
+                                         STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
+                                         STARPU_VALUE, &idxLevel, sizeof(idxLevel),
+                                         STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
+                       #ifdef SCALFMM_STARPU_USE_PRIO
+                                         STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel),
+                       #endif
+                                         STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly
+                                         STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write
+                                         STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly
+                      (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly
+    #ifdef STARPU_USE_TASK_NAME
+    #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
+                      STARPU_NAME, l2lTaskNames[idxLevel].get(),
+    #else
+                      //"L2L-l_nb_i_nbc_ic_s"
+                      STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n",
+                                                    idxLevel,
+                                                    tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
+                                                    tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
+                                                    tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(),
+                                                    tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(),
+                                                    FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)-
+                                                    FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3),
+                                                    tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
+                                                    tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
+                                                    tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(),
+                                                    tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(),
+                                                    starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)),
+    #endif
+    #endif
+                      0);
                 }
-            }
-        }
-        FLOG( FLog::Controller << "\t\t downardPass in " << timer.tacAndElapsed() << "s\n" );
-    }
 
-    /////////////////////////////////////////////////////////////////////////////////////
-    /// Direct Pass
-    /////////////////////////////////////////////////////////////////////////////////////
+            }
+            std::cout <<     "     while loop " << std::endl;
+            while(tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (((currentCells->getEndingIndex()-1)<<3)+7)
+                  && (idxSubGroup+1) != tree->getNbCellGroupAtLevel(idxLevel+1)
+                  && tree->getCellGroup(idxLevel+1, idxSubGroup+1)->getStartingIndex() <= ((currentCells->getEndingIndex()-1)<<3)+7 ){
+                idxSubGroup += 1;
+
+                // put the right codelet
+                if((noCommuteAtLastLevel && (idxLevel == FAbstractAlgorithm::lowerWorkingLevel - 2)) || noCommuteBetweenLevel){
+                    std::cout << "               (noCommuteAtLastLevel)  ID   "<<cellHandles[idxLevel][idxGroup].groupID << " intervalSize " <<cellHandles[idxLevel][idxGroup].intervalSize <<std::endl;
+                    starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                           &l2l_cl_nocommute,
+                                           STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
+                                           STARPU_VALUE, &idxLevel, sizeof(idxLevel),
+                                           STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
+                       #ifdef SCALFMM_STARPU_USE_PRIO
+                                           STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel),
+                       #endif
+                                           STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly
+                                           STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write
+                                           STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly
+                        STARPU_RW, cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly
+    #ifdef STARPU_USE_TASK_NAME
+    #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
+                        STARPU_NAME, l2lTaskNames[idxLevel].get(),
+    #else
+                        //"L2L-l_nb_i_nbc_ic_s"
+                        STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n",
+                                                      idxLevel,
+                                                      tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
+                                                      tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
+                                                      tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(),
+                                                      tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(),
+                                                      FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)-
+                                                      FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3),
+                                                      tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
+                                                      tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
+                                                      tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(),
+                                                      tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(),
+                                                      starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)),
+    #endif
+    #endif
+                        0);
+                  }
+                else{
+                    std::cout << "               (CommuteAtLastLevel)  ID   "<<cellHandles[idxLevel][idxGroup].groupID << " intervalSize " <<cellHandles[idxLevel][idxGroup].intervalSize <<std::endl;
+                    starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                           &l2l_cl,
+                                           STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
+                                           STARPU_VALUE, &idxLevel, sizeof(idxLevel),
+                                           STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
+                       #ifdef SCALFMM_STARPU_USE_PRIO
+                                           STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel),
+                       #endif
+                                           STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly
+                                           STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write
+                                           STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly
+                        (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly
+    #ifdef STARPU_USE_TASK_NAME
+    #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
+                        STARPU_NAME, l2lTaskNames[idxLevel].get(),
+    #else
+                        //"L2L-l_nb_i_nbc_ic_s"
+                        STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n",
+                                                      idxLevel,
+                                                      tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
+                                                      tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
+                                                      tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(),
+                                                      tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(),
+                                                      FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)-
+                                                      FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3),
+                                                      tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
+                                                      tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
+                                                      tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(),
+                                                      tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(),
+                                                      starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)),
+    #endif
+    #endif
+                        0);
+                  }
+              }
+          }
+      }
+    FLOG( FLog::Controller << "\t\t downardPass in " << timer.tacAndElapsed() << "s\n" );
+  }
+  void downardPassNoDuplicate(){
+    FLOG( FTic timer; );
+    for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel - 1 ; ++idxLevel){
+
+        for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
+            int idxSubGroup = 0;
+            //if(idxSubGroup == tree->getNbCellGroupAtLevel(idxLevel+1))
+            //    break;
+            CellContainerClass*const currentCells = tree->getCellGroup(idxLevel, idxGroup);
+
+            // Skip current group if needed
+            while(tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (currentCells->getStartingIndex()<<3) ){
+                ++idxSubGroup;
+                if(idxSubGroup == tree->getNbCellGroupAtLevel(idxLevel+1))
+                  break;
+
+              }
+            if(idxSubGroup == tree->getNbCellGroupAtLevel(idxLevel+1))
+              break;
+            // Copy at max 8 groups
+            {
+              // put the right codelet
+              if((noCommuteAtLastLevel && (idxLevel == FAbstractAlgorithm::lowerWorkingLevel - 2)) || noCommuteBetweenLevel){
+                  starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                         &l2l_cl_nocommute,
+                                         STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
+                                         STARPU_VALUE, &idxLevel, sizeof(idxLevel),
+                                         STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
+                       #ifdef SCALFMM_STARPU_USE_PRIO
+                                         STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel),
+                       #endif
+                                         STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly
+                                         STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write
+                                         STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly
+                      STARPU_RW, cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly
+    #ifdef STARPU_USE_TASK_NAME
+    #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
+                      STARPU_NAME, l2lTaskNames[idxLevel].get(),
+    #else
+                      //"L2L-l_nb_i_nbc_ic_s"
+                      STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n",
+                                                    idxLevel,
+                                                    tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
+                                                    tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
+                                                    tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(),
+                                                    tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(),
+                                                    FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)-
+                                                    FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3),
+                                                    tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
+                                                    tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
+                                                    tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(),
+                                                    tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(),
+                                                    starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)),
+    #endif
+    #endif
+                      0);
+                }
+              else{
+                  starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                         &l2l_cl,
+                                         STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
+                                         STARPU_VALUE, &idxLevel, sizeof(idxLevel),
+                                         STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
+                       #ifdef SCALFMM_STARPU_USE_PRIO
+                                         STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel),
+                       #endif
+                                         STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly
+                                         STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write
+                                         STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly
+                      (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly
+    #ifdef STARPU_USE_TASK_NAME
+    #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
+                      STARPU_NAME, l2lTaskNames[idxLevel].get(),
+    #else
+                      //"L2L-l_nb_i_nbc_ic_s"
+                      STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n",
+                                                    idxLevel,
+                                                    tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
+                                                    tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
+                                                    tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(),
+                                                    tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(),
+                                                    FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)-
+                                                    FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3),
+                                                    tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
+                                                    tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
+                                                    tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(),
+                                                    tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(),
+                                                    starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)),
+    #endif
+    #endif
+                      0);
+                }
 
-    void directPass(){
-        FLOG( FTic timer; );
-        FLOG( FTic timerInBlock; FTic timerOutBlock; );
+            }
+            while(tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (((currentCells->getEndingIndex()-1)<<3)+7)
+                  && (idxSubGroup+1) != tree->getNbCellGroupAtLevel(idxLevel+1)
+                  && tree->getCellGroup(idxLevel+1, idxSubGroup+1)->getStartingIndex() <= ((currentCells->getEndingIndex()-1)<<3)+7 ){
+                idxSubGroup += 1;
 
-        FLOG( timerOutBlock.tic() );
-        for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
-            for(int idxInteraction = 0; idxInteraction < int(externalInteractionsLeafLevel[idxGroup].size()) ; ++idxInteraction){
-                const int interactionid = externalInteractionsLeafLevel[idxGroup][idxInteraction].otherBlockId;
-                const std::vector<OutOfBlockInteraction>* outsideInteractions = &externalInteractionsLeafLevel[idxGroup][idxInteraction].interactions;
-                if(starpu_mpi_data_get_rank(particleHandles[idxGroup].down) == starpu_mpi_data_get_rank(particleHandles[interactionid].down))
-                {
+                // put the right codelet
+                if((noCommuteAtLastLevel && (idxLevel == FAbstractAlgorithm::lowerWorkingLevel - 2)) || noCommuteBetweenLevel){
                     starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                           &p2p_cl_inout,
+                                           &l2l_cl_nocommute,
                                            STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
-                                           STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
-                                           STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int),
+                                           STARPU_VALUE, &idxLevel, sizeof(idxLevel),
+                                           STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
                        #ifdef SCALFMM_STARPU_USE_PRIO
-                                           STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(),
+                                           STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel),
                        #endif
-                                           STARPU_R, particleHandles[idxGroup].symb,
+                                           STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly
+                                           STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write
+                                           STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly
+                        STARPU_RW, cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly
+    #ifdef STARPU_USE_TASK_NAME
+    #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
+                        STARPU_NAME, l2lTaskNames[idxLevel].get(),
+    #else
+                        //"L2L-l_nb_i_nbc_ic_s"
+                        STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n",
+                                                      idxLevel,
+                                                      tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
+                                                      tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
+                                                      tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(),
+                                                      tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(),
+                                                      FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)-
+                                                      FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3),
+                                                      tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
+                                                      tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
+                                                      tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(),
+                                                      tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(),
+                                                      starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)),
+    #endif
+    #endif
+                        0);
+                  }
+                else{
+                    starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                           &l2l_cl,
+                                           STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
+                                           STARPU_VALUE, &idxLevel, sizeof(idxLevel),
+                                           STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int),
+                       #ifdef SCALFMM_STARPU_USE_PRIO
+                                           STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel),
+                       #endif
+                                           STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly
+                                           STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write
+                                           STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly
+                        (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly
+    #ifdef STARPU_USE_TASK_NAME
+    #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
+                        STARPU_NAME, l2lTaskNames[idxLevel].get(),
+    #else
+                        //"L2L-l_nb_i_nbc_ic_s"
+                        STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n",
+                                                      idxLevel,
+                                                      tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(),
+                                                      tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(),
+                                                      tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(),
+                                                      tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(),
+                                                      FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)-
+                                                      FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3),
+                                                      tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(),
+                                                      tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(),
+                                                      tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(),
+                                                      tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(),
+                                                      starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)),
+    #endif
+    #endif
+                        0);
+                  }
+              }
+          }
+      }
+    FLOG( FLog::Controller << "\t\t downardPass in " << timer.tacAndElapsed() << "s\n" );
+  }
+
+
+  /////////////////////////////////////////////////////////////////////////////////////
+  /// Direct Pass
+  /////////////////////////////////////////////////////////////////////////////////////
+
+  void directPass(){
+    FLOG( FTic timer; );
+    FLOG( FTic timerInBlock; FTic timerOutBlock; );
+
+    FLOG( timerOutBlock.tic() );
+    for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
+        for(int idxInteraction = 0; idxInteraction < int(externalInteractionsLeafLevel[idxGroup].size()) ; ++idxInteraction){
+            const int interactionid = externalInteractionsLeafLevel[idxGroup][idxInteraction].otherBlockId;
+            const std::vector<OutOfBlockInteraction>* outsideInteractions = &externalInteractionsLeafLevel[idxGroup][idxInteraction].interactions;
+            if(starpu_mpi_data_get_rank(particleHandles[idxGroup].down) == starpu_mpi_data_get_rank(particleHandles[interactionid].down))
+              {
+                starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                       &p2p_cl_inout,
+                                       STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
+                                       STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
+                                       STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int),
+                       #ifdef SCALFMM_STARPU_USE_PRIO
+                                       STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(),
+                       #endif
+                                       STARPU_R, particleHandles[idxGroup].symb,
                        #ifdef STARPU_USE_REDUX
-                                           STARPU_REDUX, particleHandles[idxGroup].down,
+                                       STARPU_REDUX, particleHandles[idxGroup].down,
                        #else
-                                           (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down,
+                                       (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down,
                        #endif
-                                           STARPU_R, particleHandles[interactionid].symb,
+                                       STARPU_R, particleHandles[interactionid].symb,
                        #ifdef STARPU_USE_REDUX
-                                           STARPU_REDUX, particleHandles[interactionid].down,
+                                       STARPU_REDUX, particleHandles[interactionid].down,
                        #else
-                                           (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), particleHandles[interactionid].down,
-                                           STARPU_EXECUTE_ON_DATA, particleHandles[interactionid].down,
+                                       (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), particleHandles[interactionid].down,
+                                       STARPU_EXECUTE_ON_DATA, particleHandles[interactionid].down,
                        #endif
                        #ifdef STARPU_USE_TASK_NAME
                        #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
-                                           STARPU_NAME, p2pOuterTaskNames.get(),
+                                       STARPU_NAME, p2pOuterTaskNames.get(),
                        #else
-                                           //"P2P_out-nb_i_p_nb_i_p_s"
-                                           STARPU_NAME, taskNames->print("P2P_out", "%d, %lld, %lld, %d, %lld, %lld, %d, %lld, %lld, %lld, %lld, %d\n",
-                                                                         tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(),
-                                                                         tree->getParticleGroup(idxGroup)->getSizeOfInterval(),
-                                                                         tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(),
-                                                                         tree->getParticleGroup(interactionid)->getNumberOfLeavesInBlock(),
-                                                                         tree->getParticleGroup(interactionid)->getSizeOfInterval(),
-                                                                         tree->getParticleGroup(interactionid)->getNbParticlesInGroup(),
-                                                                         outsideInteractions->size(),
-                                                                         tree->getParticleGroup(idxGroup)->getStartingIndex(),
-                                                                         tree->getParticleGroup(idxGroup)->getEndingIndex(),
-                                                                         tree->getParticleGroup(interactionid)->getStartingIndex(),
-                                                                         tree->getParticleGroup(interactionid)->getEndingIndex(),
-                                                                         starpu_mpi_data_get_rank(particleHandles[interactionid].down)),
+                                       //"P2P_out-nb_i_p_nb_i_p_s"
+                                       STARPU_NAME, taskNames->print("P2P_out", "%d, %lld, %lld, %d, %lld, %lld, %d, %lld, %lld, %lld, %lld, %d\n",
+                                                                     tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(),
+                                                                     tree->getParticleGroup(idxGroup)->getSizeOfInterval(),
+                                                                     tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(),
+                                                                     tree->getParticleGroup(interactionid)->getNumberOfLeavesInBlock(),
+                                                                     tree->getParticleGroup(interactionid)->getSizeOfInterval(),
+                                                                     tree->getParticleGroup(interactionid)->getNbParticlesInGroup(),
+                                                                     outsideInteractions->size(),
+                                                                     tree->getParticleGroup(idxGroup)->getStartingIndex(),
+                                                                     tree->getParticleGroup(idxGroup)->getEndingIndex(),
+                                                                     tree->getParticleGroup(interactionid)->getStartingIndex(),
+                                                                     tree->getParticleGroup(interactionid)->getEndingIndex(),
+                                                                     starpu_mpi_data_get_rank(particleHandles[interactionid].down)),
                        #endif
                        #endif
-                                           0);
-                }
-                else
-                {
+                                       0);
+              }
+            else
+              {
 
 #ifdef SCALFMM_USE_STARPU_EXTRACT
-                    {
-                        // Extract data from second group for the first one
-                        // That is copy B to B'
-                        extractedParticlesBuffer.emplace_back();
-                        ParticleExtractedHandles& interactionBuffer = extractedParticlesBuffer.back();
-                        interactionBuffer.leavesToExtract = externalInteractionsLeafLevelOuter[idxGroup][idxInteraction];
-
-                        interactionBuffer.size = tree->getParticleGroup(interactionid)->getExtractBufferSize(interactionBuffer.leavesToExtract);
-                        // I allocate only if I will use it to extract
-                        if(starpu_mpi_data_get_rank(particleHandles[interactionid].symb) == mpi_rank){
-                            interactionBuffer.data.reset(new unsigned char[interactionBuffer.size]);
-                        }
-                        else{
-                            interactionBuffer.data.reset(nullptr);
-                        }
+                {
+                  // Extract data from second group for the first one
+                  // That is copy B to B'
+                  extractedParticlesBuffer.emplace_back();
+                  ParticleExtractedHandles& interactionBuffer = extractedParticlesBuffer.back();
+                  interactionBuffer.leavesToExtract = externalInteractionsLeafLevelOuter[idxGroup][idxInteraction];
+
+                  interactionBuffer.size = tree->getParticleGroup(interactionid)->getExtractBufferSize(interactionBuffer.leavesToExtract);
+                  // I allocate only if I will use it to extract
+                  if(starpu_mpi_data_get_rank(particleHandles[interactionid].symb) == mpi_rank){
+                      interactionBuffer.data.reset(new unsigned char[interactionBuffer.size]);
+                    }
+                  else{
+                      interactionBuffer.data.reset(nullptr);
+                    }
 
-                        int registeringNode = starpu_mpi_data_get_rank(particleHandles[interactionid].symb);
-                        int where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
-                        starpu_variable_data_register(&interactionBuffer.symb, where,
-                                                      (uintptr_t)interactionBuffer.data.get(), interactionBuffer.size);
-                        starpu_mpi_data_register(interactionBuffer.symb, tag++, registeringNode);
+                  int registeringNode = starpu_mpi_data_get_rank(particleHandles[interactionid].symb);
+                  int where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
+                  starpu_variable_data_register(&interactionBuffer.symb, where,
+                                                (uintptr_t)interactionBuffer.data.get(), interactionBuffer.size);
+                  starpu_mpi_data_register(interactionBuffer.symb, tag++, registeringNode);
 
-                        ParticleExtractedHandles* interactionBufferPtr = &interactionBuffer;
-                        starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                               &p2p_extract,
-                                               STARPU_VALUE, &interactionBufferPtr, sizeof(ParticleExtractedHandles*),
+                  ParticleExtractedHandles* interactionBufferPtr = &interactionBuffer;
+                  starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                         &p2p_extract,
+                                         STARPU_VALUE, &interactionBufferPtr, sizeof(ParticleExtractedHandles*),
                        #ifdef SCALFMM_STARPU_USE_PRIO
-                                               STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(),
+                                         STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(),
                        #endif
-                                               STARPU_R, particleHandles[interactionid].symb,
-                                               STARPU_RW, interactionBuffer.symb, 0);
-
-                        // Move to a new memory block that is on the same node as A
-                        // B' to B'''
-                        duplicatedParticlesBuffer.emplace_back();
-                        DuplicatedParticlesHandle& duplicateB = duplicatedParticlesBuffer.back();
-                        duplicateB.size = tree->getParticleGroup(interactionid)->getBufferSizeInByte();
-                        if(starpu_mpi_data_get_rank(particleHandles[idxGroup].symb) == mpi_rank){
-                            // Reuse block but just to perform the send
-                            duplicateB.data = (unsigned char*) FAlignedMemory::AllocateBytes<64>(duplicateB.size);// = const_cast<unsigned char*>(tree->getParticleGroup(interactionid)->getRawBuffer());
-                        }
-                        else{
-                            duplicateB.data = nullptr;
-                        }
+                                         STARPU_R, particleHandles[interactionid].symb,
+                                         STARPU_RW, interactionBuffer.symb, 0);
+
+                  // Move to a new memory block that is on the same node as A
+                  // B' to B'''
+                  duplicatedParticlesBuffer.emplace_back();
+                  DuplicatedParticlesHandle& duplicateB = duplicatedParticlesBuffer.back();
+                  duplicateB.size = tree->getParticleGroup(interactionid)->getBufferSizeInByte();
+                  if(starpu_mpi_data_get_rank(particleHandles[idxGroup].symb) == mpi_rank){
+                      // Reuse block but just to perform the send
+                      duplicateB.data = (unsigned char*) FAlignedMemory::AllocateBytes<64>(duplicateB.size);// = const_cast<unsigned char*>(tree->getParticleGroup(interactionid)->getRawBuffer());
+                    }
+                  else{
+                      duplicateB.data = nullptr;
+                    }
 
-                        registeringNode = starpu_mpi_data_get_rank(particleHandles[idxGroup].symb);
-                        where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
-                        starpu_variable_data_register(&duplicateB.symb, where,
-                                                      (uintptr_t)duplicateB.data, duplicateB.size);
-                        starpu_mpi_data_register(duplicateB.symb, tag++, registeringNode);
+                  registeringNode = starpu_mpi_data_get_rank(particleHandles[idxGroup].symb);
+                  where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
+                  starpu_variable_data_register(&duplicateB.symb, where,
+                                                (uintptr_t)duplicateB.data, duplicateB.size);
+                  starpu_mpi_data_register(duplicateB.symb, tag++, registeringNode);
 
-                        const unsigned char* dataPtr = const_cast<unsigned char*>(tree->getParticleGroup(interactionid)->getRawBuffer());
-                        size_t sizeData = duplicateB.size;
+                  const unsigned char* dataPtr = const_cast<unsigned char*>(tree->getParticleGroup(interactionid)->getRawBuffer());
+                  size_t sizeData = duplicateB.size;
 
-                        starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                               &p2p_insert_bis,
-                                               STARPU_VALUE, &interactionBufferPtr, sizeof(ParticleExtractedHandles*),
-                                               STARPU_VALUE, &dataPtr, sizeof(dataPtr),
-                                               STARPU_VALUE, &sizeData, sizeof(sizeData),
+                  starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                         &p2p_insert_bis,
+                                         STARPU_VALUE, &interactionBufferPtr, sizeof(ParticleExtractedHandles*),
+                                         STARPU_VALUE, &dataPtr, sizeof(dataPtr),
+                                         STARPU_VALUE, &sizeData, sizeof(sizeData),
                        #ifdef SCALFMM_STARPU_USE_PRIO
-                                               STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(),
+                                         STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(),
                        #endif
-                                               STARPU_R, interactionBuffer.symb,
-                                               STARPU_RW, duplicateB.symb,
-                                               0);
-
-                        starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                               &p2p_cl_inout_mpi,
-                                               STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
-                                               STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
-                                               STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int),
+                                         STARPU_R, interactionBuffer.symb,
+                                         STARPU_RW, duplicateB.symb,
+                                         0);
+
+                  starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                         &p2p_cl_inout_mpi,
+                                         STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
+                                         STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
+                                         STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int),
                        #ifdef SCALFMM_STARPU_USE_PRIO
-                                               STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(),
+                                         STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(),
                        #endif
-                                               STARPU_R, particleHandles[idxGroup].symb,
-                                               (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down,
-                                               STARPU_R, duplicateB.symb,
+                                         STARPU_R, particleHandles[idxGroup].symb,
+                                         (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down,
+                                         STARPU_R, duplicateB.symb,
                        #ifdef STARPU_USE_TASK_NAME
                        #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
-                                               STARPU_NAME, p2pOuterTaskNames.get(),
+                                         STARPU_NAME, p2pOuterTaskNames.get(),
                        #else
-                                               //"P2P_out-nb_i_p_nb_i_p_s"
-                                               STARPU_NAME, taskNames->print("P2P_out", "%d, %lld, %lld, %d, %lld, %lld, %d, %lld, %lld, %lld, %lld, %d\n",
-                                                                             tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(),
-                                                                             tree->getParticleGroup(idxGroup)->getSizeOfInterval(),
-                                                                             tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(),
-                                                                             tree->getParticleGroup(interactionid)->getNumberOfLeavesInBlock(),
-                                                                             tree->getParticleGroup(interactionid)->getSizeOfInterval(),
-                                                                             tree->getParticleGroup(interactionid)->getNbParticlesInGroup(),
-                                                                             outsideInteractions->size(),
-                                                                             tree->getParticleGroup(idxGroup)->getStartingIndex(),
-                                                                             tree->getParticleGroup(idxGroup)->getEndingIndex(),
-                                                                             tree->getParticleGroup(interactionid)->getStartingIndex(),
-                                                                             tree->getParticleGroup(interactionid)->getEndingIndex(),
-                                                                             starpu_mpi_data_get_rank(particleHandles[idxGroup].down)),
+                                         //"P2P_out-nb_i_p_nb_i_p_s"
+                                         STARPU_NAME, taskNames->print("P2P_out", "%d, %lld, %lld, %d, %lld, %lld, %d, %lld, %lld, %lld, %lld, %d\n",
+                                                                       tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(),
+                                                                       tree->getParticleGroup(idxGroup)->getSizeOfInterval(),
+                                                                       tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(),
+                                                                       tree->getParticleGroup(interactionid)->getNumberOfLeavesInBlock(),
+                                                                       tree->getParticleGroup(interactionid)->getSizeOfInterval(),
+                                                                       tree->getParticleGroup(interactionid)->getNbParticlesInGroup(),
+                                                                       outsideInteractions->size(),
+                                                                       tree->getParticleGroup(idxGroup)->getStartingIndex(),
+                                                                       tree->getParticleGroup(idxGroup)->getEndingIndex(),
+                                                                       tree->getParticleGroup(interactionid)->getStartingIndex(),
+                                                                       tree->getParticleGroup(interactionid)->getEndingIndex(),
+                                                                       starpu_mpi_data_get_rank(particleHandles[idxGroup].down)),
                        #endif
                        #endif
-                                               0);
-                    }
+                                         0);
+                }
+                {
+                  std::vector<OutOfBlockInteraction>* outsideInteractionsOpposite = new std::vector<OutOfBlockInteraction>(externalInteractionsLeafLevel[idxGroup][idxInteraction].interactions);
+                  for(unsigned int i = 0; i < outsideInteractionsOpposite->size(); ++i)
                     {
-                        std::vector<OutOfBlockInteraction>* outsideInteractionsOpposite = new std::vector<OutOfBlockInteraction>(externalInteractionsLeafLevel[idxGroup][idxInteraction].interactions);
-                        for(unsigned int i = 0; i < outsideInteractionsOpposite->size(); ++i)
-                        {
-                            MortonIndex tmp = outsideInteractionsOpposite->at(i).outIndex;
-                            outsideInteractionsOpposite->at(i).outIndex = outsideInteractionsOpposite->at(i).insideIndex;
-                            outsideInteractionsOpposite->at(i).insideIndex = tmp;
-                            int tmp2 = outsideInteractionsOpposite->at(i).insideIdxInBlock;
-                            outsideInteractionsOpposite->at(i).insideIdxInBlock = outsideInteractionsOpposite->at(i).outsideIdxInBlock;
-                            outsideInteractionsOpposite->at(i).outsideIdxInBlock = tmp2;
-                            outsideInteractionsOpposite->at(i).relativeOutPosition = getOppositeInterIndex(outsideInteractionsOpposite->at(i).relativeOutPosition);
-                        }
-                        externalInteractionsLeafLevelOpposite.push_front(outsideInteractionsOpposite);
+                      MortonIndex tmp = outsideInteractionsOpposite->at(i).outIndex;
+                      outsideInteractionsOpposite->at(i).outIndex = outsideInteractionsOpposite->at(i).insideIndex;
+                      outsideInteractionsOpposite->at(i).insideIndex = tmp;
+                      int tmp2 = outsideInteractionsOpposite->at(i).insideIdxInBlock;
+                      outsideInteractionsOpposite->at(i).insideIdxInBlock = outsideInteractionsOpposite->at(i).outsideIdxInBlock;
+                      outsideInteractionsOpposite->at(i).outsideIdxInBlock = tmp2;
+                      outsideInteractionsOpposite->at(i).relativeOutPosition = getOppositeInterIndex(outsideInteractionsOpposite->at(i).relativeOutPosition);
+                    }
+                  externalInteractionsLeafLevelOpposite.push_front(outsideInteractionsOpposite);
 
 
-                        // Extract data from second group for the first one
-                        // That is copy A to A'
-                        extractedParticlesBuffer.emplace_back();
-                        ParticleExtractedHandles& interactionBuffer = extractedParticlesBuffer.back();
-                        interactionBuffer.leavesToExtract = externalInteractionsLeafLevelInner[idxGroup][idxInteraction];
+                  // Extract data from second group for the first one
+                  // That is copy A to A'
+                  extractedParticlesBuffer.emplace_back();
+                  ParticleExtractedHandles& interactionBuffer = extractedParticlesBuffer.back();
+                  interactionBuffer.leavesToExtract = externalInteractionsLeafLevelInner[idxGroup][idxInteraction];
 
-                        interactionBuffer.size = tree->getParticleGroup(idxGroup)->getExtractBufferSize(interactionBuffer.leavesToExtract);
-                        // I allocate only if I will use it to extract
-                        if(starpu_mpi_data_get_rank(particleHandles[idxGroup].down) == mpi_rank){
-                            interactionBuffer.data.reset(new unsigned char[interactionBuffer.size]);
-                        }
-                        else{
-                            interactionBuffer.data.reset(nullptr);
-                        }
+                  interactionBuffer.size = tree->getParticleGroup(idxGroup)->getExtractBufferSize(interactionBuffer.leavesToExtract);
+                  // I allocate only if I will use it to extract
+                  if(starpu_mpi_data_get_rank(particleHandles[idxGroup].down) == mpi_rank){
+                      interactionBuffer.data.reset(new unsigned char[interactionBuffer.size]);
+                    }
+                  else{
+                      interactionBuffer.data.reset(nullptr);
+                    }
 
-                        int registeringNode = starpu_mpi_data_get_rank(particleHandles[idxGroup].down);
-                        int where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
-                        starpu_variable_data_register(&interactionBuffer.symb, where,
-                                                      (uintptr_t)interactionBuffer.data.get(), interactionBuffer.size);
-                        starpu_mpi_data_register(interactionBuffer.symb, tag++, registeringNode);
+                  int registeringNode = starpu_mpi_data_get_rank(particleHandles[idxGroup].down);
+                  int where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
+                  starpu_variable_data_register(&interactionBuffer.symb, where,
+                                                (uintptr_t)interactionBuffer.data.get(), interactionBuffer.size);
+                  starpu_mpi_data_register(interactionBuffer.symb, tag++, registeringNode);
 
-                        ParticleExtractedHandles* interactionBufferPtr = &interactionBuffer;
-                        starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                               &p2p_extract,
-                                               STARPU_VALUE, &interactionBufferPtr, sizeof(ParticleExtractedHandles*),
+                  ParticleExtractedHandles* interactionBufferPtr = &interactionBuffer;
+                  starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                         &p2p_extract,
+                                         STARPU_VALUE, &interactionBufferPtr, sizeof(ParticleExtractedHandles*),
                        #ifdef SCALFMM_STARPU_USE_PRIO
-                                               STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(),
+                                         STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(),
                        #endif
-                                               STARPU_R, particleHandles[idxGroup].symb,
-                                               STARPU_RW, interactionBuffer.symb, 0);
-
-                        // Move to a new memory block that is on the same node as A
-                        // B' to B'''
-                        duplicatedParticlesBuffer.emplace_back();
-                        DuplicatedParticlesHandle& duplicateA = duplicatedParticlesBuffer.back();
-                        duplicateA.size = tree->getParticleGroup(idxGroup)->getBufferSizeInByte();
-                        if(starpu_mpi_data_get_rank(particleHandles[interactionid].down) == mpi_rank){
-                            // Reuse block but just to perform the send
-                            duplicateA.data = (unsigned char*) FAlignedMemory::AllocateBytes<64>(duplicateA.size);// = const_cast<unsigned char*>(tree->getParticleGroup(idxGroup)->getRawBuffer());
-                        }
-                        else{
-                            duplicateA.data = nullptr;
-                        }
+                                         STARPU_R, particleHandles[idxGroup].symb,
+                                         STARPU_RW, interactionBuffer.symb, 0);
+
+                  // Move to a new memory block that is on the same node as A
+                  // B' to B'''
+                  duplicatedParticlesBuffer.emplace_back();
+                  DuplicatedParticlesHandle& duplicateA = duplicatedParticlesBuffer.back();
+                  duplicateA.size = tree->getParticleGroup(idxGroup)->getBufferSizeInByte();
+                  if(starpu_mpi_data_get_rank(particleHandles[interactionid].down) == mpi_rank){
+                      // Reuse block but just to perform the send
+                      duplicateA.data = (unsigned char*) FAlignedMemory::AllocateBytes<64>(duplicateA.size);// = const_cast<unsigned char*>(tree->getParticleGroup(idxGroup)->getRawBuffer());
+                    }
+                  else{
+                      duplicateA.data = nullptr;
+                    }
 
-                        registeringNode = starpu_mpi_data_get_rank(particleHandles[interactionid].down);
-                        where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
-                        starpu_variable_data_register(&duplicateA.symb, where,
-                                                      (uintptr_t)duplicateA.data, duplicateA.size);
-                        starpu_mpi_data_register(duplicateA.symb, tag++, registeringNode);
+                  registeringNode = starpu_mpi_data_get_rank(particleHandles[interactionid].down);
+                  where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1;
+                  starpu_variable_data_register(&duplicateA.symb, where,
+                                                (uintptr_t)duplicateA.data, duplicateA.size);
+                  starpu_mpi_data_register(duplicateA.symb, tag++, registeringNode);
 
-                        const unsigned char* dataPtr = const_cast<unsigned char*>(tree->getParticleGroup(idxGroup)->getRawBuffer());
-                        size_t sizeData = duplicateA.size;
+                  const unsigned char* dataPtr = const_cast<unsigned char*>(tree->getParticleGroup(idxGroup)->getRawBuffer());
+                  size_t sizeData = duplicateA.size;
 
-                        starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                               &p2p_insert_bis,
-                                               STARPU_VALUE, &interactionBufferPtr, sizeof(ParticleExtractedHandles*),
-                                               STARPU_VALUE, &dataPtr, sizeof(dataPtr),
-                                               STARPU_VALUE, &sizeData, sizeof(sizeData),
+                  starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                         &p2p_insert_bis,
+                                         STARPU_VALUE, &interactionBufferPtr, sizeof(ParticleExtractedHandles*),
+                                         STARPU_VALUE, &dataPtr, sizeof(dataPtr),
+                                         STARPU_VALUE, &sizeData, sizeof(sizeData),
                        #ifdef SCALFMM_STARPU_USE_PRIO
-                                               STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(),
+                                         STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(),
                        #endif
-                                               STARPU_R, interactionBuffer.symb,
-                                               STARPU_RW, duplicateA.symb, 0);
+                                         STARPU_R, interactionBuffer.symb,
+                                         STARPU_RW, duplicateA.symb, 0);
 
 
-                        starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                               &p2p_cl_inout_mpi,
-                                               STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
-                                               STARPU_VALUE, &outsideInteractionsOpposite, sizeof(outsideInteractionsOpposite),
-                                               STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int),
+                  starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                         &p2p_cl_inout_mpi,
+                                         STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
+                                         STARPU_VALUE, &outsideInteractionsOpposite, sizeof(outsideInteractionsOpposite),
+                                         STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int),
                        #ifdef SCALFMM_STARPU_USE_PRIO
-                                               STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(),
+                                         STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(),
                        #endif
-                                               STARPU_R, particleHandles[interactionid].symb,
-                                               (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[interactionid].down,
-                                               STARPU_R, duplicateA.symb,
+                                         STARPU_R, particleHandles[interactionid].symb,
+                                         (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[interactionid].down,
+                                         STARPU_R, duplicateA.symb,
                        #ifdef STARPU_USE_TASK_NAME
                        #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
-                                               STARPU_NAME, p2pOuterTaskNames.get(),
+                                         STARPU_NAME, p2pOuterTaskNames.get(),
                        #else
-                                               //"P2P_out-nb_i_p_nb_i_p_s"
-                                               STARPU_NAME, taskNames->print("P2P_out", "%d, %lld, %lld, %d, %lld, %lld, %d, %lld, %lld, %lld, %lld, %d\n",
-                                                                             tree->getParticleGroup(interactionid)->getNumberOfLeavesInBlock(),
-                                                                             tree->getParticleGroup(interactionid)->getSizeOfInterval(),
-                                                                             tree->getParticleGroup(interactionid)->getNbParticlesInGroup(),
-                                                                             tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(),
-                                                                             tree->getParticleGroup(idxGroup)->getSizeOfInterval(),
-                                                                             tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(),
-                                                                             outsideInteractions->size(),
-                                                                             tree->getParticleGroup(interactionid)->getStartingIndex(),
-                                                                             tree->getParticleGroup(interactionid)->getEndingIndex(),
-                                                                             tree->getParticleGroup(idxGroup)->getStartingIndex(),
-                                                                             tree->getParticleGroup(idxGroup)->getEndingIndex(),
-                                                                             starpu_mpi_data_get_rank(particleHandles[interactionid].down)),
+                                         //"P2P_out-nb_i_p_nb_i_p_s"
+                                         STARPU_NAME, taskNames->print("P2P_out", "%d, %lld, %lld, %d, %lld, %lld, %d, %lld, %lld, %lld, %lld, %d\n",
+                                                                       tree->getParticleGroup(interactionid)->getNumberOfLeavesInBlock(),
+                                                                       tree->getParticleGroup(interactionid)->getSizeOfInterval(),
+                                                                       tree->getParticleGroup(interactionid)->getNbParticlesInGroup(),
+                                                                       tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(),
+                                                                       tree->getParticleGroup(idxGroup)->getSizeOfInterval(),
+                                                                       tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(),
+                                                                       outsideInteractions->size(),
+                                                                       tree->getParticleGroup(interactionid)->getStartingIndex(),
+                                                                       tree->getParticleGroup(interactionid)->getEndingIndex(),
+                                                                       tree->getParticleGroup(idxGroup)->getStartingIndex(),
+                                                                       tree->getParticleGroup(idxGroup)->getEndingIndex(),
+                                                                       starpu_mpi_data_get_rank(particleHandles[interactionid].down)),
                        #endif
                        #endif
-                                               0);
-                    }
+                                         0);
+                }
 #else
+                {
+                  starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                         &p2p_cl_inout_mpi,
+                                         STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
+                                         STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
+                                         STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int),
+                       #ifdef SCALFMM_STARPU_USE_PRIO
+                                         STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(),
+                       #endif
+                                         STARPU_R, particleHandles[idxGroup].symb,
+                                         (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down,
+                                         STARPU_R, particleHandles[interactionid].symb,
+                       #ifdef STARPU_USE_TASK_NAME
+                       #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
+                                         STARPU_NAME, p2pOuterTaskNames.get(),
+                       #else
+                                         //"P2P_out-nb_i_p_nb_i_p_s"
+                                         STARPU_NAME, taskNames->print("P2P_out", "%d, %lld, %lld, %d, %lld, %lld, %d, %lld, %lld, %lld, %lld, %d\n",
+                                                                       tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(),
+                                                                       tree->getParticleGroup(idxGroup)->getSizeOfInterval(),
+                                                                       tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(),
+                                                                       tree->getParticleGroup(interactionid)->getNumberOfLeavesInBlock(),
+                                                                       tree->getParticleGroup(interactionid)->getSizeOfInterval(),
+                                                                       tree->getParticleGroup(interactionid)->getNbParticlesInGroup(),
+                                                                       outsideInteractions->size(),
+                                                                       tree->getParticleGroup(idxGroup)->getStartingIndex(),
+                                                                       tree->getParticleGroup(idxGroup)->getEndingIndex(),
+                                                                       tree->getParticleGroup(interactionid)->getStartingIndex(),
+                                                                       tree->getParticleGroup(interactionid)->getEndingIndex(),
+                                                                       starpu_mpi_data_get_rank(particleHandles[idxGroup].down)),
+                       #endif
+                       #endif
+                                         0);
+                  std::vector<OutOfBlockInteraction>* outsideInteractionsOpposite = new std::vector<OutOfBlockInteraction>(externalInteractionsLeafLevel[idxGroup][idxInteraction].interactions);
+                  for(unsigned int i = 0; i < outsideInteractionsOpposite->size(); ++i)
                     {
-                        starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                           &p2p_cl_inout_mpi,
-                                           STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
-                                           STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
-                                           STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int),
-                           #ifdef SCALFMM_STARPU_USE_PRIO
-                                           STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(),
-                           #endif
-                                           STARPU_R, particleHandles[idxGroup].symb,
-                                           (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down,
-                                           STARPU_R, particleHandles[interactionid].symb,
-                           #ifdef STARPU_USE_TASK_NAME
-                           #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
-                                           STARPU_NAME, p2pOuterTaskNames.get(),
-                           #else
-                                           //"P2P_out-nb_i_p_nb_i_p_s"
-                                           STARPU_NAME, taskNames->print("P2P_out", "%d, %lld, %lld, %d, %lld, %lld, %d, %lld, %lld, %lld, %lld, %d\n",
-                                                        tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(),
-                                                        tree->getParticleGroup(idxGroup)->getSizeOfInterval(),
-                                                        tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(),
-                                                        tree->getParticleGroup(interactionid)->getNumberOfLeavesInBlock(),
-                                                        tree->getParticleGroup(interactionid)->getSizeOfInterval(),
-                                                        tree->getParticleGroup(interactionid)->getNbParticlesInGroup(),
-                                                        outsideInteractions->size(),
-                                                        tree->getParticleGroup(idxGroup)->getStartingIndex(),
-                                                        tree->getParticleGroup(idxGroup)->getEndingIndex(),
-                                                        tree->getParticleGroup(interactionid)->getStartingIndex(),
-                                                        tree->getParticleGroup(interactionid)->getEndingIndex(),
-                                                        starpu_mpi_data_get_rank(particleHandles[idxGroup].down)),
-                           #endif
-                           #endif
-                                           0);
-                        std::vector<OutOfBlockInteraction>* outsideInteractionsOpposite = new std::vector<OutOfBlockInteraction>(externalInteractionsLeafLevel[idxGroup][idxInteraction].interactions);
-                        for(unsigned int i = 0; i < outsideInteractionsOpposite->size(); ++i)
-                        {
-                            MortonIndex tmp = outsideInteractionsOpposite->at(i).outIndex;
-                            outsideInteractionsOpposite->at(i).outIndex = outsideInteractionsOpposite->at(i).insideIndex;
-                            outsideInteractionsOpposite->at(i).insideIndex = tmp;
-                            int tmp2 = outsideInteractionsOpposite->at(i).insideIdxInBlock;
-                            outsideInteractionsOpposite->at(i).insideIdxInBlock = outsideInteractionsOpposite->at(i).outsideIdxInBlock;
-                            outsideInteractionsOpposite->at(i).outsideIdxInBlock = tmp2;
-                            outsideInteractionsOpposite->at(i).relativeOutPosition = getOppositeInterIndex(outsideInteractionsOpposite->at(i).relativeOutPosition);
-                        }
-                        externalInteractionsLeafLevelOpposite.push_front(outsideInteractionsOpposite);
-                        starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                           &p2p_cl_inout_mpi,
-                                           STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
-                                           STARPU_VALUE, &outsideInteractionsOpposite, sizeof(outsideInteractionsOpposite),
-                                           STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int),
-                           #ifdef SCALFMM_STARPU_USE_PRIO
-                                           STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(),
-                           #endif
-                                           STARPU_R, particleHandles[interactionid].symb,
-                                           (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[interactionid].down,
-                                           STARPU_R, particleHandles[idxGroup].symb,
-                           #ifdef STARPU_USE_TASK_NAME
-                           #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
-                                           STARPU_NAME, p2pOuterTaskNames.get(),
-                           #else
-                                           //"P2P_out-nb_i_p_nb_i_p_s"
-                                           STARPU_NAME, taskNames->print("P2P_out", "%d, %lld, %lld, %d, %lld, %lld, %d, %lld, %lld, %lld, %lld, %d\n",
-                                                        tree->getParticleGroup(interactionid)->getNumberOfLeavesInBlock(),
-                                                        tree->getParticleGroup(interactionid)->getSizeOfInterval(),
-                                                        tree->getParticleGroup(interactionid)->getNbParticlesInGroup(),
-                                                        tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(),
-                                                        tree->getParticleGroup(idxGroup)->getSizeOfInterval(),
-                                                        tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(),
-                                                        outsideInteractions->size(),
-                                                        tree->getParticleGroup(interactionid)->getStartingIndex(),
-                                                        tree->getParticleGroup(interactionid)->getEndingIndex(),
-                                                        tree->getParticleGroup(idxGroup)->getStartingIndex(),
-                                                        tree->getParticleGroup(idxGroup)->getEndingIndex(),
-                                                        starpu_mpi_data_get_rank(particleHandles[interactionid].down)),
-                           #endif
-                           #endif
-                                           0);
+                      MortonIndex tmp = outsideInteractionsOpposite->at(i).outIndex;
+                      outsideInteractionsOpposite->at(i).outIndex = outsideInteractionsOpposite->at(i).insideIndex;
+                      outsideInteractionsOpposite->at(i).insideIndex = tmp;
+                      int tmp2 = outsideInteractionsOpposite->at(i).insideIdxInBlock;
+                      outsideInteractionsOpposite->at(i).insideIdxInBlock = outsideInteractionsOpposite->at(i).outsideIdxInBlock;
+                      outsideInteractionsOpposite->at(i).outsideIdxInBlock = tmp2;
+                      outsideInteractionsOpposite->at(i).relativeOutPosition = getOppositeInterIndex(outsideInteractionsOpposite->at(i).relativeOutPosition);
                     }
-#endif
+                  externalInteractionsLeafLevelOpposite.push_front(outsideInteractionsOpposite);
+                  starpu_mpi_insert_task(MPI_COMM_WORLD,
+                                         &p2p_cl_inout_mpi,
+                                         STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
+                                         STARPU_VALUE, &outsideInteractionsOpposite, sizeof(outsideInteractionsOpposite),
+                                         STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int),
+                       #ifdef SCALFMM_STARPU_USE_PRIO
+                                         STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(),
+                       #endif
+                                         STARPU_R, particleHandles[interactionid].symb,
+                                         (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[interactionid].down,
+                                         STARPU_R, particleHandles[idxGroup].symb,
+                       #ifdef STARPU_USE_TASK_NAME
+                       #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
+                                         STARPU_NAME, p2pOuterTaskNames.get(),
+                       #else
+                                         //"P2P_out-nb_i_p_nb_i_p_s"
+                                         STARPU_NAME, taskNames->print("P2P_out", "%d, %lld, %lld, %d, %lld, %lld, %d, %lld, %lld, %lld, %lld, %d\n",
+                                                                       tree->getParticleGroup(interactionid)->getNumberOfLeavesInBlock(),
+                                                                       tree->getParticleGroup(interactionid)->getSizeOfInterval(),
+                                                                       tree->getParticleGroup(interactionid)->getNbParticlesInGroup(),
+                                                                       tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(),
+                                                                       tree->getParticleGroup(idxGroup)->getSizeOfInterval(),
+                                                                       tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(),
+                                                                       outsideInteractions->size(),
+                                                                       tree->getParticleGroup(interactionid)->getStartingIndex(),
+                                                                       tree->getParticleGroup(interactionid)->getEndingIndex(),
+                                                                       tree->getParticleGroup(idxGroup)->getStartingIndex(),
+                                                                       tree->getParticleGroup(idxGroup)->getEndingIndex(),
+                                                                       starpu_mpi_data_get_rank(particleHandles[interactionid].down)),
+                       #endif
+                       #endif
+                                         0);
                 }
-            }
-        }
-        FLOG( timerOutBlock.tac() );
-        FLOG( timerInBlock.tic() );
-        for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
-            starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                   &p2p_cl_in,
-                                   STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
-                                   STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int),
+#endif
+              }
+          }
+      }
+    FLOG( timerOutBlock.tac() );
+    FLOG( timerInBlock.tic() );
+    for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
+        starpu_mpi_insert_task(MPI_COMM_WORLD,
+                               &p2p_cl_in,
+                               STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
+                               STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int),
                        #ifdef SCALFMM_STARPU_USE_PRIO
-                                   STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2P(),
+                               STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2P(),
                        #endif
-                                   STARPU_R, particleHandles[idxGroup].symb,
+                               STARPU_R, particleHandles[idxGroup].symb,
                        #ifdef STARPU_USE_REDUX
-                                   STARPU_REDUX, particleHandles[idxGroup].down,
+                               STARPU_REDUX, particleHandles[idxGroup].down,
                        #else
-                                   (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down,
+                               (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down,
                        #endif
                        #ifdef STARPU_USE_TASK_NAME
                        #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
-                                   STARPU_NAME, p2pTaskNames.get(),
+                               STARPU_NAME, p2pTaskNames.get(),
                        #else
-                                   //"P2P-nb_i_p"
-                                   STARPU_NAME, taskNames->print("P2P", "%d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n",
-                                                                 tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(),
-                                                                 tree->getParticleGroup(idxGroup)->getSizeOfInterval(),
-                                                                 tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(),
-                                                                 tree->getParticleGroup(idxGroup)->getStartingIndex(),
-                                                                 tree->getParticleGroup(idxGroup)->getEndingIndex(),
-                                                                 tree->getParticleGroup(idxGroup)->getStartingIndex(),
-                                                                 tree->getParticleGroup(idxGroup)->getEndingIndex(),
-                                                                 starpu_mpi_data_get_rank(particleHandles[idxGroup].down)),
+                               //"P2P-nb_i_p"
+                               STARPU_NAME, taskNames->print("P2P", "%d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n",
+                                                             tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(),
+                                                             tree->getParticleGroup(idxGroup)->getSizeOfInterval(),
+                                                             tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(),
+                                                             tree->getParticleGroup(idxGroup)->getStartingIndex(),
+                                                             tree->getParticleGroup(idxGroup)->getEndingIndex(),
+                                                             tree->getParticleGroup(idxGroup)->getStartingIndex(),
+                                                             tree->getParticleGroup(idxGroup)->getEndingIndex(),
+                                                             starpu_mpi_data_get_rank(particleHandles[idxGroup].down)),
                        #endif
                        #endif
-                                   0);
-        }
-        FLOG( timerInBlock.tac() );
-
-        FLOG( FLog::Controller << "\t\t directPass in " << timer.tacAndElapsed() << "s\n" );
-        FLOG( FLog::Controller << "\t\t\t inblock  in " << timerInBlock.elapsed() << "s\n" );
-        FLOG( FLog::Controller << "\t\t\t outblock in " << timerOutBlock.elapsed() << "s\n" );
-    }
-    /////////////////////////////////////////////////////////////////////////////////////
-    /// Merge Pass
-    /////////////////////////////////////////////////////////////////////////////////////
-
-    void mergePass(){
-        FLOG( FTic timer; );
-
-        FAssertLF(cellHandles[tree->getHeight()-1].size() == particleHandles.size());
-
-        for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
-            starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                   &l2p_cl,
-                                   STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
-                                   STARPU_VALUE, &cellHandles[tree->getHeight()-1][idxGroup].intervalSize, sizeof(int),
-        #ifdef SCALFMM_STARPU_USE_PRIO
-                    STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2P(),
-        #endif
-                    STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].symb,
-                    STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].down,
-                    STARPU_R, particleHandles[idxGroup].symb,
-        #ifdef STARPU_USE_REDUX
-                    STARPU_REDUX, particleHandles[idxGroup].down,
-        #else
-                    (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down,
-        #endif
-        #ifdef STARPU_USE_TASK_NAME
-        #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
-                    STARPU_NAME, l2pTaskNames.get(),
-        #else
-                    //"L2P-nb_i_p"
-                    STARPU_NAME, taskNames->print("L2P", "%d, %lld, %lld, %lld, %lld, %d\n",
-                                                  tree->getCellGroup(tree->getHeight()-1,idxGroup)->getNumberOfCellsInBlock(),
-                                                  tree->getCellGroup(tree->getHeight()-1,idxGroup)->getSizeOfInterval(),
-                                                  tree->getCellGroup(tree->getHeight()-1,idxGroup)->getNumberOfCellsInBlock(),
-                                                  tree->getParticleGroup(idxGroup)->getStartingIndex(),
-                                                  tree->getParticleGroup(idxGroup)->getEndingIndex(),
-                                                  starpu_mpi_data_get_rank(particleHandles[idxGroup].down)),
-        #endif
-        #endif
-                    0);
-        }
-
-        FLOG( FLog::Controller << "\t\t L2P in " << timer.tacAndElapsed() << "s\n" );
-    }
+                               0);
+      }
+    FLOG( timerInBlock.tac() );
+
+    FLOG( FLog::Controller << "\t\t directPass in " << timer.tacAndElapsed() << "s\n" );
+    FLOG( FLog::Controller << "\t\t\t inblock  in " << timerInBlock.elapsed() << "s\n" );
+    FLOG( FLog::Controller << "\t\t\t outblock in " << timerOutBlock.elapsed() << "s\n" );
+  }
+  /////////////////////////////////////////////////////////////////////////////////////
+  /// Merge Pass
+  /////////////////////////////////////////////////////////////////////////////////////
+
+  void mergePass(){
+    FLOG( FTic timer; );
+
+    FAssertLF(cellHandles[tree->getHeight()-1].size() == particleHandles.size());
+
+    for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
+        starpu_mpi_insert_task(MPI_COMM_WORLD,
+                               &l2p_cl,
+                               STARPU_VALUE, &wrapperptr, sizeof(wrapperptr),
+                               STARPU_VALUE, &cellHandles[tree->getHeight()-1][idxGroup].intervalSize, sizeof(int),
+    #ifdef SCALFMM_STARPU_USE_PRIO
+            STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2P(),
+    #endif
+            STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].symb,
+            STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].down,
+            STARPU_R, particleHandles[idxGroup].symb,
+    #ifdef STARPU_USE_REDUX
+            STARPU_REDUX, particleHandles[idxGroup].down,
+    #else
+            (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down,
+    #endif
+    #ifdef STARPU_USE_TASK_NAME
+    #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS
+            STARPU_NAME, l2pTaskNames.get(),
+    #else
+            //"L2P-nb_i_p"
+            STARPU_NAME, taskNames->print("L2P", "%d, %lld, %lld, %lld, %lld, %d\n",
+                                          tree->getCellGroup(tree->getHeight()-1,idxGroup)->getNumberOfCellsInBlock(),
+                                          tree->getCellGroup(tree->getHeight()-1,idxGroup)->getSizeOfInterval(),
+                                          tree->getCellGroup(tree->getHeight()-1,idxGroup)->getNumberOfCellsInBlock(),
+                                          tree->getParticleGroup(idxGroup)->getStartingIndex(),
+                                          tree->getParticleGroup(idxGroup)->getEndingIndex(),
+                                          starpu_mpi_data_get_rank(particleHandles[idxGroup].down)),
+    #endif
+    #endif
+            0);
+      }
+
+    FLOG( FLog::Controller << "\t\t L2P in " << timer.tacAndElapsed() << "s\n" );
+  }
 
 
 #ifdef STARPU_USE_REDUX
-    void readParticle(){
-        FLOG( FTic timer; );
+  void readParticle(){
+    FLOG( FTic timer; );
 
-        FAssertLF(cellHandles[tree->getHeight()-1].size() == particleHandles.size());
+    FAssertLF(cellHandles[tree->getHeight()-1].size() == particleHandles.size());
 
-        for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
-            starpu_mpi_insert_task(MPI_COMM_WORLD,
-                                   &p2p_redux_read,
+    for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
+        starpu_mpi_insert_task(MPI_COMM_WORLD,
+                               &p2p_redux_read,
                        #ifdef SCALFMM_STARPU_USE_PRIO
-                                   STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2P(),
+                               STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2P(),
                        #endif
-                                   STARPU_R, particleHandles[idxGroup].down,
+                               STARPU_R, particleHandles[idxGroup].down,
                        #ifdef STARPU_USE_TASK_NAME
-                                   STARPU_NAME, "read-particle",
+                               STARPU_NAME, "read-particle",
                        #endif
-                                   0);
-        }
-    }
+                               0);
+      }
+  }
 #endif
 };
 
diff --git a/Src/GroupTree/Core/FGroupTools.hpp b/Src/GroupTree/Core/FGroupTools.hpp
new file mode 100644
index 000000000..e11db9eb8
--- /dev/null
+++ b/Src/GroupTree/Core/FGroupTools.hpp
@@ -0,0 +1,266 @@
+#ifndef FGROUPTOOLS_HPP
+#define FGROUPTOOLS_HPP
+
+#include "ScalFmmConfig.h"
+#include "Utils/FGlobal.hpp"
+#include "Utils/FPoint.hpp"
+#ifdef SCALFMM_USE_MPI
+#include "Utils/FMpi.hpp"
+#endif
+
+
+namespace groupTree {
+  // Structure for 1 particle
+  template<typename FReal>
+  struct particle_t {
+    using position_t = FPoint<FReal>;
+    position_t pos;
+    FReal      phi;
+    MortonIndex  morton_index;
+    const auto& position() const {
+      return pos;
+    }
+    const FPoint<FReal>& getPosition(){
+      return pos;
+    }
+    const FPoint<FReal>& getPosition() const{
+      return pos;
+    }
+    const auto& physicalValue() const{
+      return phi;
+    }
+    const auto& getPositions() const {
+      return pos;
+    }
+    void fill(const position_t &inPos, const FReal &inPhyVal, MortonIndex & inMortonIndex){
+      pos = inPos ; phi = inPhyVal ; morton_index = inMortonIndex ;
+    }
+
+    int weight() const { return 1;}
+    MortonIndex getMorton() const{
+      return morton_index;
+
+    }
+    friend constexpr MortonIndex morton_index(const particle_t& p) {
+      return p.morton_index;
+    }
+  };
+
+
+  //
+  // param[in] FMpiComm
+  // param[in] elapsedTime time on each processor
+  // param[out]  minTime  the minimum time on each processor
+  // param[out]  maxTime  the maximal time on each processor
+  // param[out]  meanTime  the mean time on each processor
+  //
+  void timeAverage(const FMpi &FMpiComm, double &elapsedTime , double &minTime,
+                   double &maxTime, double &meanTime)
+  {
+    double * allTimes = nullptr ;
+    int myrank = FMpiComm.global().processId() , nprocs=FMpiComm.global().processCount() ;
+    if(myrank == 0)
+      {
+        allTimes  = new double[nprocs] ;
+      }
+#ifdef SCALFMM_USE_MPI
+    MPI_Gather(&elapsedTime,1, MPI_DOUBLE, allTimes, 1, MPI_DOUBLE,0 /* root*/,FMpiComm.global().getComm()) ;
+#endif
+    if(myrank == 0)
+      {
+        minTime = allTimes[0],  maxTime = allTimes[0], meanTime = allTimes[0] ;
+
+        for (int i = 1 ; i < nprocs ; ++i)  {
+            minTime   = std::min(minTime, allTimes[i]) ;
+            maxTime   = std::max(maxTime, allTimes[i]) ;
+            meanTime += allTimes[i] ;
+          }
+        meanTime /= nprocs;
+
+      }
+    FMpiComm.global().barrier() ;
+  }
+  template <class OCTREECLASS>
+  void saveSolutionInFile(const std::string &fileName, const std::size_t&  NbPoints,
+                           OCTREECLASS &tree) {
+    using REALTYPE =  typename OCTREECLASS::FRealType ;
+    FFmaGenericWriter<REALTYPE> writer(fileName) ;
+    //
+    REALTYPE * particles = new REALTYPE[8*NbPoints] ;
+    memset(particles,0,8*NbPoints*sizeof(REALTYPE));
+    FSize j = 0 ;
+  #ifdef _VERBOSE_LEAF
+    int countLeaf = 0, coutPart=0;
+  #endif
+    tree.forEachLeaf([&](typename OCTREECLASS::LeafClass* leaf){
+      //
+      // Input
+      const REALTYPE*const posX = leaf->getTargets()->getPositions()[0];
+      const REALTYPE*const posY = leaf->getTargets()->getPositions()[1];
+      const REALTYPE*const posZ = leaf->getTargets()->getPositions()[2];
+      const REALTYPE*const physicalValues = leaf->getTargets()->getPhysicalValues();
+    //  const FVector<FSize>& indexes = leaf->getTargets()->getIndexes();
+      //
+      // Computed data
+      const REALTYPE*const potentials = leaf->getTargets()->getPotentials();
+      const REALTYPE*const forcesX = leaf->getTargets()->getForcesX();
+      const REALTYPE*const forcesY = leaf->getTargets()->getForcesY();
+      const REALTYPE*const forcesZ = leaf->getTargets()->getForcesZ();
+      //
+      //
+      const FSize nbParticlesInLeaf = leaf->getTargets()->getNbParticles();
+  #ifdef _VERBOSE_LEAF
+      std::cout << "Leaf " << countLeaf << " Particles : [ " << coutPart << ", " <<coutPart+nbParticlesInLeaf -1 <<  " ] "  << nbParticlesInLeaf << std::endl;
+      coutPart += nbParticlesInLeaf ; ++countLeaf;
+  #endif
+      for(FSize idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart,j+=8){
+       //   j = 8*indexes[idxPart];
+         // j = 8*idxPart;
+          particles[j]    = posX[idxPart] ;
+          particles[j+1]  = posY[idxPart] ;
+          particles[j+2]  = posZ[idxPart] ;
+          particles[j+3]  = physicalValues[idxPart] ;
+          particles[j+4]  = potentials[idxPart] ;
+          particles[j+5]  =  forcesX[idxPart] ;
+          particles[j+6]  =  forcesY[idxPart] ;
+          particles[j+7]  =  forcesZ[idxPart] ;
+        }
+    });
+
+    writer.writeHeader( tree.getBoxCenter(), tree.getBoxWidth() ,  NbPoints, sizeof(REALTYPE), 8) ;
+    writer.writeArrayOfReal(particles,  8 , NbPoints);
+
+    delete[] particles;
+  }
+
+  template< typename FReal, class GROUPTREE_T,class GROUPALGO_T, class OCTTREE_T>
+  void checkCellTree(GROUPTREE_T &groupedTree, GROUPALGO_T & groupalgo, OCTTREE_T &treeCheck, const FReal &epsilon){
+    //
+
+    std::vector<bool>  OK(groupedTree.getHeight(),true) ;
+    groupedTree.forEachCellWithLevel(
+          [&](typename GROUPTREE_T::GroupSymbolCellClass_T* gsymb ,
+                       typename GROUPTREE_T::GroupCellUpClass_T*    gmul ,
+                       typename GROUPTREE_T::GroupCellDownClass_T*  gloc ,
+                       const int level)
+    {
+        if(groupalgo.isDataOwnedBerenger(gsymb->getMortonIndex(), level))
+          {
+            const auto * cell = treeCheck.getCell(gsymb->getMortonIndex(), level);
+            if(cell == nullptr){
+                std::cout << "[Empty] Error cell should exist " << gsymb->getMortonIndex() << "\n";
+                OK[level] = false ;
+              }
+            else {
+                FMath::FAccurater<FReal> diffUp;
+                diffUp.add(cell->getMultipoleData().get(0), gmul->get(0), gmul->getVectorSize());
+                if(diffUp.getRelativeInfNorm() > epsilon || diffUp.getRelativeL2Norm() > epsilon){
+                    std::cout << "[Up] Up is different at index " << gsymb->getMortonIndex() << " level " << level << " is " << diffUp << "\n";
+                    OK[level] = false ;
+
+                  }
+                FMath::FAccurater<FReal> diffDown;
+                diffDown.add(cell->getLocalExpansionData().get(0), gloc->get(0), gloc->getVectorSize());
+                if(diffDown.getRelativeInfNorm() > epsilon || diffDown.getRelativeL2Norm() > epsilon){
+                    std::cout << "[Down] Down is different at index " << gsymb->getMortonIndex() << " level " << level << " is " << diffDown << "\n";
+                    OK[level] = false ;
+
+                  }
+              }
+          }
+      });
+    for (std::size_t l = 0 ; l < OK.size(); ++l){
+        std:: cout << "      Level ( " << l << " ) --> " << (OK[l] ? " Ok" : "Error " ) <<std::endl;
+      }
+    std:: cout << " checkCellTree --> done" <<std::endl;
+
+  }
+  template< typename FReal, class GROUPTREE_T,class GROUPALGO_T, class OCTTREE_T>
+  void checkLeaves(GROUPTREE_T &groupedTree, GROUPALGO_T & groupalgo, OCTTREE_T &treeCheck, const FReal &epsilon){
+    //
+    FMath::FAccurater<FReal> potentialGlobalDiff;
+    const int NbLevels  =  groupedTree.getHeight();
+    bool  OK = true ;
+    groupedTree.template forEachCellMyLeaf<typename GROUPTREE_T::LeafClass_T >(
+          [&](typename GROUPTREE_T::GroupSymbolCellClass_T* gsymb ,
+                       typename GROUPTREE_T::GroupCellUpClass_T*   /* gmul */,
+                       typename GROUPTREE_T::GroupCellDownClass_T* /* gloc */,
+                       typename GROUPTREE_T::LeafClass_T * leafTarget
+                       )
+    {
+
+        if(groupalgo.isDataOwnedBerenger(gsymb->getMortonIndex(), NbLevels-1))
+          {
+            const auto * targets = treeCheck.getLeafSrc(gsymb->getMortonIndex());
+            if(targets == nullptr){
+                std::cout << "[Empty] Error leaf should exist " << gsymb->getMortonIndex() << "\n";
+                OK = false ;
+
+              }
+            else{
+                const FReal*const gposX = leafTarget->getPositions()[0];
+                const FReal*const gposY = leafTarget->getPositions()[1];
+                const FReal*const gposZ = leafTarget->getPositions()[2];
+                const FSize gnbPartsInLeafTarget = leafTarget->getNbParticles();
+                const FReal*const gforceX = leafTarget->getForcesX();
+                const FReal*const gforceY = leafTarget->getForcesY();
+                const FReal*const gforceZ = leafTarget->getForcesZ();
+                const FReal*const gpotential = leafTarget->getPotentials();
+
+                const FReal*const posX = targets->getPositions()[0];
+                const FReal*const posY = targets->getPositions()[1];
+                const FReal*const posZ = targets->getPositions()[2];
+                const FSize nbPartsInLeafTarget = targets->getNbParticles();
+                const FReal*const forceX = targets->getForcesX();
+                const FReal*const forceY = targets->getForcesY();
+                const FReal*const forceZ = targets->getForcesZ();
+                const FReal*const potential = targets->getPotentials();
+
+                if(gnbPartsInLeafTarget != nbPartsInLeafTarget){
+                    std::cout << "[Empty] Not the same number of particles at " << gsymb->getMortonIndex()
+                              << " gnbPartsInLeafTarget " << gnbPartsInLeafTarget << " nbPartsInLeafTarget " << nbPartsInLeafTarget << "\n";
+                    OK = false ;
+                  }else{
+                    FMath::FAccurater<FReal> potentialDiff;
+                    FMath::FAccurater<FReal> fx, fy, fz;
+                    for(FSize idxPart = 0 ; idxPart < nbPartsInLeafTarget ; ++idxPart){
+                        if(gposX[idxPart] != posX[idxPart] || gposY[idxPart] != posY[idxPart] || gposZ[idxPart] != posZ[idxPart]){
+                            std::cout << "[Empty] Not the same particlea at " << gsymb->getMortonIndex() << " idx " << idxPart << " "
+                                      << gposX[idxPart] << " " << posX[idxPart] << " " << gposY[idxPart] << " " << posY[idxPart]
+                                         << " " << gposZ[idxPart] << " " << posZ[idxPart] << "\n";
+                            OK = false ;
+                          }
+                        else{
+                            potentialGlobalDiff.add(potential[idxPart], gpotential[idxPart]);
+                            potentialDiff.add(potential[idxPart], gpotential[idxPart]);
+                            fx.add(forceX[idxPart], gforceX[idxPart]);
+                            fy.add(forceY[idxPart], gforceY[idxPart]);
+                            fz.add(forceZ[idxPart], gforceZ[idxPart]);
+                          }
+                      }
+                    if(potentialDiff.getRelativeInfNorm() > epsilon || potentialDiff.getRelativeL2Norm() > epsilon){
+                        std::cout << " potentialDiff is different at index " << gsymb->getMortonIndex() << " is " << potentialDiff << "\n";
+                        OK = false ;
+                      }
+                    if(fx.getRelativeInfNorm() > epsilon || fx.getRelativeL2Norm() > epsilon){
+                        std::cout << " fx is different at index " << gsymb->getMortonIndex() << " is " << fx << "\n";
+                        OK = false ;
+                      }
+                    if(fy.getRelativeInfNorm() > epsilon || fy.getRelativeL2Norm() > epsilon){
+                        std::cout << " fy is different at index " << gsymb->getMortonIndex() << " is " << fy << "\n";
+                        OK = false ;
+                      }
+                    if(fz.getRelativeInfNorm() > epsilon || fz.getRelativeL2Norm() > epsilon){
+                        OK = false ;
+                        std::cout << " fz is different at index " << gsymb->getMortonIndex() << " is " << fz << "\n";
+                      }
+                  }
+              }
+          }
+      });
+    std::cout << " potentialDiff  is " << potentialGlobalDiff << "\n";
+    std:: cout << " checkLeaves --> " << (OK ? " Ok" : "Error " ) <<std::endl;
+
+  }
+}
+#endif // FGROUPTOOLS_HPP
diff --git a/Src/GroupTree/Core/FGroupTree.hpp b/Src/GroupTree/Core/FGroupTree.hpp
index 14240fcf6..8ae94bee8 100644
--- a/Src/GroupTree/Core/FGroupTree.hpp
+++ b/Src/GroupTree/Core/FGroupTree.hpp
@@ -1,5 +1,8 @@
+// ==== CMAKE =====
+//
+// ================
 
-// Keep in private GIT
+//
 #ifndef FGROUPTREE_HPP
 #define FGROUPTREE_HPP
 #include <vector>
@@ -14,161 +17,170 @@
 #include "FGroupOfParticles.hpp"
 #include "FGroupAttachedLeaf.hpp"
 #include "../../Kernels/P2P/FP2PParticleContainer.hpp"
-
-
+#ifdef SCALFMM_USE_MPI
+#include "FDistributedGroupTreeBuilder.hpp"
+#endif
 
 template <class FReal, class SymbolCellClass, class PoleCellClass, class LocalCellClass,
           class GroupAttachedLeafClass, unsigned NbSymbAttributes, unsigned NbAttributesPerParticle, class AttributeClass = FReal>
 class FGroupTree {
 public:
-    typedef GroupAttachedLeafClass BasicAttachedClass;
-    typedef FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle,AttributeClass> ParticleGroupClass;
-    typedef FGroupOfCells<SymbolCellClass, PoleCellClass, LocalCellClass> CellGroupClass;
+  typedef GroupAttachedLeafClass     BasicAttachedClass;   // Leaf
+  typedef FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle,AttributeClass>     ParticleGroupClass;
+  typedef FGroupOfCells<SymbolCellClass, PoleCellClass, LocalCellClass> CellGroupClass;
+  typedef SymbolCellClass  GroupSymbolCellClass_T ;
+  typedef LocalCellClass   GroupCellDownClass_T ;
+  typedef PoleCellClass    GroupCellUpClass_T ;
+  typedef GroupAttachedLeafClass     LeafClass_T;   // Leaf
 
 protected:
-    //< height of the tree (1 => only the root)
-    const int _treeHeight;
-    //< max number of cells in a block
-    const int _nbElementsPerBlock;
-    //< all the blocks of the tree
-    std::vector<CellGroupClass*>* _cellBlocksPerLevel;
-    //< all the blocks of leaves
-    std::vector<ParticleGroupClass*> _particleBlocks;
-
-    //< the space system center
-    const FPoint<FReal> boxCenter;
-    //< the space system corner (used to compute morton index)
-    const FPoint<FReal> boxCorner;
-    //< the space system width
-    const FReal boxWidth;
-    //< the width of a box at width level
-    const FReal boxWidthAtLeafLevel;
+  //< height of the tree (1 => only the root)
+  const int _treeHeight;
+  //< max number of cells in a block
+  const int _nbElementsPerBlock;
+  //< all the blocks of the tree
+  std::vector<CellGroupClass*>* _cellBlocksPerLevel;
+  //< all the blocks of leaves
+  std::vector<ParticleGroupClass*> _particleBlocks;
+
+  //< the space system center
+  const FPoint<FReal> boxCenter;
+  //< the space system corner (used to compute morton index)
+  const FPoint<FReal> boxCorner;
+  //< the space system width
+  const FReal boxWidth;
+  //< the width of a box at width level
+  const FReal boxWidthAtLeafLevel;
 
 public:
-    typedef typename std::vector<CellGroupClass*>::iterator CellGroupIterator;
-    typedef typename std::vector<CellGroupClass*>::const_iterator CellGroupConstIterator;
-    typedef typename std::vector<ParticleGroupClass*>::iterator ParticleGroupIterator;
-    typedef typename std::vector<ParticleGroupClass*>::const_iterator ParticleGroupConstIterator;
+  typedef typename std::vector<CellGroupClass*>::iterator           CellGroupIterator;
+  typedef typename std::vector<CellGroupClass*>::const_iterator     CellGroupConstIterator;
+  typedef typename std::vector<ParticleGroupClass*>::iterator       ParticleGroupIterator;
+  typedef typename std::vector<ParticleGroupClass*>::const_iterator ParticleGroupConstIterator;
 
-    /** This constructor create a blocked octree from a usual octree
+  /** This constructor create a blocked octree from a usual octree
      * The cell are allocated as in the usual octree (no copy constructor are called!)
      * Once allocated each cell receive its morton index and tree coordinate.
      * No blocks are allocated at level 0.
      */
-    template<class OctreeClass>
-    FGroupTree(const int in_treeHeight, const int in_nbElementsPerBlock, OctreeClass*const inOctreeSrc)
-        : _treeHeight(in_treeHeight), _nbElementsPerBlock(in_nbElementsPerBlock), _cellBlocksPerLevel(nullptr),
-          boxCenter(inOctreeSrc->getBoxCenter()), boxCorner(inOctreeSrc->getBoxCenter(),-(inOctreeSrc->getBoxWidth()/2)),
-          boxWidth(inOctreeSrc->getBoxWidth()), boxWidthAtLeafLevel(inOctreeSrc->getBoxWidth()/FReal(1<<(in_treeHeight-1))){
-        _cellBlocksPerLevel = new std::vector<CellGroupClass*>[_treeHeight];
-
-        // Iterate on the tree and build
-        typename OctreeClass::Iterator octreeIterator(inOctreeSrc);
-        octreeIterator.gotoBottomLeft();
-
-        { // First leaf level, we create leaves and cells groups
-            const int idxLevel = _treeHeight-1;
-            typename OctreeClass::Iterator avoidGotoLeft = octreeIterator;
-            // For each cell at this level
-            do {
-                typename OctreeClass::Iterator blockIteratorInOctree = octreeIterator;
-                // Move the iterator per _nbElementsPerBlock (or until it cannot move right)
-                int sizeOfBlock = 1;
-                FSize nbParticlesInGroup = octreeIterator.getCurrentLeaf()->getSrc()->getNbParticles();
-                while(sizeOfBlock < _nbElementsPerBlock && octreeIterator.moveRight()){
-                    sizeOfBlock += 1;
-                    nbParticlesInGroup += octreeIterator.getCurrentLeaf()->getSrc()->getNbParticles();
-                }
-
-                // Create a block with the apropriate parameters
-                CellGroupClass*const newBlock = new CellGroupClass(blockIteratorInOctree.getCurrentGlobalIndex(),
-                                                                 octreeIterator.getCurrentGlobalIndex()+1,
-                                                                 sizeOfBlock);
-                FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>*const newParticleBlock = new FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>(blockIteratorInOctree.getCurrentGlobalIndex(),
-                                octreeIterator.getCurrentGlobalIndex()+1,
-                                 sizeOfBlock, nbParticlesInGroup);
-
-                // Initialize each cell of the block
-                int cellIdInBlock = 0;
-                size_t nbParticlesOffsetBeforeLeaf = 0;
-                while(cellIdInBlock != sizeOfBlock){
-                    const MortonIndex newNodeIndex = blockIteratorInOctree.getCurrentCell()->getMortonIndex();
-                    const FTreeCoordinate newNodeCoordinate = blockIteratorInOctree.getCurrentCell()->getCoordinate();
-                    // Add cell
-                    newBlock->newCell(newNodeIndex, cellIdInBlock);
-
-                    SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock);
-                    symbolic.setMortonIndex(newNodeIndex);
-                    symbolic.setCoordinate(newNodeCoordinate);
-                    symbolic.setLevel(idxLevel);
-
-                    // Add leaf
-                    nbParticlesOffsetBeforeLeaf = newParticleBlock->newLeaf(newNodeIndex, cellIdInBlock,
-                                              blockIteratorInOctree.getCurrentLeaf()->getSrc()->getNbParticles(),
-                                              nbParticlesOffsetBeforeLeaf);
-
-                    BasicAttachedClass attachedLeaf = newParticleBlock->template getLeaf<BasicAttachedClass>(cellIdInBlock);
-                    attachedLeaf.copyFromContainer(blockIteratorInOctree.getCurrentLeaf()->getSrc(), 0);
-
-                    cellIdInBlock += 1;
-                    blockIteratorInOctree.moveRight();
-                }
-
-                // Keep the block
-                _cellBlocksPerLevel[idxLevel].push_back(newBlock);
-                _particleBlocks.push_back(newParticleBlock);
-
-                // If we can move right then add another block
-            } while(octreeIterator.moveRight());
-
-            avoidGotoLeft.moveUp();
-            octreeIterator = avoidGotoLeft;
-        }
-
-        // For each level from heigth - 2 to 1
-        for(int idxLevel = _treeHeight-2; idxLevel > 0 ; --idxLevel){
-            typename OctreeClass::Iterator avoidGotoLeft = octreeIterator;
-            // For each cell at this level
-            do {
-                typename OctreeClass::Iterator blockIteratorInOctree = octreeIterator;
-                // Move the iterator per _nbElementsPerBlock (or until it cannot move right)
-                int sizeOfBlock = 1;
-                while(sizeOfBlock < _nbElementsPerBlock && octreeIterator.moveRight()){
-                    sizeOfBlock += 1;
-                }
-
-                // Create a block with the apropriate parameters
-                CellGroupClass*const newBlock = new CellGroupClass(blockIteratorInOctree.getCurrentGlobalIndex(),
-                                                                 octreeIterator.getCurrentGlobalIndex()+1,
-                                                                 sizeOfBlock);
-                // Initialize each cell of the block
-                int cellIdInBlock = 0;
-                while(cellIdInBlock != sizeOfBlock){
-                    const MortonIndex newNodeIndex = blockIteratorInOctree.getCurrentCell()->getMortonIndex();
-                    const FTreeCoordinate newNodeCoordinate = blockIteratorInOctree.getCurrentCell()->getCoordinate();
-                    newBlock->newCell(newNodeIndex, cellIdInBlock);
-
-                    SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock);
-                    symbolic.setMortonIndex(newNodeIndex);
-                    symbolic.setCoordinate(newNodeCoordinate);
-                    symbolic.setLevel(idxLevel);
+  template<class OctreeClass>
+  FGroupTree()
+  {}
+  template<class OctreeClass>
+  FGroupTree(const int in_treeHeight, const int in_nbElementsPerBlock, OctreeClass*const inOctreeSrc)
+    : _treeHeight(in_treeHeight), _nbElementsPerBlock(in_nbElementsPerBlock), _cellBlocksPerLevel(nullptr),
+      boxCenter(inOctreeSrc->getBoxCenter()), boxCorner(inOctreeSrc->getBoxCenter(),-(inOctreeSrc->getBoxWidth()/2)),
+      boxWidth(inOctreeSrc->getBoxWidth()), boxWidthAtLeafLevel(inOctreeSrc->getBoxWidth()/FReal(1<<(in_treeHeight-1))){
+
+    _cellBlocksPerLevel = new std::vector<CellGroupClass*>[_treeHeight];
+
+    // Iterate on the tree and build
+    typename OctreeClass::Iterator octreeIterator(inOctreeSrc);
+    octreeIterator.gotoBottomLeft();
+
+    { // First leaf level, we create leaves and cells groups
+      const int idxLevel = _treeHeight-1;
+      typename OctreeClass::Iterator avoidGotoLeft = octreeIterator;
+      // For each cell at this level
+      do {
+          typename OctreeClass::Iterator blockIteratorInOctree = octreeIterator;
+          // Move the iterator per _nbElementsPerBlock (or until it cannot move right)
+          int sizeOfBlock = 1;
+          FSize nbParticlesInGroup = octreeIterator.getCurrentLeaf()->getSrc()->getNbParticles();
+          while(sizeOfBlock < _nbElementsPerBlock && octreeIterator.moveRight()){
+              sizeOfBlock += 1;
+              nbParticlesInGroup += octreeIterator.getCurrentLeaf()->getSrc()->getNbParticles();
+            }
 
-                    cellIdInBlock += 1;
-                    blockIteratorInOctree.moveRight();
-                }
+          // Create a block with the apropriate parameters
+          CellGroupClass*const newBlock = new CellGroupClass(blockIteratorInOctree.getCurrentGlobalIndex(),
+                                                             octreeIterator.getCurrentGlobalIndex()+1,
+                                                             sizeOfBlock);
+          FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>*const newParticleBlock = new FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>(blockIteratorInOctree.getCurrentGlobalIndex(),
+                                                                                                                                                                                                               octreeIterator.getCurrentGlobalIndex()+1,
+                                                                                                                                                                                                               sizeOfBlock, nbParticlesInGroup);
+
+          // Initialize each cell of the block
+          int cellIdInBlock = 0;
+          size_t nbParticlesOffsetBeforeLeaf = 0;
+          while(cellIdInBlock != sizeOfBlock){
+              const MortonIndex newNodeIndex = blockIteratorInOctree.getCurrentCell()->getMortonIndex();
+              const FTreeCoordinate newNodeCoordinate = blockIteratorInOctree.getCurrentCell()->getCoordinate();
+              // Add cell
+              newBlock->newCell(newNodeIndex, cellIdInBlock);
+
+              SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock);
+              symbolic.setMortonIndex(newNodeIndex);
+              symbolic.setCoordinate(newNodeCoordinate);
+              symbolic.setLevel(idxLevel);
+
+              // Add leaf
+              nbParticlesOffsetBeforeLeaf = newParticleBlock->newLeaf(newNodeIndex, cellIdInBlock,
+                                                                      blockIteratorInOctree.getCurrentLeaf()->getSrc()->getNbParticles(),
+                                                                      nbParticlesOffsetBeforeLeaf);
+
+              BasicAttachedClass attachedLeaf = newParticleBlock->template getLeaf<BasicAttachedClass>(cellIdInBlock);
+              attachedLeaf.copyFromContainer(blockIteratorInOctree.getCurrentLeaf()->getSrc(), 0);
+
+              cellIdInBlock += 1;
+              blockIteratorInOctree.moveRight();
+            }
 
-                // Keep the block
-                _cellBlocksPerLevel[idxLevel].push_back(newBlock);
+          // Keep the block
+          _cellBlocksPerLevel[idxLevel].push_back(newBlock);
+          _particleBlocks.push_back(newParticleBlock);
 
-                // If we can move right then add another block
-            } while(octreeIterator.moveRight());
+          // If we can move right then add another block
+        } while(octreeIterator.moveRight());
 
-            avoidGotoLeft.moveUp();
-            octreeIterator = avoidGotoLeft;
-        }
+      avoidGotoLeft.moveUp();
+      octreeIterator = avoidGotoLeft;
     }
 
-    /**
+    // For each level from heigth - 2 to 1
+    for(int idxLevel = _treeHeight-2; idxLevel > 0 ; --idxLevel){
+        typename OctreeClass::Iterator avoidGotoLeft = octreeIterator;
+        // For each cell at this level
+        do {
+            typename OctreeClass::Iterator blockIteratorInOctree = octreeIterator;
+            // Move the iterator per _nbElementsPerBlock (or until it cannot move right)
+            int sizeOfBlock = 1;
+            while(sizeOfBlock < _nbElementsPerBlock && octreeIterator.moveRight()){
+                sizeOfBlock += 1;
+              }
+
+            // Create a block with the apropriate parameters
+            CellGroupClass*const newBlock = new CellGroupClass(blockIteratorInOctree.getCurrentGlobalIndex(),
+                                                               octreeIterator.getCurrentGlobalIndex()+1,
+                                                               sizeOfBlock);
+            // Initialize each cell of the block
+            int cellIdInBlock = 0;
+            while(cellIdInBlock != sizeOfBlock){
+                const MortonIndex newNodeIndex = blockIteratorInOctree.getCurrentCell()->getMortonIndex();
+                const FTreeCoordinate newNodeCoordinate = blockIteratorInOctree.getCurrentCell()->getCoordinate();
+                newBlock->newCell(newNodeIndex, cellIdInBlock);
+
+                SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock);
+                symbolic.setMortonIndex(newNodeIndex);
+                symbolic.setCoordinate(newNodeCoordinate);
+                symbolic.setLevel(idxLevel);
+
+                cellIdInBlock += 1;
+                blockIteratorInOctree.moveRight();
+              }
+
+            // Keep the block
+            _cellBlocksPerLevel[idxLevel].push_back(newBlock);
+
+            // If we can move right then add another block
+          } while(octreeIterator.moveRight());
+
+        avoidGotoLeft.moveUp();
+        octreeIterator = avoidGotoLeft;
+      }
+  }
+
+  /**
      * This constructor create a group tree from a particle container index.
      * The morton index are computed and the particles are sorted in a first stage.
      * Then the leaf level is done.
@@ -176,87 +188,170 @@ public:
      * It should be easy to make it parallel using for and tasks.
      * If no limite give inLeftLimite = -1
      */
-    template<class ParticleContainer>
-    FGroupTree(const int in_treeHeight, const FReal inBoxWidth, const FPoint<FReal>& inBoxCenter,
-               const int in_nbElementsPerBlock, ParticleContainer* inParticlesContainer,
-               const bool particlesAreSorted = false, MortonIndex inLeftLimite = -1):
-            _treeHeight(in_treeHeight),_nbElementsPerBlock(in_nbElementsPerBlock),_cellBlocksPerLevel(nullptr),
-            boxCenter(inBoxCenter), boxCorner(inBoxCenter,-(inBoxWidth/2)), boxWidth(inBoxWidth),
-            boxWidthAtLeafLevel(inBoxWidth/FReal(1<<(in_treeHeight-1)))
+  template<class ParticleContainer>
+  FGroupTree(const int in_treeHeight, const FReal inBoxWidth, const FPoint<FReal>& inBoxCenter,
+             const int in_nbElementsPerBlock, ParticleContainer* inParticlesContainer,
+             const bool particlesAreSorted = false, MortonIndex inLeftLimite = -1):
+    _treeHeight(in_treeHeight),_nbElementsPerBlock(in_nbElementsPerBlock),_cellBlocksPerLevel(nullptr),
+    boxCenter(inBoxCenter), boxCorner(inBoxCenter,-(inBoxWidth/2)), boxWidth(inBoxWidth),
+    boxWidthAtLeafLevel(inBoxWidth/FReal(1<<(in_treeHeight-1)))
+  {
+    _cellBlocksPerLevel = new std::vector<CellGroupClass*>[_treeHeight];
+
+    MortonIndex* currentBlockIndexes = new MortonIndex[_nbElementsPerBlock];
+    // First we work at leaf level
     {
+      // Build morton index for particles
+      struct ParticleSortingStruct{
+        FSize originalIndex;
+        MortonIndex mindex;
+      };
+      // Convert position to morton index
+      const FSize nbParticles = inParticlesContainer->getNbParticles();
+      ParticleSortingStruct* particlesToSort = new ParticleSortingStruct[nbParticles];
+      {
+        const FReal* xpos = inParticlesContainer->getPositions()[0];
+        const FReal* ypos = inParticlesContainer->getPositions()[1];
+        const FReal* zpos = inParticlesContainer->getPositions()[2];
+
+        for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){
+            const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPositionAndCorner<FReal>(this->boxCorner, this->boxWidth,
+                                                                                                        _treeHeight,
+                                                                                                        FPoint<FReal>(xpos[idxPart], ypos[idxPart], zpos[idxPart]) );
+            const MortonIndex particleIndex = host.getMortonIndex();
+            particlesToSort[idxPart].mindex = particleIndex;
+            particlesToSort[idxPart].originalIndex = idxPart;
+          }
+      }
+
+      // Sort if needed
+      if(particlesAreSorted == false){
+          FQuickSort<ParticleSortingStruct, FSize>::QsOmp(particlesToSort, nbParticles, [](const ParticleSortingStruct& v1, const ParticleSortingStruct& v2){
+              return v1.mindex <= v2.mindex;
+            });
+        }
 
-        _cellBlocksPerLevel = new std::vector<CellGroupClass*>[_treeHeight];
-
-        MortonIndex* currentBlockIndexes = new MortonIndex[_nbElementsPerBlock];
-        // First we work at leaf level
-        {
-            // Build morton index for particles
-            struct ParticleSortingStruct{
-                FSize originalIndex;
-                MortonIndex mindex;
-            };
-            // Convert position to morton index
-            const FSize nbParticles = inParticlesContainer->getNbParticles();
-            ParticleSortingStruct* particlesToSort = new ParticleSortingStruct[nbParticles];
-            {
-                const FReal* xpos = inParticlesContainer->getPositions()[0];
-                const FReal* ypos = inParticlesContainer->getPositions()[1];
-                const FReal* zpos = inParticlesContainer->getPositions()[2];
-
-                for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){
-                    const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPositionAndCorner<FReal>(this->boxCorner, this->boxWidth,
-                                                                                                       _treeHeight,
-                                                                                                       FPoint<FReal>(xpos[idxPart], ypos[idxPart], zpos[idxPart]) );
-                    const MortonIndex particleIndex = host.getMortonIndex();
-                    particlesToSort[idxPart].mindex = particleIndex;
-                    particlesToSort[idxPart].originalIndex = idxPart;
+      FAssertLF(nbParticles == 0 || inLeftLimite < particlesToSort[0].mindex);
+      // Convert to block
+      const int idxLevel = (_treeHeight - 1);
+      FSize* nbParticlesPerLeaf = new FSize[_nbElementsPerBlock];
+      FSize firstParticle = 0;
+      // We need to proceed each group in sub level
+      while(firstParticle != nbParticles){
+          int sizeOfBlock = 0;
+          FSize lastParticle = firstParticle;
+          // Count until end of sub group is reached or we have enough cells
+          while(sizeOfBlock < _nbElementsPerBlock && lastParticle < nbParticles){
+              if(sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != particlesToSort[lastParticle].mindex){
+                  currentBlockIndexes[sizeOfBlock] = particlesToSort[lastParticle].mindex;
+                  nbParticlesPerLeaf[sizeOfBlock]  = 1;
+                  sizeOfBlock += 1;
+                }
+              else{
+                  nbParticlesPerLeaf[sizeOfBlock-1] += 1;
                 }
+              lastParticle += 1;
             }
-
-            // Sort if needed
-            if(particlesAreSorted == false){
-                FQuickSort<ParticleSortingStruct, FSize>::QsOmp(particlesToSort, nbParticles, [](const ParticleSortingStruct& v1, const ParticleSortingStruct& v2){
-                    return v1.mindex <= v2.mindex;
-                });
+          while(lastParticle < nbParticles && currentBlockIndexes[sizeOfBlock-1] == particlesToSort[lastParticle].mindex){
+              nbParticlesPerLeaf[sizeOfBlock-1] += 1;
+              lastParticle += 1;
             }
 
-            FAssertLF(nbParticles == 0 || inLeftLimite < particlesToSort[0].mindex);
-            // Convert to block
-            const int idxLevel = (_treeHeight - 1);
-            FSize* nbParticlesPerLeaf = new FSize[_nbElementsPerBlock];
-            FSize firstParticle = 0;
-            // We need to proceed each group in sub level
-            while(firstParticle != nbParticles){
-                int sizeOfBlock = 0;
-                FSize lastParticle = firstParticle;
-                // Count until end of sub group is reached or we have enough cells
-                while(sizeOfBlock < _nbElementsPerBlock && lastParticle < nbParticles){
-                    if(sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != particlesToSort[lastParticle].mindex){
-                        currentBlockIndexes[sizeOfBlock] = particlesToSort[lastParticle].mindex;
-                        nbParticlesPerLeaf[sizeOfBlock]  = 1;
-                        sizeOfBlock += 1;
-                    }
-                    else{
-                        nbParticlesPerLeaf[sizeOfBlock-1] += 1;
-                    }
-                    lastParticle += 1;
-                }
-                while(lastParticle < nbParticles && currentBlockIndexes[sizeOfBlock-1] == particlesToSort[lastParticle].mindex){
-                    nbParticlesPerLeaf[sizeOfBlock-1] += 1;
-                    lastParticle += 1;
+          // Create a group
+          CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0],
+              currentBlockIndexes[sizeOfBlock-1]+1,
+              sizeOfBlock);
+          FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>*const newParticleBlock = new FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>(currentBlockIndexes[0],
+              currentBlockIndexes[sizeOfBlock-1]+1,
+              sizeOfBlock, lastParticle-firstParticle);
+
+          // Init cells
+          size_t nbParticlesOffsetBeforeLeaf = 0;
+          FSize offsetParticles = firstParticle;
+          for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){
+              newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock);
+
+              SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock);
+              symbolic.setMortonIndex(currentBlockIndexes[cellIdInBlock]);
+              FTreeCoordinate coord;
+              coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]);
+              symbolic.setCoordinate(coord);
+              symbolic.setLevel(idxLevel);
+
+              // Add leaf
+              nbParticlesOffsetBeforeLeaf = newParticleBlock->newLeaf(currentBlockIndexes[cellIdInBlock], cellIdInBlock,
+                                                                      nbParticlesPerLeaf[cellIdInBlock], nbParticlesOffsetBeforeLeaf);
+
+              BasicAttachedClass attachedLeaf = newParticleBlock->template getLeaf<BasicAttachedClass>(cellIdInBlock);
+              // Copy each particle from the original position
+              for(FSize idxPart = 0 ; idxPart < nbParticlesPerLeaf[cellIdInBlock] ; ++idxPart){
+                  attachedLeaf.setParticle(idxPart, particlesToSort[idxPart + offsetParticles].originalIndex, inParticlesContainer);
                 }
+              offsetParticles += nbParticlesPerLeaf[cellIdInBlock];
+            }
+
+          // Keep the block
+          _cellBlocksPerLevel[idxLevel].push_back(newBlock);
+          _particleBlocks.push_back(newParticleBlock);
+
+          sizeOfBlock = 0;
+          firstParticle = lastParticle;
+        }
+      delete[] nbParticlesPerLeaf;
+      delete[] particlesToSort;
+    }
+
+    // For each level from heigth - 2 to 1
+    for(int idxLevel = _treeHeight-2; idxLevel > 0 ; --idxLevel){
+        inLeftLimite = (inLeftLimite == -1 ? inLeftLimite : (inLeftLimite>>3));
+
+        CellGroupConstIterator iterChildCells = _cellBlocksPerLevel[idxLevel+1].begin();
+        const CellGroupConstIterator iterChildEndCells = _cellBlocksPerLevel[idxLevel+1].end();
+
+        // Skip blocks that do not respect limit
+        while(iterChildCells != iterChildEndCells
+              && ((*iterChildCells)->getEndingIndex()>>3) <= inLeftLimite){
+            ++iterChildCells;
+          }
+        // If lower level is empty or all blocks skiped stop here
+        if(iterChildCells == iterChildEndCells){
+            break;
+          }
+
+        MortonIndex currentCellIndex = (*iterChildCells)->getStartingIndex();
+        if((currentCellIndex>>3) <= inLeftLimite) currentCellIndex = ((inLeftLimite+1)<<3);
+        int sizeOfBlock = 0;
+
+        // We need to proceed each group in sub level
+        while(iterChildCells != iterChildEndCells){
+            // Count until end of sub group is reached or we have enough cells
+            while(sizeOfBlock < _nbElementsPerBlock && iterChildCells != iterChildEndCells ){
+                if((sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != (currentCellIndex>>3))
+                   && (*iterChildCells)->exists(currentCellIndex)){
+                    currentBlockIndexes[sizeOfBlock] = (currentCellIndex>>3);
+                    sizeOfBlock += 1;
+                    currentCellIndex = (((currentCellIndex>>3)+1)<<3);
+                  }
+                else{
+                    currentCellIndex += 1;
+                  }
+                // If we are at the end of the sub group, move to next
+                while(iterChildCells != iterChildEndCells && (*iterChildCells)->getEndingIndex() <= currentCellIndex){
+                    ++iterChildCells;
+                    // Update morton index
+                    if(iterChildCells != iterChildEndCells && currentCellIndex < (*iterChildCells)->getStartingIndex()){
+                        currentCellIndex = (*iterChildCells)->getStartingIndex();
+                      }
+                  }
+              }
 
+            // If group is full
+            if(sizeOfBlock == _nbElementsPerBlock || (sizeOfBlock && iterChildCells == iterChildEndCells)){
                 // Create a group
                 CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0],
-                               currentBlockIndexes[sizeOfBlock-1]+1,
-                               sizeOfBlock);
-                FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>*const newParticleBlock = new FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>(currentBlockIndexes[0],
-                        currentBlockIndexes[sizeOfBlock-1]+1,
-                        sizeOfBlock, lastParticle-firstParticle);
-
+                    currentBlockIndexes[sizeOfBlock-1]+1,
+                    sizeOfBlock);
                 // Init cells
-                size_t nbParticlesOffsetBeforeLeaf = 0;
-                FSize offsetParticles = firstParticle;
                 for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){
                     newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock);
 
@@ -266,80 +361,206 @@ public:
                     coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]);
                     symbolic.setCoordinate(coord);
                     symbolic.setLevel(idxLevel);
-
-                    // Add leaf
-                    nbParticlesOffsetBeforeLeaf = newParticleBlock->newLeaf(currentBlockIndexes[cellIdInBlock], cellIdInBlock,
-                                              nbParticlesPerLeaf[cellIdInBlock], nbParticlesOffsetBeforeLeaf);
-
-                    BasicAttachedClass attachedLeaf = newParticleBlock->template getLeaf<BasicAttachedClass>(cellIdInBlock);
-                    // Copy each particle from the original position
-                    for(FSize idxPart = 0 ; idxPart < nbParticlesPerLeaf[cellIdInBlock] ; ++idxPart){
-                        attachedLeaf.setParticle(idxPart, particlesToSort[idxPart + offsetParticles].originalIndex, inParticlesContainer);
-                    }
-                    offsetParticles += nbParticlesPerLeaf[cellIdInBlock];
-                }
+                  }
 
                 // Keep the block
                 _cellBlocksPerLevel[idxLevel].push_back(newBlock);
-                _particleBlocks.push_back(newParticleBlock);
 
                 sizeOfBlock = 0;
-                firstParticle = lastParticle;
+              }
+          }
+      }
+    delete[] currentBlockIndexes;
+  }
+
+  /**
+     * This constructor create a group tree from a particle container index.
+     * The morton index are computed and the particles are sorted in a first stage.
+     * Then the leaf level is done.
+     * Finally the other leve are proceed one after the other.
+     * It should be easy to make it parallel using for and tasks.
+     * If no limite give inLeftLimite = -1
+     * The cover ration is the minimum pourcentage of cell that should
+     * exist in a group (0 means no limite, 1 means the block must be dense)
+     * oneParent should be turned on if it is better to have one block parent
+     * per sublock (in case of have the cost of FMM that increase with the level
+     * this could be an asset).
+     */
+  template<class ParticleContainer>
+  FGroupTree(const int in_treeHeight, const FReal inBoxWidth, const FPoint<FReal>& inBoxCenter,
+             const int in_nbElementsPerBlock, ParticleContainer* inParticlesContainer,
+             const bool particlesAreSorted, const bool oneParent,
+             const FReal inCoverRatio = 0.0, MortonIndex inLeftLimite = -1):
+    _treeHeight(in_treeHeight),_nbElementsPerBlock(in_nbElementsPerBlock),_cellBlocksPerLevel(nullptr),
+    boxCenter(inBoxCenter), boxCorner(inBoxCenter,-(inBoxWidth/2)), boxWidth(inBoxWidth),
+    boxWidthAtLeafLevel(inBoxWidth/FReal(1<<(in_treeHeight-1)))
+  {
+
+    FAssertLF(inCoverRatio == 0.0 || oneParent == true, "If a ratio is choosen oneParent should be turned on");
+    const bool userCoverRatio = (inCoverRatio != 0.0);
+
+    _cellBlocksPerLevel = new std::vector<CellGroupClass*>[_treeHeight];
+
+    MortonIndex* currentBlockIndexes = new MortonIndex[_nbElementsPerBlock];
+    // First we work at leaf level
+    {
+      // Build morton index for particles
+      struct ParticleSortingStruct{
+        FSize originalIndex;
+        MortonIndex mindex;
+      };
+      // Convert position to morton index
+      const FSize nbParticles = inParticlesContainer->getNbParticles();
+      ParticleSortingStruct* particlesToSort = new ParticleSortingStruct[nbParticles];
+      {
+        const FReal* xpos = inParticlesContainer->getPositions()[0];
+        const FReal* ypos = inParticlesContainer->getPositions()[1];
+        const FReal* zpos = inParticlesContainer->getPositions()[2];
+
+        for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){
+            const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPositionAndCorner<FReal>(this->boxCorner, this->boxWidth,
+                                                                                                        _treeHeight,
+                                                                                                        FPoint<FReal>(xpos[idxPart], ypos[idxPart], zpos[idxPart]) );
+            const MortonIndex particleIndex = host.getMortonIndex();
+            particlesToSort[idxPart].mindex = particleIndex;
+            particlesToSort[idxPart].originalIndex = idxPart;
+          }
+      }
+
+      // Sort if needed
+      if(particlesAreSorted == false){
+          FQuickSort<ParticleSortingStruct, FSize>::QsOmp(particlesToSort, nbParticles, [](const ParticleSortingStruct& v1, const ParticleSortingStruct& v2){
+              return v1.mindex <= v2.mindex;
+            });
+        }
+
+      FAssertLF(nbParticles == 0 || inLeftLimite < particlesToSort[0].mindex);
+
+      // Convert to block
+      const int idxLevel = (_treeHeight - 1);
+      int* nbParticlesPerLeaf = new int[_nbElementsPerBlock];
+      int firstParticle = 0;
+      // We need to proceed each group in sub level
+      while(firstParticle != nbParticles){
+          int sizeOfBlock = 0;
+          int lastParticle = firstParticle;
+          // Count until end of sub group is reached or we have enough cells
+          while(sizeOfBlock < _nbElementsPerBlock && lastParticle < nbParticles
+                && (userCoverRatio == false
+                    || sizeOfBlock == 0
+                    || currentBlockIndexes[sizeOfBlock-1] == particlesToSort[lastParticle].mindex
+                    || (FReal(sizeOfBlock+1)/FReal(particlesToSort[lastParticle].mindex-particlesToSort[firstParticle].mindex)) >= inCoverRatio)){
+              if(sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != particlesToSort[lastParticle].mindex){
+                  currentBlockIndexes[sizeOfBlock] = particlesToSort[lastParticle].mindex;
+                  nbParticlesPerLeaf[sizeOfBlock]  = 1;
+                  sizeOfBlock += 1;
+                }
+              else{
+                  nbParticlesPerLeaf[sizeOfBlock-1] += 1;
+                }
+              lastParticle += 1;
+            }
+          while(lastParticle < nbParticles && currentBlockIndexes[sizeOfBlock-1] == particlesToSort[lastParticle].mindex){
+              nbParticlesPerLeaf[sizeOfBlock-1] += 1;
+              lastParticle += 1;
+            }
+
+          // Create a group
+          CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0],
+              currentBlockIndexes[sizeOfBlock-1]+1,
+              sizeOfBlock);
+          FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>*const newParticleBlock = new FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>(currentBlockIndexes[0],
+              currentBlockIndexes[sizeOfBlock-1]+1,
+              sizeOfBlock, lastParticle-firstParticle);
+
+          // Init cells
+          size_t nbParticlesOffsetBeforeLeaf = 0;
+          int offsetParticles = firstParticle;
+          for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){
+              newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock);
+
+              SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock);
+              symbolic.setMortonIndex(currentBlockIndexes[cellIdInBlock]);
+              FTreeCoordinate coord;
+              coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]);
+              symbolic.setCoordinate(coord);
+              symbolic.setLevel(idxLevel);
+
+              // Add leaf
+              nbParticlesOffsetBeforeLeaf = newParticleBlock->newLeaf(currentBlockIndexes[cellIdInBlock], cellIdInBlock,
+                                                                      nbParticlesPerLeaf[cellIdInBlock], nbParticlesOffsetBeforeLeaf);
+
+              BasicAttachedClass attachedLeaf = newParticleBlock->template getLeaf<BasicAttachedClass>(cellIdInBlock);
+              // Copy each particle from the original position
+              for(FSize idxPart = 0 ; idxPart < nbParticlesPerLeaf[cellIdInBlock] ; ++idxPart){
+                  attachedLeaf.setParticle(idxPart, particlesToSort[idxPart + offsetParticles].originalIndex, inParticlesContainer);
+                }
+              offsetParticles += nbParticlesPerLeaf[cellIdInBlock];
             }
-            delete[] nbParticlesPerLeaf;
-            delete[] particlesToSort;
+
+          // Keep the block
+          _cellBlocksPerLevel[idxLevel].push_back(newBlock);
+          _particleBlocks.push_back(newParticleBlock);
+
+          sizeOfBlock = 0;
+          firstParticle = lastParticle;
         }
+      delete[] nbParticlesPerLeaf;
+      delete[] particlesToSort;
+    }
 
-        // For each level from heigth - 2 to 1
-        for(int idxLevel = _treeHeight-2; idxLevel > 0 ; --idxLevel){
-            inLeftLimite = (inLeftLimite == -1 ? inLeftLimite : (inLeftLimite>>3));
 
-            CellGroupConstIterator iterChildCells = _cellBlocksPerLevel[idxLevel+1].begin();
-            const CellGroupConstIterator iterChildEndCells = _cellBlocksPerLevel[idxLevel+1].end();
+    // For each level from heigth - 2 to 1
+    for(int idxLevel = _treeHeight-2; idxLevel > 0 ; --idxLevel){
+        inLeftLimite = (inLeftLimite == -1 ? inLeftLimite : (inLeftLimite>>3));
 
-            // Skip blocks that do not respect limit
-            while(iterChildCells != iterChildEndCells
-                  && ((*iterChildCells)->getEndingIndex()>>3) <= inLeftLimite){
-                ++iterChildCells;
-            }
-            // If lower level is empty or all blocks skiped stop here
-            if(iterChildCells == iterChildEndCells){
-                break;
-            }
+        CellGroupConstIterator iterChildCells = _cellBlocksPerLevel[idxLevel+1].begin();
+        const CellGroupConstIterator iterChildEndCells = _cellBlocksPerLevel[idxLevel+1].end();
 
-            MortonIndex currentCellIndex = (*iterChildCells)->getStartingIndex();
-            if((currentCellIndex>>3) <= inLeftLimite) currentCellIndex = ((inLeftLimite+1)<<3);
-            int sizeOfBlock = 0;
+        // Skip blocks that do not respect limit
+        while(iterChildCells != iterChildEndCells
+              && ((*iterChildCells)->getEndingIndex()>>3) <= inLeftLimite){
+            ++iterChildCells;
+          }
+        // If lower level is empty or all blocks skiped stop here
+        if(iterChildCells == iterChildEndCells){
+            break;
+          }
+
+        MortonIndex currentCellIndex = (*iterChildCells)->getStartingIndex();
+        if((currentCellIndex>>3) <= inLeftLimite) currentCellIndex = ((inLeftLimite+1)<<3);
+        int sizeOfBlock = 0;
 
+        if(oneParent == false){
             // We need to proceed each group in sub level
             while(iterChildCells != iterChildEndCells){
                 // Count until end of sub group is reached or we have enough cells
                 while(sizeOfBlock < _nbElementsPerBlock && iterChildCells != iterChildEndCells ){
                     if((sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != (currentCellIndex>>3))
-                            && (*iterChildCells)->exists(currentCellIndex)){
+                       && (*iterChildCells)->exists(currentCellIndex)){
                         currentBlockIndexes[sizeOfBlock] = (currentCellIndex>>3);
                         sizeOfBlock += 1;
                         currentCellIndex = (((currentCellIndex>>3)+1)<<3);
-                    }
+                      }
                     else{
                         currentCellIndex += 1;
-                    }
+                      }
                     // If we are at the end of the sub group, move to next
                     while(iterChildCells != iterChildEndCells && (*iterChildCells)->getEndingIndex() <= currentCellIndex){
                         ++iterChildCells;
                         // Update morton index
                         if(iterChildCells != iterChildEndCells && currentCellIndex < (*iterChildCells)->getStartingIndex()){
                             currentCellIndex = (*iterChildCells)->getStartingIndex();
-                        }
-                    }
-                }
+                          }
+                      }
+                  }
 
                 // If group is full
                 if(sizeOfBlock == _nbElementsPerBlock || (sizeOfBlock && iterChildCells == iterChildEndCells)){
                     // Create a group
                     CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0],
-                                    currentBlockIndexes[sizeOfBlock-1]+1,
-                                    sizeOfBlock);
+                        currentBlockIndexes[sizeOfBlock-1]+1,
+                        sizeOfBlock);
                     // Init cells
                     for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){
                         newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock);
@@ -350,371 +571,254 @@ public:
                         coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]);
                         symbolic.setCoordinate(coord);
                         symbolic.setLevel(idxLevel);
-                    }
+                      }
 
                     // Keep the block
                     _cellBlocksPerLevel[idxLevel].push_back(newBlock);
 
                     sizeOfBlock = 0;
-                }
-            }
-        }
-        delete[] currentBlockIndexes;
-    }
-
-    /**
-     * This constructor create a group tree from a particle container index.
-     * The morton index are computed and the particles are sorted in a first stage.
-     * Then the leaf level is done.
-     * Finally the other leve are proceed one after the other.
-     * It should be easy to make it parallel using for and tasks.
-     * If no limite give inLeftLimite = -1
-     * The cover ration is the minimum pourcentage of cell that should
-     * exist in a group (0 means no limite, 1 means the block must be dense)
-     * oneParent should be turned on if it is better to have one block parent
-     * per sublock (in case of have the cost of FMM that increase with the level
-     * this could be an asset).
-     */
-    template<class ParticleContainer>
-    FGroupTree(const int in_treeHeight, const FReal inBoxWidth, const FPoint<FReal>& inBoxCenter,
-               const int in_nbElementsPerBlock, ParticleContainer* inParticlesContainer,
-               const bool particlesAreSorted, const bool oneParent,
-               const FReal inCoverRatio = 0.0, MortonIndex inLeftLimite = -1):
-            _treeHeight(in_treeHeight),_nbElementsPerBlock(in_nbElementsPerBlock),_cellBlocksPerLevel(nullptr),
-            boxCenter(inBoxCenter), boxCorner(inBoxCenter,-(inBoxWidth/2)), boxWidth(inBoxWidth),
-            boxWidthAtLeafLevel(inBoxWidth/FReal(1<<(in_treeHeight-1)))
-    {
-
-        FAssertLF(inCoverRatio == 0.0 || oneParent == true, "If a ratio is choosen oneParent should be turned on");
-        const bool userCoverRatio = (inCoverRatio != 0.0);
-
-        _cellBlocksPerLevel = new std::vector<CellGroupClass*>[_treeHeight];
-
-        MortonIndex* currentBlockIndexes = new MortonIndex[_nbElementsPerBlock];
-        // First we work at leaf level
-        {
-            // Build morton index for particles
-            struct ParticleSortingStruct{
-                FSize originalIndex;
-                MortonIndex mindex;
-            };
-            // Convert position to morton index
-            const FSize nbParticles = inParticlesContainer->getNbParticles();
-            ParticleSortingStruct* particlesToSort = new ParticleSortingStruct[nbParticles];
-            {
-                const FReal* xpos = inParticlesContainer->getPositions()[0];
-                const FReal* ypos = inParticlesContainer->getPositions()[1];
-                const FReal* zpos = inParticlesContainer->getPositions()[2];
-
-                for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){
-                    const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPositionAndCorner<FReal>(this->boxCorner, this->boxWidth,
-                                                                                                       _treeHeight,
-                                                                                                       FPoint<FReal>(xpos[idxPart], ypos[idxPart], zpos[idxPart]) );
-                    const MortonIndex particleIndex = host.getMortonIndex();
-                    particlesToSort[idxPart].mindex = particleIndex;
-                    particlesToSort[idxPart].originalIndex = idxPart;
-                }
-            }
-
-            // Sort if needed
-            if(particlesAreSorted == false){
-                FQuickSort<ParticleSortingStruct, FSize>::QsOmp(particlesToSort, nbParticles, [](const ParticleSortingStruct& v1, const ParticleSortingStruct& v2){
-                    return v1.mindex <= v2.mindex;
-                });
-            }
-
-            FAssertLF(nbParticles == 0 || inLeftLimite < particlesToSort[0].mindex);
-
-            // Convert to block
-            const int idxLevel = (_treeHeight - 1);
-            int* nbParticlesPerLeaf = new int[_nbElementsPerBlock];
-            int firstParticle = 0;
+                  }
+              }
+          }
+        else{
             // We need to proceed each group in sub level
-            while(firstParticle != nbParticles){
-                int sizeOfBlock = 0;
-                int lastParticle = firstParticle;
-                // Count until end of sub group is reached or we have enough cells
-                while(sizeOfBlock < _nbElementsPerBlock && lastParticle < nbParticles
-                      && (userCoverRatio == false
-                          || sizeOfBlock == 0
-                          || currentBlockIndexes[sizeOfBlock-1] == particlesToSort[lastParticle].mindex
-                          || (FReal(sizeOfBlock+1)/FReal(particlesToSort[lastParticle].mindex-particlesToSort[firstParticle].mindex)) >= inCoverRatio)){
-                    if(sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != particlesToSort[lastParticle].mindex){
-                        currentBlockIndexes[sizeOfBlock] = particlesToSort[lastParticle].mindex;
-                        nbParticlesPerLeaf[sizeOfBlock]  = 1;
-                        sizeOfBlock += 1;
-                    }
-                    else{
-                        nbParticlesPerLeaf[sizeOfBlock-1] += 1;
-                    }
-                    lastParticle += 1;
-                }
-                while(lastParticle < nbParticles && currentBlockIndexes[sizeOfBlock-1] == particlesToSort[lastParticle].mindex){
-                    nbParticlesPerLeaf[sizeOfBlock-1] += 1;
-                    lastParticle += 1;
-                }
+            while(iterChildCells != iterChildEndCells){
+                // We want one parent group per child group so we will stop the parent group
+                // when we arrive to the same parent as lastChildIndex (which is lastChildIndex>>3)
+                const MortonIndex lastChildIndex = ((*iterChildCells)->getEndingIndex()-1);
+                // Count until end of sub group is reached or we passe the requested parent
+                while( iterChildCells != iterChildEndCells
+                       && (currentCellIndex>>3) <= (lastChildIndex>>3) ){
+                    // Proceed until the requested parent
+                    while(currentCellIndex != (*iterChildCells)->getEndingIndex()
+                          && (currentCellIndex>>3) <= (lastChildIndex>>3) ){
+                        if((*iterChildCells)->exists(currentCellIndex)){
+                            currentBlockIndexes[sizeOfBlock] = (currentCellIndex>>3);
+                            sizeOfBlock += 1;
+                            currentCellIndex = (((currentCellIndex>>3)+1)<<3);
+                          }
+                        else{
+                            currentCellIndex += 1;
+                          }
+                      }
+                    // If we are at the end of the sub group, move to next (otherwise we have consume a part of it)
+                    while(iterChildCells != iterChildEndCells && (*iterChildCells)->getEndingIndex() <= currentCellIndex){
+                        ++iterChildCells;
+                        // Update morton index
+                        if(iterChildCells != iterChildEndCells && currentCellIndex < (*iterChildCells)->getStartingIndex()){
+                            currentCellIndex = (*iterChildCells)->getStartingIndex();
+                          }
+                      }
+                  }
 
-                // Create a group
-                CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0],
-                                                                 currentBlockIndexes[sizeOfBlock-1]+1,
-                                                                 sizeOfBlock);
-                FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>*const newParticleBlock = new FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>(currentBlockIndexes[0],
+                // If group is full
+                if(sizeOfBlock){
+                    // Create a group
+                    CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0],
                         currentBlockIndexes[sizeOfBlock-1]+1,
-                        sizeOfBlock, lastParticle-firstParticle);
-
-                // Init cells
-                size_t nbParticlesOffsetBeforeLeaf = 0;
-                int offsetParticles = firstParticle;
-                for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){
-                    newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock);
-
-                    SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock);
-                    symbolic.setMortonIndex(currentBlockIndexes[cellIdInBlock]);
-                    FTreeCoordinate coord;
-                    coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]);
-                    symbolic.setCoordinate(coord);
-                    symbolic.setLevel(idxLevel);
-
-                    // Add leaf
-                    nbParticlesOffsetBeforeLeaf = newParticleBlock->newLeaf(currentBlockIndexes[cellIdInBlock], cellIdInBlock,
-                                              nbParticlesPerLeaf[cellIdInBlock], nbParticlesOffsetBeforeLeaf);
+                        sizeOfBlock);
+                    // Init cells
+                    for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){
+                        newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock);
 
-                    BasicAttachedClass attachedLeaf = newParticleBlock->template getLeaf<BasicAttachedClass>(cellIdInBlock);
-                    // Copy each particle from the original position
-                    for(FSize idxPart = 0 ; idxPart < nbParticlesPerLeaf[cellIdInBlock] ; ++idxPart){
-                        attachedLeaf.setParticle(idxPart, particlesToSort[idxPart + offsetParticles].originalIndex, inParticlesContainer);
-                    }
-                    offsetParticles += nbParticlesPerLeaf[cellIdInBlock];
-                }
+                        SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock);
+                        symbolic.setMortonIndex(currentBlockIndexes[cellIdInBlock]);
+                        FTreeCoordinate coord;
+                        coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]);
+                        symbolic.setCoordinate(coord);
+                        symbolic.setLevel(idxLevel);
+                      }
 
-                // Keep the block
-                _cellBlocksPerLevel[idxLevel].push_back(newBlock);
-                _particleBlocks.push_back(newParticleBlock);
+                    // Keep the block
+                    _cellBlocksPerLevel[idxLevel].push_back(newBlock);
 
-                sizeOfBlock = 0;
-                firstParticle = lastParticle;
-            }
-            delete[] nbParticlesPerLeaf;
-            delete[] particlesToSort;
+                    sizeOfBlock = 0;
+                  }
+              }
+          }
+      }
+    delete[] currentBlockIndexes;
+  }
+    /**
+     * Sequential Constructor of GroupTree
+     * used to construct a duplicated Ggroup tree on all processes
+     * @param[in] in_treeHeight size of the tree
+     * @param[in] in_boxWidth   bow witdh
+     * @param[in] in_boxCenter  box center
+     * @param[in] in__nbElementsPerBlock block size
+     * @param[in] inParticlesContainer  an array of particles
+     * @param[out] blockSizeAtEachLevel  box width at leaf level
+     * @param[in] particlesAreSorted  True if the particle are sorted
+     */
+  template<class ParticleContainer>
+  FGroupTree(const int in_treeHeight, const FReal inBoxWidth, const FPoint<FReal>& inBoxCenter,
+             const int in_nbElementsPerBlock, ParticleContainer* inParticlesContainer,
+             std::vector<std::vector<int>> & blockSizeAtEachLevel,
+             const bool particlesAreSorted = false):
+    _treeHeight(in_treeHeight),_nbElementsPerBlock(in_nbElementsPerBlock),_cellBlocksPerLevel(nullptr),
+    boxCenter(inBoxCenter), boxCorner(inBoxCenter,-(inBoxWidth/2)), boxWidth(inBoxWidth),
+    boxWidthAtLeafLevel(inBoxWidth/FReal(1<<(in_treeHeight-1)))
+  {
+    _cellBlocksPerLevel = new std::vector<CellGroupClass*>[_treeHeight];
+
+    MortonIndex* currentBlockIndexes = new MortonIndex[_nbElementsPerBlock];
+    // First we work at leaf level
+    {
+      // Build morton index for particles
+      struct ParticleSortingStruct{
+        FSize originalIndex;
+        MortonIndex mindex;
+      };
+      // Convert position to morton index
+      const FSize nbParticles = inParticlesContainer->getNbParticles();
+      ParticleSortingStruct* particlesToSort = new ParticleSortingStruct[nbParticles];
+      {
+        const FReal* xpos = inParticlesContainer->getPositions()[0];
+        const FReal* ypos = inParticlesContainer->getPositions()[1];
+        const FReal* zpos = inParticlesContainer->getPositions()[2];
+
+        for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){
+            const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPositionAndCorner<FReal>(this->boxCorner, this->boxWidth,
+                                                                                                        _treeHeight,
+                                                                                                        FPoint<FReal>(xpos[idxPart], ypos[idxPart], zpos[idxPart]) );
+            const MortonIndex particleIndex = host.getMortonIndex();
+            particlesToSort[idxPart].mindex = particleIndex;
+            particlesToSort[idxPart].originalIndex = idxPart;
+          }
+      }
+
+      // Sort if needed
+      if(particlesAreSorted == false){
+          FQuickSort<ParticleSortingStruct, FSize>::QsOmp(particlesToSort, nbParticles, [](const ParticleSortingStruct& v1, const ParticleSortingStruct& v2){
+              return v1.mindex <= v2.mindex;
+            });
         }
 
-
-        // For each level from heigth - 2 to 1
-        for(int idxLevel = _treeHeight-2; idxLevel > 0 ; --idxLevel){
-            inLeftLimite = (inLeftLimite == -1 ? inLeftLimite : (inLeftLimite>>3));
-
-            CellGroupConstIterator iterChildCells = _cellBlocksPerLevel[idxLevel+1].begin();
-            const CellGroupConstIterator iterChildEndCells = _cellBlocksPerLevel[idxLevel+1].end();
-
-            // Skip blocks that do not respect limit
-            while(iterChildCells != iterChildEndCells
-                  && ((*iterChildCells)->getEndingIndex()>>3) <= inLeftLimite){
-                ++iterChildCells;
-            }
-            // If lower level is empty or all blocks skiped stop here
-            if(iterChildCells == iterChildEndCells){
-                break;
-            }
-
-            MortonIndex currentCellIndex = (*iterChildCells)->getStartingIndex();
-            if((currentCellIndex>>3) <= inLeftLimite) currentCellIndex = ((inLeftLimite+1)<<3);
-            int sizeOfBlock = 0;
-
-            if(oneParent == false){
-                // We need to proceed each group in sub level
-                while(iterChildCells != iterChildEndCells){
-                    // Count until end of sub group is reached or we have enough cells
-                    while(sizeOfBlock < _nbElementsPerBlock && iterChildCells != iterChildEndCells ){
-                        if((sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != (currentCellIndex>>3))
-                                && (*iterChildCells)->exists(currentCellIndex)){
-                            currentBlockIndexes[sizeOfBlock] = (currentCellIndex>>3);
-                            sizeOfBlock += 1;
-                            currentCellIndex = (((currentCellIndex>>3)+1)<<3);
-                        }
-                        else{
-                            currentCellIndex += 1;
-                        }
-                        // If we are at the end of the sub group, move to next
-                        while(iterChildCells != iterChildEndCells && (*iterChildCells)->getEndingIndex() <= currentCellIndex){
-                            ++iterChildCells;
-                            // Update morton index
-                            if(iterChildCells != iterChildEndCells && currentCellIndex < (*iterChildCells)->getStartingIndex()){
-                                currentCellIndex = (*iterChildCells)->getStartingIndex();
-                            }
-                        }
-                    }
-
-                    // If group is full
-                    if(sizeOfBlock == _nbElementsPerBlock || (sizeOfBlock && iterChildCells == iterChildEndCells)){
-                        // Create a group
-                        CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0],
-                                                                         currentBlockIndexes[sizeOfBlock-1]+1,
-                                                                         sizeOfBlock);
-                        // Init cells
-                        for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){
-                            newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock);
-
-                            SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock);
-                            symbolic.setMortonIndex(currentBlockIndexes[cellIdInBlock]);
-                            FTreeCoordinate coord;
-                            coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]);
-                            symbolic.setCoordinate(coord);
-                            symbolic.setLevel(idxLevel);
-                        }
-
-                        // Keep the block
-                        _cellBlocksPerLevel[idxLevel].push_back(newBlock);
-
-                        sizeOfBlock = 0;
-                    }
+      // Convert to block
+      const int idxLevel = (_treeHeight - 1);
+      int idxBlock = 0;
+      FSize* nbParticlesPerLeaf = new FSize[_nbElementsPerBlock];
+      FSize firstParticle = 0;
+      // We need to proceed each group in sub level
+      while(firstParticle != nbParticles){
+          int sizeOfBlock = 0;
+          FSize lastParticle = firstParticle;
+          // Count until end of sub group is reached or we have enough cells
+          while(sizeOfBlock < blockSizeAtEachLevel[_treeHeight-1][idxBlock] && lastParticle < nbParticles){
+              if(sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != particlesToSort[lastParticle].mindex){
+                  currentBlockIndexes[sizeOfBlock] = particlesToSort[lastParticle].mindex;
+                  nbParticlesPerLeaf[sizeOfBlock]  = 1;
+                  sizeOfBlock += 1;
                 }
-            }
-            else{
-                // We need to proceed each group in sub level
-                while(iterChildCells != iterChildEndCells){
-                    // We want one parent group per child group so we will stop the parent group
-                    // when we arrive to the same parent as lastChildIndex (which is lastChildIndex>>3)
-                    const MortonIndex lastChildIndex = ((*iterChildCells)->getEndingIndex()-1);
-                    // Count until end of sub group is reached or we passe the requested parent
-                    while( iterChildCells != iterChildEndCells
-                           && (currentCellIndex>>3) <= (lastChildIndex>>3) ){
-                        // Proceed until the requested parent
-                        while(currentCellIndex != (*iterChildCells)->getEndingIndex()
-                              && (currentCellIndex>>3) <= (lastChildIndex>>3) ){
-                            if((*iterChildCells)->exists(currentCellIndex)){
-                                currentBlockIndexes[sizeOfBlock] = (currentCellIndex>>3);
-                                sizeOfBlock += 1;
-                                currentCellIndex = (((currentCellIndex>>3)+1)<<3);
-                            }
-                            else{
-                                currentCellIndex += 1;
-                            }
-                        }
-                        // If we are at the end of the sub group, move to next (otherwise we have consume a part of it)
-                        while(iterChildCells != iterChildEndCells && (*iterChildCells)->getEndingIndex() <= currentCellIndex){
-                            ++iterChildCells;
-                            // Update morton index
-                            if(iterChildCells != iterChildEndCells && currentCellIndex < (*iterChildCells)->getStartingIndex()){
-                                currentCellIndex = (*iterChildCells)->getStartingIndex();
-                            }
-                        }
-                    }
-
-                    // If group is full
-                    if(sizeOfBlock){
-                        // Create a group
-                        CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0],
-                                                                         currentBlockIndexes[sizeOfBlock-1]+1,
-                                                                         sizeOfBlock);
-                        // Init cells
-                        for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){
-                            newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock);
-
-                            SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock);
-                            symbolic.setMortonIndex(currentBlockIndexes[cellIdInBlock]);
-                            FTreeCoordinate coord;
-                            coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]);
-                            symbolic.setCoordinate(coord);
-                            symbolic.setLevel(idxLevel);
-                        }
-
-                        // Keep the block
-                        _cellBlocksPerLevel[idxLevel].push_back(newBlock);
-
-                        sizeOfBlock = 0;
-                    }
+              else{
+                  nbParticlesPerLeaf[sizeOfBlock-1] += 1;
                 }
+              lastParticle += 1;
             }
-        }
-        delete[] currentBlockIndexes;
+          while(lastParticle < nbParticles && currentBlockIndexes[sizeOfBlock-1] == particlesToSort[lastParticle].mindex){
+              nbParticlesPerLeaf[sizeOfBlock-1] += 1;
+              lastParticle += 1;
+            }
+
+          // Create a group
+          CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0],
+              currentBlockIndexes[sizeOfBlock-1]+1,
+              sizeOfBlock);
+          FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>*const newParticleBlock = new FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>(currentBlockIndexes[0],
+              currentBlockIndexes[sizeOfBlock-1]+1,
+              sizeOfBlock, lastParticle-firstParticle);
+
+          /////////////////////////  TO REMOVE ?? //////////////
+          #include <iostream>
+	  using namespace std;
+	  if(currentBlockIndexes[sizeOfBlock-1]+1 == 511)
+	    cout << "Suricate" << endl;
+	  /////////////////////////////////////////////////////
+
+	  // Init cells
+	  size_t nbParticlesOffsetBeforeLeaf = 0;
+	  FSize offsetParticles = firstParticle;
+	  for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){
+	      newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock);
+
+	      SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock);
+	      symbolic.setMortonIndex(currentBlockIndexes[cellIdInBlock]);
+	      FTreeCoordinate coord;
+	      coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]);
+	      symbolic.setCoordinate(coord);
+	      symbolic.setLevel(idxLevel);
+
+	      // Add leaf
+	      nbParticlesOffsetBeforeLeaf = newParticleBlock->newLeaf(currentBlockIndexes[cellIdInBlock], cellIdInBlock,
+								      nbParticlesPerLeaf[cellIdInBlock], nbParticlesOffsetBeforeLeaf);
+
+	      BasicAttachedClass attachedLeaf = newParticleBlock->template getLeaf<BasicAttachedClass>(cellIdInBlock);
+	      // Copy each particle from the original position
+	      for(FSize idxPart = 0 ; idxPart < nbParticlesPerLeaf[cellIdInBlock] ; ++idxPart){
+		  attachedLeaf.setParticle(idxPart, particlesToSort[idxPart + offsetParticles].originalIndex, inParticlesContainer);
+		}
+	      offsetParticles += nbParticlesPerLeaf[cellIdInBlock];
+	    }
+
+	  // Keep the block
+	  _cellBlocksPerLevel[idxLevel].push_back(newBlock);
+	  _particleBlocks.push_back(newParticleBlock);
+
+	  sizeOfBlock = 0;
+	  firstParticle = lastParticle;
+	  ++idxBlock;
+	}
+      delete[] nbParticlesPerLeaf;
+      delete[] particlesToSort;
     }
+//    MPI_Barrier(MPI_COMM_WORLD);
 
-    template<class ParticleContainer>
-    FGroupTree(const int in_treeHeight, const FReal inBoxWidth, const FPoint<FReal>& inBoxCenter,
-               const int in_nbElementsPerBlock, ParticleContainer* inParticlesContainer,
-			   std::vector<std::vector<int>> & blockSizeAtEachLevel,
-               const bool particlesAreSorted = false):
-            _treeHeight(in_treeHeight),_nbElementsPerBlock(in_nbElementsPerBlock),_cellBlocksPerLevel(nullptr),
-            boxCenter(inBoxCenter), boxCorner(inBoxCenter,-(inBoxWidth/2)), boxWidth(inBoxWidth),
-            boxWidthAtLeafLevel(inBoxWidth/FReal(1<<(in_treeHeight-1)))
-    {
+    // For each level from heigth - 2 to 1
+    for(int idxLevel = _treeHeight-2; idxLevel > 0 ; --idxLevel){
+        CellGroupConstIterator iterChildCells = _cellBlocksPerLevel[idxLevel+1].begin();
+        const CellGroupConstIterator iterChildEndCells = _cellBlocksPerLevel[idxLevel+1].end();
 
-        _cellBlocksPerLevel = new std::vector<CellGroupClass*>[_treeHeight];
+        // If lower level is empty or all blocks skiped stop here
+        if(iterChildCells == iterChildEndCells){
+            break;
+          }
 
-        MortonIndex* currentBlockIndexes = new MortonIndex[_nbElementsPerBlock];
-        // First we work at leaf level
-        {
-            // Build morton index for particles
-            struct ParticleSortingStruct{
-                FSize originalIndex;
-                MortonIndex mindex;
-            };
-            // Convert position to morton index
-            const FSize nbParticles = inParticlesContainer->getNbParticles();
-            ParticleSortingStruct* particlesToSort = new ParticleSortingStruct[nbParticles];
-            {
-                const FReal* xpos = inParticlesContainer->getPositions()[0];
-                const FReal* ypos = inParticlesContainer->getPositions()[1];
-                const FReal* zpos = inParticlesContainer->getPositions()[2];
-
-                for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){
-                    const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPositionAndCorner<FReal>(this->boxCorner, this->boxWidth,
-                                                                                                       _treeHeight,
-                                                                                                       FPoint<FReal>(xpos[idxPart], ypos[idxPart], zpos[idxPart]) );
-                    const MortonIndex particleIndex = host.getMortonIndex();
-                    particlesToSort[idxPart].mindex = particleIndex;
-                    particlesToSort[idxPart].originalIndex = idxPart;
-                }
-            }
+        MortonIndex currentCellIndex = (*iterChildCells)->getStartingIndex();
+        int sizeOfBlock = 0;
+        int idxBlock    = 0;
 
-            // Sort if needed
-            if(particlesAreSorted == false){
-                FQuickSort<ParticleSortingStruct, FSize>::QsOmp(particlesToSort, nbParticles, [](const ParticleSortingStruct& v1, const ParticleSortingStruct& v2){
-                    return v1.mindex <= v2.mindex;
-                });
-            }
+        // We need to proceed each group in sub level
+        while(iterChildCells != iterChildEndCells){
 
+            // Count until end of sub group is reached or we have enough cells
+            while(sizeOfBlock < blockSizeAtEachLevel[idxLevel][idxBlock] && iterChildCells != iterChildEndCells ){
 
-            // Convert to block
-            const int idxLevel = (_treeHeight - 1);
-			int idxBlock = 0;
-            FSize* nbParticlesPerLeaf = new FSize[_nbElementsPerBlock];
-            FSize firstParticle = 0;
-            // We need to proceed each group in sub level
-            while(firstParticle != nbParticles){
-                int sizeOfBlock = 0;
-                FSize lastParticle = firstParticle;
-                // Count until end of sub group is reached or we have enough cells
-                while(sizeOfBlock < blockSizeAtEachLevel[_treeHeight-1][idxBlock] && lastParticle < nbParticles){
-                    if(sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != particlesToSort[lastParticle].mindex){
-                        currentBlockIndexes[sizeOfBlock] = particlesToSort[lastParticle].mindex;
-                        nbParticlesPerLeaf[sizeOfBlock]  = 1;
-                        sizeOfBlock += 1;
-                    }
-                    else{
-                        nbParticlesPerLeaf[sizeOfBlock-1] += 1;
-                    }
-                    lastParticle += 1;
-                }
-                while(lastParticle < nbParticles && currentBlockIndexes[sizeOfBlock-1] == particlesToSort[lastParticle].mindex){
-                    nbParticlesPerLeaf[sizeOfBlock-1] += 1;
-                    lastParticle += 1;
-                }
+                if((sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != (currentCellIndex>>3))
+                   && (*iterChildCells)->exists(currentCellIndex)){
+                    currentBlockIndexes[sizeOfBlock] = (currentCellIndex>>3);
+                    sizeOfBlock += 1;
+                    currentCellIndex = (((currentCellIndex>>3)+1)<<3);
+                  }
+                else{
+                    currentCellIndex += 1;
+                  }
+                // If we are at the end of the sub group, move to next
+                while(iterChildCells != iterChildEndCells && (*iterChildCells)->getEndingIndex() <= currentCellIndex){
+                    ++iterChildCells;
+                    // Update morton index
+                    if(iterChildCells != iterChildEndCells && currentCellIndex < (*iterChildCells)->getStartingIndex()){
+                        currentCellIndex = (*iterChildCells)->getStartingIndex();
+                      }
+                  }
+              }
 
+            // If group is full
+            if(sizeOfBlock == blockSizeAtEachLevel[idxLevel][idxBlock] || (sizeOfBlock && iterChildCells == iterChildEndCells)){ //NOTE la seconde partie va sûrement sauter, car la taille est pré-calculée
                 // Create a group
                 CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0],
-                                                                 currentBlockIndexes[sizeOfBlock-1]+1,
-                                                                 sizeOfBlock);
-                FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>*const newParticleBlock = new FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>(currentBlockIndexes[0],
-                        currentBlockIndexes[sizeOfBlock-1]+1,
-                        sizeOfBlock, lastParticle-firstParticle);
-                        #include <iostream>
-				using namespace std;
-				if(currentBlockIndexes[sizeOfBlock-1]+1 == 511)
-					cout << "Suricate" << endl;
+                    currentBlockIndexes[sizeOfBlock-1]+1,
+                    sizeOfBlock);
                 // Init cells
-                size_t nbParticlesOffsetBeforeLeaf = 0;
-                FSize offsetParticles = firstParticle;
                 for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){
                     newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock);
 
@@ -724,100 +828,21 @@ public:
                     coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]);
                     symbolic.setCoordinate(coord);
                     symbolic.setLevel(idxLevel);
-
-                    // Add leaf
-                    nbParticlesOffsetBeforeLeaf = newParticleBlock->newLeaf(currentBlockIndexes[cellIdInBlock], cellIdInBlock,
-                                              nbParticlesPerLeaf[cellIdInBlock], nbParticlesOffsetBeforeLeaf);
-
-                    BasicAttachedClass attachedLeaf = newParticleBlock->template getLeaf<BasicAttachedClass>(cellIdInBlock);
-                    // Copy each particle from the original position
-                    for(FSize idxPart = 0 ; idxPart < nbParticlesPerLeaf[cellIdInBlock] ; ++idxPart){
-                        attachedLeaf.setParticle(idxPart, particlesToSort[idxPart + offsetParticles].originalIndex, inParticlesContainer);
-                    }
-                    offsetParticles += nbParticlesPerLeaf[cellIdInBlock];
-                }
+                  }
 
                 // Keep the block
                 _cellBlocksPerLevel[idxLevel].push_back(newBlock);
-                _particleBlocks.push_back(newParticleBlock);
 
                 sizeOfBlock = 0;
-                firstParticle = lastParticle;
-				++idxBlock;
-            }
-            delete[] nbParticlesPerLeaf;
-            delete[] particlesToSort;
-        }
-
-
-        // For each level from heigth - 2 to 1
-        for(int idxLevel = _treeHeight-2; idxLevel > 0 ; --idxLevel){
-
-            CellGroupConstIterator iterChildCells = _cellBlocksPerLevel[idxLevel+1].begin();
-            const CellGroupConstIterator iterChildEndCells = _cellBlocksPerLevel[idxLevel+1].end();
-
-            // If lower level is empty or all blocks skiped stop here
-            if(iterChildCells == iterChildEndCells){
-                break;
-            }
-
-            MortonIndex currentCellIndex = (*iterChildCells)->getStartingIndex();
-            int sizeOfBlock = 0;
-			int idxBlock = 0;
-            // We need to proceed each group in sub level
-            while(iterChildCells != iterChildEndCells){
-                // Count until end of sub group is reached or we have enough cells
-                while(sizeOfBlock < blockSizeAtEachLevel[idxLevel][idxBlock] && iterChildCells != iterChildEndCells ){
-                    if((sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != (currentCellIndex>>3))
-                            && (*iterChildCells)->exists(currentCellIndex)){
-                        currentBlockIndexes[sizeOfBlock] = (currentCellIndex>>3);
-                        sizeOfBlock += 1;
-                        currentCellIndex = (((currentCellIndex>>3)+1)<<3);
-                    }
-                    else{
-                        currentCellIndex += 1;
-                    }
-                    // If we are at the end of the sub group, move to next
-                    while(iterChildCells != iterChildEndCells && (*iterChildCells)->getEndingIndex() <= currentCellIndex){
-                        ++iterChildCells;
-                        // Update morton index
-                        if(iterChildCells != iterChildEndCells && currentCellIndex < (*iterChildCells)->getStartingIndex()){
-                            currentCellIndex = (*iterChildCells)->getStartingIndex();
-                        }
-                    }
-                }
-
-                // If group is full
-                if(sizeOfBlock == blockSizeAtEachLevel[idxLevel][idxBlock] || (sizeOfBlock && iterChildCells == iterChildEndCells)){ //NOTE la seconde partie va sûrement sauter, car la taille est pré-calculée
-                    // Create a group
-                    CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0],
-                                                                     currentBlockIndexes[sizeOfBlock-1]+1,
-                                                                     sizeOfBlock);
-                    // Init cells
-                    for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){
-                        newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock);
-
-                        SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock);
-                        symbolic.setMortonIndex(currentBlockIndexes[cellIdInBlock]);
-                        FTreeCoordinate coord;
-                        coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]);
-                        symbolic.setCoordinate(coord);
-                        symbolic.setLevel(idxLevel);
-                    }
+                ++idxBlock;
+              }
+          }
+      }
+    delete[] currentBlockIndexes;
+  }
 
-                    // Keep the block
-                    _cellBlocksPerLevel[idxLevel].push_back(newBlock);
 
-                    sizeOfBlock = 0;
-					++idxBlock;
-                }
-            }
-        }
-        delete[] currentBlockIndexes;
-    }
-
-
-    /**
+  /**
      * Minimal Constructor of GroupTree
      * @author benjamin.dufoyer@inria.fr
      * @param in__treeHeight size of the tree
@@ -827,64 +852,65 @@ public:
      * @param in_boxWidth   bow witdh
      * @param in_boxWidthAtLeafLevel  box width at leaf level
      */
-    FGroupTree(
-         int in__treeHeight,
-         int in__nbElementsPerBlock,
-         FPoint<FReal> in_boxCenter,
-         FPoint<FReal> in_boxCorner,
-         FReal in_boxWidth,
-         FReal in_boxWidthAtLeafLevel
-    ):
-        _treeHeight(in__treeHeight),
-        _nbElementsPerBlock(in__nbElementsPerBlock),
-        boxCenter(in_boxCenter),
-        boxCorner(in_boxCorner),
-        boxWidth(in_boxWidth),
-        boxWidthAtLeafLevel(in_boxWidthAtLeafLevel)
-    {
-        this->_cellBlocksPerLevel = new std::vector<CellGroupClass*>[this->_treeHeight];
-    }
-
-
-    /**
+  FGroupTree(
+      int in__treeHeight,
+      int in__nbElementsPerBlock,
+      FPoint<FReal> in_boxCenter,
+      FPoint<FReal> in_boxCorner,
+      FReal in_boxWidth,
+      FReal in_boxWidthAtLeafLevel
+      ):
+    _treeHeight(in__treeHeight),
+    _nbElementsPerBlock(in__nbElementsPerBlock),
+    boxCenter(in_boxCenter),
+    boxCorner(in_boxCorner),
+    boxWidth(in_boxWidth),
+    boxWidthAtLeafLevel(in_boxWidthAtLeafLevel)
+  {
+    this->_cellBlocksPerLevel = new std::vector<CellGroupClass*>[_treeHeight];
+
+  }
+
+
+  /**
      * get_block_tree_instance return a new instance of FGroupTree from
      * a blocked linear tree
      * @author benjamin.dufoyer@inria.fr
      * @param  blocked_linear_tree blocked linear tree
      * @return new FGroupTree
      */
-    template<
-    class GroupCellSymbClass,
-    class GroupCellUpClass,
-    class GroupCellDownClass,
-    class GroupContainerClass
-    >
-    static FGroupTree get_block_tree_instance(
-        int in_tree_height,
-        int in_block_size,
-        FPoint<FReal> in_box_center,
-        FReal in_box_width
-    ){
-        // Compute every information to initialise the group tree
-        FPoint<FReal> box_corner = FPoint<FReal>(in_box_center, -in_box_width/2);
-        FReal box_width_at_leaf_level = in_box_width/FReal( 1<< (in_tree_height-1));
-
-        // Return a new instance of a empty group tree
-        return FGroupTree<FReal,GroupCellSymbClass,GroupCellUpClass, GroupCellDownClass, GroupContainerClass, NbSymbAttributes, NbAttributesPerParticle, FReal>(
-                 in_tree_height
-                ,in_block_size
-                ,in_box_center
-                ,box_corner
-                ,in_box_width
-                ,box_width_at_leaf_level);
-    }
-
-
-    /////////////////////////////////////////////////////////
-    // Function to init group tree
-    /////////////////////////////////////////////////////////
-
-    /**
+  template<
+      class GroupCellSymbClass,
+      class GroupCellUpClass,
+      class GroupCellDownClass,
+      class GroupContainerClass
+      >
+  static FGroupTree get_block_tree_instance(
+      int in_tree_height,
+      int in_block_size,
+      FPoint<FReal> in_box_center,
+      FReal in_box_width
+      ){
+    // Compute every information to initialise the group tree
+    FPoint<FReal> box_corner = FPoint<FReal>(in_box_center, -in_box_width/2);
+    FReal box_width_at_leaf_level = in_box_width/FReal( 1<< (in_tree_height-1));
+
+    // Return a new instance of a empty group tree
+    return FGroupTree<FReal,GroupCellSymbClass,GroupCellUpClass, GroupCellDownClass, GroupContainerClass, NbSymbAttributes, NbAttributesPerParticle, FReal>(
+          in_tree_height
+          ,in_block_size
+          ,in_box_center
+          ,box_corner
+          ,in_box_width
+          ,box_width_at_leaf_level);
+  }
+
+
+  /////////////////////////////////////////////////////////
+  // Function to init group tree
+  /////////////////////////////////////////////////////////
+
+  /**
      * create_tree this function fill the tree from blocked_linear_tree
      * She build the group tree from the bottom
      * @author benjamin.dufoyer@inria.fr
@@ -892,237 +918,306 @@ public:
      * @param  particles    vector where particle are stock,
      *                      they will be sort BEFORE calling this function
      */
-    template<class Group_Linear_tree,
-              class Particle_Container
-            >
-    void create_tree(Group_Linear_tree* in_lin_tree,
-                     Particle_Container* particles,
-                     MortonIndex inLeftLimite = -1){
-        MortonIndex in_left_limit = in_lin_tree->get_left_limit();
-        // Creation of the leaf level and groups of particle
-        auto current_block_indexes = create_leaf_level(in_lin_tree,particles);
-        // Creation of every level of the tree
-        create_block_nodes_level(
-            current_block_indexes,
-            in_left_limit);
-    }
-
-    /** This function dealloc the tree by deleting each block */
-    ~FGroupTree(){
-        for(int idxLevel = 0 ; idxLevel < _treeHeight ; ++idxLevel){
-            std::vector<CellGroupClass*>& levelBlocks = _cellBlocksPerLevel[idxLevel];
-            for (CellGroupClass* block: levelBlocks){
-                delete block;
-            }
-        }
-        delete[] _cellBlocksPerLevel;
-        for (ParticleGroupClass* block: _particleBlocks){
+  template<class Group_Linear_tree,
+           class Particle_Container
+           >
+  void create_tree(Group_Linear_tree& in_lin_tree,
+                   const Particle_Container& particles){
+    MortonIndex in_left_limit = in_lin_tree.get_left_limit();
+    // Creation of the leaf level and groups of particle
+    std::vector<MortonIndex> current_block_indexes = create_leaf_level(in_lin_tree,particles);
+    // Creation of every level of the tree
+    create_block_nodes_level(
+          current_block_indexes,
+          in_left_limit);
+  }
+
+  /** This function dealloc the tree by deleting each block */
+  ~FGroupTree(){
+    for(int idxLevel = 0 ; idxLevel < _treeHeight ; ++idxLevel){
+        std::vector<CellGroupClass*>& levelBlocks = _cellBlocksPerLevel[idxLevel];
+        for (CellGroupClass* block: levelBlocks){
             delete block;
-        }
-    }
-
-    ////////////////////////////////////////////////////////
-    // Lambda function to apply to all member
-    /////////////////////////////////////////////////////////
-
-    /**
+          }
+      }
+    delete[] _cellBlocksPerLevel;
+    for (ParticleGroupClass* block: _particleBlocks){
+        delete block;
+      }
+  }
+
+  ////////////////////////////////////////////////////////
+  // Lambda function to apply to all member
+  /////////////////////////////////////////////////////////
+
+  /**
    * @brief forEachLeaf iterate on the leaf and apply the function
    * @param function
    */
-    template<class ParticlesAttachedClass>
-    void forEachLeaf(std::function<void(ParticlesAttachedClass*)> function){
-        for (ParticleGroupClass* block: _particleBlocks){
-            block->forEachLeaf(function);
-        }
-    }
-
-    /**
-   * @brief forEachLeaf iterate on the cell and apply the function
+  template<class ParticlesAttachedClass>
+  void forEachLeaf(std::function<void(ParticlesAttachedClass*)> function){
+    for (ParticleGroupClass* block: _particleBlocks){
+        block->forEachLeaf(function);
+      }
+  }
+  /**
+   * @brief forEachMyLeaf iterate on the leaf and apply the function
    * @param function
    */
-    void forEachCell(std::function<void(SymbolCellClass*,PoleCellClass*,LocalCellClass*)> function){
-        for(int idxLevel = 0 ; idxLevel < _treeHeight ; ++idxLevel){
-            std::vector<CellGroupClass*>& levelBlocks = _cellBlocksPerLevel[idxLevel];
-            for (CellGroupClass* block: levelBlocks){
-                block->forEachCell(function);
-            }
-        }
-    }
-
-    /**
+  template<class ParticlesAttachedClass>
+  void forEachMyLeaf(std::function<void(ParticlesAttachedClass*)> function){
+    for (ParticleGroupClass* block: _particleBlocks){
+        if(block->isMine())
+          block->forEachLeaf(function);
+      }
+  }
+
+  /**
+   * @brief forEachCell iterate on the cell and apply the function
+   * @param function
+   */
+  void forEachCell(std::function<void(SymbolCellClass*,PoleCellClass*,LocalCellClass*)> function){
+    for(int idxLevel = 0 ; idxLevel < _treeHeight ; ++idxLevel){
+        std::vector<CellGroupClass*>& levelBlocks = _cellBlocksPerLevel[idxLevel];
+        for (CellGroupClass* block: levelBlocks){
+            block->forEachCell(function);
+          }
+      }
+  }
+
+  void forEachMyCell(std::function<void(SymbolCellClass*,PoleCellClass*,LocalCellClass*)> function){
+    for(int idxLevel = 0 ; idxLevel < _treeHeight ; ++idxLevel){
+        std::vector<CellGroupClass*>& levelBlocks = _cellBlocksPerLevel[idxLevel];
+        for (CellGroupClass* block: levelBlocks){
+            if(block->isMine())
+              block->forEachCell(function);
+          }
+      }
+  }
+
+  /**
    * @brief forEachLeaf iterate on the cell and apply the function
    * @param function
    */
-    void forEachCellWithLevel(std::function<void(SymbolCellClass*,PoleCellClass*,LocalCellClass*,const int)> function){
-        for(int idxLevel = 0 ; idxLevel < _treeHeight ; ++idxLevel){
-            std::vector<CellGroupClass*>& levelBlocks = _cellBlocksPerLevel[idxLevel];
-            for (CellGroupClass* block: levelBlocks){
-                block->forEachCell(function, idxLevel);
-            }
-        }
-    }
-
-    /**
+  void forEachCellWithLevel(std::function<void(SymbolCellClass*,PoleCellClass*,LocalCellClass*,const int)> function){
+    for(int idxLevel = 0 ; idxLevel < _treeHeight ; ++idxLevel){
+        std::vector<CellGroupClass*>& levelBlocks = _cellBlocksPerLevel[idxLevel];
+        for (CellGroupClass* block: levelBlocks){
+            block->forEachCell(function, idxLevel);
+          }
+      }
+  }
+
+  void forEachMyCellWithLevel(std::function<void(SymbolCellClass*,PoleCellClass*,LocalCellClass*,const int)> function){
+    for(int idxLevel = 0 ; idxLevel < _treeHeight ; ++idxLevel){
+        std::vector<CellGroupClass*>& levelBlocks = _cellBlocksPerLevel[idxLevel];
+        for (CellGroupClass* block: levelBlocks){
+            if(block->isMine())
+              block->forEachCell(function, idxLevel);
+          }
+      }
+  }
+
+  /**
    * @brief forEachLeaf iterate on the cell and apply the function
    * @param function
    */
-    template<class ParticlesAttachedClass>
-    void forEachCellLeaf(std::function<void(SymbolCellClass*,PoleCellClass*,LocalCellClass*,ParticlesAttachedClass*)> function){
-        CellGroupIterator iterCells = _cellBlocksPerLevel[_treeHeight-1].begin();
-        const CellGroupIterator iterEndCells = _cellBlocksPerLevel[_treeHeight-1].end();
+  template<class ParticlesAttachedClass>
+  void forEachCellLeaf(std::function<void(SymbolCellClass*,PoleCellClass*,LocalCellClass*,ParticlesAttachedClass*)> function){
+    CellGroupIterator iterCells = _cellBlocksPerLevel[_treeHeight-1].begin();
+    const CellGroupIterator iterEndCells = _cellBlocksPerLevel[_treeHeight-1].end();
 
-        ParticleGroupIterator iterLeaves = _particleBlocks.begin();
-        const ParticleGroupIterator iterEndLeaves = _particleBlocks.end();
+    ParticleGroupIterator iterLeaves = _particleBlocks.begin();
+    const ParticleGroupIterator iterEndLeaves = _particleBlocks.end();
 
-        while(iterCells != iterEndCells && iterLeaves != iterEndLeaves){
+    while(iterCells != iterEndCells && iterLeaves != iterEndLeaves){
 
-            (*iterCells)->forEachCell(
-                [&](SymbolCellClass* symb,
-                    PoleCellClass* mult,
-                    LocalCellClass* loc)
-                {
-                const int leafIdx = (*iterLeaves)->getLeafIndex(symb->getMortonIndex());
-                FAssertLF(leafIdx != -1);
-                ParticlesAttachedClass aLeaf = (*iterLeaves)->template getLeaf <ParticlesAttachedClass>(leafIdx);
-                FAssertLF(aLeaf.isAttachedToSomething());
-                function(symb, mult, loc, &aLeaf);
-            });
+        (*iterCells)->forEachCell(
+              [&](SymbolCellClass* symb,
+              PoleCellClass* mult,
+              LocalCellClass* loc)
+        {
+	  const int leafIdx = (*iterLeaves)->getLeafIndex(symb->getMortonIndex());
+          FAssertLF(leafIdx != -1);
+          ParticlesAttachedClass aLeaf = (*iterLeaves)->template getLeaf <ParticlesAttachedClass>(leafIdx);
+          FAssertLF(aLeaf.isAttachedToSomething());
+          function(symb, mult, loc, &aLeaf);
+        });
 
-            ++iterCells;
-            ++iterLeaves;
-        }
+        ++iterCells;
+        ++iterLeaves;
+      }
 
-        FAssertLF(iterCells == iterEndCells && iterLeaves == iterEndLeaves);
-    }
+    FAssertLF(iterCells == iterEndCells && iterLeaves == iterEndLeaves);
+  }
+
+
+  template<class ParticlesAttachedClass>
+  void forEachCellMyLeaf(std::function<void(SymbolCellClass*,PoleCellClass*,LocalCellClass*,ParticlesAttachedClass*)> function){
+    CellGroupIterator iterCells = _cellBlocksPerLevel[_treeHeight-1].begin();
+
+    const CellGroupIterator iterEndCells = _cellBlocksPerLevel[_treeHeight-1].end();
+
+    ParticleGroupIterator iterLeaves = _particleBlocks.begin();
+    const ParticleGroupIterator iterEndLeaves = _particleBlocks.end();
+
+    while(iterCells != iterEndCells && iterLeaves != iterEndLeaves){
+        if((*iterCells)->isMine()){
+            (*iterCells)->forEachCell(
+                  [&](SymbolCellClass* symb,
+                  PoleCellClass*       mult,
+                  LocalCellClass*      loc)
+            {
+              const int leafIdx = (*iterLeaves)->getLeafIndex(symb->getMortonIndex());
+              FAssertLF(leafIdx != -1);
+              ParticlesAttachedClass aLeaf = (*iterLeaves)->template getLeaf <ParticlesAttachedClass>(leafIdx);
+              FAssertLF(aLeaf.isAttachedToSomething());
+              function(symb, mult, loc, &aLeaf);
+            });
+          }
+        ++iterCells;
+        ++iterLeaves;
+      }
 
+    FAssertLF(iterCells == iterEndCells && iterLeaves == iterEndLeaves);
+  }
 
 
-    /** @brief, for statistic purpose, display each block with number of
+  /** @brief, for statistic purpose, display each block with number of
    * cell, size of header, starting index, and ending index
    */
-    void printInfoBlocks(){
-        std::cout << "Group Tree information:\n";
-        std::cout << "\t Group Size = " << _nbElementsPerBlock << "\n";
-        std::cout << "\t Tree height = " << _treeHeight << "\n";
-        for(int idxLevel = 1 ; idxLevel < _treeHeight ; ++idxLevel){
-            std::vector<CellGroupClass*>& levelBlocks = _cellBlocksPerLevel[idxLevel];
-            std::cout << "Level " << idxLevel << ", there are " << levelBlocks.size() << " groups.\n";
-            int idxGroup = 0;
-            for (const CellGroupClass* block: levelBlocks){
-                std::cout << "\t Group " << (idxGroup++);
-                std::cout << "\t Size = " << block->getNumberOfCellsInBlock();
-                std::cout << "\t Starting Index = " << block->getStartingIndex();
-                std::cout << "\t Ending Index = " << block->getEndingIndex();
-                std::cout << "\t Ratio of usage = " << float(block->getNumberOfCellsInBlock())/float(block->getEndingIndex()-block->getStartingIndex()) << "\n";
-            }
-        }
-
-        std::cout << "There are " << _particleBlocks.size() << " leaf-groups.\n";
+  void printInfoBlocks(){
+    std::cout << "Group Tree information:\n";
+    std::cout << "\t Group Size = " << _nbElementsPerBlock << "\n";
+    std::cout << "\t Tree height = " << _treeHeight << "\n";
+    for(int idxLevel = 1 ; idxLevel < _treeHeight ; ++idxLevel){
+        std::vector<CellGroupClass*>& levelBlocks = _cellBlocksPerLevel[idxLevel];
+        std::cout << "Level " << idxLevel << ", there are " << levelBlocks.size() << " groups.\n";
         int idxGroup = 0;
-        FSize totalNbParticles = 0;
-        for (const ParticleGroupClass* block: _particleBlocks){
+        for (const CellGroupClass* block: levelBlocks){
             std::cout << "\t Group " << (idxGroup++);
-            std::cout << "\t Size = " << block->getNumberOfLeavesInBlock();
+          //  std::cout << "\t local " << std::boolalpha << block->isMine();
+            std::cout << "\t Size = " << block->getNumberOfCellsInBlock();
             std::cout << "\t Starting Index = " << block->getStartingIndex();
             std::cout << "\t Ending Index = " << block->getEndingIndex();
-            std::cout << "\t Nb Particles = " << block->getNbParticlesInGroup();
-            std::cout << "\t Ratio of usage = " << float(block->getNumberOfLeavesInBlock())/float(block->getEndingIndex()-block->getStartingIndex()) << "\n";
-            totalNbParticles += block->getNbParticlesInGroup();
-        }
-        std::cout << "There are " << totalNbParticles << " particles.\n";
-    }
-
-    /////////////////////////////////////////////////////////
-    // Algorithm function
-    /////////////////////////////////////////////////////////
-
-    int getHeight() const {
-        return _treeHeight;
-    }
-
-    CellGroupIterator cellsBegin(const int inLevel){
-        FAssertLF(inLevel < _treeHeight);
-        return _cellBlocksPerLevel[inLevel].begin();
-    }
-
-    CellGroupConstIterator cellsBegin(const int inLevel) const {
-        FAssertLF(inLevel < _treeHeight);
-        return _cellBlocksPerLevel[inLevel].begin();
-    }
-
-    CellGroupIterator cellsEnd(const int inLevel){
-        FAssertLF(inLevel < _treeHeight);
-        return _cellBlocksPerLevel[inLevel].end();
-    }
-
-    CellGroupConstIterator cellsEnd(const int inLevel) const {
-        FAssertLF(inLevel < _treeHeight);
-        return _cellBlocksPerLevel[inLevel].end();
-    }
-
-    int getNbCellGroupAtLevel(const int inLevel) const {
-        FAssertLF(inLevel < _treeHeight);
-        return int(_cellBlocksPerLevel[inLevel].size());
-    }
-
-    CellGroupClass* getCellGroup(const int inLevel, const int inIdx){
-        FAssertLF(inLevel < _treeHeight);
-        if(inIdx >= int(_cellBlocksPerLevel[inLevel].size())){
-            std::cout << "level : "<< inLevel << std::endl;
-            std::cout << " idx :"<< inIdx << std::endl;
-        }
-        FAssertLF(inIdx < int(_cellBlocksPerLevel[inLevel].size()));
-        return _cellBlocksPerLevel[inLevel][inIdx];
-    }
-
-    const int getNbElementsPerBlock() const{
-        return this->_nbElementsPerBlock;
-    }
-
-    const CellGroupClass* getCellGroup(const int inLevel, const int inIdx) const {
-        FAssertLF(inLevel < _treeHeight);
-        FAssertLF(inIdx < int(_cellBlocksPerLevel[inLevel].size()));
-        return _cellBlocksPerLevel[inLevel][inIdx];
-    }
-
-    ParticleGroupIterator leavesBegin(){
-        return _particleBlocks.begin();
-    }
-
-    ParticleGroupConstIterator leavesBegin() const {
-        return _particleBlocks.begin();
-    }
-
-    ParticleGroupIterator leavesEnd(){
-        return _particleBlocks.end();
-    }
-
-    ParticleGroupConstIterator leavesEnd() const {
-        return _particleBlocks.end();
-    }
-
-    int getNbParticleGroup() const {
-        return int(_particleBlocks.size());
-    }
-
-    ParticleGroupClass* getParticleGroup(const int inIdx){
-        FAssertLF(inIdx < int(_particleBlocks.size()));
-        return _particleBlocks[inIdx];
-    }
-
-    const ParticleGroupClass* getParticleGroup(const int inIdx) const {
-        FAssertLF(inIdx < int(_particleBlocks.size()));
-        return _particleBlocks[inIdx];
-    }
-
-    size_t getTotalNbLeaf() {
-        size_t nbLeaf = 0;
-        for(int i = 0 ; i < this->getNbParticleGroup();++i){
-            nbLeaf += this->_particleBlocks[i]->getNumberOfLeavesInBlock();
-        }
-        return nbLeaf;
-    }
-    /**
+         //   std::cout << "\t Global index  = " << block->getIdxGlobal();
+            std::cout << "\t Ratio of usage = " <<
+                         float(block->getNumberOfCellsInBlock())/float(block->getEndingIndex()-block->getStartingIndex()) << "\n";
+          }
+      }
+
+    std::cout << "There are " << _particleBlocks.size() << " leaf-groups.\n";
+    int idxGroup = 0;
+    FSize totalNbParticles = 0;
+    for (const ParticleGroupClass* block: _particleBlocks){
+        std::cout << "\t Group " << (idxGroup++);
+
+        std::cout << "\t Size = " << block->getNumberOfLeavesInBlock();
+        std::cout << "\t Starting Index = " << block->getStartingIndex();
+        std::cout << "\t Ending Index = " << block->getEndingIndex();
+        std::cout << "\t Nb Particles = " << block->getNbParticlesInGroup();
+        std::cout << "\t Global index  = " << block->getIdxGlobal();
+        std::cout << "\t Ratio of usage = " << float(block->getNumberOfLeavesInBlock())/float(block->getEndingIndex()-block->getStartingIndex()) << "\n";
+        totalNbParticles += block->getNbParticlesInGroup();
+      }
+    std::cout << "There are " << totalNbParticles << " particles.\n";
+  }
+
+  /////////////////////////////////////////////////////////
+  // Algorithm function
+  /////////////////////////////////////////////////////////
+
+  int getHeight() const {
+    return _treeHeight;
+  }
+
+  CellGroupIterator cellsBegin(const int inLevel){
+    FAssertLF(inLevel < _treeHeight);
+    return _cellBlocksPerLevel[inLevel].begin();
+  }
+
+  CellGroupConstIterator cellsBegin(const int inLevel) const {
+    FAssertLF(inLevel < _treeHeight);
+    return _cellBlocksPerLevel[inLevel].begin();
+  }
+
+  CellGroupIterator cellsEnd(const int inLevel){
+    FAssertLF(inLevel < _treeHeight);
+    return _cellBlocksPerLevel[inLevel].end();
+  }
+
+  CellGroupConstIterator cellsEnd(const int inLevel) const {
+    FAssertLF(inLevel < _treeHeight);
+    return _cellBlocksPerLevel[inLevel].end();
+  }
+
+  int getNbCellGroupAtLevel(const int inLevel) const {
+    FAssertLF(inLevel < _treeHeight);
+    return int(_cellBlocksPerLevel[inLevel].size());
+  }
+
+  CellGroupClass* getCellGroup(const int inLevel, const int inIdx){
+    FAssertLF(inLevel < _treeHeight);
+    FAssertLF(inIdx < int(_cellBlocksPerLevel[inLevel].size()));
+    return _cellBlocksPerLevel[inLevel][inIdx];
+  }
+
+  const int getNbElementsPerBlock() const{
+    return this->_nbElementsPerBlock;
+  }
+
+  const CellGroupClass* getCellGroup(const int inLevel, const int inIdx) const {
+    FAssertLF(inLevel < _treeHeight);
+    FAssertLF(inIdx < int(_cellBlocksPerLevel[inLevel].size()));
+    return _cellBlocksPerLevel[inLevel][inIdx];
+  }
+
+  ParticleGroupIterator leavesBegin(){
+    return _particleBlocks.begin();
+  }
+
+  ParticleGroupConstIterator leavesBegin() const {
+    return _particleBlocks.begin();
+  }
+
+  ParticleGroupIterator leavesEnd(){
+    return _particleBlocks.end();
+  }
+
+  ParticleGroupConstIterator leavesEnd() const {
+    return _particleBlocks.end();
+  }
+
+  int getNbParticleGroup() const {
+    return int(_particleBlocks.size());
+  }
+
+  ParticleGroupClass* getParticleGroup(const int inIdx){
+    FAssertLF(inIdx < int(_particleBlocks.size()));
+    return _particleBlocks[inIdx];
+  }
+
+  const ParticleGroupClass* getParticleGroup(const int inIdx) const {
+    FAssertLF(inIdx < int(_particleBlocks.size()));
+    return _particleBlocks[inIdx];
+  }
+
+  const FPoint<FReal> getBoxCenter() const{
+    return this->boxCenter;
+  }
+
+  const FReal getBoxWidth() const{
+    return this->boxWidth;
+  }
+
+  std::size_t getTotalNbLeaf() {
+    std::size_t nbLeaf = 0;
+    for(int i = 0 ; i < this->getNbParticleGroup();++i){
+        nbLeaf += this->_particleBlocks[i]->getNumberOfLeavesInBlock();
+      }
+    return nbLeaf;
+  }
+  /**
      * RESTRICTION : The array will be initialise BEFORE
      * RESTRICTION : The morton index of particle will be at _treeHeight
      * get_number_of_particle compute the total number of
@@ -1130,27 +1225,26 @@ public:
      * @author benjamin.dufoyer@inria.fr
      * @param  container    container of particle
      */
-    template<class particle_t>
-    void get_number_of_particle(std::vector<particle_t>* container,
-                                std::vector<size_t>* nb_particles_per_leaf){
-        FAssert(container->size() != 0);
-        int current_idx = 0;
-        size_t old_m_index   = container->front().morton_index;
-        size_t current_m_idx = old_m_index;
-
-        for(size_t i = 0 ; i < container->size(); ++i){
-            current_m_idx = container->data()[i].morton_index;
-            if(current_m_idx == old_m_index){
-                nb_particles_per_leaf->data()[current_idx] += 1;
-            } else {
-                current_idx += 1;
-                nb_particles_per_leaf->data()[current_idx] += 1;
-                old_m_index = current_m_idx;
-            }
-        }
-    }
-
-    /**
+  template<class particle_t>
+  void get_number_of_particle(const std::vector<particle_t>& container,
+                              std::vector<std::size_t>& nb_particles_per_leaf){
+    FAssert(container.size() != 0);
+    int current_idx = 0;
+    std::size_t old_m_index   = container.front().morton_index;
+    std::size_t current_m_idx = old_m_index;
+    for(std::size_t i = 0 ; i < container.size(); ++i){
+        current_m_idx = container[i].morton_index;
+        if(current_m_idx == old_m_index){
+            nb_particles_per_leaf[current_idx] += 1;
+          } else {
+            current_idx += 1;
+            nb_particles_per_leaf[current_idx] += 1;
+            old_m_index = current_m_idx;
+          }
+      }
+  }
+
+  /**
      * create_leaf_level create the leaf level of the
      * Group tree from a blocked linear tree
      * @author benjamin.dufoyer@inria.fr
@@ -1158,153 +1252,153 @@ public:
      * @param  particles    container of particle, will be a std::vector
      */
 
-template<
-class Blocked_Linear_tree,
-class Particle_Container
->
-MortonIndex* create_leaf_level(Blocked_Linear_tree* in_lin_tree,
-                   Particle_Container* particles)
-{
+  template<class Blocked_Linear_tree,
+           class Particle_Container>
+  std::vector<MortonIndex> create_leaf_level(Blocked_Linear_tree& in_lin_tree,
+                                             Particle_Container& particles)
+  {
+    // set parametter for the function
     const int idxLevel = this->_treeHeight-1;
-    const int nb_block = in_lin_tree->get_nb_block();
+    const int nb_block = in_lin_tree.get_nb_block();
     const int block_size = this->_nbElementsPerBlock;
-    size_t in_nb_leaf = in_lin_tree->get_nb_leaf();
-    auto tree = in_lin_tree->get_tree();
-
-    MortonIndex* current_block_indexes = new MortonIndex[this->_nbElementsPerBlock];
-
-    std::vector<size_t> nb_particle_per_leaf(in_nb_leaf,0);
-    this->get_number_of_particle(particles,&nb_particle_per_leaf);
-
+    std::size_t in_nb_leaf = in_lin_tree.get_nb_leaf();
+    auto tree = in_lin_tree.get_tree();
+    // alloc the vector for the current block index
+    // get the number of particle per leaf
+    std::vector<MortonIndex> current_block_indexes(this->_nbElementsPerBlock,0);
+    std::vector<std::size_t> nb_particle_per_leaf(in_nb_leaf,0);
+    this->get_number_of_particle(particles,nb_particle_per_leaf);
+    // put the particle in the FP2PParticleContainer
     FP2PParticleContainer<FReal> particle_container;
-    for(unsigned i = 0 ; i < particles->size() ; ++i){
-        particle_container.push(
-            particles->data()[i].position(),
-            particles->data()[i].physicalValue());
-    }
+    for(unsigned i = 0 ; i < particles.size() ; ++i){
+        particle_container.push(particles[i].position(), particles[i].physicalValue());
+      }
 
-    size_t leaf_number = 0;
-    size_t leaf_number_min = 0;
+    std::size_t leaf_number = 0;
+    std::size_t leaf_number_min = 0;
 
     // Create every block
+    std::size_t idx_particules = 0;
     for(int n_block = 0 ; n_block < nb_block ; ++n_block){
         // Compute the morton index for the first and the
         // last cell of the block
         unsigned size_of_block = 0;
-        while(size_of_block < (unsigned)block_size && leaf_number < in_nb_leaf){
+        while(size_of_block < (unsigned)block_size
+              && leaf_number < in_nb_leaf)
+          {
             current_block_indexes[size_of_block] = tree->data()[leaf_number].morton_index;
             leaf_number += 1;
             size_of_block += 1;
-        }
+          }
 
         CellGroupClass*const new_block = new CellGroupClass(current_block_indexes[0],
-                       current_block_indexes[size_of_block-1]+1, //+1 is need by the class
-                       size_of_block);
-
-       size_t current_nb_particle = 0;
-       for(size_t i = 0 ; i < size_of_block ; ++i){
-           current_nb_particle += nb_particle_per_leaf[leaf_number_min+i];
-       }
-       FGroupOfParticles<
-       FReal,
-       NbSymbAttributes,
-       NbAttributesPerParticle,
-       AttributeClass>*const new_particle_block
-           = new FGroupOfParticles<
-                   FReal,
-                   NbSymbAttributes,
-                   NbAttributesPerParticle,
-                   AttributeClass>
-                   (current_block_indexes[0],
-                   current_block_indexes[size_of_block-1]+1,
-                   size_of_block,
-                   current_nb_particle);
-
-       // Initialise each cell of the block
-       size_t nb_particles_offset_before_leaf = 0;
-       size_t idx_particules = 0;
-       for(unsigned cell_id_in_block = 0;  cell_id_in_block < size_of_block; ++cell_id_in_block){
-           // Adding cell into leaf block
-           new_block->newCell(
-                   current_block_indexes[cell_id_in_block],
-                   cell_id_in_block);
-
-           // Fill symbolic information of the block
-           SymbolCellClass& symbolic =
-               new_block->getSymbolic(cell_id_in_block);
-           symbolic.setMortonIndex(current_block_indexes[cell_id_in_block]);
-           FTreeCoordinate coord;
-           coord.setPositionFromMorton(current_block_indexes[cell_id_in_block]);
-           symbolic.setCoordinate(coord);
-           symbolic.setLevel(idxLevel);
-
-           // Adding cell into particle blockCells
-
-           nb_particles_offset_before_leaf =
-               new_particle_block->newLeaf(
-                   current_block_indexes[cell_id_in_block],
-                   cell_id_in_block,
-                   FSize(nb_particle_per_leaf[leaf_number_min+cell_id_in_block]),
-                       nb_particles_offset_before_leaf
-                   );
-
-
-                BasicAttachedClass attached_leaf =
-                    new_particle_block->template getLeaf<BasicAttachedClass>(cell_id_in_block);
-
-                // Adding particle
-                for(size_t idxPart = 0 ; idxPart <   nb_particle_per_leaf[leaf_number_min+cell_id_in_block] ; ++idxPart ){
-                    attached_leaf.setParticle(
-                    idxPart,
-                    idx_particules,
-                    //nb_particles_offset_before_leaf+idxPart,
-                    &particle_container);
-                    ++idx_particules;
-                }
-                // Setting the offset to don't use particle twice
-               //offset_particles += nb_particle_per_leaf[idx_nb_particle_in_block];
-               //idx_nb_particle_in_block += 1;
-              // cell_id_in_block += 1;
-           }
-           leaf_number_min = leaf_number;
-           //Stock the block cell and the block particles
-           _cellBlocksPerLevel[idxLevel].push_back(new_block);
-           _particleBlocks.push_back(new_particle_block);
-           size_of_block = 0;
-    }
-    return current_block_indexes;
-
-}
-
-    /**
-     * create_level create every level
-     * It's juste a factorisation from the Beregenger constructor
-     * @author benjamin.dufoyer@inria.fr
-     * @param currentBlockIndexes block repartition at leaf level
-     * to construct
-     * @param inLeftLimite left limit of block of the current proc
-     * this parameter is not used with the blocked_linear_tree, he is here
-     * to have compatibility with old constructor
-     */
-void create_block_nodes_level(MortonIndex* currentBlockIndexes,
-                              MortonIndex inLeftLimite = -1
- ){
-     // Cronstruct every level
-     for(int idxLevel = _treeHeight-2; idxLevel > 0 ; --idxLevel){
+            current_block_indexes[size_of_block-1]+1, //+1 is need by the class
+            size_of_block);
+        size_t current_nb_particle = 0;
+        for(size_t i = 0 ; i < size_of_block ; ++i){
+            current_nb_particle += nb_particle_per_leaf[leaf_number_min+i];
+          }
+        FGroupOfParticles<
+            FReal,
+            NbSymbAttributes,
+            NbAttributesPerParticle,
+            AttributeClass>*const new_particle_block
+            = new FGroupOfParticles<
+            FReal,
+            NbSymbAttributes,
+            NbAttributesPerParticle,
+            AttributeClass>
+            (current_block_indexes[0],
+            current_block_indexes[size_of_block-1]+1,
+            size_of_block,
+            current_nb_particle);
+
+        // Initialise each cell of the block
+        size_t nb_particles_offset_before_leaf = 0;
+        for(unsigned cell_id_in_block = 0;  cell_id_in_block < size_of_block; ++cell_id_in_block)
+          {
+            // Adding cell into leaf block
+            new_block->newCell(
+                  current_block_indexes[cell_id_in_block],
+                  cell_id_in_block);
+
+            // Fill symbolic information of the block
+            SymbolCellClass& symbolic =
+                new_block->getSymbolic(cell_id_in_block);
+            symbolic.setMortonIndex(current_block_indexes[cell_id_in_block]);
+            FTreeCoordinate coord;
+            coord.setPositionFromMorton(current_block_indexes[cell_id_in_block]);
+            symbolic.setCoordinate(coord);
+            symbolic.setLevel(idxLevel);
+
+            // Adding cell into particle blockCells
+
+            nb_particles_offset_before_leaf =
+                new_particle_block->newLeaf(
+                  current_block_indexes[cell_id_in_block],
+                  cell_id_in_block,
+                  FSize(nb_particle_per_leaf[leaf_number_min+cell_id_in_block]),
+                nb_particles_offset_before_leaf
+                );
+
+
+            BasicAttachedClass attached_leaf =
+                new_particle_block->template getLeaf<BasicAttachedClass>(cell_id_in_block);
+
+            // Adding particle
+            for(size_t idxPart = 0 ; idxPart <   nb_particle_per_leaf[leaf_number_min+cell_id_in_block] ; ++idxPart ){
+                attached_leaf.setParticle(
+                      idxPart,
+                      idx_particules,
+                      //nb_particles_offset_before_leaf+idxPart,
+                      &particle_container);
+                ++idx_particules;
+              }
+            // Setting the offset to don't use particle twice
+            //offset_particles += nb_particle_per_leaf[idx_nb_particle_in_block];
+            //idx_nb_particle_in_block += 1;
+            // cell_id_in_block += 1;
+          }
+        leaf_number_min = leaf_number;
+        new_block->declare_mine();
+        //Stock the block cell and the block particles
+        _cellBlocksPerLevel[idxLevel].push_back(new_block);
+        _particleBlocks.push_back(new_particle_block);
+        size_of_block = 0;
+      }
+    return {current_block_indexes.begin(),current_block_indexes.end()};
+
+  }
+
+  /**
+    * create_level create every level
+    * It's juste a factorisation from the Beregenger constructor
+    * @author benjamin.dufoyer@inria.fr
+    * @param currentBlockIndexes block repartition at leaf level
+    * to construct
+    * @param inLeftLimite left limit of block of the current proc
+    * this parameter is not used with the blocked_linear_tree, he is here
+    * to have compatibility with old constructor
+    */
+  void create_block_nodes_level(std::vector<MortonIndex>& currentBlockIndexes,
+                                MortonIndex inLeftLimite = -1
+      ){
+    // Cronstruct every level
+    for(int idxLevel = _treeHeight-2; idxLevel > 0 ; --idxLevel){
         inLeftLimite = (inLeftLimite == -1 ? inLeftLimite : (inLeftLimite>>3));
 
-        CellGroupConstIterator iterChildCells = _cellBlocksPerLevel[idxLevel+1].begin();
+        CellGroupConstIterator iterChildCells          = _cellBlocksPerLevel[idxLevel+1].begin();
         const CellGroupConstIterator iterChildEndCells = _cellBlocksPerLevel[idxLevel+1].end();
 
         // Skip blocks that do not respect limit
         while(iterChildCells != iterChildEndCells
               && ((*iterChildCells)->getEndingIndex()>>3) <= inLeftLimite){
             ++iterChildCells;
-        }
+          }
         // If lower level is empty or all blocks skiped stop here
         if(iterChildCells == iterChildEndCells){
             break;
-        }
+          }
 
         MortonIndex currentCellIndex = (*iterChildCells)->getStartingIndex();
         if((currentCellIndex>>3) <= inLeftLimite) currentCellIndex = ((inLeftLimite+1)<<3);
@@ -1315,30 +1409,30 @@ void create_block_nodes_level(MortonIndex* currentBlockIndexes,
             // Count until end of sub group is reached or we have enough cells
             while(sizeOfBlock < _nbElementsPerBlock && iterChildCells != iterChildEndCells ){
                 if((sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != (currentCellIndex>>3))
-                        && (*iterChildCells)->exists(currentCellIndex)){
+                   && (*iterChildCells)->exists(currentCellIndex)){
                     currentBlockIndexes[sizeOfBlock] = (currentCellIndex>>3);
                     sizeOfBlock += 1;
                     currentCellIndex = (((currentCellIndex>>3)+1)<<3);
-                }
+                  }
                 else{
                     currentCellIndex += 1;
-                }
+                  }
                 // If we are at the end of the sub group, move to next
                 while(iterChildCells != iterChildEndCells && (*iterChildCells)->getEndingIndex() <= currentCellIndex){
                     ++iterChildCells;
                     // Update morton index
                     if(iterChildCells != iterChildEndCells && currentCellIndex < (*iterChildCells)->getStartingIndex()){
                         currentCellIndex = (*iterChildCells)->getStartingIndex();
-                    }
-                }
-            }
+                      }
+                  }
+              }
 
             // If group is full
             if(sizeOfBlock == _nbElementsPerBlock || (sizeOfBlock && iterChildCells == iterChildEndCells)){
                 // Create a group
                 CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0],
-                                currentBlockIndexes[sizeOfBlock-1]+1,
-                                sizeOfBlock);
+                    currentBlockIndexes[sizeOfBlock-1]+1,
+                    sizeOfBlock);
                 // Init cells
                 for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){
                     newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock);
@@ -1349,60 +1443,437 @@ void create_block_nodes_level(MortonIndex* currentBlockIndexes,
                     coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]);
                     symbolic.setCoordinate(coord);
                     symbolic.setLevel(idxLevel);
-                }
-
+                  }
+                newBlock->declare_mine();
                 // Keep the block
                 _cellBlocksPerLevel[idxLevel].push_back(newBlock);
                 sizeOfBlock = 0;
-            }
-        }
-    }
-}
-
-
-
-    /**
-     * Add LET block at leaf level of the local GroupTree
-     * @author benjamin.dufoyer@inria.fr
-     * @param  block_to_insert list symbolic information of block
-     *                         to add
-     * @param  start           indicate if we put the block at the
-     *                         start of at the end of current block
-     * @param  level           level to add
-     */
-    template<class block_t>
-    void add_LET_block(std::vector<block_t>& block_to_insert,
-                       int level,
-                       const MortonIndex& local_min_m_idx
-    ){
-        if(block_to_insert.size() == 0)
-            return;
-        // Allocate vector of new block
-        std::vector<CellGroupClass*> vect_block(block_to_insert.size());
-        // Fill the vector of new block
-        unsigned block_at_begin = 0;
-        for(unsigned i = 0; i < vect_block.size(); ++i){
-            vect_block[i] = new CellGroupClass(
-                                block_to_insert[i].start_index ,
-                                block_to_insert[i].end_index ,
-                                (int)block_to_insert[i].nb_leaf_in_block );
-            if(block_to_insert[i].end_index < local_min_m_idx){
+              }
+          }
+      }
+  }
+
+
+
+  /**
+    * This function add all LET block put in parameter
+    * She put block in order according to idx_global
+    * She detect if we are at leaf level and create particle group
+    *
+    * @author benjamin.dufoyer@inria.fr
+    * @param  block_to_insert pair of symbolic information of cellGroup and Part
+    *                         group
+    * @param  level           The level where are adding LET group
+    */
+  template<class particle_symbolic_block_t,
+           class cell_symbolic_block_t>
+  void add_LET_block(
+      std::pair<std::vector<cell_symbolic_block_t>,
+      std::vector<particle_symbolic_block_t>>& block_to_insert,
+      int                                                level
+      ){
+    // Check if we are at leaf level
+    bool leaf_level = ( level == ( _treeHeight - 1 ) );
+    // Bind the vector of the pair
+    std::vector<cell_symbolic_block_t> cell_to_insert = block_to_insert.first;
+    std::vector<particle_symbolic_block_t> particle_to_insert = block_to_insert.second;
+    // If we are at leaf level
+    if(leaf_level){
+        // Check if we have the same number of symoblic information of cellBlock
+        // and of particleBlock
+        FAssert(cell_to_insert.size() == particle_to_insert.size());
+      } else {
+        // Else check if the particle block is empty
+        FAssert(particle_to_insert.size() == 0);
+      }
+    // if we have no block to insert, we don't need to continue this function
+    if(cell_to_insert.size() == 0)
+      return;
+    // Get my local minimum index global
+    int min_idx_global = this->getCellGroup(level,0)->getIdxGlobal();
+    // Allocate vector of new block
+    std::vector<CellGroupClass*> vect_block(cell_to_insert.size());
+
+    // Fill the vector of new block
+    unsigned block_at_begin = 0;
+    // iterate on every cell
+    for(unsigned i = 0; i < cell_to_insert.size(); ++i){
+        // create new cell
+        vect_block[i] = new CellGroupClass(
+              cell_to_insert[i].start_index ,
+              cell_to_insert[i].end_index,
+              (int)cell_to_insert[i].nb_leaf_in_block );
+        // set the global index of the cell
+        vect_block[i]->setIdxGlobal(cell_to_insert[i].idx_global_block);
+        // if the global index is less than the local idex, we need to
+        // insert
+        // the block at the beginning of the tree
+        if(cell_to_insert[i].idx_global_block < min_idx_global){
+            ++block_at_begin;
+          }
+        // init each cell of the new block
+        for(unsigned j = 0; j < cell_to_insert[i].m_idx_in_block.size(); ++j){
+            vect_block[i]->newCell(cell_to_insert[i].m_idx_in_block[j],j);
+          }
+      }
+    // Add block at beginning of the level
+    _cellBlocksPerLevel[level].insert(
+          _cellBlocksPerLevel[level].begin(),
+          vect_block.begin(),
+          vect_block.begin()+block_at_begin);
+
+    // Add block a the end of the level
+    _cellBlocksPerLevel[level].insert(
+          _cellBlocksPerLevel[level].end(),
+          vect_block.begin()+block_at_begin,
+          vect_block.end());
+    // if we are at the leaf level
+    if(leaf_level ){
+        // init of the vector of particle
+        std::vector<ParticleGroupClass*> vect_particle(particle_to_insert.size());
+
+        block_at_begin = 0;
+        // iterate on every symbolic particle group
+        for(unsigned i = 0 ; i < particle_to_insert.size(); ++i ){
+            // create a new particle group
+            vect_particle[i] = new ParticleGroupClass(
+                  cell_to_insert[i].start_index ,
+                  cell_to_insert[i].end_index,
+                  (int)cell_to_insert[i].nb_leaf_in_block,
+                  particle_to_insert[i].nb_particles);
+            // set the global index of the new particle group
+            vect_particle[i]-> setIdxGlobal(particle_to_insert[i].idx_global_block);
+            // if the current idx global block have a idx global smaller than
+            // the global index in local
+            if(cell_to_insert[i].idx_global_block < min_idx_global){
                 ++block_at_begin;
-            }
-        }
+              }
+            size_t offset = 0;
+            // init all leaf of the current particle group
+            for(int j = 0; j < cell_to_insert[i].nb_leaf_in_block; ++j){
+                offset = vect_particle[i]->newLeaf(
+                      cell_to_insert[i].m_idx_in_block[j],
+                      j,
+                      particle_to_insert[i].nb_particle_per_leaf[j],
+                      offset);
+              }
+          }
         // Add block at beginning of the level
-        _cellBlocksPerLevel[level].insert(
-            _cellBlocksPerLevel[level].begin(),
-            vect_block.begin(),
-            vect_block.begin()+block_at_begin);
+        _particleBlocks.insert(
+              _particleBlocks.begin(),
+              vect_particle.begin(),
+              vect_particle.begin()+block_at_begin);
         // Add block a the end of the level
+        _particleBlocks.insert(
+              _particleBlocks.end(),
+              vect_particle.begin()+block_at_begin,
+              vect_particle.end());
+      }
+  }
+
+
+#ifdef SCALFMM_USE_MPI
+  /**
+     * This function compute and add the local essential tree (LET) at
+     * the level.
+     * We compute interaction for the P2P(if needed) and M2L. We communicate
+     * other proc to get the GroupOfCell needed for building the LET
+     * @author benjamin.dufoyer@inria.fr
+     * @param  group_linear_tree        The group linear tree
+     * @param  level                    The level to build the LET
+     * @param  dim                      The dimension of Coordinate
+     */
+  template<class GroupLinearTree>
+  void create_LET_at_level(
+      GroupLinearTree&    group_linear_tree,
+      int&                level,
+      MortonIndex&        gmin,
+      MortonIndex&        gmax,
+      MortonIndex&        lmin,
+      MortonIndex&        lmax,
+      int                 dim = 3
+      ){
+    // stock in the variable if we are at the leaf level
+    bool leaf_level = (this->getHeight()-1 == level);
+    // update the morton index
+    if(!leaf_level){
+        gmin = gmin >> 3;
+        gmax = gmax >> 3;
+      }
+    const MortonIndex global_min_m_idx = gmin;
+    const MortonIndex global_max_m_idx = gmax;
+    // Compute min and max local morton index at the level needed
+    if(this->getNbCellGroupAtLevel(level) > 0){
+        lmin = this->getCellGroup(level,0)->getStartingIndex();
+        lmax = this->getCellGroup(level,this->getNbCellGroupAtLevel(level)-1)->getEndingIndex()-1;
+      } else {
+        lmin = -1;
+        lmax = -1;
+      }
+    const MortonIndex local_min_m_idx = lmin;
+    const MortonIndex local_max_m_idx = lmax;
+
+    // declare variable, needed because we fill it in a if case
+    std::vector<MortonIndex> leaf_P2P;
+    if(leaf_level){
+        // IDEA : can be a task
+        // This function compute the leaf needed by the P2P operation
+        // This function return a vector with all leaf needed
+        // The P2P interaction is only needed at leaf level
+        leaf_P2P = dstr_grp_tree_builder::get_leaf_P2P_interaction(
+              *this,
+              global_min_m_idx,
+              global_max_m_idx,
+              local_min_m_idx,
+              local_max_m_idx);
+      }
+
+    // IDEA can be a task
+    // This function compute the leaf needed by the M2L operation
+    // This function return a vector with all leaf needed
+    // get leaf M2L
+    std::vector<MortonIndex> leaf_M2L =
+        dstr_grp_tree_builder::get_leaf_M2L_interaction_at_level(
+          global_min_m_idx,
+          global_max_m_idx,
+          local_min_m_idx,
+          local_max_m_idx,
+          level,
+          *this,
+          dim);
+    std::vector<MortonIndex> needed_leaf;
+    if(leaf_level){
+        // this function return the concatenation of the leaf for the P2P and
+        // the leaf for the M2L
+        needed_leaf = dstr_grp_tree_builder::concat_M2L_P2P(leaf_P2P,leaf_M2L);
+      } else {
+        // if it's not the leaf level, we juste need the M2L
+        needed_leaf = leaf_M2L;
+        group_linear_tree.update_index_particle_distribution(
+              std::pair<MortonIndex,MortonIndex>(local_min_m_idx
+                                                 ,local_max_m_idx)
+              );
+      }
+    // free memory
+    // this call swap the current vector to a empty vector
+    std::vector<MortonIndex>().swap(leaf_P2P);
+    std::vector<MortonIndex>().swap(leaf_M2L);
+
+    std::vector<std::pair<MortonIndex,MortonIndex>> index_particle_distribution =
+        group_linear_tree.get_index_particle_distribution();
+
+    // Get the interaction matrix
+    // matrix[2][nproc]
+    // first line for Morton index to Send
+    // second line for Morton index to Recv
+    std::vector<std::vector<size_t>> global_matrix_interaction = dstr_grp_tree_builder::get_matrix_interaction(
+          needed_leaf,
+          index_particle_distribution,
+          group_linear_tree.get_mpi_conf());
+
+    // Send and get leaf
+    // Auto is used to get the block more easly
+    // it's a std::pair<std::vector<cell_symbolic_block>,std::vector<particle_symbolic_block>>
+    // block_t is a struct define on FDistributedGroupTreeBuilder.hpp
+    auto let_block =
+        dstr_grp_tree_builder::send_get_symbolic_block_at_level(
+          needed_leaf,
+          global_matrix_interaction,
+          *this,
+          level,
+          group_linear_tree.get_mpi_conf());
+
+    // free needed leaf
+    std::vector<MortonIndex>().swap(needed_leaf);
+    // free interaction matrix
+    std::vector<std::vector<size_t>>().swap(global_matrix_interaction);
+
+
+    //add the LET block to the tree
+    this->add_LET_block(
+          let_block,
+          level);
+  }
+
+  /**
+     * this function create the local essential tree at every level requested
+     * by this function
+     * @author benjamin.dufoyer@inria.fr
+     * @param  group_linear_tree        The group linear tree
+     * @param  level_min                The minimum level to build the LET
+     * @param  dim                      The dimension of coordinate
+     */
+  template<class GroupLinearTree>
+  void create_LET(
+      GroupLinearTree&    group_linear_tree,
+      int                 level_min = 2,
+      int                 dim = 3
+      ){
+    // get the particle distribution
+    std::vector<std::pair<MortonIndex,MortonIndex>> index_particle_distribution =
+        group_linear_tree.get_index_particle_distribution();
+
+    // compute the min and the max global morton index at the level needed
+    // Compute min and max global morton index at the level needed
+    // This variable is used to put value in const
+    MortonIndex gmin = index_particle_distribution.front().first;
+    MortonIndex gmax = index_particle_distribution.back().second;
+    MortonIndex lmin = this->getParticleGroup(0)->getStartingIndex();
+    MortonIndex lmax = this->getParticleGroup((this->getNbParticleGroup()-1) )->getEndingIndex();
+    // if we have more than 1 proc
+    if( group_linear_tree.get_mpi_conf().comm.size() != 1 ){
+        // compute the LET at every level
+        for(int i = this->_treeHeight-1 ; i >= level_min ; --i){
+            //        std::cout << "Start creating LET at " << i << std::endl;
+            this->create_LET_at_level(group_linear_tree,i,gmin,gmax,lmin,lmax,dim);
+          }
+      }
+    dstr_grp_tree_builder::send_get_block_M2M(
+          *this,
+          group_linear_tree.get_mpi_conf()
+          );
+  }
+#endif
+  /**
+     * IDEA une factorisation peut être faite avec la fonction d'ajout du LET
+     * This function allow to insert 1 block at a level needed
+     * @author benjamin.dufoyer@inria.fr
+     * @param  block_to_insert          symbolique information of the block
+     * @param  list_m_idx               List of Morton Index
+     * @param  level                    Level to insert
+     * @param  nb_particle_per_leaf     [OPTIONNAL] number of particle per leaf
+     */
+  template<class info_symb_cell_t>
+  void insert_block(
+      info_symb_cell_t&           block_to_insert,
+      std::vector<MortonIndex>&   list_m_idx,
+      int                         level,
+      std::vector<FSize>*         nb_particle_per_leaf = nullptr
+      ){
+    // Check if we already have this block
+    for(int i = 0 ; i < this->getNbCellGroupAtLevel(level); ++i){
+        auto* container = this->getCellGroup(level,i);
+        // break the loop if the globalIdx is too big
+        if(container->getIdxGlobal() > block_to_insert.idx_global_block ){
+            break;
+          }
+        if(container->getIdxGlobal() == block_to_insert.idx_global_block ){
+            return;
+          }
+      }
+    // Check if we are at the leaf level
+    bool leaf_level = ( level == ( _treeHeight - 1 ) );
+    // create the new block
+    CellGroupClass* new_block = new CellGroupClass(
+          block_to_insert.start_index,
+          block_to_insert.end_index,
+          block_to_insert.nb_leaf_in_block);
+    // set the global idx to the new block
+    new_block->setIdxGlobal(block_to_insert.idx_global_block);
+    // init all cell of the new block
+    for(int i = 0 ; i < block_to_insert.nb_leaf_in_block ; ++i){
+        new_block->newCell(list_m_idx[i],i);
+      }
+    // if we are at leaf level
+    if(leaf_level){
+        MortonIndex min_global_idx = 0;
+        MortonIndex max_global_idx = 0;
+        int idx_min = 0;
+        int idx_max = 0;
+        // seek the min morton index of my blocks
+        for(int i = 0 ; i < this->getNbParticleGroup() ; ++i){
+            if(this->getCellGroup(level,i)->isMine()){
+                min_global_idx = this->getParticleGroup(i)->getIdxGlobal()-1;
+                idx_min = i;
+                break;
+              }
+          }
+        // seek the max morton index of my blocks
+        for(int i = this->getNbParticleGroup()-1 ; i >= 0 ; --i){
+            if(this->getCellGroup(level,i)->isMine()){
+                max_global_idx = this->getParticleGroup(i)->getIdxGlobal()+1;
+                idx_max = i;
+                break;
+              }
+          }
+        // compute the number of particle of this block
+        FSize nb_particle = 0;
+        for(unsigned i = 0; i < nb_particle_per_leaf->size(); ++i){
+            nb_particle += nb_particle_per_leaf->data()[i];
+          }
+        // create the particle group
+        ParticleGroupClass* new_block_p = new ParticleGroupClass(
+              block_to_insert.start_index ,
+              block_to_insert.end_index,
+              (int)block_to_insert.nb_leaf_in_block,
+              nb_particle);
+        // set the global index of the particle group
+        new_block_p->setIdxGlobal((int)min_global_idx);
+        std::size_t offset = 0;
+        // create all leaf of the particle group
+        for(int  i = 0 ; i < block_to_insert.nb_leaf_in_block ; ++i){
+            offset = new_block_p->newLeaf(
+                  list_m_idx[i],
+                  i,
+                  nb_particle_per_leaf->data()[i],
+                  offset
+                  );
+          }
+        // insert the particle group at the good place
+        if(this->getParticleGroup(idx_min)->getStartingIndex() > block_to_insert.start_index){
+            new_block_p->setIdxGlobal((int)min_global_idx);
+            _particleBlocks.insert(
+                  _particleBlocks.begin()+idx_min,
+                  new_block_p
+                  );
+          } else {
+            new_block_p->setIdxGlobal((int)max_global_idx);
+            _particleBlocks.insert(
+                  _particleBlocks.begin()+idx_max+1,
+                  new_block_p
+                  );
+          }
+
+      }
+    // if we need to put the new block at first
+    // if we already have a block at this level
+    if(this->getNbCellGroupAtLevel(level) > 0) {
+        if(this->getCellGroup(level,0)->getIdxGlobal() > block_to_insert.idx_global_block){
+            _cellBlocksPerLevel[level].insert(
+                  _cellBlocksPerLevel[level].begin(),
+                  new_block);
+            return;
+          }
+        // if we don't have block at this level
+      } else {
         _cellBlocksPerLevel[level].insert(
-            _cellBlocksPerLevel[level].end(),
-            vect_block.begin()+block_at_begin,
-            vect_block.end());
-    }
-
+              _cellBlocksPerLevel[level].begin(),
+              new_block);
+        return;
+      }
+    // else find the place of the block
+    // iterate on every block
+    for(int idx_block = 0 ; idx_block < this->getNbCellGroupAtLevel(level) ; ++idx_block){
+        auto* container = this->getCellGroup(level,idx_block);
+        // if the block i want to insert is already here
+        if(container->getIdxGlobal() == block_to_insert.idx_global_block ){
+            return;
+          }
+        if(container->getIdxGlobal() >  block_to_insert.idx_global_block ){
+            _cellBlocksPerLevel[level].insert(
+                  _cellBlocksPerLevel[level].begin()+idx_block,
+                  new_block);
+            return;
+          }
+      }
+    _cellBlocksPerLevel[level].insert(
+          _cellBlocksPerLevel[level].end(),
+          new_block);
+  }
 
 };
 
+
+
+
 #endif // FGROUPTREE_HPP
diff --git a/Src/GroupTree/Core/FOutOfBlockInteraction.hpp b/Src/GroupTree/Core/FOutOfBlockInteraction.hpp
index c4a9ca2e6..d0bd203c3 100644
--- a/Src/GroupTree/Core/FOutOfBlockInteraction.hpp
+++ b/Src/GroupTree/Core/FOutOfBlockInteraction.hpp
@@ -1,7 +1,7 @@
 #ifndef FOUTOFBLOCKINTERACTION_HPP
 #define FOUTOFBLOCKINTERACTION_HPP
 
-#include "../../Utils/FGlobal.hpp"
+#include "Utils/FGlobal.hpp"
 
 #include "../StarPUUtils/FStarPUDefaultAlign.hpp"
 
diff --git a/Src/GroupTree/Core/FP2PGroupParticleContainer.hpp b/Src/GroupTree/Core/FP2PGroupParticleContainer.hpp
index 9cd819106..1d2e1691c 100644
--- a/Src/GroupTree/Core/FP2PGroupParticleContainer.hpp
+++ b/Src/GroupTree/Core/FP2PGroupParticleContainer.hpp
@@ -66,7 +66,11 @@ public:
     int getNVALS() const {
         return NVALS;
     }
-
+/*
+    const long long int* getDataDown() const {
+        return Parent::template getAttribute<0>();
+    }
+*/
 };
 
 #endif // FP2PGROUPPARTICLECONTAINER_HPP
diff --git a/Src/GroupTree/StarPUUtils/FStarPUCptInteractionsWrapper.hpp b/Src/GroupTree/StarPUUtils/FStarPUCptInteractionsWrapper.hpp
index 1bfec0963..d7fd4151a 100644
--- a/Src/GroupTree/StarPUUtils/FStarPUCptInteractionsWrapper.hpp
+++ b/Src/GroupTree/StarPUUtils/FStarPUCptInteractionsWrapper.hpp
@@ -448,7 +448,9 @@ public:
         FSize nbInteractions = int(outsideInteractions->size());
 
         if(mode == 1){
-            FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(GetWorkerId(), &taskTimeRecorder, (((currentCells->getStartingIndex()+1) * (cellsOther->getStartingIndex()+2)) * 20 + idxLevel) * 8 + 3, "M2L-ext"));
+            FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(GetWorkerId(), &taskTimeRecorder,
+                                                        (((currentCells->getStartingIndex()+1) * (cellsOther->getStartingIndex()+2)) * 20 + idxLevel) * 8 + 3,
+                                                        "M2L-ext"));
             for(int outInterIdx = 0 ; outInterIdx < int(outsideInteractions->size()) ; ++outInterIdx){
                 const auto& inter_data = (*outsideInteractions)[outInterIdx];
                 const multipole_t* source_multipole
@@ -479,7 +481,9 @@ public:
             }
         }
         else{
-            FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(GetWorkerId(), &taskTimeRecorder, (((currentCells->getStartingIndex()+1) * (cellsOther->getStartingIndex()+1)) * 20 + idxLevel) * 8 + 3, "M2L-ext"));
+            FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(GetWorkerId(), &taskTimeRecorder,
+                                                        (((currentCells->getStartingIndex()+1) * (cellsOther->getStartingIndex()+1)) * 20 + idxLevel) * 8 + 3,
+                                                        "M2L-ext"));
             for(int outInterIdx = 0 ; outInterIdx < int(outsideInteractions->size()) ; ++outInterIdx){
                 const auto& inter_data = (*outsideInteractions)[outInterIdx];
                 const multipole_t* source_multipole
diff --git a/Src/Utils/FMpi.hpp b/Src/Utils/FMpi.hpp
index 49bb7085c..7a5338ab9 100644
--- a/Src/Utils/FMpi.hpp
+++ b/Src/Utils/FMpi.hpp
@@ -1,8 +1,8 @@
 // See LICENCE file at project root
+// @FUSE_MPI
 #ifndef FMPI_HPP
 #define FMPI_HPP
 
-
 #include <cstdio>
 #include <stdexcept>
 
@@ -10,7 +10,7 @@
 #ifndef SCALFMM_USE_MPI
 #error The MPI header is included while SCALFMM_USE_MPI is turned OFF
 #endif
-
+#include <mpi.h>
 
 #include "FNoCopyable.hpp"
 #include "FMath.hpp"
@@ -27,7 +27,7 @@
 /////////////////////////////////////////////////////////////////////////////////////////
 
 
-#include <mpi.h>
+
 
 
 /////////////////////////////////////////////////////////////////////////////////////////
@@ -86,7 +86,7 @@ public:
     ////////////////////////////////////////////////////////
     // FComm to factorize MPI_Comm work
     ////////////////////////////////////////////////////////
-    
+
     /**
      * \brief MPI comunicator abstraction
      *
@@ -102,7 +102,7 @@ public:
 
 
         /// Updates current process rank and process count from mpi
-        void reset(){
+        void updateMembers(){
             FMpi::Assert( MPI_Comm_rank(communicator,&rank),  __LINE__ );
             FMpi::Assert( MPI_Comm_size(communicator,&nbProc),  __LINE__ );
         }
@@ -113,7 +113,7 @@ public:
             FMpi::Assert( MPI_Comm_dup(inCommunicator, &communicator),  __LINE__ , "comm dup");
             FMpi::Assert( MPI_Comm_group(communicator, &group),  __LINE__ , "comm group");
 
-            reset();
+            this->updateMembers();
         }
 
         /// Constructor : duplicates the given communicator
@@ -121,7 +121,7 @@ public:
             FMpi::Assert( MPI_Comm_dup(inCommunicator.communicator, &communicator),  __LINE__ , "comm dup");
             FMpi::Assert( MPI_Comm_group(communicator, &group),  __LINE__ , "comm group");
 
-            reset();
+            this->updateMembers();
         }
 
         FComm& operator=(const FComm& inCommunicator ) {
@@ -131,7 +131,7 @@ public:
             FMpi::Assert( MPI_Comm_dup(inCommunicator.communicator, &communicator),  __LINE__ , "comm dup");
             FMpi::Assert( MPI_Comm_group(communicator, &group),  __LINE__ , "comm group");
 
-            reset();
+            this->updateMembers();
 
             return *this;
         }
@@ -250,7 +250,7 @@ public:
             MPI_Comm_free(&previousComm);
             MPI_Group_free(&previousGroup);
 
-            reset();
+            this->updateMembers();
             delete[]  procsIdArray ;
         }
 
@@ -278,7 +278,7 @@ public:
             MPI_Comm_free(&previousComm);
             MPI_Group_free(&previousGroup);
 
-            reset();
+            this->updateMembers();
             FAssertLF(nbProc == counterNewGroup);
             delete[]  procsIdArray ;
         }
@@ -306,7 +306,7 @@ public:
      *     [fourmi062:15896] [[13237,0],1]-[[13237,1],1] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104)
      *     [fourmi056:04597] [[13237,0],3]-[[13237,1],3] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104)
      *     [fourmi053:08571] [[13237,0],5]-[[13237,1],5] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104)
-     *    
+     *
      * Error for process 1:
      *
      *     [[13237,1],1][btl_openib_component.c:3227:handle_wc] from fourmi062 to: fourmi056 error polling LP CQ with status LOCAL LENGTH ERROR status number 1 for wr_id 7134664 opcode 0  vendor error 105 qp_idx 3
@@ -326,6 +326,15 @@ public:
         communicator = new FComm(MPI_COMM_WORLD);
     }
 
+    FMpi(MPI_Comm comm):communicator(nullptr){
+        if( instanceCount > 0) {
+            throw std::logic_error("FMpi should not be instanciatedmore than once.");
+        } else {
+            instanceCount++;
+        }
+        communicator = new FComm(comm);
+    }
+
     /// Constructor
     FMpi(int inArgc, char **  inArgv ) : communicator(nullptr) {
         if( instanceCount > 0) {
@@ -346,7 +355,7 @@ public:
     }
 
     /// Get the global communicator
-    const FComm& global() {
+    const FComm& global() const {
         return (*communicator);
     }
 
@@ -511,12 +520,10 @@ public:
 private:
     /// The original communicator
     FComm* communicator;
-    
+
     /// Counter to avoid several instanciations
     static int instanceCount;
 };
 
 
 #endif //FMPI_HPP
-
-
diff --git a/Src/Utils/FValidationAlgorithm.hpp b/Src/Utils/FValidationAlgorithm.hpp
index d814edb56..9ac674d7e 100644
--- a/Src/Utils/FValidationAlgorithm.hpp
+++ b/Src/Utils/FValidationAlgorithm.hpp
@@ -1,10 +1,229 @@
 #ifndef _VALIDATION_METHOD_
 #define _VALIDATION_METHOD_
 
+#include "../../Src/Kernels/P2P/FP2PParticleContainer.hpp"
+#include "../../Src/Components/FSimpleLeaf.hpp"
+#include "../../Src/Kernels/Chebyshev/FChebCell.hpp"
+#include "../../Src/Kernels/Chebyshev/FChebSymKernel.hpp"
+#include "../../Src/Core/FFmmAlgorithmThreadProc.hpp"
+#include "../../Src/Containers/FOctree.hpp"
+#include "../../Src/Utils/FMpi.hpp"
+#include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp"
+#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp"
+
+#include "../../Src/Kernels/Uniform/FUnifCell.hpp"
+#include "../../Src/Kernels/Uniform/FUnifKernel.hpp"
+
 
 namespace validation_methods
 {
 
+using FReal = double;
+
+template<class GroupCellSymbClass,
+         class GroupCellUpClass,
+         class GroupCellDownClass,
+         class GroupContainerClass,
+         class particle_t,
+         class GroupAlgorithm>
+void validate_uniform_distributed(
+    FGroupTree<FReal,GroupCellSymbClass,GroupCellUpClass, GroupCellDownClass, GroupContainerClass,1,4, FReal>& groupedTree,
+    GroupAlgorithm& groupalgo,
+    int& operationsToProceed,
+    std::vector<particle_t>& allParticles,
+    FMpi& mpiComm
+){
+        static const int ORDER = 6;
+        using MatrixKernelClass = FInterpMatrixKernelR<FReal>;
+        using ContainerClass =  FP2PParticleContainer<FReal>;
+        using LeafClass      =  FSimpleLeaf<FReal, ContainerClass>;
+        using CellClass      =  FUnifCell<FReal,ORDER> ;
+        using OctreeClass    =  FOctree<FReal, CellClass,ContainerClass,LeafClass> ;
+        using KernelClass    =  FUnifKernel<FReal,CellClass,ContainerClass,MatrixKernelClass,ORDER> ;
+        using FmmClass       =  FFmmAlgorithmThreadProc<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> ;
+
+        OctreeClass treeCheck(groupedTree.getHeight(),
+                              /*subTree*/2,
+                              groupedTree.getBoxWidth(),
+                              groupedTree.getBoxCenter());
+
+        for(FSize idxPart = 0 ; idxPart < (FSize)allParticles.size() ; ++idxPart){
+              treeCheck.insert(allParticles[idxPart].pos, 0.1);
+        }
+
+        MatrixKernelClass MatrixKernelValidation;
+
+        KernelClass kernels(groupedTree.getHeight(),
+                            groupedTree.getBoxWidth(),
+                            groupedTree.getBoxCenter(), &MatrixKernelValidation);
+        FmmClass algorithm(mpiComm.global(),&treeCheck, &kernels);
+        algorithm.execute(operationsToProceed);
+
+        validate_group_tree_distributed(
+            groupedTree,
+            groupalgo,
+            treeCheck
+        );
+}
+
+template<class GroupCellSymbClass,
+         class GroupCellUpClass,
+         class GroupCellDownClass,
+         class GroupContainerClass,
+         class particle_t,
+         class GroupAlgorithm>
+void validate_chebyshev_distributed(
+    FGroupTree<FReal,GroupCellSymbClass,GroupCellUpClass, GroupCellDownClass, GroupContainerClass,1,4, FReal>& groupedTree,
+    GroupAlgorithm groupalgo,
+    int operationsToProceed,
+    std::vector<particle_t>& allParticles,
+    FMpi mpiComm
+){
+        static const int ORDER = 6;
+        using MatrixKernelClass = FInterpMatrixKernelR<FReal>;
+        using ContainerClass =  FP2PParticleContainer<FReal>;
+        using LeafClass      =  FSimpleLeaf<FReal, ContainerClass>;
+        using CellClass      =  FChebCell<FReal,ORDER> ;
+        using OctreeClass    =  FOctree<FReal, CellClass,ContainerClass,LeafClass> ;
+        using KernelClass    =  FChebSymKernel<FReal,CellClass,ContainerClass,MatrixKernelClass,ORDER> ;
+        using FmmClass       =  FFmmAlgorithmThreadProc<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> ;
+
+        OctreeClass treeCheck(groupedTree.getHeight(),
+                              /*subTree*/2,
+                              groupedTree.getBoxWidth(),
+                              groupedTree.getBoxCenter());
+
+        for(FSize idxPart = 0 ; idxPart < (FSize)allParticles.size() ; ++idxPart){
+              treeCheck.insert(allParticles[idxPart].pos, 0.1);
+        }
+
+        MatrixKernelClass MatrixKernelValidation;
+
+        KernelClass kernels(groupedTree.getHeight(),
+                            groupedTree.getBoxWidth(),
+                            groupedTree.getBoxCenter(), &MatrixKernelValidation);
+        FmmClass algorithm(mpiComm.global(),&treeCheck, &kernels);
+        algorithm.execute(operationsToProceed);
+
+        validate_group_tree_distributed(
+            groupedTree,
+            groupalgo,
+            treeCheck
+        );
+}
+
+template<class GroupCellSymbClass,
+         class GroupCellUpClass,
+         class GroupCellDownClass,
+         class GroupContainerClass,
+         class GroupAlgorithm,
+         class CellClass,
+         class ContainerClass,
+         class LeafClass>
+void validate_group_tree_distributed(
+    FGroupTree<FReal,GroupCellSymbClass,GroupCellUpClass, GroupCellDownClass, GroupContainerClass,1,4, FReal>& groupedTree,
+    GroupAlgorithm groupalgo,
+    FOctree<FReal, CellClass,ContainerClass,LeafClass> treeCheck
+){
+    const FReal epsi = 1E-10;
+
+    groupedTree.forEachMyCellWithLevel(
+        [&](GroupCellSymbClass* gsymb ,
+        GroupCellUpClass*   gmul,
+        GroupCellDownClass* gloc,
+        const int level)
+    {
+        if(groupalgo.isDataOwnedBerenger(gsymb->getMortonIndex(), level))
+        {
+            const CellClass* cell = treeCheck.getCell(gsymb->getMortonIndex(), level);
+            if(cell == nullptr){
+                std::cout << "[Empty] Error cell should exist " << gsymb->getMortonIndex() << "\n";
+            } else {
+                FMath::FAccurater<FReal> diffUp;
+                diffUp.add(cell->getMultipoleData().get(0), gmul->get(0), gmul->getVectorSize());
+                if(diffUp.getRelativeInfNorm() > epsi || diffUp.getRelativeL2Norm() > epsi){
+                    std::cout << "[Up] Up is different at index " << gsymb->getMortonIndex() << " level " << level << " is " << diffUp << "\n";
+                }
+                FMath::FAccurater<FReal> diffDown;
+                diffDown.add(cell->getLocalExpansionData().get(0), gloc->get(0), gloc->getVectorSize());
+                if(diffDown.getRelativeInfNorm() > epsi || diffDown.getRelativeL2Norm() > epsi){
+                    std::cout << "[Down] Down is different at index " << gsymb->getMortonIndex() << " level " << level << " is " << diffDown << "\n";
+                }
+            }
+        }
+    });
+
+    groupedTree.forEachCellMyLeaf(
+        [&](GroupCellSymbClass* gsymb ,
+        GroupCellUpClass*   /* gmul */,
+        GroupCellDownClass* /* gloc */,
+        FP2PGroupParticleContainer<FReal> * leafTarget)
+    {
+        if(groupalgo.isDataOwnedBerenger(gsymb->getMortonIndex(), groupedTree.getHeight()-1))
+        {
+            const ContainerClass* targets = treeCheck.getLeafSrc(gsymb->getMortonIndex());
+            if(targets == nullptr){
+                std::cout << "[Empty] Error leaf should exist " << gsymb->getMortonIndex() << "\n";
+        } else {
+            const FReal*const gposX = leafTarget->getPositions()[0];
+            const FReal*const gposY = leafTarget->getPositions()[1];
+            const FReal*const gposZ = leafTarget->getPositions()[2];
+            const FSize gnbPartsInLeafTarget = leafTarget->getNbParticles();
+            const FReal*const gforceX = leafTarget->getForcesX();
+            const FReal*const gforceY = leafTarget->getForcesY();
+            const FReal*const gforceZ = leafTarget->getForcesZ();
+            const FReal*const gpotential = leafTarget->getPotentials();
+
+            const FReal*const posX = targets->getPositions()[0];
+            const FReal*const posY = targets->getPositions()[1];
+            const FReal*const posZ = targets->getPositions()[2];
+            const FSize nbPartsInLeafTarget = targets->getNbParticles();
+            const FReal*const forceX = targets->getForcesX();
+            const FReal*const forceY = targets->getForcesY();
+            const FReal*const forceZ = targets->getForcesZ();
+            const FReal*const potential = targets->getPotentials();
+
+            if(gnbPartsInLeafTarget != nbPartsInLeafTarget){
+                std::cout << "[Empty] Not the same number of particles at " << gsymb->getMortonIndex()
+                << " gnbPartsInLeafTarget " << gnbPartsInLeafTarget << " nbPartsInLeafTarget " << nbPartsInLeafTarget << "\n";
+            }else{
+                FMath::FAccurater<FReal> potentialDiff;
+                FMath::FAccurater<FReal> fx, fy, fz;
+                for(FSize idxPart = 0 ; idxPart < nbPartsInLeafTarget ; ++idxPart){
+                    if(gposX[idxPart] != posX[idxPart] || gposY[idxPart] != posY[idxPart] || gposZ[idxPart] != posZ[idxPart]){
+                        std::cout << "[Empty] Not the same particlea at " << gsymb->getMortonIndex() << " idx " << idxPart << " "
+                        << gposX[idxPart] << " " << posX[idxPart] << " " << gposY[idxPart] << " " << posY[idxPart]
+                        << " " << gposZ[idxPart] << " " << posZ[idxPart] << "\n";
+                    } else {
+                        potentialDiff.add(potential[idxPart], gpotential[idxPart]);
+                        fx.add(forceX[idxPart], gforceX[idxPart]);
+                        fy.add(forceY[idxPart], gforceY[idxPart]);
+                        fz.add(forceZ[idxPart], gforceZ[idxPart]);
+                    }
+                }
+                if(potentialDiff.getRelativeInfNorm() > epsi || potentialDiff.getRelativeL2Norm() > epsi){
+                    std::cout << "[Up] potentialDiff is different at index " << gsymb->getMortonIndex() << " is " << potentialDiff << "\n";
+                }
+                if(fx.getRelativeInfNorm() > epsi || fx.getRelativeL2Norm() > epsi){
+                    std::cout << "[Up] fx is different at index " << gsymb->getMortonIndex() << " is " << fx << "\n";
+                }
+                if(fy.getRelativeInfNorm() > epsi || fy.getRelativeL2Norm() > epsi){
+                    std::cout << "[Up] fy is different at index " << gsymb->getMortonIndex() << " is " << fy << "\n";
+                }
+                if(fz.getRelativeInfNorm() > epsi || fz.getRelativeL2Norm() > epsi){
+                    std::cout << "[Up] fz is different at index " << gsymb->getMortonIndex() << " is " << fz << "\n";
+                }
+                }
+            }
+        }
+    });
+
+
+}
+
+
+
+
 /**
  * function to check the result of the groupTree Algo
  * @author benjamin.dufoyer@inria.fr
diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt
index 9616ae310..ab40202cc 100644
--- a/Tests/CMakeLists.txt
+++ b/Tests/CMakeLists.txt
@@ -19,6 +19,7 @@ file(
 INCLUDE_DIRECTORIES(
      ${SCALFMM_BINARY_DIR}/Src    
      ${SCALFMM_SOURCE_DIR}/Src   
+     ${SCALFMM_SOURCE_DIR}
      ${SCALFMM_INCLUDES}
 )
 
diff --git a/Tests/GroupTree/testBlockedChebyshev.cpp b/Tests/GroupTree/testBlockedChebyshev.cpp
index c2d18f002..78cc3f3c4 100644
--- a/Tests/GroupTree/testBlockedChebyshev.cpp
+++ b/Tests/GroupTree/testBlockedChebyshev.cpp
@@ -1,11 +1,11 @@
 // ==== CMAKE =====
 // @FUSE_BLAS
+// 
 // ================
 // Keep in private GIT
 
 
 #include "../../Src/Utils/FGlobal.hpp"
-
 #include "../../Src/GroupTree/Core/FGroupTree.hpp"
 
 #include "../../Src/Components/FSimpleLeaf.hpp"
@@ -34,6 +34,7 @@
 #include "../../Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp"
 #include "../../Src/GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp"
 #endif
+
 #include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp"
 
 #include "../../Src/Utils/FParameterNames.hpp"
diff --git a/Tests/GroupTree/testBlockedImplicitChebyshev.cpp b/Tests/GroupTree/testBlockedImplicitChebyshev.cpp
index 476158d69..2ee5e754f 100644
--- a/Tests/GroupTree/testBlockedImplicitChebyshev.cpp
+++ b/Tests/GroupTree/testBlockedImplicitChebyshev.cpp
@@ -1,11 +1,14 @@
 // @FUSE_BLAS
 // @FUSE_MPI
 // @FUSE_STARPU
+// @FUSE_MPI
 // Keep in private GIT
 #include <iostream>
 #include <fstream>
 #include <vector>
+#ifdef SCALFMM_USE_MPI
 #include <mpi.h>
+#endif
 using namespace std;
 
 #include "../../Src/Utils/FGlobal.hpp"
diff --git a/Tests/GroupTree/testBlockedImplicitUniform.cpp b/Tests/GroupTree/testBlockedImplicitUniform.cpp
index a2ee1756d..9c2b9b185 100644
--- a/Tests/GroupTree/testBlockedImplicitUniform.cpp
+++ b/Tests/GroupTree/testBlockedImplicitUniform.cpp
@@ -154,9 +154,10 @@ int main(int argc, char* argv[]){
     GroupAlgorithm groupalgo(&groupedTree,&groupkernel, distributedMortonIndex);
     mpiComm.global().barrier();
     FTic timerExecute;
+    timerExecute.tic();
     starpu_fxt_start_profiling();
     groupalgo.execute(operationsToProceed);
-    groupedTree.printInfoBlocks();
+    //groupedTree.printInfoBlocks();
     mpiComm.global().barrier();
     starpu_fxt_stop_profiling();
     timerExecute.tac();
diff --git a/Tests/GroupTree/testBlockedMpiInterpolation.cpp b/Tests/GroupTree/testBlockedMpiInterpolation.cpp
new file mode 100644
index 000000000..d85945bd3
--- /dev/null
+++ b/Tests/GroupTree/testBlockedMpiInterpolation.cpp
@@ -0,0 +1,68 @@
+// ==== CMAKE =====
+// @FUSE_BLAS
+// ================
+// Keep in private GIT
+// @FUSE_MPI
+// @FUSE_STARPU
+
+#include "../../Src/Files/FBlockedMpiInterpolation.hpp"
+#include "../../Src/Utils/FGlobal.hpp"
+
+
+#include "../../Src/Kernels/P2P/FP2PParticleContainer.hpp"
+
+#include "../../Src/Kernels/Chebyshev/FChebSymKernel.hpp"
+#include "../../Src/Kernels/Chebyshev/FChebCell.hpp"
+#include "Kernels/Interpolation/FInterpMatrixKernel.hpp"
+
+#include "../../Src/GroupTree/Core/FGroupSeqAlgorithm.hpp"
+#include "../../Src/GroupTree/Core/FGroupTaskAlgorithm.hpp"
+#include "../../Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp"
+#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp"
+
+#include "../../Src/Components/FTestParticleContainer.hpp"
+#include "../../Src/Components/FTestCell.hpp"
+#include "../../Src/Components/FTestKernels.hpp"
+
+#include "../../Src/Core/FFmmAlgorithmThreadProc.hpp"
+#include "../../Src/Files/FMpiTreeBuilder.hpp"
+#include "../../Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp"
+
+#include "../../Src/Files/FMpiFmaGenericLoader.hpp"
+#include "../../Src/Containers/FCoordinateComputer.hpp"
+
+#include "../../Src/GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp"
+
+#include <memory>
+
+
+int main(int argc, char *argv[]){
+    const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree"};
+    const FParameterNames LocalOptionNoValidate { {"-no-validation"}, "To avoid comparing with direct computation"};
+    FHelpDescribeAndExit(argc, argv, "Test the blocked tree by counting the particles.",
+                         FParameterDefinitions::OctreeHeight,FParameterDefinitions::InputFile,
+                         FParameterDefinitions::NbParticles,
+                         LocalOptionBlocSize,LocalOptionNoValidate);
+
+    using FReal = double;
+    static const int ORDER = 6;
+    using GroupContainerClass = FP2PGroupParticleContainer<FReal>;
+    using MatrixKernelClass   =  FInterpMatrixKernelR<FReal>;
+    using GroupCellClass      = FChebCell<FReal, ORDER>;
+    using GroupCellUpClass    = typename GroupCellClass::multipole_t;
+    using GroupCellDownClass  = typename GroupCellClass::local_expansion_t;
+    using GroupCellSymbClass = FSymbolicData;
+    using kernelClass = FChebSymKernel<FReal,GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER>;
+
+    auto groupedTree = blockedMpiInterpolation::execute_algorithm<
+        GroupCellClass,
+        GroupCellUpClass,
+        GroupCellDownClass,
+        GroupCellSymbClass,
+        kernelClass,
+        MatrixKernelClass
+    >(argc,argv);
+
+    // Validation
+
+}
diff --git a/Tests/LET_STF_Implicit/testCompareGroupTree.cpp b/Tests/LET_STF_Implicit/testCompareGroupTree.cpp
new file mode 100644
index 000000000..e1da9b739
--- /dev/null
+++ b/Tests/LET_STF_Implicit/testCompareGroupTree.cpp
@@ -0,0 +1,431 @@
+// See LICENCE file at project root
+
+// ==== CMAKE =====
+// @FUSE_MPI
+// @FUSE_BLAS
+// ================
+
+
+#include "../../Src/Utils/FGlobal.hpp"
+//#include <mpi.h>
+// include algo for linear tree
+#include "inria/algorithm/distributed/mpi.hpp"
+#include "inria/linear_tree/balance_tree.hpp"
+// tree class
+#include "../../Src/GroupTree/Core/FGroupTree.hpp"
+// symbolic data
+#include "../../Src/Components/FSymbolicData.hpp"
+// cell class
+#include "../../Src/Kernels/Chebyshev/FChebCell.hpp"
+// parameter
+#include "../../Src/Utils/FParameters.hpp"
+#include "../../Src/Utils/FParameterNames.hpp"
+// GroupParticleContianer
+#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp"
+// file loader
+#include "../../Src/Files/FMpiFmaGenericLoader.hpp"
+// FBox
+#include "Adaptive/FBox.hpp"
+// Group linear tree
+#include "../../Src/GroupTree/Core/FGroupLinearTree.hpp"
+// Function for GroupLinearTree
+#include "../../Src/GroupTree/Core/FDistributedGroupTreeBuilder.hpp"
+#include "../../Src/Utils/FTic.hpp"
+#include "../../Src/Files/FFmaGenericLoader.hpp"
+#include "../../Src/Utils/FLeafBalance.hpp"
+
+#include "../../Contribs/json.hpp"
+
+#include <memory>
+
+
+static const int ORDER = 6;
+using FReal               = double;
+using GroupCellClass      = FChebCell<FReal, ORDER>;
+using GroupCellUpClass    = typename GroupCellClass::multipole_t;
+using GroupCellDownClass  = typename GroupCellClass::local_expansion_t;
+using GroupCellSymbClass  = FSymbolicData;
+using GroupContainerClass = FP2PGroupParticleContainer<FReal>;
+using GroupOctreeClass    = FGroupTree<FReal,
+                                        GroupCellSymbClass,
+                                        GroupCellUpClass,
+                                        GroupCellDownClass, GroupContainerClass, 1, 4, FReal>;
+
+// Structure for 1 particle
+struct particle_t {
+    using position_t = FPoint<FReal>;
+    position_t pos;
+    FReal phi;
+    std::size_t morton_index;
+    const auto& position() const {
+        return pos;
+    }
+    const FPoint<FReal>& getPosition(){
+        return pos;
+    }
+    const auto& physicalValue() const{
+        return phi;
+    }
+    const auto& getPositions() const {
+        return pos;
+    }
+    int weight() const { return 1;}
+    friend constexpr auto morton_index(const particle_t& p) {
+        return p.morton_index;
+    }
+};
+void sortParticle(FPoint<FReal> * allParticlesToSort, int treeHeight, int groupSize, std::vector<std::vector<int>> & sizeForEachGroup, std::vector<MortonIndex> & distributedMortonIndex, FFmaGenericLoader<FReal>& loader, int nproc);
+void createNodeRepartition(std::vector<MortonIndex> distributedMortonIndex, std::vector<std::vector<std::vector<MortonIndex>>>& nodeRepartition, int nproc, int treeHeight);
+FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total);
+
+int main(int argc, char *argv[]) {
+    FTic time;
+    // Parameter definition
+    const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree"};
+    const FParameterNames LocalOptionTreeChoice{ {"-let"}, "Build the LET Group Tree"};
+    // Parameter help
+    FHelpDescribeAndExit(argc, argv,
+                         "Test the blocked tree created with linear tree." ,FParameterDefinitions::OctreeHeight,
+                         FParameterDefinitions::InputFile,
+                         LocalOptionBlocSize,
+                         LocalOptionTreeChoice);
+    // Get parameters
+    // Get the groupSize
+    const int groupSize =
+            FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250);
+    // Get the file input
+    const char* const filename       =
+            FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma");
+    // Get the treeHeight
+    const unsigned int TreeHeight    =
+            FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5);
+
+    const bool letTree =
+            FParameters::getValue(argc,argv,LocalOptionTreeChoice.options,false);
+    // The level is the level of the leaf
+    int level = TreeHeight-1;
+    // Init MPI communicator
+        // Initialisation MPI Berenger
+    FMpi FMpiComm(argc,argv);
+    int nproc = FMpiComm.global().processCount();
+        // Initialisation MPI Quentin
+    inria::mpi::communicator mpi_comm(FMpiComm.global().getComm());
+
+    // Show job information
+    std::cout << "GroupTree building comparaison " << std::endl;
+    std::cout << "File name : " << filename              << std::endl;
+    std::cout <<  "TreeHeight : "   <<   TreeHeight    << std::endl;
+    std::cout <<  "Block size : "   <<    groupSize   << std::endl;
+    std::cout << "------------------------------------------" << std::endl;
+
+
+    FMpiFmaGenericLoader<FReal> loaderParrallel(filename, FMpiComm.global());
+    FFmaGenericLoader<FReal>    loader(filename);
+        // vector to stock all particles
+    FTic timer_1;
+
+
+    if(letTree){
+        std::vector<particle_t> myParticles(loaderParrallel.getMyNumberOfParticles());
+        // define the max level to sort particle
+        const std::size_t max_level = sizeof(particle_t::morton_index) * 8 / 3;
+        // define a box, used in the sort
+        const FBox<FPoint<FReal>> box{loaderParrallel.getBoxWidth(),loaderParrallel.getCenterOfBox()};
+
+        // iterate on all of my particles
+        for(FSize idxPart = 0; idxPart <loaderParrallel.getMyNumberOfParticles();++idxPart){
+            particle_t tmp;
+            // get the current particles
+            loaderParrallel.fillParticle(&tmp.pos,&tmp.phi);
+            // set the morton index of the current particle at the max_level
+            tmp.morton_index = inria::linear_tree::get_morton_index(
+                tmp.pos, box, max_level);
+            // set the weight of the particle
+            tmp.phi = 0.1;
+            // add the particle to my vector of particle
+            myParticles.at(idxPart) = tmp;
+        }
+
+        // Now i have all of my particles in a vector, they all have a morton index
+        // now we will sort them
+        inria::sort(mpi_comm,myParticles,
+                    [](const auto& p1, const auto& p2) {
+                        return p1.morton_index < p2.morton_index;
+                    });
+
+        // Now i want to create the the linear tree
+        // a linear tree is a tree, with only the leaf
+        auto linear_tree =
+            inria::linear_tree::create_balanced_linear_tree_at_level(mpi_comm,
+                                                                     level,
+                                                                     box,
+                                                                     myParticles);
+        // Now i need to create a blocked linear tree, it's just a linear tree with
+        // more information
+        // declaration of the group linear tree
+        FGroupLinearTree<decltype(linear_tree)::value_type>group_linear_tree{mpi_comm};
+        group_linear_tree.create_local_group_linear_tree(
+            &linear_tree,
+            groupSize
+        );
+        // now i will to redistribute the particle according to the linear tree
+        // Redistribution of particles
+        inria::linear_tree::redistribute_particles(mpi_comm,
+                                                   linear_tree,
+                                                   myParticles);
+        // Now we need to modify the morton index of of all particle to
+        // have the morton index at le treeHeight-1
+        for(unsigned i = 0 ; i < myParticles.size(); ++i){
+            myParticles.at(i).morton_index = inria::linear_tree::get_morton_index(
+                myParticles.at(i).pos, box, level);
+        }
+
+        // Now we need to share the particle distribution to build the GroupTree
+        group_linear_tree.set_index_particle_distribution(myParticles);
+
+        // Now i can declare my groupTree
+        // it's a empty instance of the FGroupTree
+        GroupOctreeClass  localGroupTree =
+	  GroupOctreeClass::template get_block_tree_instance<GroupCellSymbClass, GroupCellUpClass,
+							     GroupCellDownClass, GroupContainerClass>(TreeHeight,
+												      groupSize,
+												      loaderParrallel.getCenterOfBox(),
+												      loaderParrallel.getBoxWidth());
+
+        // Now i can fill the localGroupTree
+        std::cout << "Start creating LET Blocked tree " << std::endl;
+        time.tic();
+        localGroupTree.create_tree(group_linear_tree,myParticles);
+        time.tac();
+        // Now i want to build a LET tree
+        // the LET tree is the Local Essential Tree
+        // It's a the same tree but we add block from other proc
+        // the block added are the blocked needed to post task for the
+        // P2P M2M and M2L interaction
+
+        // The first step is to add a unique index at all of group
+        //int nb_block = dstr_grp_tree_builder::set_cell_group_global_index(localGroupTree,mpi_comm);
+        //// now we can create LET
+        timer_1.tic();
+        localGroupTree.create_LET(group_linear_tree);
+        timer_1.tac();
+        mpi_comm.barrier();
+        nlohmann::json result;
+        std::string name = std::to_string(TreeHeight);
+        name += "_" + std::to_string(groupSize)+"_"+std::to_string(loader.getNumberOfParticles()) + ".json";
+        std::ifstream file(name);
+        if(mpi_comm.rank() == 0){
+            auto tree = time.elapsed();
+            auto let = timer_1.elapsed();
+            if(file.fail()){
+                // le fichier n'éxiste pas
+                auto tree = time.elapsed();
+                auto let = timer_1.elapsed();
+                result["TreeHeight"] = TreeHeight;
+                result["GroupSize"] = groupSize;
+                result["Filename"] = filename;
+                result["NbParticle"] = loaderParrallel.getNumberOfParticles();
+                result["LET_Blocked_tree"]["tree"] = tree;
+                result["LET_Blocked_tree"]["let"]  = let;
+                result["LET_Blocked_tree"]["total"] = let+tree;
+            } else {
+                file >> result;
+                result["LET_Blocked_tree"]["tree"] = tree;
+                result["LET_Blocked_tree"]["let"]  = let;
+                result["LET_Blocked_tree"]["total"] = let+tree;
+            }
+            std::ofstream out(name, std::ios::out | std::ios::app);
+            out << result << std::endl;
+        }
+        return 0;
+    } else {
+
+
+        //FFmaGenericLoader<FReal> loader(filename);
+        const FSize NbParticles   = loader.getNumberOfParticles();
+        FPoint<FReal> * allParticlesToSort = new FPoint<FReal>[NbParticles];
+        FSize idxPart = 0;
+        for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
+            FReal physicalValue = 0.1;
+            loader.fillParticle(&allParticlesToSort[idxPart], &physicalValue);//Same with file or not
+        }
+        std::vector<MortonIndex> distributedMortonIndex;
+        std::vector<std::vector<int>> sizeForEachGroup;
+        sortParticle(allParticlesToSort, TreeHeight, groupSize, sizeForEachGroup, distributedMortonIndex, loader, nproc);
+
+        FP2PParticleContainer<FReal> allParticles;
+        for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
+            FReal physicalValue = 0.1;
+            allParticles.push(allParticlesToSort[idxPart], physicalValue);
+        }
+        // Put the data into the tree
+        time.tic();
+        GroupOctreeClass groupedTree(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, &allParticles, sizeForEachGroup, true);
+        time.tac();
+        nlohmann::json result;
+        std::string name = std::to_string(TreeHeight);
+        name += "_" + std::to_string(groupSize)+"_"+std::to_string(loader.getNumberOfParticles()) + ".json";
+        std::ifstream file(name);
+        if(mpi_comm.rank() == 0){
+            if(file.fail()){
+            // le fichier n'existe pas
+            result["TreeHeight"] = TreeHeight;
+            result["GroupSize"]  = groupSize;
+            result["Filename"]   = filename;
+            result["NbParticle"] = loaderParrallel.getNumberOfParticles();
+            result["Group_tree"]["time"] = time.elapsed();
+            } else {
+                file >> result;
+                result["Group_tree"]["time"] = time.elapsed();
+            }
+            std::ofstream out(name, std::ios::out | std::ios::app);
+            out << result << std::endl;
+        }
+        return 0;
+    }
+}
+
+
+void sortParticle(FPoint<FReal> * allParticles, int treeHeight, int groupSize, std::vector<std::vector<int>> & sizeForEachGroup, std::vector<MortonIndex> & distributedMortonIndex, FFmaGenericLoader<FReal>& loader, int nproc)
+{
+    //Structure pour trier
+    struct ParticleSortingStruct{
+        FPoint<FReal> position;
+        MortonIndex mindex;
+    };
+    // Création d'un tableau de la structure pour trier puis remplissage du tableau
+    const FSize nbParticles = loader.getNumberOfParticles();
+    ParticleSortingStruct* particlesToSort = new ParticleSortingStruct[nbParticles];
+    for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){
+        const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPosition<FReal>(loader.getCenterOfBox(), loader.getBoxWidth(),
+                                                                                           treeHeight,
+                                                                                           allParticles[idxPart]);
+        const MortonIndex particleIndex = host.getMortonIndex();
+        particlesToSort[idxPart].mindex = particleIndex;
+        particlesToSort[idxPart].position = allParticles[idxPart];
+    }
+
+    //Trie du nouveau tableau
+    FQuickSort<ParticleSortingStruct, FSize>::QsOmp(particlesToSort, nbParticles, [](const ParticleSortingStruct& v1, const ParticleSortingStruct& v2){
+            return v1.mindex <= v2.mindex;
+        });
+    //Replace tout dans l'ordre dans le tableau d'origine
+    for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){
+        allParticles[idxPart] = particlesToSort[idxPart].position;
+    }
+
+    //Compte le nombre de feuilles
+    sizeForEachGroup.resize(treeHeight);
+    MortonIndex previousLeaf = -1;
+    int numberOfLeaf = 0;
+    for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart)
+    {
+        if(particlesToSort[idxPart].mindex != previousLeaf)
+        {
+            previousLeaf = particlesToSort[idxPart].mindex;
+            ++numberOfLeaf;
+        }
+    }
+
+    //Calcul de la taille des groupes au niveau des feuilles
+    FLeafBalance balancer;
+    for(int processId = 0; processId < nproc; ++processId)
+    {
+        FSize size_last;
+        FSize countGroup;
+        FSize leafOnProcess = balancer.getRight(numberOfLeaf, nproc, processId) - balancer.getLeft(numberOfLeaf, nproc, processId);
+        size_last = leafOnProcess%groupSize;
+        countGroup = (leafOnProcess - size_last)/groupSize;
+        for(int i = 0; i < countGroup; ++i)
+            sizeForEachGroup[treeHeight-1].push_back(groupSize);
+        if(size_last > 0)
+            sizeForEachGroup[treeHeight-1].push_back((int)size_last);
+    }
+
+    //Calcul du working interval au niveau des feuilles
+    previousLeaf = -1;
+    int countLeaf = 0;
+    int processId = 0;
+    FSize leafOnProcess = balancer.getRight(numberOfLeaf, nproc, 0) - balancer.getLeft(numberOfLeaf, nproc, 0);
+    distributedMortonIndex.push_back(previousLeaf);
+    for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart)
+    {
+        if(particlesToSort[idxPart].mindex != previousLeaf)
+        {
+            previousLeaf = particlesToSort[idxPart].mindex;
+            ++countLeaf;
+            if(countLeaf == leafOnProcess)
+            {
+                distributedMortonIndex.push_back(previousLeaf);
+                distributedMortonIndex.push_back(previousLeaf);
+                countLeaf = 0;
+                ++processId;
+                leafOnProcess = balancer.getRight(numberOfLeaf, nproc, processId) - balancer.getLeft(numberOfLeaf, nproc, processId);
+            }
+        }
+    }
+    distributedMortonIndex.push_back(particlesToSort[nbParticles - 1].mindex);
+
+    //Calcul des working interval à chaque niveau
+    std::vector<std::vector<std::vector<MortonIndex>>> nodeRepartition;
+    createNodeRepartition(distributedMortonIndex, nodeRepartition, nproc, treeHeight);
+
+    //Pour chaque niveau calcul de la taille des groupe
+    for(int idxLevel = treeHeight - 2; idxLevel >= 0; --idxLevel)
+    {
+        processId = 0;
+        int countParticleInTheGroup = 0;
+        MortonIndex previousMortonCell = -1;
+
+        //cout << "Compute Level " << idxLevel << endl;
+        for(int idxPart = 0; idxPart < nbParticles; ++idxPart)
+        {
+            MortonIndex mortonCell = (particlesToSort[idxPart].mindex) >> (3*(treeHeight - 1 - idxLevel));
+            if(mortonCell <= nodeRepartition[idxLevel][processId][1]) //Si l'indice est dans le working interval
+            {
+                if(mortonCell != previousMortonCell) //Si c'est un nouvelle indice
+                {
+                    ++countParticleInTheGroup; //On le compte dans le groupe
+                    previousMortonCell = mortonCell;
+                    if(countParticleInTheGroup == groupSize) //Si le groupe est plein on ajoute le compte
+                    {
+                        sizeForEachGroup[idxLevel].push_back(groupSize);
+                        countParticleInTheGroup = 0;
+                    }
+                }
+            }
+            else //Si l'on change d'interval de process on ajoute ce que l'on a compté
+            {
+                if(countParticleInTheGroup > 0)
+                    sizeForEachGroup[idxLevel].push_back(countParticleInTheGroup);
+                countParticleInTheGroup = 1;
+                previousMortonCell = mortonCell;
+                ++processId;
+            }
+        }
+        if(countParticleInTheGroup > 0)
+            sizeForEachGroup[idxLevel].push_back(countParticleInTheGroup);
+    }
+}
+
+void createNodeRepartition(std::vector<MortonIndex> distributedMortonIndex, std::vector<std::vector<std::vector<MortonIndex>>>& nodeRepartition, int nproc, int treeHeight) {
+    nodeRepartition.resize(treeHeight, std::vector<std::vector<MortonIndex>>(nproc, std::vector<MortonIndex>(2)));
+    for(int node_id = 0; node_id < nproc; ++node_id){
+        nodeRepartition[treeHeight-1][node_id][0] = distributedMortonIndex[node_id*2];
+        nodeRepartition[treeHeight-1][node_id][1] = distributedMortonIndex[node_id*2+1];
+    }
+    for(int idxLevel = treeHeight - 2; idxLevel >= 0  ; --idxLevel){
+        nodeRepartition[idxLevel][0][0] = nodeRepartition[idxLevel+1][0][0] >> 3;
+        nodeRepartition[idxLevel][0][1] = nodeRepartition[idxLevel+1][0][1] >> 3;
+        for(int node_id = 1; node_id < nproc; ++node_id){
+            nodeRepartition[idxLevel][node_id][0] = FMath::Max(nodeRepartition[idxLevel+1][node_id][0] >> 3, nodeRepartition[idxLevel][node_id-1][0]+1); //Berenger phd :)
+            nodeRepartition[idxLevel][node_id][1] = nodeRepartition[idxLevel+1][node_id][1] >> 3;
+        }
+    }
+}
+
+FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total){
+    if(mpi_rank < (total%mpi_count))
+        return ((total - (total%mpi_count))/mpi_count)+1;
+    return ((total - (total%mpi_count))/mpi_count);
+}
diff --git a/Tests/LET_STF_Implicit/testFMMInterpolationStarPU.hpp b/Tests/LET_STF_Implicit/testFMMInterpolationStarPU.hpp
new file mode 100644
index 000000000..ee4031fa3
--- /dev/null
+++ b/Tests/LET_STF_Implicit/testFMMInterpolationStarPU.hpp
@@ -0,0 +1,284 @@
+// -*-c++-*-
+// ==== CMAKE =====
+// @FUSE_BLAS
+// @FUSE_MPI
+// @FUSE_STARPU
+// ================
+//
+#include "Utils/FGlobal.hpp"
+// parameters
+#include "Utils/FParameters.hpp"
+#include "Utils/FParameterNames.hpp"
+
+// include algo for linear tree
+#include "inria/algorithm/distributed/mpi.hpp"
+#include "inria/linear_tree/balance_tree.hpp"
+// tree class
+#include "GroupTree/Core/FGroupTree.hpp"
+// symbolic data
+#include "Components/FSymbolicData.hpp"
+//
+
+// GroupParticleContainer
+#include "GroupTree/Core/FP2PGroupParticleContainer.hpp"
+// file loader
+#include "Files/FMpiFmaGenericLoader.hpp"
+//#include "Files/FmaGenericLoader.hpp"
+// FBox
+#include "Adaptive/FBox.hpp"
+// Group linear tree
+#include "GroupTree/Core/FGroupLinearTree.hpp"
+// Function for GroupLinearTree
+#include "GroupTree/Core/FDistributedGroupTreeBuilder.hpp"
+//
+// Algorithm include
+#include "GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp"
+#include "GroupTree/StarPUUtils/FStarPUCpuWrapper.hpp"
+#include "GroupTree/Core/FGroupTaskStarpuImplicitAlgorithm.hpp"
+//
+// To construct either the duplicated Octree or the LET
+//
+#include "GroupTree/Core/FBuildGroupTree.hpp"
+//For validation
+#include "GroupTree/Core/FGroupTools.hpp"
+#include "GroupTree/Core/FCheckResults.hpp"
+#include "Components/FSimpleLeaf.hpp"
+#include "Core/FFmmAlgorithm.hpp"
+// Four output
+#include "Contribs/json.hpp"
+
+
+static const int ORDER  = 6;
+using FReal             = double;
+//
+//   1/r kernel
+using MatrixKernelClass = FInterpMatrixKernelR<FReal> ;
+//
+// definition of the common tree structure
+using CellClass           = FInterpolationCell<FReal, ORDER>;
+using GroupCellUpClass    = typename CellClass::multipole_t;
+using GroupCellDownClass  = typename CellClass::local_expansion_t;
+using GroupCellSymbClass  = FSymbolicData;
+using GroupContainerClass = FP2PGroupParticleContainer<FReal>;
+using GroupOctreeClass    = FGroupTree<FReal,GroupCellSymbClass,
+GroupCellUpClass,
+GroupCellDownClass, GroupContainerClass, 1, 4, FReal>;
+
+// definition of algorithm structure
+using GroupKernelClass    = FStarPUAllCpuCapacities<FInterpolationKernel
+<FReal, CellClass,GroupContainerClass,MatrixKernelClass,ORDER>>;
+
+using GroupCpuWrapper     = FStarPUCpuWrapper<
+typename GroupOctreeClass::CellGroupClass, CellClass, GroupKernelClass,
+typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass> ;
+//
+using GroupAlgorithm      = FGroupTaskStarPUImplicitAlgorithm<GroupOctreeClass,
+typename GroupOctreeClass::CellGroupClass,GroupKernelClass,
+typename GroupOctreeClass::ParticleGroupClass, GroupCpuWrapper>;
+//////////////////////////////////////////////////////////////////
+
+
+int main(int argc, char *argv[]) {
+  // Parameter definition
+  const FParameterNames LocalOptionBlocSize { {"-bs"},"The size of the block of the blocked tree"};
+  const FParameterNames LocalOptionValidate { {"-check-result"}, "To compare with direct computation"};
+  const FParameterNames LocalOptionBuildTree { {"-tree"}, "To compare with direct computation 0 let, 1 duplicate tree (let distribution) 2 duplicate tree "};
+  const std::string TreeBuilderOption[3]={"Let tree", "Duplicated tree with Let distribution", "Duplicated tree"};
+  // Parameter help
+  FHelpDescribeAndExit(argc, argv,
+                       "Test the blocked tree created with linear tree." ,
+                       FParameterDefinitions::OctreeHeight,
+                       FParameterDefinitions::InputFile,
+                       LocalOptionBlocSize,
+                       LocalOptionValidate, LocalOptionBuildTree);
+
+  // Get parameters
+  // Get the groupSize
+  const int groupSize =            FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250);
+  // Get the file input
+  const char* const filename       =
+      FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma");
+  // Get the treeHeight
+  const unsigned int TreeHeight    =
+      FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5);
+  const int optionBuildTree = FParameters::getValue(argc, argv, LocalOptionBuildTree.options,0) ;
+
+
+  // Init MPI communicator
+  // Initialisation MPI Berenger
+  FMpi FMpiComm;
+  // Initialisation MPI Quentin
+  inria::mpi::communicator mpi_comm(FMpiComm.global().getComm());
+  // Show job information
+  std::cout << "JOB INFORMATION " << std::endl;
+  std::cout << "File name:    " << filename              << std::endl;
+  std::cout <<  "TreeHeight:  "   <<   TreeHeight    << std::endl;
+  std::cout <<  "Block size:  "   <<    groupSize   << std::endl;
+  std::cout <<  "Tree type:   "    <<  TreeBuilderOption[optionBuildTree] << std::endl;
+  std::cout << "------------------------------------------" << std::endl;
+  FTic time;
+  // Use FMpiFmaGenericLoader to read the box simulation size
+  std::cout << "Opening : " << filename << " ...";
+  FMpiFmaGenericLoader<FReal> loader(filename, FMpiComm.global());
+  std::cout << " done." << std::endl;
+
+  // define a box, used in the sort
+  const FBox<FPoint<FReal>> box{loader.getBoxWidth(),loader.getCenterOfBox()};
+  FReal width = std::max(box.width(0) , std::max(box.width(1) ,box.width(2) )) ;
+  //
+  // The group tree used for the computation
+  GroupOctreeClass * computeOnGroupTree = nullptr ;
+  //
+  ///////////////////////////////////////////////////////////////////////////////
+  //          Build Let or duplicated tree
+  ///////////////////////////////////////////////////////////////////////////////
+  //
+  std::string title   ;
+  int nb_block ;
+  std::vector<MortonIndex> mortonCellDistribution ;
+  // vector to stock all particles
+  std::vector<groupTree::particle_t<FReal> > myParticles ;
+  //
+  // define the max level to sort particle
+
+  std::string octreeType;
+  if(optionBuildTree <= 1 ){
+      title = "Distribution LETGroupTree in ";
+      octreeType = "Let group"   ;
+      //
+      GroupOctreeClass *localGroupTree = nullptr;
+      //
+      groupTree::buildLetTree(mpi_comm, loader, myParticles,
+                   box, TreeHeight, groupSize, localGroupTree, mortonCellDistribution ,nb_block);
+      computeOnGroupTree   = localGroupTree ;
+    }
+  //
+  if(optionBuildTree > 0 ){
+      title ="duplicate GroupTree in ";
+      octreeType = "duplicate group"   ;
+      //
+      GroupOctreeClass *fullGroupTree = nullptr;
+      //
+      groupTree::buildDuplicatedTree( FMpiComm, optionBuildTree, filename, myParticles, box, TreeHeight,
+                           groupSize, fullGroupTree,mortonCellDistribution,nb_block);
+      computeOnGroupTree   = fullGroupTree ;
+      nb_block =0;
+    }
+  time.tac();
+  computeOnGroupTree->printInfoBlocks();
+  std::cout << title <<  mortonCellDistribution.size() << std::endl;
+  for ( auto v :  mortonCellDistribution)
+    std::cout << "  " << v     ;
+  std::cout << std::endl;
+  std::cout << " nb_block: " << nb_block <<std::endl;
+  std::cout << " Creating GroupTree in " << time.elapsed() << "s)." << std::endl;
+  //
+  ///////////////////////////////////////////////////////////////////////////////
+  //          Computation part
+  ///////////////////////////////////////////////////////////////////////////////
+  //
+  // define the operation to proceed
+  //  FFmmNearField   only Near field
+  //  FFmmFarField    only Far field
+  // FFmmNearAndFarFields  full FMM
+  // By operator FFmmP2P| FFmmP2M | |  FFmmM2M  FFmmM2L | FFmmL2L | FFmmL2P
+  const unsigned int operationsToProceed =   FFmmP2M |  FFmmM2M |  FFmmM2L | FFmmL2L | FFmmL2P ;
+
+  const MatrixKernelClass MatrixKernel;
+  GroupKernelClass groupkernel(TreeHeight, width, box.center() , &MatrixKernel);
+  std::cout << " end GroupKernelClass " << std::endl ;
+  GroupAlgorithm groupalgo(computeOnGroupTree,&groupkernel, mortonCellDistribution,nb_block);
+  std::cout << " end GroupAlgorithm " << std::endl ;
+  // wait all proc
+  FTic timerExecute;
+  FMpiComm.global().barrier();    // Synchronization for timer
+  // start new timer
+  timerExecute.tic();
+  //  starpu_fxt_start_profiling();
+  std::cout << " start groupalgo.execute " << std::endl ;
+  groupalgo.execute(operationsToProceed);
+  std::cout << " end groupalgo.execute " << std::endl ;
+  computeOnGroupTree->printInfoBlocks();
+
+  FMpiComm.global().barrier();
+  //   starpu_fxt_stop_profiling();
+  timerExecute.tac();
+  auto timeElapsed = timerExecute.elapsed();
+  // print times
+  double minTime,maxTime,meanTime ;
+  groupTree::timeAverage(FMpiComm, timeElapsed, minTime, maxTime, meanTime) ;
+  std::cout <<  " time (in sec.)  on node: " << timeElapsed
+             << " min " << minTime << " max " << maxTime
+             << " mean " << meanTime << std::endl;
+  //
+  ///////////////////////////////////////////////////////////////////////////////
+  //          Extraction des resultats
+  ///////////////////////////////////////////////////////////////////////////////
+  //
+  nlohmann::json result;
+  std::string name = std::to_string(TreeHeight);
+  name += "_" + std::to_string(groupSize)+"_"+std::to_string(loader.getNumberOfParticles()) + ".json";
+  result["TreeHeight"] = TreeHeight;
+  result["GroupSize"]  = groupSize;
+  result["Filename"]   = filename;
+  result["NbParticle"] = loader.getNumberOfParticles();
+  result["Octree"]     = octreeType;
+  result["Algorithm"]["time"] = timeElapsed;
+  result["Algorithm"]["mean"] = meanTime;
+  result["Algorithm"]["min"]  = minTime;
+  result["Algorithm"]["max"]  = maxTime;
+  result["kernel"]            = interpolationKernel ;
+  std::ofstream out(name);
+  out << result << std::endl;
+  //
+  ///////////////////////////////////////////////////////////////////////////////
+  //          Validation
+  ///////////////////////////////////////////////////////////////////////////////
+  //
+  // Validate the result
+  if(FParameters::existParameter(argc, argv, LocalOptionValidate.options) == true){
+      // Check the result with a previous computation
+      // The resuls are stored in the files
+      using CellClass1      = FInterpolationCell<FReal, ORDER>;
+
+      typedef FP2PParticleContainer<FReal>         ContainerClass;
+      typedef FSimpleLeaf<FReal, ContainerClass >  LeafClass;
+      using OctreeClass = FOctree<FReal, CellClass1,ContainerClass,LeafClass>       ;
+      using KernelClass = FInterpolationKernel<FReal,CellClass1,ContainerClass,MatrixKernelClass,ORDER>    ;
+      using FmmClass    = FFmmAlgorithm<OctreeClass,CellClass1,ContainerClass,KernelClass,LeafClass> ;
+      const int SubTreeHeight=3;
+      OctreeClass treeCheck(TreeHeight, SubTreeHeight,width,box.center());
+      const FReal epsilon = 1E-10;
+      KernelClass kernels(TreeHeight, width, box.center(), &MatrixKernel);
+      if(optionBuildTree > 0 ){
+          for(std::size_t idxPart = 0 ; idxPart < myParticles.size() ; ++idxPart){
+              // put in tree
+              treeCheck.insert(myParticles[idxPart].getPosition(),
+                               myParticles[idxPart].physicalValue());
+ //             std::cout << idxPart<< " " << myParticles[idxPart].getPosition() << " " << myParticles[idxPart].physicalValue() <<std::endl;
+            }
+          FmmClass algorithm(&treeCheck, &kernels);
+
+
+          checkWithDuplicatedTree(FMpiComm.global().processId(), myParticles,treeCheck, algorithm,
+                                  *computeOnGroupTree,groupalgo,operationsToProceed,epsilon );
+        }
+      else {
+          std::cerr << " check Not yet implemented " << std::endl ;
+          FFmaGenericLoader<FReal>  seqLoader(filename);
+          readAndSortAllParticle(seqLoader, box,  myParticles, TreeHeight ) ;
+          for(std::size_t idxPart = 0 ; idxPart < myParticles.size() ; ++idxPart){
+              // put in tree
+              treeCheck.insert(myParticles[idxPart].getPosition(),
+                               myParticles[idxPart].physicalValue());
+            }
+           FmmClass algorithm(&treeCheck, &kernels);
+
+
+            checkWithDuplicatedTree(FMpiComm.global().processId(), myParticles,treeCheck, algorithm,
+                                      *computeOnGroupTree,groupalgo,operationsToProceed,epsilon );
+        }
+
+    }
+  return 0;
+}
diff --git a/Tests/LET_STF_Implicit/testGroupTreeFromLinearTree.cpp b/Tests/LET_STF_Implicit/testGroupTreeFromLinearTree.cpp
new file mode 100644
index 000000000..dfe23d8ba
--- /dev/null
+++ b/Tests/LET_STF_Implicit/testGroupTreeFromLinearTree.cpp
@@ -0,0 +1,218 @@
+// ==== CMAKE =====
+// @FUSE_BLAS
+// ================
+// Keep in private GIT
+// @FUSE_MPI
+
+#include "../../Src/Utils/FGlobal.hpp"
+// include algo for linear tree
+#include "inria/algorithm/distributed/mpi.hpp"
+#include "inria/linear_tree/balance_tree.hpp"
+// tree class
+#include "../../Src/GroupTree/Core/FGroupTree.hpp"
+// symbolic data
+#include "../../Src/Components/FSymbolicData.hpp"
+// cell class
+#include "../../Src/Kernels/Chebyshev/FChebCell.hpp"
+// parameter
+#include "../../Src/Utils/FParameters.hpp"
+#include "../../Src/Utils/FParameterNames.hpp"
+// GroupParticleContianer
+#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp"
+// file loader
+#include "../../Src/Files/FMpiFmaGenericLoader.hpp"
+// FBox
+#include "Adaptive/FBox.hpp"
+// Group linear tree
+#include "../../Src/GroupTree/Core/FGroupLinearTree.hpp"
+// Function for GroupLinearTree
+#include "../../Src/GroupTree/Core/FDistributedGroupTreeBuilder.hpp"
+
+#include <memory>
+
+static const int ORDER = 6;
+using FReal               = double;
+using GroupCellClass      = FChebCell<FReal, ORDER>;
+using GroupCellUpClass    = typename GroupCellClass::multipole_t;
+using GroupCellDownClass  = typename GroupCellClass::local_expansion_t;
+using GroupCellSymbClass  = FSymbolicData;
+using GroupContainerClass = FP2PGroupParticleContainer<FReal>;
+using GroupOctreeClass    = FGroupTree<FReal,
+                                        GroupCellSymbClass,
+                                        GroupCellUpClass,
+                                        GroupCellDownClass, GroupContainerClass, 1, 4, FReal>;
+
+// Structure for 1 particle
+struct particle_t {
+    using position_t = FPoint<FReal>;
+    position_t pos;
+    FReal phi;
+    MortonIndex morton_index;
+    const auto& position() const {
+        return pos;
+    }
+    const FPoint<FReal>& getPosition(){
+        return pos;
+    }
+    const auto& physicalValue() const{
+        return phi;
+    }
+    const auto& getPositions() const {
+        return pos;
+    }
+    int weight() const { return 1;}
+    friend constexpr auto morton_index(const particle_t& p) {
+        return p.morton_index;
+    }
+};
+
+int main(int argc, char *argv[]) {
+    // Parameter definition
+    const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree"};
+    // Parameter help
+    FHelpDescribeAndExit(argc, argv,
+                         "Test the blocked tree created with linear tree." ,FParameterDefinitions::OctreeHeight,
+                         FParameterDefinitions::InputFile,
+                         LocalOptionBlocSize);
+    // Get parameters
+    // Get the groupSize
+    const int groupSize =
+            FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250);
+    // Get the file input
+    const char* const filename       =
+            FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma");
+    // Get the treeHeight
+    const unsigned int TreeHeight    =
+            FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5);
+    // The level is the level of the leaf
+    int level = TreeHeight-1;
+    // Init MPI communicator
+        // Initialisation MPI Berenger
+    FMpi FMpiComm(argc,argv);
+        // Initialisation MPI Quentin
+    inria::mpi::communicator mpi_comm(FMpiComm.global().getComm());
+
+    // Show job information
+    std::cout << "JOB INFORMATION " << std::endl;
+    std::cout << "File name : " << filename              << std::endl;
+    std::cout <<  "TreeHeight : "   <<   TreeHeight    << std::endl;
+    std::cout <<  "Block size : "   <<    groupSize   << std::endl;
+    std::cout << "------------------------------------------" << std::endl;
+
+    std::cout << "Opening : " << filename << " ...";
+    FMpiFmaGenericLoader<FReal> loader(filename, FMpiComm.global());
+    std::cout << " done." << std::endl;
+
+    // vector to stock all particles
+    std::vector<particle_t> myParticles(loader.getMyNumberOfParticles());
+    // define the max level to sort particle
+    const std::size_t max_level = sizeof(particle_t::morton_index) * 8 / 3;
+    // define a box, used in the sort
+    const FBox<FPoint<FReal>> box{loader.getBoxWidth(),loader.getCenterOfBox()};
+
+    // iterate on all of my particles
+    for(FSize idxPart = 0; idxPart <loader.getMyNumberOfParticles();++idxPart){
+        particle_t tmp;
+        // get the current particles
+        loader.fillParticle(&tmp.pos,&tmp.phi);
+        // set the morton index of the current particle at the max_level
+        tmp.morton_index = inria::linear_tree::get_morton_index(
+            tmp.pos, box, max_level);
+        // set the weight of the particle
+        tmp.phi = 0.1;
+        // add the particle to my vector of particle
+        myParticles.at(idxPart) = tmp;
+    }
+
+    // Now i have all of my particles in a vector, they all have a morton index
+    // now we will sort them
+    std::cout << "Sorting particles ...";
+    inria::sort(mpi_comm,myParticles,
+                [](const auto& p1, const auto& p2) {
+                    return p1.morton_index < p2.morton_index;
+                });
+    std::cout << " Done" << std::endl;
+
+    std::cout << " I have " << loader.getMyNumberOfParticles() << " particles ..." << std::endl;
+    std::cout << "For a total of " << loader.getNumberOfParticles() << " particles ..." << std::endl;
+
+    // Now i want to create the the linear tree
+    // a linear tree is a tree, with only the leaf
+    std::cout << "Create linear tree at level " << level << " ...";
+    auto linear_tree =
+        inria::linear_tree::create_balanced_linear_tree_at_level(mpi_comm,
+                                                                 level,
+                                                                 box,
+                                                                 myParticles);
+    std::cout << " done." << std::endl;
+
+    // Now i need to create a blocked linear tree, it's just a linear tree with
+    // more information
+    std::cout << "Creating blocked linear tree ...";
+    // declaration of the group linear tree
+    FGroupLinearTree<decltype(linear_tree)::value_type>group_linear_tree{mpi_comm};
+    // now i will fill it
+    // i can apply 2 methods
+    //     -  create_local_group_linear_tree
+    //     -  create_global_group_linear_tree
+    // the first function balance the linear tree according to particle (so he
+    // don't touch to the distribution )
+    // the second function balance the linear tree according to the number of
+    // block, he try to have group full on the left
+    //
+    // with a groupSize at 128, 890 leaf and 3 proc the distribution will be
+    //     ___________________________________
+    //    |   Proc 0  |  Proc 1 |   Proc 2    |
+    //    |           |         |             |
+    //    | 128   128 | 128 128 | 128 128 122 |
+    //    |___________|_________|_____________|
+    group_linear_tree.create_local_group_linear_tree(
+        &linear_tree,
+        groupSize
+    );
+    std::cout << " Done" << std::endl;
+
+    // now i will to redistribute the particle according to the linear tree
+    std::cout << "Redistribute ...";
+    // Redistribution of particles
+    inria::linear_tree::redistribute_particles(mpi_comm,
+                                               linear_tree,
+                                               myParticles);
+    std::cout << " Done" << std::endl;
+
+    // Now we need to modify the morton index of of all particle to
+    // have the morton index at le treeHeight-1
+    MortonIndex minMidx=1,maxMidx=0;
+    for (int i =0 ; i< level ; ++i){
+      minMidx= minMidx <<3 ;
+    }
+    for(unsigned i = 0 ; i < myParticles.size(); ++i){
+      myParticles.at(i).morton_index = inria::linear_tree::get_morton_index(
+									    myParticles.at(i).pos, box, level);
+      maxMidx =std::max(maxMidx,  myParticles.at(i).morton_index);
+	  minMidx =std::min(minMidx,  myParticles.at(i).morton_index);
+    }
+
+    std::cout << " MinIndex: " << minMidx << "  MaxIndex: " << maxMidx<<std::endl;
+    // Now we need to share the particle distribution to build the GroupTree
+    std::cout << "Share my particle distribution ...";
+    group_linear_tree.set_index_particle_distribution(myParticles);
+    std::cout << " done"<< std::endl;
+
+    // Now i can declare my groupTree
+    // it's a empty instance of the FGroupTree
+    GroupOctreeClass  localGroupTree = GroupOctreeClass::template get_block_tree_instance<GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass, GroupContainerClass>(TreeHeight,
+                                             groupSize,
+                                             loader.getCenterOfBox(),
+                                             loader.getBoxWidth());
+
+    // Now i can fill the localGroupTree
+    std::cout << "Creating tree ..." ;
+    localGroupTree.create_tree(group_linear_tree,myParticles);
+    std::cout << " done." << std::endl;
+
+    // now we can show the groupTree
+    localGroupTree.printInfoBlocks();
+
+    return 0;
+}
diff --git a/Tests/LET_STF_Implicit/testLETGroupTree.cpp b/Tests/LET_STF_Implicit/testLETGroupTree.cpp
new file mode 100644
index 000000000..19c940fc9
--- /dev/null
+++ b/Tests/LET_STF_Implicit/testLETGroupTree.cpp
@@ -0,0 +1,254 @@
+// ==== CMAKE =====
+// @FUSE_BLAS
+// @FUSE_MPI
+// ================
+//
+
+#include "../../Src/Utils/FGlobal.hpp"
+// include algo for linear tree
+#include "inria/algorithm/distributed/mpi.hpp"
+#include "inria/linear_tree/balance_tree.hpp"
+// tree class
+#include "../../Src/GroupTree/Core/FGroupTree.hpp"
+// symbolic data
+#include "../../Src/Components/FSymbolicData.hpp"
+// cell class
+#include "../../Src/Kernels/Chebyshev/FChebCell.hpp"
+// parameter
+#include "../../Src/Utils/FParameters.hpp"
+#include "../../Src/Utils/FParameterNames.hpp"
+// GroupParticleContianer
+#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp"
+// file loader
+#include "../../Src/Files/FMpiFmaGenericLoader.hpp"
+// FBox
+#include "Adaptive/FBox.hpp"
+// Group linear tree
+#include "../../Src/GroupTree/Core/FGroupLinearTree.hpp"
+// Function for GroupLinearTree
+#include "../../Src/GroupTree/Core/FDistributedGroupTreeBuilder.hpp"
+// Include validation Function
+#include "../../Src/GroupTree/Core/FDistributedLETGroupTreeValidator.hpp"
+#include <memory>
+
+
+static const int ORDER = 6;
+using FReal               = double;
+using GroupCellClass      = FChebCell<FReal, ORDER>;
+using GroupCellUpClass    = typename GroupCellClass::multipole_t;
+using GroupCellDownClass  = typename GroupCellClass::local_expansion_t;
+using GroupCellSymbClass  = FSymbolicData;
+using GroupContainerClass = FP2PGroupParticleContainer<FReal>;
+using GroupOctreeClass    = FGroupTree<FReal,
+                                        GroupCellSymbClass,
+                                        GroupCellUpClass,
+                                        GroupCellDownClass, GroupContainerClass, 1, 4, FReal>;
+
+// Structure for 1 particle
+struct particle_t {
+    using position_t = FPoint<FReal>;
+    position_t pos;
+    FReal phi;
+    std::size_t morton_index;
+    const auto& position() const {
+        return pos;
+    }
+    const FPoint<FReal>& getPosition(){
+        return pos;
+    }
+    const auto& physicalValue() const{
+        return phi;
+    }
+    const auto& getPositions() const {
+        return pos;
+    }
+    int weight() const { return 1;}
+    friend constexpr auto morton_index(const particle_t& p) {
+        return p.morton_index;
+    }
+};
+
+int main(int argc, char *argv[]) {
+    // Parameter definition
+    const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree"};
+    // Parameter help
+    FHelpDescribeAndExit(argc, argv,
+                         "Test the blocked tree created with linear tree." ,FParameterDefinitions::OctreeHeight,
+                         FParameterDefinitions::InputFile,
+                         LocalOptionBlocSize);
+    std::string comments("===================================================================================");
+    // Get parameters
+    // Get the groupSize
+    const int groupSize =
+            FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250);
+    // Get the file input
+    const char* const filename       =
+            FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma");
+    // Get the treeHeight
+    const unsigned int TreeHeight    =
+            FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5);
+    // The level is the level of the leaf
+    int level = TreeHeight-1;
+    // Init MPI communicator
+        // Initialisation MPI Berenger
+    FMpi FMpiComm(argc,argv);
+        // Initialisation MPI Quentin
+    inria::mpi::communicator mpi_comm(FMpiComm.global().getComm());
+
+    // Show job information
+    std::cout << "JOB INFORMATION " << std::endl;
+    std::cout << "File name : " << filename              << std::endl;
+    std::cout <<  "TreeHeight : "   <<   TreeHeight    << std::endl;
+    std::cout <<  "Block size : "   <<    groupSize   << std::endl;
+    std::cout << "------------------------------------------" << std::endl;
+
+    std::cout << "Opening : " << filename << " ...";
+    FMpiFmaGenericLoader<FReal> loader(filename, FMpiComm.global());
+    std::cout << " done." << std::endl;
+
+    // vector to stock all particles
+    std::vector<particle_t> myParticles(loader.getMyNumberOfParticles());
+    // define the max level to sort particle
+    const std::size_t max_level = sizeof(particle_t::morton_index) * 8 / 3;
+    // define a box, used in the sort
+    const FBox<FPoint<FReal>> box{loader.getBoxWidth(),loader.getCenterOfBox()};
+
+    // iterate on all of my particles
+    for(FSize idxPart = 0; idxPart <loader.getMyNumberOfParticles();++idxPart){
+        particle_t tmp;
+        // get the current particles
+        loader.fillParticle(&tmp.pos,&tmp.phi);
+        // set the morton index of the current particle at the max_level
+        tmp.morton_index = inria::linear_tree::get_morton_index(
+            tmp.pos, box, max_level);
+        // set the weight of the particle
+        tmp.phi = 0.1;
+        // add the particle to my vector of particle
+        myParticles.at(idxPart) = tmp;
+    }
+
+    // Now i have all of my particles in a vector, they all have a morton index
+    // now we will sort them
+    std::cout << "Sorting particles ...";
+    inria::sort(mpi_comm,myParticles,
+                [](const auto& p1, const auto& p2) {
+                    return p1.morton_index < p2.morton_index;
+                });
+    std::cout << " Done" << std::endl;
+
+    std::cout << " I have " << loader.getMyNumberOfParticles() << " particles ..." << std::endl;
+    std::cout << "For a total of " << loader.getNumberOfParticles() << " particles ..." << std::endl;
+
+    // Now i want to create the the linear tree
+    // a linear tree is a tree, with only the leaf
+    std::cout << "Create linear tree at level " << level << " ...";
+    auto linear_tree =
+        inria::linear_tree::create_balanced_linear_tree_at_level(mpi_comm,
+                                                                 level,
+                                                                 box,
+                                                                 myParticles);
+    std::cout << " done." << std::endl;
+
+    // Now i need to create a blocked linear tree, it's just a linear tree with
+    // more information
+    std::cout << "Creating blocked linear tree ...";
+    // declaration of the group linear tree
+    FGroupLinearTree<decltype(linear_tree)::value_type>group_linear_tree{mpi_comm};
+    // now i will fill it
+    // i can apply 2 methods
+    //     -  create_local_group_linear_tree
+    //     -  create_global_group_linear_tree
+    // the first function balance the linear tree according to particle (so he
+    // don't touch to the distribution )
+    // the second function balance the linear tree according to the number of
+    // block, he try to have group full on the left
+    //
+    // with a groupSize at 128, 890 leaf and 3 proc the distribution will be
+    //     ___________________________________
+    //    |   Proc 0  |  Proc 1 |   Proc 2    |
+    //    |           |         |             |
+    //    | 128   128 | 128 128 | 128 128 122 |
+    //    |___________|_________|_____________|
+    group_linear_tree.create_local_group_linear_tree(
+        &linear_tree,
+        groupSize
+    );
+    std::cout << " Done" << std::endl;
+
+    // now i will to redistribute the particle according to the linear tree
+    std::cout << "Redistribute ...";
+    // Redistribution of particles
+    inria::linear_tree::redistribute_particles(mpi_comm,
+                                               linear_tree,
+                                               myParticles);
+    std::cout << " Done" << std::endl;
+
+    // Now we need to modify the morton index of of all particle to
+    // have the morton index at le treeHeight-1
+    MortonIndex minMidx=1,maxMidx=0;
+    for (int i =0 ; i< level ; ++i){
+      minMidx= minMidx <<3 ;
+    }
+    for(unsigned i = 0 ; i < myParticles.size(); ++i){
+        myParticles.at(i).morton_index = inria::linear_tree::get_morton_index(
+            myParticles.at(i).pos, box, level);
+    }
+    std::cout << " MinIndex: " << minMidx << "  MaxIndex: " << maxMidx<<std::endl;
+
+    // Now we need to share the particle distribution to build the GroupTree
+    std::cout << "Share my particle distribution ...";
+    group_linear_tree.set_index_particle_distribution(myParticles);
+    std::cout << " done"<< std::endl;
+    //
+    //////////////////////////////////////////////////////////////////////////////////////////////////////
+    //
+    // Now i can declare my groupTree
+    // it's a empty instance of the FGroupTree
+    GroupOctreeClass  localGroupTree = GroupOctreeClass::template get_block_tree_instance<GroupCellSymbClass,
+											  GroupCellUpClass,
+											  GroupCellDownClass,
+											  GroupContainerClass>(TreeHeight,
+													       groupSize,
+													       loader.getCenterOfBox(),
+													       loader.getBoxWidth());
+    //
+    // Now I fill the localGroupTree
+    //
+    std::cout << "Creating tree ..." ;
+    localGroupTree.create_tree(group_linear_tree,myParticles);
+    std::cout << " done." << std::endl;
+    //
+    // now we can show the groupTree
+    std::cout << comments <<std::endl << "                  Local Tree " <<std::endl;
+
+    localGroupTree.printInfoBlocks();
+    //
+    std::cout << comments<<std::endl;
+    // Now i want to build a LET tree
+    // the LET tree is the Local Essential Tree
+    // It's a the same tree but we add block from other proc
+    // the block added are the blocked needed to post task for the P2P M2M and M2L interactions
+    //
+    // The first step is to add a unique index at all of group
+    std::cout << " Add a global index on every block ..";
+    int nb_block = dstr_grp_tree_builder::set_cell_group_global_index(localGroupTree,mpi_comm);
+    std::cout << ".  done.";
+    std::cout << "We have a total of " << nb_block << " on every proc." << std::endl;
+    //
+    //                now we can create LET
+    //
+    std::cout << "Creating let ...";
+    localGroupTree.create_LET(group_linear_tree);
+    std::cout << "done." << std::endl;
+
+    // now we can show the groupTree with LET
+    // to show where is our block, the second argument of every block
+    // show if the block is local
+
+    bool flag = dstr_grp_tree_vldr::validate_group_tree(localGroupTree,mpi_comm);
+    std::cout << comments <<std::endl << "                  LET " <<std::endl;
+    localGroupTree.printInfoBlocks();
+    std::cout << comments <<std::endl;
+
+    return 0;
+}
diff --git a/Tests/LET_STF_Implicit/testLetImplicitChebyshev.cpp b/Tests/LET_STF_Implicit/testLetImplicitChebyshev.cpp
new file mode 100644
index 000000000..0f3af149a
--- /dev/null
+++ b/Tests/LET_STF_Implicit/testLetImplicitChebyshev.cpp
@@ -0,0 +1,30 @@
+// ==== CMAKE =====
+// @FUSE_BLAS
+// ================
+// Keep in private GIT
+// @FUSE_MPI
+// @FUSE_STARPU
+//
+//
+#include <string>
+//
+// Chebychev cell class
+#include "Kernels/Chebyshev/FChebCell.hpp"
+#include "Kernels/Chebyshev/FChebSymKernel.hpp"
+//
+template<typename FReal, int ORDER> 
+using FInterpolationCell =  FChebCell<FReal, ORDER>;
+
+template<typename FReal, typename GroupCellClass,
+	 typename GroupContainerClass,
+	 typename MatrixKernelClass, int ORDER>  
+
+using FInterpolationKernel = FChebSymKernel<FReal,
+					    GroupCellClass,
+					    GroupContainerClass,
+					    MatrixKernelClass,
+					    ORDER> ;
+
+static std::string interpolationKernel("Chebyshev");
+#include "testFMMInterpolationStarPU.hpp"
+
diff --git a/Tests/LET_STF_Implicit/testLetImplicitUniform.cpp b/Tests/LET_STF_Implicit/testLetImplicitUniform.cpp
new file mode 100644
index 000000000..572d1348e
--- /dev/null
+++ b/Tests/LET_STF_Implicit/testLetImplicitUniform.cpp
@@ -0,0 +1,29 @@
+// ==== CMAKE =====
+// @FUSE_BLAS
+// ================
+// Keep in private GIT
+// @FUSE_MPI
+// @FUSE_STARPU
+//
+#include <string>
+//
+// Uniform Grid points cell class
+#include "Kernels/Uniform/FUnifCell.hpp"
+#include "Kernels/Uniform/FUnifKernel.hpp"
+//
+template<typename FReal, int ORDER> 
+using FInterpolationCell =  FUnifCell<FReal, ORDER>;
+
+template<typename FReal, typename GroupCellClass,
+	 typename GroupContainerClass,
+	 typename MatrixKernelClass, int ORDER>  
+using FInterpolationKernel = FUnifKernel<FReal,
+					    GroupCellClass,
+					    GroupContainerClass,
+					    MatrixKernelClass,
+					    ORDER> ;
+
+static std::string interpolationKernel("Uniform");
+
+#include "testFMMInterpolationStarPU.hpp"
+
diff --git a/Tests/LET_STF_Implicit/testSizeGroupTree.cpp b/Tests/LET_STF_Implicit/testSizeGroupTree.cpp
new file mode 100644
index 000000000..62457891e
--- /dev/null
+++ b/Tests/LET_STF_Implicit/testSizeGroupTree.cpp
@@ -0,0 +1,356 @@
+// ==== CMAKE =====
+// @FUSE_BLAS
+// ================
+// Keep in private GIT
+// @FUSE_MPI
+// @FUSE_STARPU
+
+#include "../../Src/Utils/FGlobal.hpp"
+// include algo for linear tree
+#include "inria/algorithm/distributed/mpi.hpp"
+#include "inria/linear_tree/balance_tree.hpp"
+// tree class
+#include "../../Src/GroupTree/Core/FGroupTree.hpp"
+// symbolic data
+#include "../../Src/Components/FSymbolicData.hpp"
+// cell class
+#include "../../Src/Kernels/Chebyshev/FChebCell.hpp"
+// parameter
+#include "../../Src/Utils/FParameters.hpp"
+#include "../../Src/Utils/FParameterNames.hpp"
+// GroupParticleContianer
+#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp"
+// file loader
+#include "../../Src/Files/FMpiFmaGenericLoader.hpp"
+// FBox
+#include "Adaptive/FBox.hpp"
+// Group linear tree
+#include "../../Src/GroupTree/Core/FGroupLinearTree.hpp"
+// Function for GroupLinearTree
+#include "../../Src/GroupTree/Core/FDistributedGroupTreeBuilder.hpp"
+#include "../../Src/Utils/FTic.hpp"
+#include "../../Src/Files/FFmaGenericLoader.hpp"
+#include "../../Src/Utils/FLeafBalance.hpp"
+
+#include "../../Contribs/json.hpp"
+
+#include <memory>
+
+
+static const int ORDER = 6;
+using FReal               = double;
+using GroupCellClass      = FChebCell<FReal, ORDER>;
+using GroupCellUpClass    = typename GroupCellClass::multipole_t;
+using GroupCellDownClass  = typename GroupCellClass::local_expansion_t;
+using GroupCellSymbClass  = FSymbolicData;
+using GroupContainerClass = FP2PGroupParticleContainer<FReal>;
+using GroupOctreeClass    = FGroupTree<FReal,
+                                        GroupCellSymbClass,
+                                        GroupCellUpClass,
+                                        GroupCellDownClass, GroupContainerClass, 1, 4, FReal>;
+
+// Structure for 1 particle
+struct particle_t {
+    using position_t = FPoint<FReal>;
+    position_t pos;
+    FReal phi;
+    std::size_t morton_index;
+    const auto& position() const {
+        return pos;
+    }
+    const FPoint<FReal>& getPosition(){
+        return pos;
+    }
+    const auto& physicalValue() const{
+        return phi;
+    }
+    const auto& getPositions() const {
+        return pos;
+    }
+    int weight() const { return 1;}
+    friend constexpr auto morton_index(const particle_t& p) {
+        return p.morton_index;
+    }
+};
+void sortParticle(FPoint<FReal> * allParticlesToSort, int treeHeight, int groupSize, std::vector<std::vector<int>> & sizeForEachGroup, std::vector<MortonIndex> & distributedMortonIndex, FFmaGenericLoader<FReal>& loader, int nproc);
+void createNodeRepartition(std::vector<MortonIndex> distributedMortonIndex, std::vector<std::vector<std::vector<MortonIndex>>>& nodeRepartition, int nproc, int treeHeight);
+FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total);
+
+int main(int argc, char *argv[]) {
+    FTic time;
+    // Parameter definition
+    const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree"};
+    // Parameter help
+    FHelpDescribeAndExit(argc, argv,
+                         "Test the blocked tree created with linear tree." ,FParameterDefinitions::OctreeHeight,
+                         FParameterDefinitions::InputFile,
+                         LocalOptionBlocSize);
+    // Get parameters
+    // Get the groupSize
+    const int groupSize =
+            FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250);
+    // Get the file input
+    const char* const filename       =
+            FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma");
+    // Get the treeHeight
+    const unsigned int TreeHeight    =
+            FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5);
+    // The level is the level of the leaf
+    int level = TreeHeight-1;
+    // Init MPI communicator
+        // Initialisation MPI Berenger
+    FMpi FMpiComm(argc,argv);
+    int nproc = FMpiComm.global().processCount();
+        // Initialisation MPI Quentin
+    inria::mpi::communicator mpi_comm(FMpiComm.global().getComm());
+
+    // Show job information
+    std::cout << "GroupTree building comparaison " << std::endl;
+    std::cout << "File name : " << filename              << std::endl;
+    std::cout <<  "TreeHeight : "   <<   TreeHeight    << std::endl;
+    std::cout <<  "Block size : "   <<    groupSize   << std::endl;
+    std::cout << "------------------------------------------" << std::endl;
+
+    FFmaGenericLoader<FReal> loader(filename);
+    const FSize NbParticles   = loader.getNumberOfParticles();
+    FPoint<FReal> * allParticlesToSort = new FPoint<FReal>[NbParticles];
+    FSize idxPart = 0;
+    for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
+        FReal physicalValue = 0.1;
+        loader.fillParticle(&allParticlesToSort[idxPart], &physicalValue);//Same with file or not
+    }
+    std::vector<MortonIndex> distributedMortonIndex;
+    std::vector<std::vector<int>> sizeForEachGroup;
+    sortParticle(allParticlesToSort, TreeHeight, groupSize, sizeForEachGroup, distributedMortonIndex, loader, nproc);
+
+    FP2PParticleContainer<FReal> allParticles;
+    for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
+        FReal physicalValue = 0.1;
+        allParticles.push(allParticlesToSort[idxPart], physicalValue);
+    }
+    // Put the data into the tree
+    FTic time2;
+       time2.tic();
+    GroupOctreeClass groupedTree(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, &allParticles, sizeForEachGroup, true);
+    time2.tac();
+
+    std::size_t particle_blocks = 0;
+    std::size_t total_size = 0;
+    std::size_t cell_blocks = 0;
+    int old_idx = 0;
+    for(int i = 0; i < groupedTree.getNbParticleGroup();++i){
+        auto* container = groupedTree.getParticleGroup(i);
+        total_size += sizeof(*container);
+        total_size += container->getBufferSizeInByte();
+    }
+    particle_blocks = total_size;
+
+    for(int i = groupedTree.getHeight()-1 ; i > 0 ; --i ){
+        for(int j = 0 ; j < groupedTree.getNbCellGroupAtLevel(i) ; ++j){
+            auto* container = groupedTree.getCellGroup(i,j);
+                total_size += sizeof(*container);
+                total_size += container->getBufferSizeInByte();
+            }
+    }
+    cell_blocks = total_size - particle_blocks;
+    total_size += sizeof(groupedTree);
+    mpi_comm.barrier();
+
+    int nb_proc = mpi_comm.size();
+    int my_rank = mpi_comm.rank();
+    std::vector<std::size_t> vect_result(3);
+    vect_result[0] = particle_blocks;
+    vect_result[1] = total_size;
+    vect_result[2] = cell_blocks;
+    std::vector<std::size_t> vect_recv(0,0);
+    if(my_rank == 0){
+        vect_recv.resize(nb_proc*3);
+    }
+    mpi_comm.gather(
+        &vect_result[0],
+        3*sizeof(std::size_t),
+        MPI_CHAR,
+        &vect_recv[0],
+        3*sizeof(std::size_t),
+        MPI_CHAR,
+        0
+    );
+
+    if(my_rank == 0 ){
+        std::size_t partavg = 0;
+        std::size_t cellavg = 0;
+        std::size_t totalavg = 0;
+        for(int i = 0 ; i < vect_recv.size() ; i+= 3){
+            partavg  += vect_recv[0];
+            cellavg  += vect_recv[2];
+            totalavg += vect_recv[1];
+        }
+
+        partavg /= nb_proc;
+        cellavg /= nb_proc;
+        totalavg /= nb_proc;
+
+        std::cout << "Particle blocks : " << partavg << std::endl;
+        std::cout << "Cell blocks : " << cellavg << std::endl;
+        std::cout << "Total size : " << totalavg << " bits " << (totalavg)/1000 << " Kb "<< ((totalavg)/1000)/1000 << " Mb " << std::endl;
+
+        // read a JSON file
+        std::string name = "out"+std::to_string(TreeHeight);
+        name += "_" + std::to_string(groupSize)+"_"+std::to_string(loader.getNumberOfParticles()) + ".json";
+         std::ifstream ii(name);
+         nlohmann::json j;
+        // ii >> j;
+         j["GroupTree"]["ParticlesBlocks"] = partavg;
+         j["GroupTree"]["CellsBlocks"] = cellavg;
+         j["GroupTree"]["TotalSize"] = totalavg;
+         std::ofstream out(name);
+              out << j << std::endl;
+    }
+
+    return 0;
+}
+
+
+void sortParticle(FPoint<FReal> * allParticles, int treeHeight, int groupSize, std::vector<std::vector<int>> & sizeForEachGroup, std::vector<MortonIndex> & distributedMortonIndex, FFmaGenericLoader<FReal>& loader, int nproc)
+{
+    //Structure pour trier
+    struct ParticleSortingStruct{
+        FPoint<FReal> position;
+        MortonIndex mindex;
+    };
+    // Création d'un tableau de la structure pour trier puis remplissage du tableau
+    const FSize nbParticles = loader.getNumberOfParticles();
+    ParticleSortingStruct* particlesToSort = new ParticleSortingStruct[nbParticles];
+    for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){
+        const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPosition<FReal>(loader.getCenterOfBox(), loader.getBoxWidth(),
+                                                                                           treeHeight,
+                                                                                           allParticles[idxPart]);
+        const MortonIndex particleIndex = host.getMortonIndex();
+        particlesToSort[idxPart].mindex = particleIndex;
+        particlesToSort[idxPart].position = allParticles[idxPart];
+    }
+
+    //Trie du nouveau tableau
+    FQuickSort<ParticleSortingStruct, FSize>::QsOmp(particlesToSort, nbParticles, [](const ParticleSortingStruct& v1, const ParticleSortingStruct& v2){
+            return v1.mindex <= v2.mindex;
+        });
+    //Replace tout dans l'ordre dans le tableau d'origine
+    for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){
+        allParticles[idxPart] = particlesToSort[idxPart].position;
+    }
+
+    //Compte le nombre de feuilles
+    sizeForEachGroup.resize(treeHeight);
+    MortonIndex previousLeaf = -1;
+    int numberOfLeaf = 0;
+    for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart)
+    {
+        if(particlesToSort[idxPart].mindex != previousLeaf)
+        {
+            previousLeaf = particlesToSort[idxPart].mindex;
+            ++numberOfLeaf;
+        }
+    }
+
+    //Calcul de la taille des groupes au niveau des feuilles
+    FLeafBalance balancer;
+    for(int processId = 0; processId < nproc; ++processId)
+    {
+        FSize size_last;
+        FSize countGroup;
+        FSize leafOnProcess = balancer.getRight(numberOfLeaf, nproc, processId) - balancer.getLeft(numberOfLeaf, nproc, processId);
+        size_last = leafOnProcess%groupSize;
+        countGroup = (leafOnProcess - size_last)/groupSize;
+        for(int i = 0; i < countGroup; ++i)
+            sizeForEachGroup[treeHeight-1].push_back(groupSize);
+        if(size_last > 0)
+            sizeForEachGroup[treeHeight-1].push_back((int)size_last);
+    }
+
+    //Calcul du working interval au niveau des feuilles
+    previousLeaf = -1;
+    int countLeaf = 0;
+    int processId = 0;
+    FSize leafOnProcess = balancer.getRight(numberOfLeaf, nproc, 0) - balancer.getLeft(numberOfLeaf, nproc, 0);
+    distributedMortonIndex.push_back(previousLeaf);
+    for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart)
+    {
+        if(particlesToSort[idxPart].mindex != previousLeaf)
+        {
+            previousLeaf = particlesToSort[idxPart].mindex;
+            ++countLeaf;
+            if(countLeaf == leafOnProcess)
+            {
+                distributedMortonIndex.push_back(previousLeaf);
+                distributedMortonIndex.push_back(previousLeaf);
+                countLeaf = 0;
+                ++processId;
+                leafOnProcess = balancer.getRight(numberOfLeaf, nproc, processId) - balancer.getLeft(numberOfLeaf, nproc, processId);
+            }
+        }
+    }
+    distributedMortonIndex.push_back(particlesToSort[nbParticles - 1].mindex);
+
+    //Calcul des working interval à chaque niveau
+    std::vector<std::vector<std::vector<MortonIndex>>> nodeRepartition;
+    createNodeRepartition(distributedMortonIndex, nodeRepartition, nproc, treeHeight);
+
+    //Pour chaque niveau calcul de la taille des groupe
+    for(int idxLevel = treeHeight - 2; idxLevel >= 0; --idxLevel)
+    {
+        processId = 0;
+        int countParticleInTheGroup = 0;
+        MortonIndex previousMortonCell = -1;
+
+        //cout << "Compute Level " << idxLevel << endl;
+        for(int idxPart = 0; idxPart < nbParticles; ++idxPart)
+        {
+            MortonIndex mortonCell = (particlesToSort[idxPart].mindex) >> (3*(treeHeight - 1 - idxLevel));
+            if(mortonCell <= nodeRepartition[idxLevel][processId][1]) //Si l'indice est dans le working interval
+            {
+                if(mortonCell != previousMortonCell) //Si c'est un nouvelle indice
+                {
+                    ++countParticleInTheGroup; //On le compte dans le groupe
+                    previousMortonCell = mortonCell;
+                    if(countParticleInTheGroup == groupSize) //Si le groupe est plein on ajoute le compte
+                    {
+                        sizeForEachGroup[idxLevel].push_back(groupSize);
+                        countParticleInTheGroup = 0;
+                    }
+                }
+            }
+            else //Si l'on change d'interval de process on ajoute ce que l'on a compté
+            {
+                if(countParticleInTheGroup > 0)
+                    sizeForEachGroup[idxLevel].push_back(countParticleInTheGroup);
+                countParticleInTheGroup = 1;
+                previousMortonCell = mortonCell;
+                ++processId;
+            }
+        }
+        if(countParticleInTheGroup > 0)
+            sizeForEachGroup[idxLevel].push_back(countParticleInTheGroup);
+    }
+}
+
+void createNodeRepartition(std::vector<MortonIndex> distributedMortonIndex, std::vector<std::vector<std::vector<MortonIndex>>>& nodeRepartition, int nproc, int treeHeight) {
+    nodeRepartition.resize(treeHeight, std::vector<std::vector<MortonIndex>>(nproc, std::vector<MortonIndex>(2)));
+    for(int node_id = 0; node_id < nproc; ++node_id){
+        nodeRepartition[treeHeight-1][node_id][0] = distributedMortonIndex[node_id*2];
+        nodeRepartition[treeHeight-1][node_id][1] = distributedMortonIndex[node_id*2+1];
+    }
+    for(int idxLevel = treeHeight - 2; idxLevel >= 0  ; --idxLevel){
+        nodeRepartition[idxLevel][0][0] = nodeRepartition[idxLevel+1][0][0] >> 3;
+        nodeRepartition[idxLevel][0][1] = nodeRepartition[idxLevel+1][0][1] >> 3;
+        for(int node_id = 1; node_id < nproc; ++node_id){
+            nodeRepartition[idxLevel][node_id][0] = FMath::Max(nodeRepartition[idxLevel+1][node_id][0] >> 3, nodeRepartition[idxLevel][node_id-1][0]+1); //Berenger phd :)
+            nodeRepartition[idxLevel][node_id][1] = nodeRepartition[idxLevel+1][node_id][1] >> 3;
+        }
+    }
+}
+
+FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total){
+    if(mpi_rank < (total%mpi_count))
+        return ((total - (total%mpi_count))/mpi_count)+1;
+    return ((total - (total%mpi_count))/mpi_count);
+}
diff --git a/Tests/LET_STF_Implicit/testSizeLETGroupTree.cpp b/Tests/LET_STF_Implicit/testSizeLETGroupTree.cpp
new file mode 100644
index 000000000..c2b2570e7
--- /dev/null
+++ b/Tests/LET_STF_Implicit/testSizeLETGroupTree.cpp
@@ -0,0 +1,267 @@
+// ==== CMAKE =====
+// @FUSE_BLAS
+// ================
+// Keep in private GIT
+// @FUSE_MPI
+
+#include "../../Src/Utils/FGlobal.hpp"
+// include algo for linear tree
+#include "inria/algorithm/distributed/mpi.hpp"
+#include "inria/linear_tree/balance_tree.hpp"
+// tree class
+#include "../../Src/GroupTree/Core/FGroupTree.hpp"
+// symbolic data
+#include "../../Src/Components/FSymbolicData.hpp"
+// cell class
+#include "../../Src/Kernels/Chebyshev/FChebCell.hpp"
+// parameter
+#include "../../Src/Utils/FParameters.hpp"
+#include "../../Src/Utils/FParameterNames.hpp"
+// GroupParticleContianer
+#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp"
+// file loader
+#include "../../Src/Files/FMpiFmaGenericLoader.hpp"
+// FBox
+#include "Adaptive/FBox.hpp"
+// Group linear tree
+#include "../../Src/GroupTree/Core/FGroupLinearTree.hpp"
+// Function for GroupLinearTree
+#include "../../Src/GroupTree/Core/FDistributedGroupTreeBuilder.hpp"
+
+#include "../../Contribs/json.hpp"
+
+#include <memory>
+
+
+static const int ORDER = 6;
+using FReal               = double;
+using GroupCellClass      = FChebCell<FReal, ORDER>;
+using GroupCellUpClass    = typename GroupCellClass::multipole_t;
+using GroupCellDownClass  = typename GroupCellClass::local_expansion_t;
+using GroupCellSymbClass  = FSymbolicData;
+using GroupContainerClass = FP2PGroupParticleContainer<FReal>;
+using GroupOctreeClass    = FGroupTree<FReal,
+                                        GroupCellSymbClass,
+                                        GroupCellUpClass,
+                                        GroupCellDownClass, GroupContainerClass, 1, 4, FReal>;
+
+// Structure for 1 particle
+struct particle_t {
+    using position_t = FPoint<FReal>;
+    position_t pos;
+    FReal phi;
+    std::size_t morton_index;
+    const auto& position() const {
+        return pos;
+    }
+    const FPoint<FReal>& getPosition(){
+        return pos;
+    }
+    const auto& physicalValue() const{
+        return phi;
+    }
+    const auto& getPositions() const {
+        return pos;
+    }
+    int weight() const { return 1;}
+    friend constexpr auto morton_index(const particle_t& p) {
+        return p.morton_index;
+    }
+};
+
+int main(int argc, char *argv[]) {
+    // Parameter definition
+    const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree"};
+    // Parameter help
+    FHelpDescribeAndExit(argc, argv,
+                         "Test the blocked tree created with linear tree." ,FParameterDefinitions::OctreeHeight,
+                         FParameterDefinitions::InputFile,
+                         LocalOptionBlocSize);
+    // Get parameters
+    // Get the groupSize
+    const int groupSize =
+            FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250);
+    // Get the file input
+    const char* const filename       =
+            FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma");
+    // Get the treeHeight
+    const unsigned int TreeHeight    =
+            FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5);
+    // The level is the level of the leaf
+    int level = TreeHeight-1;
+    // Init MPI communicator
+        // Initialisation MPI Berenger
+    FMpi FMpiComm(argc,argv);
+        // Initialisation MPI Quentin
+    inria::mpi::communicator mpi_comm(FMpiComm.global().getComm());
+
+    // Show job information
+    std::cout << "JOB INFORMATION " << std::endl;
+    std::cout << "File name : " << filename              << std::endl;
+    std::cout <<  "TreeHeight : "   <<   TreeHeight    << std::endl;
+    std::cout <<  "Block size : "   <<    groupSize   << std::endl;
+    std::cout << "------------------------------------------" << std::endl;
+
+    std::cout << "Opening : " << filename << " ...";
+    FMpiFmaGenericLoader<FReal> loader(filename, FMpiComm.global());
+    std::cout << " done." << std::endl;
+
+    // vector to stock all particles
+    std::vector<particle_t> myParticles(loader.getMyNumberOfParticles());
+    // define the max level to sort particle
+    const std::size_t max_level = sizeof(particle_t::morton_index) * 8 / 3;
+    // define a box, used in the sort
+    const FBox<FPoint<FReal>> box{loader.getBoxWidth(),loader.getCenterOfBox()};
+
+    // iterate on all of my particles
+    for(FSize idxPart = 0; idxPart <loader.getMyNumberOfParticles();++idxPart){
+        particle_t tmp;
+        // get the current particles
+        loader.fillParticle(&tmp.pos,&tmp.phi);
+        // set the morton index of the current particle at the max_level
+        tmp.morton_index = inria::linear_tree::get_morton_index(
+            tmp.pos, box, max_level);
+        // set the weight of the particle
+        tmp.phi = 0.1;
+        // add the particle to my vector of particle
+        myParticles.at(idxPart) = tmp;
+    }
+
+    // Now i have all of my particles in a vector, they all have a morton index
+    // now we will sort them
+    inria::sort(mpi_comm,myParticles,
+                [](const auto& p1, const auto& p2) {
+                    return p1.morton_index < p2.morton_index;
+                });
+
+    std::cout << " I have " << loader.getMyNumberOfParticles() << " particles ..." << std::endl;
+    std::cout << "For a total of " << loader.getNumberOfParticles() << " particles ..." << std::endl;
+
+    // Now i want to create the the linear tree
+    // a linear tree is a tree, with only the leaf
+    auto linear_tree =
+        inria::linear_tree::create_balanced_linear_tree_at_level(mpi_comm,
+                                                                 level,
+                                                                 box,
+                                                                 myParticles);
+    // Now i need to create a blocked linear tree, it's just a linear tree with
+    // more information
+    // declaration of the group linear tree
+    FGroupLinearTree<decltype(linear_tree)::value_type>group_linear_tree{mpi_comm};
+    group_linear_tree.create_local_group_linear_tree(
+        &linear_tree,
+        groupSize
+    );
+
+    // now i will to redistribute the particle according to the linear tree
+    // Redistribution of particles
+    inria::linear_tree::redistribute_particles(mpi_comm,
+                                               linear_tree,
+                                               myParticles);
+
+    // Now we need to modify the morton index of of all particle to
+    // have the morton index at le treeHeight-1
+    for(unsigned i = 0 ; i < myParticles.size(); ++i){
+        myParticles.at(i).morton_index = inria::linear_tree::get_morton_index(
+            myParticles.at(i).pos, box, level);
+    }
+
+    // Now we need to share the particle distribution to build the GroupTree
+    group_linear_tree.set_index_particle_distribution(myParticles);
+
+    // Now i can declare my groupTree
+    // it's a empty instance of the FGroupTree
+    GroupOctreeClass  localGroupTree = GroupOctreeClass::template get_block_tree_instance<GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass, GroupContainerClass>(TreeHeight,
+                                             groupSize,
+                                             loader.getCenterOfBox(),
+                                             loader.getBoxWidth());
+
+    // Now i can fill the localGroupTree
+    localGroupTree.create_tree(group_linear_tree,myParticles);
+
+    int nb_block = dstr_grp_tree_builder::set_cell_group_global_index(localGroupTree,mpi_comm);
+
+    // now we can create LET
+    localGroupTree.create_LET(group_linear_tree);
+
+    std::size_t particle_blocks = 0;
+    std::size_t total_size = 0;
+    std::size_t cell_blocks = 0;
+
+    /// Validation of block, we check to don't have dupicate block
+    int old_idx = 0;
+    for(int i = 0; i < localGroupTree.getNbParticleGroup();++i){
+        auto* container = localGroupTree.getParticleGroup(i);
+        total_size += sizeof(*container);
+        total_size += container->getBufferSizeInByte();
+    }
+    particle_blocks = total_size;
+
+    for(int i = localGroupTree.getHeight()-1 ; i > 0 ; --i ){
+        for(int j = 0 ; j < localGroupTree.getNbCellGroupAtLevel(i) ; ++j){
+            auto* container = localGroupTree.getCellGroup(i,j);
+                total_size += sizeof(*container);
+                total_size += container->getBufferSizeInByte();
+            }
+    }
+    cell_blocks = total_size - particle_blocks;
+    total_size += sizeof(localGroupTree);
+    mpi_comm.barrier();
+
+    int nb_proc = mpi_comm.size();
+    int my_rank = mpi_comm.rank();
+    std::vector<std::size_t> vect_result(3);
+    vect_result[0] = particle_blocks;
+    vect_result[1] = total_size;
+    vect_result[2] = cell_blocks;
+    std::vector<std::size_t> vect_recv(0,0);
+    if(my_rank == 0){
+        vect_recv.resize(nb_proc*3);
+    }
+    mpi_comm.gather(
+        &vect_result[0],
+        3*sizeof(std::size_t),
+        MPI_CHAR,
+        &vect_recv[0],
+        3*sizeof(std::size_t),
+        MPI_CHAR,
+        0
+    );
+
+    if(my_rank == 0 ){
+        std::size_t partavg = 0;
+        std::size_t cellavg = 0;
+        std::size_t totalavg = 0;
+        for(int i = 0 ; i < vect_recv.size() ; i+= 3){
+            partavg  += vect_recv[0];
+            cellavg  += vect_recv[2];
+            totalavg += vect_recv[1];
+        }
+
+        partavg /= nb_proc;
+        cellavg /= nb_proc;
+        totalavg /= nb_proc;
+
+        std::cout << "Particle blocks : " << partavg << std::endl;
+        std::cout << "Cell blocks : " << cellavg << std::endl;
+        std::cout << "Total size : " << totalavg << " bits " << (totalavg)/1000 << " Kb "<< ((totalavg)/1000)/1000 << " Mb " << std::endl;
+
+        nlohmann::json result;
+        std::string name = std::to_string(TreeHeight);
+        name += "_" + std::to_string(groupSize)+"_"+std::to_string(loader.getNumberOfParticles()) + ".json";
+
+        result["TreeHeight"] = TreeHeight;
+        result["GroupSize"] = groupSize;
+        result["Filename"] = filename;
+        result["NbParticle"] = loader.getNumberOfParticles();
+        result["LETGroupTree"]["ParticlesBlocks"] = partavg;
+        result["LETGroupTree"]["CellsBlocks"] = cellavg;
+        result["LETGroupTree"]["TotalSize"] = totalavg;
+
+        std::ofstream out(name);
+        out << result << std::endl;
+
+    }
+
+    return 0;
+}
diff --git a/UTests/utestLetTree.cpp b/UTests/utestLetTree.cpp
new file mode 100644
index 000000000..d50b8e421
--- /dev/null
+++ b/UTests/utestLetTree.cpp
@@ -0,0 +1,213 @@
+// See LICENCE file at project root
+
+// ==== CMAKE =====
+// @FUSE_BLAS
+// @FUSE_MPI
+// ================
+
+#include "../../Src/Utils/FGlobal.hpp"
+// include algo for linear tree
+#include "inria/algorithm/distributed/mpi.hpp"
+#include "inria/linear_tree/balance_tree.hpp"
+// tree class
+#include "GroupTree/Core/FGroupTree.hpp"
+// symbolic data
+#include "Components/FSymbolicData.hpp"
+// cell class
+#include "Kernels/Chebyshev/FChebCell.hpp"
+// parameter
+#include "Utils/FParameters.hpp"
+#include "Utils/FParameterNames.hpp"
+// GroupParticleContianer
+#include "GroupTree/Core/FP2PGroupParticleContainer.hpp"
+// file loader
+#include "Files/FMpiFmaGenericLoader.hpp"
+// FBox
+#include "Adaptive/FBox.hpp"
+// Group linear tree
+#include "GroupTree/Core/FGroupLinearTree.hpp"
+// Function for GroupLinearTree
+#include "GroupTree/Core/FDistributedGroupTreeBuilder.hpp"
+#include "GroupTree/Core/FDistributedLETGroupTreeValidator.hpp"
+#include <memory>
+
+#include "FUTester.hpp"
+
+static const int ORDER = 6;
+using FReal               = double;
+
+struct particle_t {
+    using position_t = FPoint<FReal>;
+    position_t pos;
+    FReal phi;
+
+    std::size_t morton_index;
+    const auto& position() const {
+        return pos;
+    }
+    const FPoint<FReal>& getPosition(){
+        return pos;
+    }
+    const auto& physicalValue() const{
+        return phi;
+    }
+    const auto& getPositions() const {
+        return pos;
+    }
+    int weight() const { return 1;}
+    friend constexpr std::size_t morton_index(const particle_t& p) {
+        return p.morton_index;
+    }
+};
+
+class TestLetGroupTree : public FUTesterMpi<TestLetGroupTree>{
+
+
+    template<class GroupCellClass
+             ,class GroupCellUpClass
+             ,class GroupCellDownClass
+             ,class GroupCellSymbClass
+             ,class GroupContainerClass
+             ,class GroupOctreeClass>
+    void RunTest(){
+        const int TreeHeight = 5;
+        const int level = TreeHeight-1;
+        const int groupSize = 32;
+        // Definition of the particle type
+
+
+        // Load the Quentin MPI
+        inria::mpi::communicator mpi_comm(app.global().getComm());
+
+        // Selection of the file
+        const std::string parFile( (sizeof(FReal) == sizeof(float))?
+                                       "Test/DirectFloatbfma":
+                                       "test20k.fma");
+        std::string filename(SCALFMMDataPath+parFile);
+
+        // Load the file
+        FMpiFmaGenericLoader<FReal> loader(filename, app.global());
+
+        // declare vector to stock particle
+        std::vector<particle_t> myParticles(loader.getMyNumberOfParticles());
+
+
+        const std::size_t max_level = sizeof(particle_t::morton_index) * 8 / 3;
+        // define a box, used in the sort
+        const FBox<FPoint<FReal>> box{loader.getBoxWidth(),loader.getCenterOfBox()};
+
+        // iterate on all of my particles
+        for(FSize idxPart = 0; idxPart <loader.getMyNumberOfParticles();++idxPart){
+            particle_t tmp;
+            // get the current particles
+            loader.fillParticle(&tmp.pos,&tmp.phi);
+            // set the morton index of the current particle at the max_level
+            tmp.morton_index =  inria::linear_tree::get_morton_index(
+                                tmp.pos, box, max_level);
+            // set the weight of the particle
+            tmp.phi = 0.1;
+            //  add the particle to my vector of particle
+            myParticles[idxPart] = tmp;
+        }
+        // Sort particules
+        inria::sort(mpi_comm,myParticles,
+                [](const auto& p1, const auto& p2) {
+                    return p1.morton_index < p2.morton_index;
+                });
+        // Create linear tree
+        auto linear_tree = inria::linear_tree::create_balanced_linear_tree_at_level(
+            mpi_comm,
+            level,
+            box,                                                     myParticles);
+        // Create empty instance of group linear tree
+        FGroupLinearTree<decltype(linear_tree)::value_type>group_linear_tree{mpi_comm};
+
+
+        // Fill the group linear tree
+        group_linear_tree.create_local_group_linear_tree(
+            &linear_tree,
+            groupSize);
+
+        // Redistribute particule according to the linear tree
+        inria::linear_tree::redistribute_particles(mpi_comm,
+                                               linear_tree,
+                                               myParticles);
+
+
+       // Modify the Morton Index to accord him to the level
+       for(unsigned i = 0 ; i < myParticles.size(); ++i){
+            myParticles.at(i).morton_index = inria::linear_tree::get_morton_index(
+                myParticles.at(i).pos, box, level);
+        }
+
+        group_linear_tree.set_index_particle_distribution(myParticles);
+
+
+        GroupOctreeClass  localGroupTree = GroupOctreeClass::template get_block_tree_instance<GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass, GroupContainerClass>(TreeHeight,
+                                             groupSize,
+                                             loader.getCenterOfBox(),
+                                             loader.getBoxWidth());
+         std::cout << " Creating Tree" << std::endl;
+
+        localGroupTree.create_tree(group_linear_tree,myParticles);
+        std::cout << "Tree construit" << std::endl;
+        int nb_block = dstr_grp_tree_builder::set_cell_group_global_index(localGroupTree,mpi_comm);
+        // Check if we have no duplication of global index
+        std::vector<bool> group_index_checker(nb_block,false);
+        // Check on particle group
+        for(int i = 0 ; i < localGroupTree.getNbParticleGroup() ; ++i){
+            auto* container = localGroupTree.getParticleGroup(i);
+            int idx_global = container->getIdxGlobal();
+            uassert(group_index_checker[idx_global] == false);
+            group_index_checker[idx_global] = true;
+        }
+        // Check on cell group
+        for(int j = 0 ; j < localGroupTree.getHeight() ; ++j){
+            for(int i = 0 ; i < localGroupTree.getNbCellGroupAtLevel(j); ++i){
+                auto* container = localGroupTree.getCellGroup(j,i);
+                int idx_global = container->getIdxGlobal();
+                uassert(group_index_checker[idx_global] == false);
+                group_index_checker[idx_global] = true;
+            }
+        }
+        // Create LET
+        localGroupTree.create_LET(group_linear_tree);
+        // launch the let checker
+        bool flag = dstr_grp_tree_vldr::validate_group_tree(localGroupTree,mpi_comm);
+        // if the LET is correct, flag is true
+        uassert(flag);
+    }
+
+
+
+    void TestLet(){
+        using GroupCellClass      = FChebCell<FReal, ORDER>;
+        using GroupCellUpClass    = typename GroupCellClass::multipole_t;
+        using GroupCellDownClass  = typename GroupCellClass::local_expansion_t;
+        using GroupCellSymbClass  = FSymbolicData;
+        using GroupContainerClass = FP2PGroupParticleContainer<FReal>;
+        using GroupOctreeClass    = FGroupTree<FReal,
+                                        GroupCellSymbClass,
+                                        GroupCellUpClass,
+                                        GroupCellDownClass, GroupContainerClass, 1, 4, FReal>;
+
+        RunTest<GroupCellClass,
+                GroupCellUpClass,
+                GroupCellDownClass,
+                GroupCellSymbClass,
+                GroupContainerClass,
+                GroupOctreeClass>();
+    }
+
+    void SetTests(){
+        AddTest(&TestLetGroupTree::TestLet,"Test the building of the LET ");
+    }
+
+public:
+    TestLetGroupTree(int argc, char ** argv):
+    FUTesterMpi(argc,argv){
+    }
+
+};
+
+TestClassMpi(TestLetGroupTree);
diff --git a/Utils/stdComplex.hpp b/Utils/stdComplex.hpp
new file mode 100644
index 000000000..4811beb46
--- /dev/null
+++ b/Utils/stdComplex.hpp
@@ -0,0 +1,26 @@
+// ===================================================================================
+// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
+// This software is a computer program whose purpose is to compute the FMM.
+//
+// This software is governed by the CeCILL-C and LGPL licenses and
+// abiding by the rules of distribution of free software.  
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public and CeCILL-C Licenses for more details.
+// "http://www.cecill.info". 
+// "http://www.gnu.org/licenses".
+// ===================================================================================
+#ifndef STDCOMPLEXE_HPP
+#define STDCOMPLEXE_HPP
+
+#include <complex>
+template<typename T>
+using stdComplex = std::complex<T> ;
+
+
+
+#endif //STDCOMPLEXE_HPP
+
+
-- 
GitLab