diff --git a/include/scalfmm/algorithms/mpi/upward.hpp b/include/scalfmm/algorithms/mpi/upward.hpp
index 874565ea0219a6844f46d6b17beab9f27af9ced0..53d435ce963d15b9c456ef89f1678cb2f0e1c16e 100644
--- a/include/scalfmm/algorithms/mpi/upward.hpp
+++ b/include/scalfmm/algorithms/mpi/upward.hpp
@@ -291,268 +291,169 @@ namespace scalfmm::algorithms::mpi::pass
             // }
         }
     }
-    // std::cout << " END start_communications" << std::endl << std::flush;
-    // #pragma omp taskwait
-    //
-}
 
-template<typename Tree>
-void prepare_comm_up(Tree tree, const int level)
-{
-    using value_type = typename Tree::base_type::cell_type::value_type;
-    using dep_type = typename Tree::group_of_cell_type::symbolics_type::ptr_multi_dependency_type;
-
-    static constexpr std::size_t dimension = Tree::base_type::box_type::dimension;
-    static constexpr int nb_inputs = Tree::cell_type::storage_type::inputs_size;
-    //
-    // number of theoretical children
-    constexpr int nb_children = math::pow(2, dimension);
-    static constexpr auto prio{omp::priorities::max};
-    //
-    auto& para = tree.get_parallel_manager();
-    auto& comm = para.get_communicator();
-    auto ptr_comm = &comm;
-    auto rank = comm.rank();
-    int nb_proc = comm.size();
-    int tag_nb = 1200 + 10 * level;
-    int tag_data = 1201 + 10 * level;
-    auto mpi_int_type = cpp_tools::parallel_manager::mpi::get_datatype<int>();
-    // level where the multipoles are known
-    auto level_child = level + 1;
-    // iterator on the child cell groups
-    auto it_first_group_of_child = tree.begin_mine_cells(level_child);
-    // get size of multipoles
-    auto it_first_cell_child = it_first_group_of_child->get()->begin();
-    auto const& m1 = it_first_cell_child->cmultipoles();
-    int size{int(nb_inputs * m1.at(0).size()) * nb_children};   //only nb_children -1 is needed in the worse case
-
-    auto msg = tree.get_up_down_access(level);
-
-    auto const& distrib = tree.get_cell_distribution(level);
-    auto const& distrib_child = tree.get_cell_distribution(level_child);
-    bool send_data{false}, receive_data{false};
-
-    auto ptr_tree = &tree;
-
-    if(rank > 0)
+    template<typename Tree>
+    void prepare_comm_up(Tree tree, const int level)
     {
-        // If we send the multipoles, they must have been updated by the M2M of the previous level!
-        // Check if you have to send something on the left (rank-1)
-        // We are at level l and the children are at level l+1.
-        // The ghost at the child level(on the right)
-        //   must be updated if the parent index of the first child index is not equal to(lower than)
-        //     the first index at level l.
-        // serialization
-        // check if we have some child to send (if there exists they starts at the first group)
-
-        // first_index_child = morton index of the first child cell (at level = level_child)
-        auto first_index_child = distrib_child[rank][0];
-        // index_parent = first index of the cell at current level  on my process
-        auto index_parent = distrib[rank][0];
-        // parent_morton_index = last index of the cell on previous process at current level
-        auto previous_parent_morton_index = distrib[rank - 1][1] - 1;
-        // first_index_child = morton index of the first child cell (at level = level_child)
-        auto last_index_child_previous = distrib_child[rank - 1][1] - 1;
+        using value_type = typename Tree::base_type::cell_type::value_type;
+        using dep_type = typename Tree::group_of_cell_type::symbolics_type::ptr_multi_dependency_type;
+
+        static constexpr std::size_t dimension = Tree::base_type::box_type::dimension;
+        static constexpr int nb_inputs = Tree::cell_type::storage_type::inputs_size;
         //
-        send_data = (first_index_child >> dimension) == previous_parent_morton_index;
+        // number of theoretical children
+        constexpr int nb_children = math::pow(2, dimension);
+        static constexpr auto prio{omp::priorities::max};
+        //
+        auto& para = tree.get_parallel_manager();
+        auto& comm = para.get_communicator();
+        auto ptr_comm = &comm;
+        auto rank = comm.rank();
+        int nb_proc = comm.size();
+        int tag_nb = 1200 + 10 * level;
+        int tag_data = 1201 + 10 * level;
+        auto mpi_int_type = cpp_tools::parallel_manager::mpi::get_datatype<int>();
+        // level where the multipoles are known
+        auto level_child = level + 1;
+        // iterator on the child cell groups
+        auto it_first_group_of_child = tree.begin_mine_cells(level_child);
+        // get size of multipoles
+        auto it_first_cell_child = it_first_group_of_child->get()->begin();
+        auto const& m1 = it_first_cell_child->cmultipoles();
+        int size{int(nb_inputs * m1.at(0).size()) * nb_children};   //only nb_children -1 is needed in the worse case
 
-        if(send_data)
-        {
-            std::clog << "upward(task send(" << level << ")) start \n";
-            int count{0};
-            std::clog << "upward(task send(" << level << ")) index " << first_index_child
-                      << "  parent_of_first_index_child " << previous_parent_morton_index << std::endl
-                      << std::flush;
-            // index is now the parent of the first child
-            auto index = first_index_child >> dimension;
-            // I have to send
-            // find the number of children to send (get pointer on multipoles !!)
-            // Construct an MPI datatype
+        auto msg = tree.get_up_down_access(level);
+
+        auto const& distrib = tree.get_cell_distribution(level);
+        auto const& distrib_child = tree.get_cell_distribution(level_child);
+        bool send_data{false}, receive_data{false};
+
+        auto ptr_tree = &tree;
 
+        if(rank > 0)
+        {
+            // If we send the multipoles, they must have been updated by the M2M of the previous level!
+            // Check if you have to send something on the left (rank-1)
+            // We are at level l and the children are at level l+1.
+            // The ghost at the child level(on the right)
+            //   must be updated if the parent index of the first child index is not equal to(lower than)
+            //     the first index at level l.
             // serialization
-            auto it_group = it_first_group_of_child;
-            //
-            auto it_cell = it_first_group_of_child->get()->begin();
-            auto next = scalfmm::component::generate_linear_iterator(1, it_group, it_cell);
+            // check if we have some child to send (if there exists they starts at the first group)
+
+            // first_index_child = morton index of the first child cell (at level = level_child)
+            auto first_index_child = distrib_child[rank][0];
+            // index_parent = first index of the cell at current level  on my process
+            auto index_parent = distrib[rank][0];
+            // parent_morton_index = last index of the cell on previous process at current level
+            auto previous_parent_morton_index = distrib[rank - 1][1] - 1;
+            // first_index_child = morton index of the first child cell (at level = level_child)
+            auto last_index_child_previous = distrib_child[rank - 1][1] - 1;
             //
-            // compute the number of cells to send and copy the multipoles in the buffer
-            for(int i = 0; i < nb_children - 1; ++i, next())
+            send_data = (first_index_child >> dimension) == previous_parent_morton_index;
+
+            if(send_data)
             {
-                if(previous_parent_morton_index < (it_cell->index() >> dimension))
+                std::clog << "upward(task send(" << level << ")) start \n";
+                int count{0};
+                std::clog << "upward(task send(" << level << ")) index " << first_index_child
+                          << "  parent_of_first_index_child " << previous_parent_morton_index << std::endl
+                          << std::flush;
+                // index is now the parent of the first child
+                auto index = first_index_child >> dimension;
+                // I have to send
+                // find the number of children to send (get pointer on multipoles !!)
+                // Construct an MPI datatype
+
+                // serialization
+                auto it_group = it_first_group_of_child;
+                //
+                auto it_cell = it_first_group_of_child->get()->begin();
+                auto next = scalfmm::component::generate_linear_iterator(1, it_group, it_cell);
+                //
+                // compute the number of cells to send and copy the multipoles in the buffer
+                for(int i = 0; i < nb_children - 1; ++i, next())
                 {
-                    break;
+                    if(previous_parent_morton_index < (it_cell->index() >> dimension))
+                    {
+                        break;
+                    }
+                    std::clog << "upward(task send) Check children  P " << index << " C " << it_cell->index()
+                              << " level " << it_cell->csymbolics().level << std::endl
+                              << std::flush;
+                    ++count;
                 }
-                std::clog << "upward(task send) Check children  P " << index << " C " << it_cell->index() << " level "
-                          << it_cell->csymbolics().level << std::endl
-                          << std::flush;
-                ++count;
-            }
-            std::clog << "upward(task send) nb_send = " << count << std::endl;
-            ptr_comm->isend(&count, 1, mpi_int_type, rank - 1, tag_nb);
-
-            msg.set_nb_child_to_send(count);
-        }
-    }
-    if(rank < nb_proc - 1)
-    {
-        // last_index_child = morton index of the last child cell (at level = level_child)
-        auto last_index_child = distrib_child[rank][1] - 1;
-        // parent_morton_index = last index of the cell on previous process at current level
-        auto parent_morton_index = distrib[rank][1] - 1;
-        // first_index_child_next = morton index of the first child cell (at level = level_child) on next mpi process
-        auto first_index_child_next = distrib_child[rank + 1][0];
-        receive_data = (last_index_child >> dimension) == (first_index_child_next >> dimension);
-        if(receive_data)
-        {
-            // std::cout << "upward receive comm  last_index_child " << last_index_child << " parent "
-            //           << (last_index_child >> dimension) << " next child " << first_index_child_next
-            //           << " its parent " << (first_index_child_next >> dimension) << std::endl
-            //           << std::flush;
+                std::clog << "upward(task send) nb_send = " << count << std::endl;
+                ptr_comm->isend(&count, 1, mpi_int_type, rank - 1, tag_nb);
 
-            std::clog << std::flush;
-            {
-                std::clog << "upward(task receive(" << level << ")) start \n";
-                int count{0};
-                std::vector<value_type> buffer(size);
-                ptr_comm->recv(&count, 1, mpi_int_type, rank + 1, tag_nb);
-                std::clog << "upward(task receive(" << level << "))  " << count << " cell(s)\n" << std::flush;
-                msg.set_nb_child_to_receive(count);
+                msg.set_nb_child_to_send(count);
             }
         }
-    }
-};
-/// @brief This function constructs the local approximation for all the cells of the tree by applying the operator
-/// m2m
-///
-/// @param tree   the tree target
-/// @param approximation the approximation to construct the local approximation
-///
-template<typename Tree, typename Approximation>
-inline auto upward(Tree& tree, Approximation const& approximation) -> void
-{
-    // If we send the multipoles, they must have been updated by the M2M of the previous level!
-    // Check if you have to send something on the left (rank-1)
-    // We are at level l and the children are at level l+1.
-    // The ghost at the child level(on the right)
-    //   must be updated if the parent index of the first child index is not equal to(lower than)
-    //     the first index at level l.
-    // serialization
-    // check if we have some child to send (if there exists they starts at the first group)
-
-    // first_index_child = morton index of the first child cell (at level = level_child)
-    auto first_index_child = distrib_child[rank][0];
-    // index_parent = first index of the cell at current level  on my process
-    auto index_parent = distrib[rank][0];
-    // parent_morton_index = last index of the cell on previous process at current level
-    auto previous_parent_morton_index = distrib[rank - 1][1] - 1;
-    // first_index_child = morton index of the first child cell (at level = level_child)
-    auto last_index_child_previous = distrib_child[rank - 1][1] - 1;
-    //
-    send_data = (first_index_child >> dimension) == previous_parent_morton_index;
-
-    if(send_data)
-    {
-        std::clog << "upward(task send(" << level << ")) start \n";
-        int count{0};
-        std::clog << "upward(task send(" << level << ")) index " << first_index_child
-                  << "  parent_of_first_index_child " << previous_parent_morton_index << std::endl
-                  << std::flush;
-        // index is now the parent of the first child
-        auto index = first_index_child >> dimension;
-        // I have to send
-        // find the number of children to send (get pointer on multipoles !!)
-        // Construct an MPI datatype
-
-        // serialization
-        auto it_group = it_first_group_of_child;
-        //
-        auto it_cell = it_first_group_of_child->get()->begin();
-        auto next = scalfmm::component::generate_linear_iterator(1, it_group, it_cell);
-        //
-        // compute the number of cells to send and copy the multipoles in the buffer
-        for(int i = 0; i < nb_children - 1; ++i, next())
+        if(rank < nb_proc - 1)
         {
-            if(previous_parent_morton_index < (it_cell->index() >> dimension))
+            // last_index_child = morton index of the last child cell (at level = level_child)
+            auto last_index_child = distrib_child[rank][1] - 1;
+            // parent_morton_index = last index of the cell on previous process at current level
+            auto parent_morton_index = distrib[rank][1] - 1;
+            // first_index_child_next = morton index of the first child cell (at level = level_child) on next mpi process
+            auto first_index_child_next = distrib_child[rank + 1][0];
+            receive_data = (last_index_child >> dimension) == (first_index_child_next >> dimension);
+            if(receive_data)
             {
-                break;
+                // std::cout << "upward receive comm  last_index_child " << last_index_child << " parent "
+                //           << (last_index_child >> dimension) << " next child " << first_index_child_next
+                //           << " its parent " << (first_index_child_next >> dimension) << std::endl
+                //           << std::flush;
+
+                std::clog << std::flush;
+                {
+                    std::clog << "upward(task receive(" << level << ")) start \n";
+                    int count{0};
+                    std::vector<value_type> buffer(size);
+                    ptr_comm->recv(&count, 1, mpi_int_type, rank + 1, tag_nb);
+                    std::clog << "upward(task receive(" << level << "))  " << count << " cell(s)\n" << std::flush;
+                    msg.set_nb_child_to_receive(count);
+                }
             }
-            std::clog << "upward(task send) Check children  P " << index << " C " << it_cell->index() << " level "
-                      << it_cell->csymbolics().level << std::endl
-                      << std::flush;
-            ++count;
         }
-        std::clog << "upward(task send) nb_send = " << count << std::endl;
-        ptr_comm->isend(&count, 1, mpi_int_type, rank - 1, tag_nb);
-
-        msg.set_nb_child_to_send(count);
-    }
-}
-if(rank < nb_proc - 1)
-{
-    // last_index_child = morton index of the last child cell (at level = level_child)
-    auto last_index_child = distrib_child[rank][1] - 1;
-    // parent_morton_index = last index of the cell on previous process at current level
-    auto parent_morton_index = distrib[rank][1] - 1;
-    // first_index_child_next = morton index of the first child cell (at level = level_child) on next mpi process
-    auto first_index_child_next = distrib_child[rank + 1][0];
-    receive_data = (last_index_child >> dimension) == (first_index_child_next >> dimension);
-    if(receive_data)
+    };
+    /// @brief This function constructs the local approximation for all the cells of the tree by applying the operator
+    /// m2m
+    ///
+    /// @param tree   the tree target
+    /// @param approximation the approximation to construct the local approximation
+    ///
+    template<typename Tree, typename Approximation>
+    inline auto upward(Tree& tree, Approximation const& approximation) -> void
     {
-        // std::cout << "upward receive comm  last_index_child " << last_index_child << " parent "
-        //           << (last_index_child >> dimension) << " next child " << first_index_child_next
-        //           << " its parent " << (first_index_child_next >> dimension) << std::endl
-        //           << std::flush;
+        auto leaf_level = tree.height() - 1;
 
-        std::clog << std::flush;
+        // upper working level is
+        const int top_height = tree.box().is_periodic() ? 0 : 2;
+        // const int start_duplicated_level = tree.start_duplicated_level();
+        //
+        // int top = start_duplicated_level < 0 ? top_height : start_duplicated_level - 1;
+        int top = top_height;
+        for(int level = leaf_level - 1; level >= top /*top_height*/; --level)   // int because top_height could be 0
         {
-            std::clog << "upward(task receive(" << level << ")) start \n";
-            int count{0};
-            std::vector<value_type> buffer(size);
-            ptr_comm->recv(&count, 1, mpi_int_type, rank + 1, tag_nb);
-            std::clog << "upward(task receive(" << level << "))  " << count << " cell(s)\n" << std::flush;
-            msg.set_nb_child_to_receive(count);
+            // std::cout << "M2M : " << level + 1 << " -> " << level << std::endl << std::flush;
+            //
+            start_communications(level, tree);
+            // std::cout << "  end comm " << std::endl << std::flush;
+
+            omp::pass::upward_level(level, tree, approximation);
+            // std::cout << "  end upward_level " << level << std::endl << std::flush;
         }
-    }
-}
-}
-;
-/// @brief This function constructs the local approximation for all the cells of the tree by applying the operator
-/// m2m
-///
-/// @param tree   the tree target
-/// @param approximation the approximation to construct the local approximation
-///
-template<typename Tree, typename Approximation>
-inline auto upward(Tree& tree, Approximation const& approximation) -> void
-{
-    auto leaf_level = tree.height() - 1;
-
-    // upper working level is
-    const int top_height = tree.box().is_periodic() ? 0 : 2;
-    // const int start_duplicated_level = tree.start_duplicated_level();
-    //
-    // int top = start_duplicated_level < 0 ? top_height : start_duplicated_level - 1;
-    int top = top_height;
-    for(int level = leaf_level - 1; level >= top /*top_height*/; --level)   // int because top_height could be 0
-    {
-        // std::cout << "M2M : " << level + 1 << " -> " << level << std::endl << std::flush;
+        // std::cout << "end upward " << std::endl << std::flush;
+
         //
-        start_communications(level, tree);
-        // std::cout << "  end comm " << std::endl << std::flush;
 
-        omp::pass::upward_level(level, tree, approximation);
-        // std::cout << "  end upward_level " << level << std::endl << std::flush;
+        // for(int level = start_duplicated_level; level >= top_height; --level)   // int because top_height could be 0
+        // {
+        //     std::cout << "Level duplicated (seq): " << level << std::endl;
+        //     upward_level(level, tree, approximation);
+        // }
     }
-    // std::cout << "end upward " << std::endl << std::flush;
-
-    //
-
-    // for(int level = start_duplicated_level; level >= top_height; --level)   // int because top_height could be 0
-    // {
-    //     std::cout << "Level duplicated (seq): " << level << std::endl;
-    //     upward_level(level, tree, approximation);
-    // }
-}
 }   // namespace scalfmm::algorithms::mpi::pass
 
 #endif   // _OPENMP
diff --git a/include/scalfmm/algorithms/omp/transfer.hpp b/include/scalfmm/algorithms/omp/transfer.hpp
index bb4b5d7698999671877438ca2d87cd2d60e041b3..c8406be836e043298d55add80872d8a2b685b516 100644
--- a/include/scalfmm/algorithms/omp/transfer.hpp
+++ b/include/scalfmm/algorithms/omp/transfer.hpp
@@ -69,7 +69,7 @@ namespace scalfmm::algorithms::omp::pass
             auto group_ptr = cell_target_level_it->get();
             // dependence on the first local of the group
             auto ptr_local_raw = &(group_ptr->ccomponent(0).clocals(0));
-            static constexpr auto prio{priorities::m2l};
+            auto prio{priorities::m2l};
 
 #ifdef SCALFMM_USE_MPI
             // Increase th priority on the last group (For L2L in MPI)
diff --git a/include/scalfmm/algorithms/omp/upward.hpp b/include/scalfmm/algorithms/omp/upward.hpp
index a6e972df66c4bfa4cdd4c35aa3d1c4c5829ab7c6..f5e70fed7ea74761497a7f9438d16d300cfc3f26 100644
--- a/include/scalfmm/algorithms/omp/upward.hpp
+++ b/include/scalfmm/algorithms/omp/upward.hpp
@@ -5,7 +5,7 @@
 #ifndef SCALFMM_ALGORITHMS_OMP_UPWARD_HPP
 #define SCALFMM_ALGORITHMS_OMP_UPWARD_HPP
 
-#ifdef _OPENMP
+#include <limits>
 
 #include "scalfmm/operators/m2m.hpp"
 #include "scalfmm/operators/tags.hpp"
@@ -13,7 +13,7 @@
 #include "scalfmm/utils/massert.hpp"
 #include "scalfmm/utils/math.hpp"
 
-#include <limits>
+#ifdef _OPENMP
 #include <omp.h>
 
 namespace scalfmm::algorithms::omp::pass
diff --git a/include/scalfmm/tools/fma_dist_loader.hpp b/include/scalfmm/tools/fma_dist_loader.hpp
index 9e8f9d39bd57a55636fc1596313e215a4365cb23..28be4b05dd7f37dbdbd973116e6092044c411f9c 100644
--- a/include/scalfmm/tools/fma_dist_loader.hpp
+++ b/include/scalfmm/tools/fma_dist_loader.hpp
@@ -161,7 +161,7 @@ namespace scalfmm::io
 
         int m_headerSize;   ///< size of the header in byte
 
-        int _nbDataTowritePerRecord;   ///<   number of data to write for one particle.
+        int m_nbDataTowritePerRecord;   ///<   number of data to write for one particle.
 
         std::size_t m_numberOfParticles;   ///< number of particle (global) to write in the file.
 
@@ -315,11 +315,11 @@ namespace scalfmm::io
          * @brief Write all for all particles the position, physical values, potential and forces.
          *
          * @tparam TreeType
-         * @param myOctree the octree
+         * @param tree the octree
          * @param nbParticles number of particles.
          */
         template<typename TreeType>
-        auto writeFromTree(const TreeType& myOctree, std::size_t const& nbParticles) -> void
+        auto writeFromTree(const TreeType& tree, std::size_t const& nbParticles) -> void
         {
             // The header is already written
             static constexpr int dimension = TreeType::dimension;
diff --git a/include/scalfmm/tree/group_let.hpp b/include/scalfmm/tree/group_let.hpp
index 597597c1af30456984cc5b2247201f59ee321252..1b7e1fd7b639d955bda9146a606d053531cf2f36 100644
--- a/include/scalfmm/tree/group_let.hpp
+++ b/include/scalfmm/tree/group_let.hpp
@@ -1,10 +1,5 @@
-ï»¿// --------------------------------
-// See LICENCE file at project root
-// File : scalfmm/tree/group_let.hpp
-// --------------------------------
-#ifndef SCALFMM_TREE_LET_HPP
+ï»¿#ifndef SCALFMM_TREE_LET_HPP
 #define SCALFMM_TREE_LET_HPP
-
 #include <algorithm>
 #include <array>
 #include <fstream>
@@ -15,6 +10,13 @@
 #include <utility>
 #include <vector>
 
+#include <cpp_tools/colors/colorized.hpp>
+#include <cpp_tools/parallel_manager/parallel_manager.hpp>
+
+#include <scalfmm/tree/utils.hpp>
+#include <scalfmm/utils/io_helpers.hpp>   // for io::print
+#include <scalfmm/utils/math.hpp>
+
 #include "scalfmm/container/particle_container.hpp"
 #include "scalfmm/lists/sequential.hpp"
 #include "scalfmm/meta/utils.hpp"
@@ -22,11 +24,8 @@
 #include "scalfmm/parallel/mpi/utils.hpp"
 #include "scalfmm/parallel/utils.hpp"
 #include "scalfmm/tree/for_each.hpp"
-#include "scalfmm/tree/utils.hpp"
-#include "scalfmm/utils/io_helpers.hpp"
-#include "scalfmm/utils/math.hpp"
-
 #ifdef SCALFMM_USE_MPI
+
 #include <inria/algorithm/distributed/distribute.hpp>
 #include <inria/algorithm/distributed/mpi.hpp>
 #include <inria/algorithm/distributed/sort.hpp>
@@ -34,23 +33,15 @@
 #include <mpi.h>
 #endif
 
-#include <cpp_tools/colors/colorized.hpp>
-#include <cpp_tools/parallel_manager/parallel_manager.hpp>
-
 namespace scalfmm::tree
 {
     using morton_type = std::int64_t;   // typename Tree_type::
 
-    /**
-     * @brief
-     *
-     * @tparam MortonIdxType
-     */
-    template<typename MortonIdxType>
+    template<typename MortonIdx>
     struct leaf_info_type
     {
-        using morton_type = MortonIdxType;
-        MortonIdxType morton{};
+        using morton_type = MortonIdx;
+        MortonIdx morton{};
         std::size_t number_of_particles{};
         friend std::ostream& operator<<(std::ostream& os, const leaf_info_type& w)
         {
@@ -62,18 +53,11 @@ namespace scalfmm::tree
     namespace let
     {
 
-        /**
-         * @brief
-         *
-         * @tparam BoxType
-         * @tparam VectorLeafInfoType
-         * @tparam MortonDistributionType
-         */
-        template<typename BoxType, typename VectorLeafInfoType, typename MortonDistributionType>
-        inline /*std::vector<morton_type>*/ VectorLeafInfoType
-        get_ghosts_p2p_interaction(cpp_tools::parallel_manager::parallel_manager& para, BoxType const& box,
-                                   std::size_t const& level, int const& separation, VectorLeafInfoType const& leaf_info,
-                                   MortonDistributionType const& leaves_distrib)
+        template<typename Box, typename VectorLeafInfo, typename MortonDistribution>
+        inline /*std::vector<morton_type>*/ VectorLeafInfo
+        get_ghosts_p2p_interaction(cpp_tools::parallel_manager::parallel_manager& para, Box const& box,
+                                   std::size_t const& level, int const& separation, VectorLeafInfo const& leaf_info,
+                                   MortonDistribution const& leaves_distrib)
         {
             std::vector<morton_type> ghost_to_add;
             auto const& period = box.get_periodicity();
@@ -83,11 +67,12 @@ namespace scalfmm::tree
             for(auto const& info: leaf_info)
             {
                 auto const& morton_index = info.morton;
-                auto coordinate{index::get_coordinate_from_morton_index<BoxType::dimension>(morton_index)};
+                auto coordinate{index::get_coordinate_from_morton_index<Box::dimension>(morton_index)};
                 auto interaction_neighbors = index::get_neighbors(coordinate, level, period, separation);
                 auto& list = std::get<0>(interaction_neighbors);
                 auto nb = std::get<1>(interaction_neighbors);
                 int it{0};
+                //io::print("rank(" + std::to_string(rank) + ") list idx(p2p)  : ", list);
 
                 while(list[it] < my_distrib[0])
                 {
@@ -114,7 +99,7 @@ namespace scalfmm::tree
             std::sort(ghost_to_add.begin(), ghost_to_add.end());
             auto last = std::unique(ghost_to_add.begin(), ghost_to_add.end());
             ghost_to_add.erase(last, ghost_to_add.end());
-            VectorLeafInfoType ghost_leaf_to_add(ghost_to_add.size());
+            VectorLeafInfo ghost_leaf_to_add(ghost_to_add.size());
             for(int i = 0; i < ghost_to_add.size(); ++i)
             {
                 ghost_leaf_to_add[i] = {ghost_to_add[i], 0};
@@ -122,32 +107,25 @@ namespace scalfmm::tree
 
             return ghost_leaf_to_add;
         }
-
-        /**
-         * @brief  get theoretical m2l interaction list outside me
-         *
-         * We return the list of indexes of cells involved in P2P interaction that we do
-         *  not have locally.  The cells on other processors may not exist.
-         *
-         * @tparam BoxType
-         * @tparam VectorMortonIdxType
-         * @tparam MortonDistributionType
-         * @param[in] para the parallel manager
-         * @param box
-         * @param level
-         * @param separation
-         * @param local_morton_vect
-         * @param cell_distrib the cells distribution on the processes
-         * @return the list of indexes on tother processes
-         */
-        template<typename BoxType, typename VectorMortonIdxType, typename MortonDistributionType>
-        inline VectorMortonIdxType get_ghosts_m2l_interaction(cpp_tools::parallel_manager::parallel_manager& para,
-                                                              BoxType const& box, const std::size_t& level,
-                                                              int const& separation,
-                                                              VectorMortonIdxType const& local_morton_vect,
-                                                              MortonDistributionType const& cell_distrib)
+        ///
+        /// \brief  get theoretical m2l interaction list outside me
+        ///
+        /// We return the list of indexes of cells involved in P2P interaction that we do
+        ///  not have locally.  The cells on other processors may not exist.
+        ///
+        /// \param[in]  para the parallel manager
+        /// \param tree the tree used to compute the interaction
+        /// \param local_morton_idx the local morton index of the cells
+        /// \param cell_distrib the cells distribution on the processes
+        /// \return the list of indexes on tother processes
+        ///
+        template<typename Box, typename VectorMortonIdx, typename MortonDistribution>
+        inline VectorMortonIdx
+        get_ghosts_m2l_interaction(cpp_tools::parallel_manager::parallel_manager& para, Box const& box,
+                                   const std::size_t& level, int const& separation,
+                                   VectorMortonIdx const& local_morton_vect, MortonDistribution const& cell_distrib)
         {
-            VectorMortonIdxType ghost_to_add;
+            VectorMortonIdx ghost_to_add;
             auto const& period = box.get_periodicity();
             const auto rank = para.get_process_id();
             auto const my_distrib = cell_distrib[rank];
@@ -156,36 +134,64 @@ namespace scalfmm::tree
             for(auto morton_index: local_morton_vect)
             {
                 // for each index in the vector of cells in local_morton_vect we compute the m2l interactions
-                auto coordinate{index::get_coordinate_from_morton_index<BoxType::dimension>(morton_index)};
+                auto coordinate{index::get_coordinate_from_morton_index<Box::dimension>(morton_index)};
                 auto interaction_m2l_list = index::get_m2l_list(coordinate, level, period, separation);
 
                 auto& list = std::get<0>(interaction_m2l_list);
                 auto nb = std::get<2>(interaction_m2l_list);
+                //
+                // io::print("rank(" + std::to_string(rank) + ") list idx(m2l)  : ", list);
+                // io::print("rank(" + std::to_string(rank) + ") my_distrib  : ", my_distrib);
 
                 int it{0};
                 // We check if the cells are in the distribution
                 for(auto it = 0; it < nb; ++it)
                 {
+                    // if(list[it] > my_distrib[0])
+                    // std::cout << list[it] << " " << std::boolalpha
+                    //           << math::between(list[it], my_distrib[0], my_distrib[1]) << std::endl;
+
                     if(math::between(list[it], my_distrib[0], my_distrib[1]))
                     {
                         break;
                     }
                     bool check{false};
+                    // for(int i = 0; i < rank; ++i)
                     for(int i = rank - 1; i >= 0; i--)
                     {
                         auto const& interval = cell_distrib[i];
+                        // // if(rank == 2)
+                        // {
+                        //     std::cout << "parallel::utils::is_inside_distrib_left list[it]: " << interval[0] << " < "
+                        //     << list[it]
+                        //               << " < " << interval[1] << std::endl;
+                        // }
                         check = math::between(list[it], interval[0], interval[1]);
                         if(check)
                         {
                             break;
                         }
                     }
+                    // std::cout << "                 " << list[it] << "  " << std::boolalpha << check << std::endl;
                     if(check)   // parallel::utils::is_inside_distrib_left(list[it], rank, cell_distrib))
                     {
                         ghost_to_add.push_back(list[it]);
                     }
                 }
-
+                // while(list[it] < my_distrib[0])
+                // {
+                //     std::cout << it << " INSIDE left idx " << list[it] << "  " << std::boolalpha
+                //               << parallel::utils::is_inside_distrib(list[it], cell_distrib) << std::endl;
+                //     if(parallel::utils::is_inside_distrib_left(list[it], rank, cell_distrib))
+                //     {
+                //         ghost_to_add.push_back(list[it]);
+                //     }
+                //     ++it;
+                //     if(it > nb)
+                //     {
+                //         break;
+                //     }
+                // }
                 it = nb - 1;
                 if(not last_proc)   // No ghost on the right on last process
                 {
@@ -198,22 +204,20 @@ namespace scalfmm::tree
                         --it;
                     }
                 }
+                // if(rank == 2)
+                // {
+                //     io::print("rank(" + std::to_string(rank) + ") tmp ghost_to_add(m2l)  : ", ghost_to_add);
+                // }
             }
             std::sort(ghost_to_add.begin(), ghost_to_add.end());
             auto last = std::unique(ghost_to_add.begin(), ghost_to_add.end());
             ghost_to_add.erase(last, ghost_to_add.end());
+            // io::print("rank(" + std::to_string(rank) + ") cell_distrib: ", cell_distrib);
+            // io::print("rank(" + std::to_string(rank) + ") ghost_to_add(m2l): ", ghost_to_add);
 
             return ghost_to_add;
         }
 
-        /**
-         * @brief
-         *
-         * @tparam VectorLeafInfoType
-         * @param localLeaves
-         * @param ghosts
-         * @return auto
-         */
         template<typename VectorLeafInfoType>
         auto merge_split_structure(VectorLeafInfoType const& localLeaves, VectorLeafInfoType const& ghosts)
         {
@@ -261,7 +265,6 @@ namespace scalfmm::tree
 
             return std::make_tuple(morton, number_of_particles);
         }
-
         /**
          * @brief Split the LeafInfo structure in two vectors (Morton, number_of_particles)
          *
@@ -286,15 +289,6 @@ namespace scalfmm::tree
             }
             return std::make_tuple(morton, number_of_particles);
         }
-
-        /**
-         * @brief
-         *
-         * @tparam VectorLeafInfoIteratorType
-         * @param begin
-         * @param end
-         * @return auto
-         */
         template<typename VectorLeafInfoIteratorType>
         auto split_structure(const VectorLeafInfoIteratorType begin, const VectorLeafInfoIteratorType end)
         {
@@ -323,8 +317,8 @@ namespace scalfmm::tree
          * @return the vector ot the three blocs
          */
         template<typename VectorBlockType>
-        auto merge_blocs(VectorBlockType const& bloc1, VectorBlockType const& bloc2,
-                         VectorBlockType const& bloc3) -> VectorBlockType
+        VectorBlockType merge_blocs(VectorBlockType const& bloc1, VectorBlockType const& bloc2,
+                                    VectorBlockType const& bloc3)
         {
             // Merge the three block structure
             auto size = bloc1.size() + bloc2.size() + bloc3.size();
@@ -345,22 +339,21 @@ namespace scalfmm::tree
             }
             return all_blocks;
         }
-
         /**
          * @brief Construct the M2M ghost for the current level
          *
          *  The routine check if there is ghosts during the M2M operation.
          *  If yes, we exchange the ghost indexes
-         * @tparam BoxType
-         * @tparam VectorMortonIdxType
-         * @tparam MortonDistributionType
+         * @tparam Box
+         * @tparam VectorMortonIdx
+         * @tparam MortonDistribution
          * @param para  the parallel manager
          * @param box  the simulation box
          * @param level the current level
          * @param local_morton_vect
          * @param cells_distrib teh distribution of cells
          * @param top if top is true nothing is down
-         * @return VectorMortonIdxType
+         * @return VectorMortonIdx
          */
         template<typename Box, typename VectorMortonIdx, typename MortonDistribution>
         [[nodiscard]] auto build_ghost_m2m_let_at_level(cpp_tools::parallel_manager::parallel_manager& para, Box& box,
@@ -411,7 +404,8 @@ namespace scalfmm::tree
                     send[idx] = local_morton_vect[0];
                     for(int i = 1; i < std::min(nb_children, int(local_morton_vect.size())); ++i)
                     {
-                        auto parent_index = local_morton_vect[i] >> BoxType::dimension;
+                        auto parent_index = local_morton_vect[i] >> Box::dimension;
+                        //			std::cout << "index : " << local_morton_vect[i] << "   Parent ! " << parent_first << " " << last_parent_previous_proc << std::endl;
                         if(parent_index == last_parent_previous_proc)
                         {
                             ++idx;
@@ -456,39 +450,39 @@ namespace scalfmm::tree
             // std::clog << " end build_ghost_m2m_let_at_level" << std::endl;
             return ghosts;
         }
-
-        /**
-         * @brief construct the local essential tree (LET) at the level.
-         *
-         *  We start from a given Morton index distribution and we compute all
-         *  interactions needed
-         *   in the algorithm steps.
-         *  At the leaf level it corresponds to the interactions coming from the
-         *  direct pass (P2P operators)
-         *     and in the transfer pass (M2L operator). For the other levels we
-         *     consider only the M2L interactions.
-         * The leaves_distrib and the cells_distrib might be different
-         *  At the end the let has also all the interaction list computed
-         *
-         * @tparam BoxType
-         * @tparam VectorMortonIdxType
-         * @tparam MortonDistributionType
-         * @param para
-         * @param box
-         *  @param[in] level the level to construct the let
-         * @param local_morton_vect
-         *  @param[in] cells_distrib the morton index distribution for
-         * the cells at the leaf level.
-         * @param separation
-         * @return VectorMortonIdxType
-         */
-        template<typename BoxType, typename VectorMortonIdxType, typename MortonDistributionType>
-        [[nodiscard]] auto build_let_at_level(cpp_tools::parallel_manager::parallel_manager& para, BoxType& box,
-                                              const int& level, const VectorMortonIdxType& local_morton_vect,
-                                              const MortonDistributionType& cells_distrib,
-                                              const int& separation) -> VectorMortonIdxType
+        ///
+        /// \brief construct the local essential tree (LET) at the level.
+        ///
+        ///  We start from a given Morton index distribution and we compute all
+        ///  interactions needed
+        ///   in the algorithm steps.
+        ///  At the leaf level it corresponds to the interactions coming from the
+        ///  direct pass (P2P operators)
+        ///     and in the transfer pass (M2L operator). For the other levels we
+        ///     consider only the M2L interactions.
+        /// The leaves_distrib and the cells_distrib might be different
+        ///  At the end the let has also all the interaction list computed
+        ///
+        /// \param[inout]  tree the tree to compute the let.
+        /// \param[in]  local_morton_idx the morton index of the particles in the
+        /// processors.
+        ///
+        ///
+        ///  \param[in]  cells_distrib the morton index distribution for
+        /// the cells at the leaf level.
+        ///
+        ///  \param[in]  level the level to construct the let
+        ///
+        template<typename Box, typename VectorMortonIdx, typename MortonDistribution>
+        [[nodiscard]] auto build_let_at_level(cpp_tools::parallel_manager::parallel_manager& para, Box& box,
+                                              const int& level, const VectorMortonIdx& local_morton_vect,
+                                              const MortonDistribution& cells_distrib,
+                                              const int& separation) -> VectorMortonIdx
         {
             const auto my_rank = para.get_process_id();
+            // std::cout << cpp_tools::colors::red << " --> Begin let::build_let_at_level() at level = " << level
+            //           << "dist: " << cells_distrib[my_rank] << cpp_tools::colors::reset << std::endl;
+            // io::print("rank(" + std::to_string(my_rank) + ") local_morton_vect  : ", local_morton_vect);
 
             //  we compute the cells needed in the M2L operator
 
@@ -500,7 +494,13 @@ namespace scalfmm::tree
             std::cout << std::flush;
             /// Look if the morton index really exists in the distributed tree
             parallel::utils::check_if_morton_index_exist(para, needed_idx, cells_distrib, local_morton_vect);
+            ///
+            // io::print("rank(" + std::to_string(my_rank) + ") check_if_morton_index_exist(m2l)  : ", needed_idx);
+            //
 
+            // std::cout << cpp_tools::colors::red
+            //           << "rank(" + std::to_string(my_rank) + ")-- > End let::build_let_at_level() "
+            //           << cpp_tools::colors::reset << std::endl;
             return needed_idx;
         }
         // template<typename OctreeTree, typename VectorMortonIdx, typename MortonDistribution>
@@ -530,13 +530,12 @@ namespace scalfmm::tree
         //     std::cout << cpp_tools::colors::green << " --> End let::build_let_at_level() at level = " << level
         //               << cpp_tools::colors::reset << std::endl;
         // }
-
         /**
          * @brief
          *
-         * @tparam BoxType
-         * @tparam VectorMortonIdxType
-         * @tparam MortonDistributionType
+         * @tparam Box
+         * @tparam VectorMortonIdx
+         * @tparam MortonDistribution
          * @param para
          * @param box
          * @param level
@@ -544,11 +543,11 @@ namespace scalfmm::tree
          * @param leaves_distrib
          * @param separation
          */
-        template<typename BoxType, typename VectorLeafInfo, typename MortonDistributionType>
-        [[nodiscard]] auto build_let_leaves(cpp_tools::parallel_manager::parallel_manager& para, BoxType const& box,
+        template<typename Box, typename VectorLeafInfo, typename MortonDistribution>
+        [[nodiscard]] auto build_let_leaves(cpp_tools::parallel_manager::parallel_manager& para, Box const& box,
                                             const std::size_t& level,
                                             const VectorLeafInfo& leaf_info /*local_morton_vect*/,
-                                            MortonDistributionType const& leaves_distrib, const int& separation)
+                                            MortonDistribution const& leaves_distrib, const int& separation)
 
           -> VectorLeafInfo
         {
@@ -599,45 +598,46 @@ namespace scalfmm::tree
             return leaf_info_to_add;
         }
 
-        /**
-         * @brief buildLetTree  Build the let of the tree and the leaves and cells distributions
-         *
-         * The algorithm has 5 steps:
-         *   1) We sort the particles according to their Morton Index (leaf level)
-         *   2) Build the leaf morton vector of my local particles and construct either
-         *      the leaves distribution or the cell distribution according to parameter
-         *       use_leaf_distribution or use_particle_distribution
-         *   3) Fit the particles inside the use_leaf_distribution
-         *   4) Construct the  tree according to my particles and build the leaf
-         *       morton vector of my local particles
-         *   5) Constructing the let level by level
-         *
-         * @tparam TreeType
-         * @tparam VectorType
-         * @tparam BoxType
-         * @param[in] manager   the parallel manager
-         * @param[in] number_of_particles  total number of particles in the simulation
-         * @param[in] particle_container   vector of particles on my node. On output the array is sorted and correspond to teh distribution built
-         * @param[in] box  size of the simulation box
-         * @param[in] leaf_level   level of the leaf in the tree
-         * @param[in] level_shared the level at which cells are duplicated on processors. If the level is negative,
-         * nothing is duplicated.
-         * @param[in] groupSizeLeaves blocking parameter for the leaves (particles)
-         * @param[in] groupSizeCells blocking parameter for the cells
-         * @param[in] order order of the approximation to build the tree
-         * @param[in] use_leaf_distribution to say if you consider the leaf distribution
-         * @param[in] use_particle_distribution to say if you consider the particle distribution
-         * @return localGroupTree the LET of the octree processors
-         */
-        template<typename TreeType, typename VectorType, typename BoxType>
-        auto buildLetTree(cpp_tools::parallel_manager::parallel_manager& manager,
-                          const std::size_t& number_of_particles, VectorType& particle_container, const BoxType& box,
-                          const int& leaf_level, const int& level_shared, const int groupSizeLeaves,
-                          const int groupSizeCells, const int order, const int separation,
-                          const bool use_leaf_distribution, const bool use_particle_distribution) -> TreeType
+        ///
+        /// \brief buildLetTree  Build the let of the tree and the leaves and cells distributions
+        ///
+        /// The algorithm has 5 steps:
+        ///   1) We sort the particles according to their Morton Index (leaf level)
+        ///   2) Build the leaf morton vector of my local particles and construct either
+        ///      the leaves distribution or the cell distribution according to parameter
+        ///       use_leaf_distribution or use_particle_distribution
+        ///   3) Fit the particles inside the use_leaf_distribution
+        ///   4) Construct the  tree according to my particles and build the leaf
+        ///       morton vector of my local particles
+        ///   5) Constructing the let level by level
+        ///
+        /// \param[in]    manager   the parallel manager
+        /// \param[in] number_of_particles  total number of particles in the simulation
+        /// \param[in]  particle_container   vector of particles on my node. On output the
+        ///                 array is sorted and correspond to teh distribution built
+        /// \param[in]     box  size of the simulation box
+        /// \param[in] leaf_level   level of the leaf in the tree
+        /// \param[in] level_shared the level at which cells are duplicated on processors. If the level is negative,
+        /// nothing is duplicated.
+        /// \param[in] groupSizeLeaves  blocking parameter for the leaves (particles)
+        /// \param[in] groupSizeCells    blocking parameter for the cells
+        /// @param[in]    order order of the approximation to build the tree
+        /// @param[in]    use_leaf_distribution to say if you consider the leaf distribution
+        /// @param[in]    use_particle_distribution to say if you consider the particle distribution
+        /// @return  localGroupTree  the LET of the octree
+
+        /// processors
+        template<typename Tree_type, typename Vector_type, typename Box_type>
+        Tree_type
+        buildLetTree(cpp_tools::parallel_manager::parallel_manager& manager, const std::size_t& number_of_particles,
+                     Vector_type& particle_container, const Box_type& box, const int& leaf_level,
+                     const int& level_shared, const int groupSizeLeaves, const int groupSizeCells, const int order,
+                     const int separation, const bool use_leaf_distribution, const bool use_particle_distribution)
         {
+            // std::cout << cpp_tools::colors::green << " --> Begin let::group_let() " << cpp_tools::colors::reset
+            //           << std::endl;
             //
-            static constexpr std::size_t dimension = VectorType::value_type::dimension;
+            static constexpr std::size_t dimension = Vector_type::value_type::dimension;
             const auto rank = manager.get_process_id();
             ////////////////////////////////////////////////////////////////////////////
             ///   Sort the particles at the leaf level according to their Morton index
@@ -684,7 +684,7 @@ namespace scalfmm::tree
             ///
             /// A morton index should be own by only one process
             ///
-            using morton_distrib_type = typename TreeType::data_distrib_type;
+            using morton_distrib_type = typename Tree_type::data_distrib_type;
 
             ///
             ///  Build a uniform distribution of the leaves/cells
@@ -752,8 +752,8 @@ namespace scalfmm::tree
             ///
             ///   Construct the local tree based on our set of particles
             // Build and empty tree
-            TreeType localGroupTree(manager, static_cast<std::size_t>(leaf_level + 1), level_shared, order,
-                                    groupSizeLeaves, groupSizeCells, box);
+            Tree_type localGroupTree(manager, static_cast<std::size_t>(leaf_level + 1), level_shared, order,
+                                     groupSizeLeaves, groupSizeCells, box);
             /// Set true because the particles are already sorted
             ///  In fact we have all the leaves to add in leafMortonIdx - could be used to construct
             /// the tree !!!
@@ -761,6 +761,9 @@ namespace scalfmm::tree
 
 #ifdef SCALFMM_USE_MPI
 
+            // std::cout << cpp_tools::colors::red;
+            // io::print("rank(" + std::to_string(rank) + ") leafMortonIdx: ", leafMortonIdx);
+            // std::cout << cpp_tools::colors::reset << std::endl;
             ///  End
             ////////////////////////////////////////////////////////////////////////////////////////////
             ///
@@ -772,6 +775,7 @@ namespace scalfmm::tree
             {
                 leafMortonIdx[i] = scalfmm::index::get_morton_index(particle_container[i].position(), box, leaf_level);
             }
+            // io::print("rank(" + std::to_string(rank) + ")  -->  leafMortonIdx:    ", leafMortonIdx);
 
             // localLeafInfo contains information on leaves (morton, number of particles) own by th current process
             std::vector<tree::leaf_info_type<morton_type>> localLeafInfo(leafMortonIdx.size());
@@ -796,6 +800,8 @@ namespace scalfmm::tree
             }
             leafMortonIdx.resize(idx + 1);
             localLeafInfo.resize(leafMortonIdx.size());
+            // io::print("rank(" + std::to_string(rank) + ")  -->  localLeafInfo:    ", localLeafInfo);
+            // io::print("rank(" + std::to_string(rank) + ")  -->  leafMortonIdx:    ", leafMortonIdx);
             ////////////////////////////////////////////////////////////////////////////////////////
             // Build the pointer of the tree with all parameters
 
@@ -809,9 +815,17 @@ namespace scalfmm::tree
 
                 auto ghostP2P_leafInfo =
                   build_let_leaves(manager, box, leaf_level, localLeafInfo, particles_distrib, separation);
+                // io::print("rank(" + std::to_string(rank) + ")  -->  final ghostP2P_leafInfo:    ",
+                // ghostP2P_leafInfo); io::print("rank(" + std::to_string(rank) + ")  -->  final localLeafInfo:    ",
+                // localLeafInfo);
 
                 localGroupTree.set_leaf_distribution(particles_distrib);
 
+                // std::cout << std::flush;
+                // std::cout << cpp_tools::colors::red;
+                // std::cout << "END LEAF LEVEL " << std::endl;
+                // std::cout << cpp_tools::colors::reset;
+
                 /// If the distribution is not the same for the leaf and the cell we redistribute the
                 /// morton index according to the uniform distribution of morton index
                 ///
@@ -861,20 +875,33 @@ namespace scalfmm::tree
 
                 // build all leaves between leaf_level - 1 and level_shared -1.
                 //  we use the maximum because if we don't share certain levels this number is <0
+                // std::cout << "std::max(level_shared, int(localGroupTree.top_level())) "
+                //           << std::max(level_shared, int(localGroupTree.top_level())) << std::endl;
+                // std::cout << "  XXXXXXXXXX -> std::max(level_shared, int(localGroupTree.top_level() - 1))"
+                //           << std::max(level_shared, int(localGroupTree.top_level() - 1)) << std::endl;
+                ;
                 for(int level = leaf_level - 1; level >= localGroupTree.top_level(); --level)
                 {
+                    // std::cout << "   $$$$$$$$$$$$$$$$$$$$$$$$$ level " << level << "   $$$$$$$$$$$$$$$$$$$$$$$$$ "
+                    //           << std::endl;
                     std::int64_t ghost_l2l_cell{-1};
 
                     // Get the distribution at the current level, the ghost cell involved in l2l operator
                     //  and the morton index of the existing cells at this level
                     level_dist[level] = std::move(parallel::utils::build_upper_distribution(
                       manager, dimension, level, leafMortonIdx, ghost_l2l_cell, level_dist[level + 1]));
+                    // io::print("rank(" + std::to_string(rank) + ") MortonIdx(" + std::to_string(level) + "): ",
+                    //           leafMortonIdx);
+                    // std::cout << " ghost_l2l_cell: " << ghost_l2l_cell << std::endl;
                     // Set the distribution in tres tree
                     localGroupTree.set_cell_distribution(level, level_dist[level]);
                     // build the m2l ghost cells at this level
                     auto ghost_cells_level =
                       build_let_at_level(manager, box, level, leafMortonIdx, level_dist[level], separation);
 
+                    // io::print("rank(" + std::to_string(rank) + ") level=" + std::to_string(level) +
+                    //             " -->  final ghost_cells(m2l):    ",
+                    //           ghost_cells_level);
                     // build the m2m ghost cells at this level
 
                     auto ghost_m2m_cells = build_ghost_m2m_let_at_level(
@@ -884,8 +911,12 @@ namespace scalfmm::tree
                     // Create the groupe of cells structure for this level
                     localGroupTree.create_cells_at_level(level, leafMortonIdx, ghost_cells_level, ghost_m2m_cells,
                                                          ghost_l2l_cell, level_dist[level][rank]);
+                    // std::cout << "   $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$ " << std::endl
+                    //           << std::flush;
                 }
+                // std::cout << " end loop\n" << std::flush;
                 manager.get_communicator().barrier();
+                // std::cout << " end barrier\n" << std::flush;
             }
             else
 #endif          // SCALFMM_USE_MPI
@@ -901,6 +932,7 @@ namespace scalfmm::tree
                 localGroupTree.construct(particleMortonIndex);
                 // then, we fill each leaf with its particles (the particle container is sorted )
                 localGroupTree.fill_leaves_with_particles(particle_container);
+
                 //
                 localGroupTree.set_leaf_distribution(particles_distrib);
                 localGroupTree.set_cell_distribution(leaf_level, leaves_distrib);
@@ -913,8 +945,17 @@ namespace scalfmm::tree
                 }
             }
 
+            // std::cout << cpp_tools::colors::red << std::endl << std::flush;
+            // std::cout << "set iterators \n" << std::flush << std::flush;
             localGroupTree.set_valid_iterators(true);
+            // std::cout << "begin fill_leaves_with_particles \n" << std::flush;
             localGroupTree.fill_leaves_with_particles(particle_container);
+            // std::cout << "end fill_leaves_with_particles \n" << std::flush;
+
+            // std::cout << cpp_tools::colors::reset << std::endl;
+            // std::cout << cpp_tools::colors::green << " --> End let::group_let() " << cpp_tools::colors::reset
+            //           << std::endl
+            //           << std::flush;
 
             return localGroupTree;
         }