From 6151cdfa19fbe3fb10e1bab1dd71ef75e261edfb Mon Sep 17 00:00:00 2001
From: berenger-bramas <berenger-bramas@2616d619-271b-44dc-8df4-d4a8f33a7222>
Date: Tue, 29 Mar 2011 12:22:46 +0000
Subject: [PATCH] Upgrading iteration performances & adding simple threaded
 versions

git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/scalfmm/scalfmm/trunk@19 2616d619-271b-44dc-8df4-d4a8f33a7222
---
 Sources/Containers/FList.hpp                  |  42 +++
 Sources/Containers/FOctree.hpp                |   9 +-
 Sources/Containers/FSubOctree.hpp             |   2 +-
 Sources/Core/FAbstractKernels.hpp             |  12 +-
 Sources/Core/FBasicKernels.hpp                |  24 +-
 Sources/Core/FFMMAlgorithm.hpp                |  36 +-
 Sources/Core/FFMMAlgorithmThreaded.hpp        | 285 ++++++++++++++++
 .../Core/FFMMAlgorithmThreadedInterval.hpp    | 317 ++++++++++++++++++
 Sources/Fmb/FAbstractFmbKernels.hpp           |  37 +-
 Sources/Fmb/FExtendFmbCell.hpp                |   9 +
 Sources/Fmb/FFmbKernelsForces.hpp             |   4 +-
 Sources/Fmb/FFmbKernelsPotential.hpp          |   4 +-
 Sources/Fmb/FFmbKernelsPotentialForces.hpp    |  46 ++-
 Tests/testFMMAlgorithm.cpp                    |  55 ++-
 Tests/testFMMAlgorithmThreaded.cpp            | 278 +++++++++++++++
 Tests/testFmbAlgorithm.cpp                    |   4 +-
 16 files changed, 1109 insertions(+), 55 deletions(-)
 create mode 100644 Sources/Core/FFMMAlgorithmThreaded.hpp
 create mode 100644 Sources/Core/FFMMAlgorithmThreadedInterval.hpp
 create mode 100644 Tests/testFMMAlgorithmThreaded.cpp

diff --git a/Sources/Containers/FList.hpp b/Sources/Containers/FList.hpp
index b19a7b838..f47778564 100644
--- a/Sources/Containers/FList.hpp
+++ b/Sources/Containers/FList.hpp
@@ -213,6 +213,48 @@ public:
 
         };
 
+        class ConstBasicIterator {
+        private:
+            const Node* iter; //< current node on the list
+
+        public:
+            /**
+              * Constructor needs the target list
+              * @param the list to iterate on
+              */
+            ConstBasicIterator(const FList& list) : iter(list.root){
+            }
+
+            /** to progress on the list */
+            void progress(){
+                if(this->iter) this->iter = this->iter->next;
+            }
+
+            /**
+            * Current pointed value
+            * current iterator must be valide (isValide()) to use this function
+            */
+            Object value(){
+                return this->iter->target;
+            }
+
+            /**
+            * Current pointed value
+            * current iterator must be valide (isValide()) to use this function
+            */
+            const Object& value() const{
+                return this->iter->target;
+            }
+
+            /**
+            * To know if an iterator is at the end of the list
+            * @return true if the current iterator can progress and access to value, else false
+            */
+            bool isValide() const{
+                return iter;
+            }
+        };
+
 };
 
 #endif //FLIST_HPP
diff --git a/Sources/Containers/FOctree.hpp b/Sources/Containers/FOctree.hpp
index 50dfdc6cf..ab42c3971 100644
--- a/Sources/Containers/FOctree.hpp
+++ b/Sources/Containers/FOctree.hpp
@@ -192,6 +192,10 @@ public:
                 this->currentLocalIndex = TransposeIndex(this->current.tree->getLeftLeafIndex(), (this->current.tree->getSubOctreeHeight() - this->currentLocalLevel - 1) );
             }
 
+            Iterator() : currentLocalLevel(0), currentLocalIndex(0) {
+                current.tree = 0;
+            }
+
             /** Copy constructor
               * @param other source iterator to copy
               */
@@ -206,7 +210,10 @@ public:
               * @return this after copy
               */
             Iterator& operator=(const Iterator& other){
-                memcpy(this, &other, sizeof(Iterator));
+                this->current = other.current ;
+                this->currentLocalLevel = other.currentLocalLevel ;
+                this->currentLocalIndex = other.currentLocalIndex ;
+                return *this;
             }
 
             /**
diff --git a/Sources/Containers/FSubOctree.hpp b/Sources/Containers/FSubOctree.hpp
index 419e1e19b..4fa93d6cd 100644
--- a/Sources/Containers/FSubOctree.hpp
+++ b/Sources/Containers/FSubOctree.hpp
@@ -34,7 +34,7 @@
  * @warning Give the particuleClass & cellClass
  */
 template< class ParticuleClass, class CellClass >
-class FAbstractSubOctree : public FAssertable{
+class FAbstractSubOctree : protected FAssertable{
 protected:
     const int subOctreeHeight;	            //< Height of this suboctree
     const int subOctreePosition;	    //< Level of the current suboctree in the global tree (0 if node)
diff --git a/Sources/Core/FAbstractKernels.hpp b/Sources/Core/FAbstractKernels.hpp
index d2b0e6cf3..3133e04f7 100644
--- a/Sources/Core/FAbstractKernels.hpp
+++ b/Sources/Core/FAbstractKernels.hpp
@@ -20,22 +20,22 @@ public:
     virtual void init(){}
 
     /** P2M */
-    virtual void P2M(CellClass* const pole, FList<ParticuleClass*>* const particules) = 0;
+    virtual void P2M(CellClass* const pole, const FList<ParticuleClass*>* const particules) = 0;
 
     /** M2M */
-    virtual void M2M(CellClass* const pole, CellClass** const child, const int inLevel) = 0;
+    virtual void M2M(CellClass* const pole, const CellClass*const* const child, const int inLevel) = 0;
 
     /** M2L */
-    virtual void M2L(CellClass* const pole, CellClass** const distantNeighbors, const int size, const int inLevel) = 0;
+    virtual void M2L(CellClass* const pole, const CellClass*const* const distantNeighbors, const int size, const int inLevel) = 0;
 
     /** L2L */
-    virtual void L2L(CellClass* const pole, CellClass** const child, const int inLevel) = 0;
+    virtual void L2L(const CellClass* const pole, CellClass** const child, const int inLevel) = 0;
 
     /** L2P */
-    virtual void L2P(CellClass* const pole, FList<ParticuleClass*>* const particules) = 0;
+    virtual void L2P(const CellClass* const pole, FList<ParticuleClass*>* const particules) = 0;
 
     /** P2P */
-    virtual void P2P(FList<ParticuleClass*>* const pole, FList<ParticuleClass*>** const directNeighbors, const int size) = 0;
+    virtual void P2P(FList<ParticuleClass*>* const pole, const FList<ParticuleClass*>*const* const directNeighbors, const int size) = 0;
 };
 
 
diff --git a/Sources/Core/FBasicKernels.hpp b/Sources/Core/FBasicKernels.hpp
index d1f0d4fd1..dab607318 100644
--- a/Sources/Core/FBasicKernels.hpp
+++ b/Sources/Core/FBasicKernels.hpp
@@ -26,33 +26,33 @@ public:
     virtual void init(){}
 
     /** Print the number of particules */
-    virtual void P2M(CellClass* const pole, FList<ParticuleClass*>* const particules) {
-        FDEBUG( FDebug::Controller << "P2M : " << particules->getSize() << "\n" );
+    virtual void P2M(CellClass* const pole, const FList<ParticuleClass*>* const particules) {
+        //FDEBUG( FDebug::Controller << "P2M : " << particules->getSize() << "\n" );
     }
 
     /** Print the morton index */
-    virtual void M2M(CellClass* const pole, CellClass** const child, const int inLevel) {
-        FDEBUG( FDebug::Controller << "M2M : " << pole->getMortonIndex() << "\n" );
+    virtual void M2M(CellClass* const pole, const CellClass*const* const child, const int inLevel) {
+        //FDEBUG( FDebug::Controller << "M2M : " << pole->getMortonIndex() << "\n" );
     }
 
     /** Print the morton index */
-    virtual void M2L(CellClass* const pole, CellClass** const distantNeighbors, const int size, const int inLevel) {
-        FDEBUG( FDebug::Controller << "M2L : " << pole->getMortonIndex() << " (" << size << ")\n" );
+    virtual void M2L(CellClass* const pole, const CellClass*const* const distantNeighbors, const int size, const int inLevel) {
+        //FDEBUG( FDebug::Controller << "M2L : " << pole->getMortonIndex() << " (" << size << ")\n" );
     }
 
     /** Print the morton index */
-    virtual void L2L(CellClass* const local, CellClass** const child, const int inLevel) {
-        FDEBUG( FDebug::Controller << "L2L : " << local->getMortonIndex() << "\n" );
+    virtual void L2L(const CellClass* const local, CellClass** const child, const int inLevel) {
+        //FDEBUG( FDebug::Controller << "L2L : " << local->getMortonIndex() << "\n" );
     }
 
     /** Print the number of particules */
-    virtual void L2P(CellClass* const pole, FList<ParticuleClass*>* const particules){
-        FDEBUG( FDebug::Controller << "L2P : " << particules->getSize() << "\n" );
+    virtual void L2P(const CellClass* const pole, FList<ParticuleClass*>* const particules){
+        //FDEBUG( FDebug::Controller << "L2P : " << particules->getSize() << "\n" );
     }
 
     /** Print the number of particules */
-    virtual void P2P(FList<ParticuleClass*>* const currentBox, FList<ParticuleClass*>** directNeighbors, const int size) {
-        FDEBUG( FDebug::Controller << "P2P : " << currentBox->getSize() << " (" << size << ")\n" );
+    virtual void P2P(FList<ParticuleClass*>* const currentBox, const FList<ParticuleClass*>*const* directNeighbors, const int size) {
+        //FDEBUG( FDebug::Controller << "P2P : " << currentBox->getSize() << " (" << size << ")\n" );
     }
 };
 
diff --git a/Sources/Core/FFMMAlgorithm.hpp b/Sources/Core/FFMMAlgorithm.hpp
index 59777ff4a..b88fe6c7d 100644
--- a/Sources/Core/FFMMAlgorithm.hpp
+++ b/Sources/Core/FFMMAlgorithm.hpp
@@ -19,7 +19,7 @@
 * It just iterates on a tree and call the kernls with good arguments
 */
 template<template< class ParticuleClass, class CellClass> class KernelClass, class ParticuleClass, class CellClass, int OctreeHeight, int SubtreeHeight>
-class FFMMAlgorithm : public FAssertable{
+class FFMMAlgorithm : protected FAssertable{
     FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>* const tree;    //< The octree to work on
     KernelClass<ParticuleClass, CellClass>* const kernels;                     //< The kernels
 
@@ -36,6 +36,7 @@ public:
         : tree(inTree) , kernels(inKernels) {
         assert(tree, "tree cannot be null", __LINE__, __FILE__);
         assert(kernels, "kernels cannot be null", __LINE__, __FILE__);
+        FDEBUG_TRACE(FDebug::Controller.write("FFMMAlgorithm\n"));
     }
 
     /** Default destructor */
@@ -82,6 +83,9 @@ public:
         typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator octreeIterator(tree);
         octreeIterator.gotoBottomLeft();
         octreeIterator.moveUp();
+
+        typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator avoidGotoLeftIterator(octreeIterator);
+
         // for each levels
         for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){
             // for each cells
@@ -90,8 +94,10 @@ public:
                 // child is an array (of 8 child) that may be null
                 kernels->M2M( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel);
             } while(octreeIterator.moveRight());
-            octreeIterator.moveUp();
-            octreeIterator.gotoLeft();
+            //octreeIterator.moveUp();
+            //octreeIterator.gotoLeft();
+            avoidGotoLeftIterator.moveUp();
+            octreeIterator = avoidGotoLeftIterator;
         }
 
         FDEBUG_TIME(counter.tac(););
@@ -106,16 +112,21 @@ public:
         { // first M2L
             typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator octreeIterator(tree);
             octreeIterator.moveDown();
+
+            typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator avoidGotoLeftIterator(octreeIterator);
+
             CellClass* neighbors[208];
             // for each levels
             for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
                 // for each cells
                 do{
                     const int counter = tree->getDistantNeighbors(neighbors, octreeIterator.getCurrentGlobalIndex(),idxLevel);
-                    kernels->M2L( octreeIterator.getCurrentCell() , neighbors, counter, idxLevel);
+                    if(counter) kernels->M2L( octreeIterator.getCurrentCell() , neighbors, counter, idxLevel);
                 } while(octreeIterator.moveRight());
-                octreeIterator.gotoLeft();
-                octreeIterator.moveDown();
+                //octreeIterator.gotoLeft();
+                //octreeIterator.moveDown();
+                avoidGotoLeftIterator.moveDown();
+                octreeIterator = avoidGotoLeftIterator;
             }
         }
         FDEBUG_TIME(counter.tac(););
@@ -126,6 +137,9 @@ public:
         { // second L2L
             typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator octreeIterator(tree);
             octreeIterator.moveDown();
+
+            typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator avoidGotoLeftIterator(octreeIterator);
+
             const int heightMinusOne = OctreeHeight - 1;
             // for each levels exepted leaf level
             for(int idxLevel = 2 ; idxLevel < heightMinusOne ; ++idxLevel ){
@@ -133,8 +147,10 @@ public:
                 do{
                     kernels->L2L( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel);
                 } while(octreeIterator.moveRight());
-                octreeIterator.gotoLeft();
-                octreeIterator.moveDown();
+                //octreeIterator.gotoLeft();
+                //octreeIterator.moveDown();
+                avoidGotoLeftIterator.moveDown();
+                octreeIterator = avoidGotoLeftIterator;
             }
         }
 
@@ -148,6 +164,8 @@ public:
         FDEBUG_TRACE( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); );
         FDEBUG_TIME(counter.tic(););
 
+        const int heightMinusOne = OctreeHeight - 1;
+
         typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator octreeIterator(tree);
         octreeIterator.gotoBottomLeft();
         // There is a maximum of 26 neighbors
@@ -156,7 +174,7 @@ public:
         do{
             kernels->L2P(octreeIterator.getCurrentCell(), octreeIterator.getCurrentList());
             // need the current particules and neighbors particules
-            const int counter = tree->getLeafsNeighbors(neighbors, octreeIterator.getCurrentGlobalIndex(),OctreeHeight-1);
+            const int counter = tree->getLeafsNeighbors(neighbors, octreeIterator.getCurrentGlobalIndex(),heightMinusOne);
             kernels->P2P( octreeIterator.getCurrentList() , neighbors, counter);
         } while(octreeIterator.moveRight());
 
diff --git a/Sources/Core/FFMMAlgorithmThreaded.hpp b/Sources/Core/FFMMAlgorithmThreaded.hpp
new file mode 100644
index 000000000..eac79bf56
--- /dev/null
+++ b/Sources/Core/FFMMAlgorithmThreaded.hpp
@@ -0,0 +1,285 @@
+#ifndef FFMMALGORITHMTHREADED_HPP
+#define FFMMALGORITHMTHREADED_HPP
+// /!\ Please, you must read the license at the bottom of this page
+
+#include "../Utils/FAssertable.hpp"
+#include "../Utils/FDebug.hpp"
+#include "../Utils/FTic.hpp"
+#include "../Utils/FOpenMPThread.hpp"
+
+#include "../Containers/FOctree.hpp"
+
+#include <omp.h>
+
+
+/**
+* @author Berenger Bramas (berenger.bramas@inria.fr)
+* @class FFMMAlgorithmThreaded
+* @brief
+* Please read the license
+*
+*/
+template<template< class ParticuleClass, class CellClass> class KernelClass, class ParticuleClass, class CellClass, int OctreeHeight, int SubtreeHeight>
+class FFMMAlgorithmThreaded : protected FAssertable{
+    typedef typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator FOctreeIterator;
+
+    static const int NbThreads = 4;
+
+    FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>* const tree;    //< The octree to work on
+    KernelClass<ParticuleClass, CellClass>* kernels[NbThreads];                     //< The kernels
+
+
+    FDEBUG_TIME(FTic counter);       //< In case of debug count the time
+
+public:
+    /** The constructor need the octree and the kernels used for computation
+      * @param inTree the octree
+      * @param inKernels the kernels
+      * an assert is launched if one of the arguments is null
+      */
+    FFMMAlgorithmThreaded(FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>* const inTree,
+                  KernelClass<ParticuleClass, CellClass>* const inKernels)
+        : tree(inTree) {
+        assert(tree, "tree cannot be null", __LINE__, __FILE__);
+        assert(kernels, "kernels cannot be null", __LINE__, __FILE__);
+
+        for(int idxThread = 0 ; idxThread < NbThreads ; ++idxThread){
+            this->kernels[idxThread] = new KernelClass<ParticuleClass, CellClass>(*inKernels);
+        }
+
+        FDEBUG_TRACE(FDebug::Controller.write("FFMMAlgorithmThreaded\n"));
+    }
+
+    /** Default destructor */
+    virtual ~FFMMAlgorithmThreaded(){
+        for(int idxThread = 0 ; idxThread < NbThreads ; ++idxThread){
+            delete this->kernels[idxThread];
+        }
+    }
+
+    /** To execute the fmm algorithm
+      * Call this function to run the complete algo
+      */
+    void execute(){
+        for(int idxThread = 0 ; idxThread < NbThreads ; ++idxThread){
+            this->kernels[idxThread]->init();
+        }
+
+        bottomPass();
+        upwardPass();
+
+        downardPass();
+
+        directPass();
+    }
+
+    /** P2M */
+    void bottomPass(){
+        FDEBUG_TRACE( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) );
+        FDEBUG_TIME(counter.tic(););
+
+        FOctreeIterator octreeIterator(tree);
+        // Iterate on leafs
+        octreeIterator.gotoBottomLeft();
+
+        omp_lock_t mutex;
+        omp_init_lock(&mutex);
+        bool stop = false;
+        #pragma omp parallel shared(octreeIterator,mutex,stop) num_threads(NbThreads)
+        {
+            const int threadId = omp_get_thread_num();
+            omp_set_lock(&mutex);
+            while(!stop){
+                CellClass*const cell = octreeIterator.getCurrentCell();
+                const FList<ParticuleClass*>* const particules = octreeIterator.getCurrentList();
+                if(!octreeIterator.moveRight()) stop = true;
+                omp_unset_lock(&mutex);
+
+                // We need the current cell that represent the leaf
+                // and the list of particules
+                kernels[threadId]->P2M( cell , particules);
+
+                omp_set_lock(&mutex);
+            }
+            omp_unset_lock(&mutex);
+        }
+        omp_destroy_lock(&mutex);
+
+        FDEBUG_TIME(counter.tac(););
+        FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; )
+    }
+
+    /** M2M */
+    void upwardPass(){
+        FDEBUG_TRACE( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); );
+        FDEBUG_TIME(counter.tic(););
+
+        FOctreeIterator octreeIterator(tree);
+        octreeIterator.gotoBottomLeft();
+        octreeIterator.moveUp();
+
+        omp_lock_t mutex;
+        omp_init_lock(&mutex);
+        // for each levels
+        for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){
+            bool stop = false;
+            #pragma omp parallel shared(octreeIterator,mutex,stop) num_threads(NbThreads)
+            {
+                const int threadId = omp_get_thread_num();
+                omp_set_lock(&mutex);
+                // for each cells
+                while(!stop){
+                    // We need the current cell and the child
+                    // child is an array (of 8 child) that may be null
+                    CellClass*const cell = octreeIterator.getCurrentCell();
+                    const CellClass*const*const child = octreeIterator.getCurrentChild();
+                    if(!octreeIterator.moveRight()) stop = true;
+                    omp_unset_lock(&mutex);
+
+                    kernels[threadId]->M2M( cell , child, idxLevel);
+
+                    omp_set_lock(&mutex);
+                }
+                omp_unset_lock(&mutex);
+            }
+            octreeIterator.moveUp();
+            octreeIterator.gotoLeft();
+        }
+        omp_destroy_lock(&mutex);
+
+        FDEBUG_TIME(counter.tac(););
+        FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; )
+    }
+
+    /** M2L L2L */
+    void downardPass(){
+        FDEBUG_TRACE( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); );
+        FDEBUG_TIME(counter.tic(););
+
+        { // first M2L
+            FOctreeIterator octreeIterator(tree);
+            octreeIterator.moveDown();
+
+            omp_lock_t mutex;
+            omp_init_lock(&mutex);
+
+            // for each levels
+            for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
+                bool stop = false;
+                #pragma omp parallel shared(octreeIterator,mutex,idxLevel,stop) num_threads(NbThreads)
+                {
+                    const int threadId = omp_get_thread_num();
+                    CellClass* neighbors[208];
+                    omp_set_lock(&mutex);
+                    // for each cells
+                    while(!stop){
+                        CellClass* const cell = octreeIterator.getCurrentCell();
+                        const MortonIndex cellIndex = octreeIterator.getCurrentGlobalIndex();
+                        if(!octreeIterator.moveRight()) stop = true;
+                        omp_unset_lock(&mutex);
+
+                        const int counter = tree->getDistantNeighbors(neighbors, cellIndex,idxLevel);
+                        if(counter) kernels[threadId]->M2L( cell, neighbors, counter, idxLevel);
+
+                        omp_set_lock(&mutex);
+                    }
+                    omp_unset_lock(&mutex);
+                }
+                octreeIterator.gotoLeft();
+                octreeIterator.moveDown();
+            }
+            omp_destroy_lock(&mutex);
+        }
+        FDEBUG_TIME(counter.tac(););
+        FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; )
+
+        FDEBUG_TRACE( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); );
+        FDEBUG_TIME(counter.tic(););
+        { // second L2L
+            FOctreeIterator octreeIterator(tree);
+            octreeIterator.moveDown();
+            const int heightMinusOne = OctreeHeight - 1;
+
+            omp_lock_t mutex;
+            omp_init_lock(&mutex);
+            // for each levels exepted leaf level
+            for(int idxLevel = 2 ; idxLevel < heightMinusOne ; ++idxLevel ){
+                bool stop = false;
+                #pragma omp parallel shared(octreeIterator,mutex,idxLevel,stop) num_threads(NbThreads)
+                {
+                    const int threadId = omp_get_thread_num();
+                    omp_set_lock(&mutex);
+                    // for each cells
+                    while(!stop){
+                        const CellClass * const cell = octreeIterator.getCurrentCell();
+                        CellClass ** const child = octreeIterator.getCurrentChild();
+                        if(!octreeIterator.moveRight()) stop = true;
+                        omp_unset_lock(&mutex);
+
+                        kernels[threadId]->L2L( cell, child, idxLevel);
+
+                        omp_set_lock(&mutex);
+                    }
+                    omp_unset_lock(&mutex);
+                }
+
+                octreeIterator.gotoLeft();
+                octreeIterator.moveDown();
+            }
+            omp_destroy_lock(&mutex);
+        }
+
+        FDEBUG_TIME(counter.tac(););
+        FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; )
+
+    }
+
+    /** P2P */
+    void directPass(){
+        FDEBUG_TRACE( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); );
+        FDEBUG_TIME(counter.tic(););
+
+        FOctreeIterator octreeIterator(tree);
+        octreeIterator.gotoBottomLeft();
+
+        omp_lock_t mutex;
+        omp_init_lock(&mutex);
+        bool stop = false;
+        #pragma omp parallel shared(octreeIterator,mutex,stop) num_threads(NbThreads)
+        {
+            const int threadId = omp_get_thread_num();
+            // There is a maximum of 26 neighbors
+            FList<ParticuleClass*>* neighbors[26];
+            const int heightMinusOne = OctreeHeight - 1;
+
+            omp_set_lock(&mutex);
+            // for each leafs
+            while(!stop){
+                const CellClass * const cell = octreeIterator.getCurrentCell();
+                FList<ParticuleClass*>* const particules = octreeIterator.getCurrentList();
+                const MortonIndex cellIndex = octreeIterator.getCurrentGlobalIndex();
+                if(!octreeIterator.moveRight()) stop = true;
+                omp_unset_lock(&mutex);
+
+                kernels[threadId]->L2P(cell, particules);
+                // need the current particules and neighbors particules
+                const int counter = tree->getLeafsNeighbors(neighbors, cellIndex,heightMinusOne);
+                kernels[threadId]->P2P( particules , neighbors, counter);
+
+                omp_set_lock(&mutex);
+            }
+            omp_unset_lock(&mutex);
+        }
+        omp_destroy_lock(&mutex);
+
+        FDEBUG_TIME(counter.tac(););
+        FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; )
+
+    }
+
+};
+
+
+#endif //FFMMALGORITHMTHREADED_HPP
+
+// [--LICENSE--]
diff --git a/Sources/Core/FFMMAlgorithmThreadedInterval.hpp b/Sources/Core/FFMMAlgorithmThreadedInterval.hpp
new file mode 100644
index 000000000..6822e2788
--- /dev/null
+++ b/Sources/Core/FFMMAlgorithmThreadedInterval.hpp
@@ -0,0 +1,317 @@
+#ifndef FFMMALGORITHMTHREADEDTHREADED_HPP
+#define FFMMALGORITHMTHREADEDTHREADED_HPP
+// /!\ Please, you must read the license at the bottom of this page
+
+#include "../Utils/FAssertable.hpp"
+#include "../Utils/FDebug.hpp"
+#include "../Utils/FTic.hpp"
+#include "../Utils/FOpenMPThread.hpp"
+
+#include "../Containers/FOctree.hpp"
+
+#include <omp.h>
+
+
+/**
+* @author Berenger Bramas (berenger.bramas@inria.fr)
+* @class FFMMAlgorithmThreadedInterval
+* @brief
+* Please read the license
+*
+*/
+template<template< class ParticuleClass, class CellClass> class KernelClass, class ParticuleClass, class CellClass, int OctreeHeight, int SubtreeHeight>
+class FFMMAlgorithmThreadedInterval : protected FAssertable{
+    typedef typename FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>::Iterator FOctreeIterator;
+
+    static const int NbThreads = 1;
+    static const int SizeInterval = 50;
+
+    FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>* const tree;    //< The octree to work on
+    KernelClass<ParticuleClass, CellClass>* kernels[NbThreads];                     //< The kernels
+
+    FDEBUG_TIME(FTic counter);       //< In case of debug count the time
+
+public:
+    /** The constructor need the octree and the kernels used for computation
+      * @param inTree the octree
+      * @param inKernels the kernels
+      * an assert is launched if one of the arguments is null
+      */
+    FFMMAlgorithmThreadedInterval(FOctree<ParticuleClass, CellClass, OctreeHeight, SubtreeHeight>* const inTree,
+                  KernelClass<ParticuleClass, CellClass>* const inKernels)
+        : tree(inTree) {
+        assert(tree, "tree cannot be null", __LINE__, __FILE__);
+        assert(kernels, "kernels cannot be null", __LINE__, __FILE__);
+
+        for(int idxThread = 0 ; idxThread < NbThreads ; ++idxThread){
+            this->kernels[idxThread] = new KernelClass<ParticuleClass, CellClass>();
+            *this->kernels[idxThread] = *inKernels;
+        }
+
+        FDEBUG_TRACE(FDebug::Controller.write("FFMMAlgorithmThreadedInterval\n"));
+    }
+
+    /** Default destructor */
+    virtual ~FFMMAlgorithmThreadedInterval(){
+        for(int idxThread = 0 ; idxThread < NbThreads ; ++idxThread){
+            delete this->kernels[idxThread];
+        }
+    }
+
+    /** To execute the fmm algorithm
+      * Call this function to run the complete algo
+      */
+    void execute(){
+        for(int idxThread = 0 ; idxThread < NbThreads ; ++idxThread){
+            this->kernels[idxThread]->init();
+        }
+
+        bottomPass();
+        upwardPass();
+
+        downardPass();
+
+        directPass();
+    }
+
+    /** P2M */
+    void bottomPass(){
+        FDEBUG_TRACE( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) );
+        FDEBUG_TIME(counter.tic(););
+
+        FOctreeIterator octreeIterator(tree);
+        // Iterate on leafs
+        octreeIterator.gotoBottomLeft();
+
+        omp_lock_t mutex;
+        omp_init_lock(&mutex);
+        bool stop = false;
+        #pragma omp parallel shared(octreeIterator,mutex,stop) num_threads(NbThreads)
+        {
+            const int threadId = omp_get_thread_num();
+
+            FOctreeIterator threadIter;
+            int idxSizeInterval = 0;
+
+            omp_set_lock(&mutex);
+            while(!stop){
+                threadIter = octreeIterator;
+                for(idxSizeInterval = 1 ; idxSizeInterval < SizeInterval && octreeIterator.moveRight(); ++idxSizeInterval);
+                if(idxSizeInterval != SizeInterval || !octreeIterator.moveRight()) stop = true;
+                omp_unset_lock(&mutex);
+
+                while(idxSizeInterval--){
+                    // We need the current cell that represent the leaf
+                    // and the list of particules
+                    kernels[threadId]->P2M( threadIter.getCurrentCell() , threadIter.getCurrentList());
+                    threadIter.moveRight();
+                }
+                omp_set_lock(&mutex);
+            }
+            omp_unset_lock(&mutex);
+        }
+        omp_destroy_lock(&mutex);
+
+        FDEBUG_TIME(counter.tac(););
+        FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; )
+    }
+
+    /** M2M */
+    void upwardPass(){
+        FDEBUG_TRACE( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); );
+        FDEBUG_TIME(counter.tic(););
+
+        FOctreeIterator octreeIterator(tree);
+        octreeIterator.gotoBottomLeft();
+        octreeIterator.moveUp();
+
+        omp_lock_t mutex;
+        omp_init_lock(&mutex);
+        // for each levels
+        for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){
+            bool stop = false;
+            #pragma omp parallel shared(octreeIterator,mutex,stop) num_threads(NbThreads)
+            {
+                const int threadId = omp_get_thread_num();
+
+                FOctreeIterator threadIter;
+                int idxSizeInterval = 0;
+
+                omp_set_lock(&mutex);
+                // for each cells
+                while(!stop){
+                    threadIter = octreeIterator;
+                    for(idxSizeInterval = 1 ; idxSizeInterval < SizeInterval && octreeIterator.moveRight(); ++idxSizeInterval);
+                    if(idxSizeInterval != SizeInterval || !octreeIterator.moveRight()) stop = true;
+                    omp_unset_lock(&mutex);
+
+                    while(idxSizeInterval--){
+                        // We need the current cell and the child
+                        // child is an array (of 8 child) that may be null
+                        kernels[threadId]->M2M( threadIter.getCurrentCell() , threadIter.getCurrentChild(), idxLevel);
+                        threadIter.moveRight();
+                    }
+
+                    omp_set_lock(&mutex);
+                }
+                omp_unset_lock(&mutex);
+            }
+            octreeIterator.moveUp();
+            octreeIterator.gotoLeft();
+        }
+        omp_destroy_lock(&mutex);
+
+        FDEBUG_TIME(counter.tac(););
+        FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; )
+    }
+
+    /** M2L L2L */
+    void downardPass(){
+        FDEBUG_TRACE( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); );
+        FDEBUG_TIME(counter.tic(););
+
+        { // first M2L
+            FOctreeIterator octreeIterator(tree);
+            octreeIterator.moveDown();
+
+            omp_lock_t mutex;
+            omp_init_lock(&mutex);
+
+            // for each levels
+            for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){
+                bool stop = false;
+                #pragma omp parallel shared(octreeIterator,mutex,idxLevel,stop) num_threads(NbThreads)
+                {
+                    const int threadId = omp_get_thread_num();
+
+                    FOctreeIterator threadIter;
+                    int idxSizeInterval = 0;
+                    CellClass* neighbors[208];
+
+                    omp_set_lock(&mutex);
+                    // for each cells
+                    while(!stop){
+                        threadIter = octreeIterator;
+                        for(idxSizeInterval = 1 ; idxSizeInterval < SizeInterval && octreeIterator.moveRight(); ++idxSizeInterval);
+                        if(idxSizeInterval != SizeInterval || !octreeIterator.moveRight()) stop = true;
+                        omp_unset_lock(&mutex);
+
+                        while(idxSizeInterval--){
+                            const int counter = tree->getDistantNeighbors(neighbors, threadIter.getCurrentGlobalIndex(),idxLevel);
+                            if(counter) kernels[threadId]->M2L( threadIter.getCurrentCell(), neighbors, counter, idxLevel);
+                            threadIter.moveRight();
+                        }
+
+                        omp_set_lock(&mutex);
+                    }
+                    omp_unset_lock(&mutex);
+                }
+                octreeIterator.gotoLeft();
+                octreeIterator.moveDown();
+            }
+            omp_destroy_lock(&mutex);
+        }
+        FDEBUG_TIME(counter.tac(););
+        FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; )
+
+        FDEBUG_TRACE( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); );
+        FDEBUG_TIME(counter.tic(););
+        { // second L2L
+            FOctreeIterator octreeIterator(tree);
+            octreeIterator.moveDown();
+            const int heightMinusOne = OctreeHeight - 1;
+
+            omp_lock_t mutex;
+            omp_init_lock(&mutex);
+            // for each levels exepted leaf level
+            for(int idxLevel = 2 ; idxLevel < heightMinusOne ; ++idxLevel ){
+                bool stop = false;
+                #pragma omp parallel shared(octreeIterator,mutex,idxLevel,stop) num_threads(NbThreads)
+                {
+                    const int threadId = omp_get_thread_num();
+
+                    FOctreeIterator threadIter;
+                    int idxSizeInterval = 0;
+
+                    omp_set_lock(&mutex);
+                    // for each cells
+                    while(!stop){
+                        threadIter = octreeIterator;
+                        for(idxSizeInterval = 1 ; idxSizeInterval < SizeInterval && octreeIterator.moveRight(); ++idxSizeInterval);
+                        if(idxSizeInterval != SizeInterval || !octreeIterator.moveRight()) stop = true;
+                        omp_unset_lock(&mutex);
+
+                        while(idxSizeInterval--){
+                            kernels[threadId]->L2L( threadIter.getCurrentCell(),  threadIter.getCurrentChild(), idxLevel);
+                            threadIter.moveRight();
+                        }
+
+                        omp_set_lock(&mutex);
+                    }
+                    omp_unset_lock(&mutex);
+                }
+
+                octreeIterator.gotoLeft();
+                octreeIterator.moveDown();
+            }
+            omp_destroy_lock(&mutex);
+        }
+
+        FDEBUG_TIME(counter.tac(););
+        FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; )
+
+    }
+
+    /** P2P */
+    void directPass(){
+        FDEBUG_TRACE( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); );
+        FDEBUG_TIME(counter.tic(););
+
+        FOctreeIterator octreeIterator(tree);
+        octreeIterator.gotoBottomLeft();
+        const int heightMinusOne = OctreeHeight - 1;
+
+        omp_lock_t mutex;
+        omp_init_lock(&mutex);
+        bool stop = false;
+        #pragma omp parallel shared(octreeIterator,mutex,stop) num_threads(NbThreads)
+        {
+            const int threadId = omp_get_thread_num();
+            // There is a maximum of 26 neighbors
+            FList<ParticuleClass*>* neighbors[26];
+            FOctreeIterator threadIter;
+            int idxSizeInterval = 0;
+
+            omp_set_lock(&mutex);
+            // for each leafs
+            while(!stop){
+                threadIter = octreeIterator;
+                for(idxSizeInterval = 1 ; idxSizeInterval < SizeInterval && octreeIterator.moveRight(); ++idxSizeInterval);
+                if(idxSizeInterval != SizeInterval || !octreeIterator.moveRight()) stop = true;
+                omp_unset_lock(&mutex);
+
+                while(idxSizeInterval--){
+                    kernels[threadId]->L2P(threadIter.getCurrentCell(), threadIter.getCurrentList());
+                    // need the current particules and neighbors particules
+                    const int counter = tree->getLeafsNeighbors(neighbors, threadIter.getCurrentGlobalIndex(),heightMinusOne);
+                    kernels[threadId]->P2P( threadIter.getCurrentList() , neighbors, counter);
+                    threadIter.moveRight();
+                }
+
+                omp_set_lock(&mutex);
+            }
+            omp_unset_lock(&mutex);
+        }
+        omp_destroy_lock(&mutex);
+
+        FDEBUG_TIME(counter.tac(););
+        FDEBUG_TRACE( FDebug::Controller << "\tFinished (") FDEBUG_TIME(<< counter.elapsed() <<) FDEBUG_TRACE("s)\n"; )
+
+    }
+
+};
+
+
+#endif //FFMMALGORITHMTHREADEDTHREADED_HPP
+
+// [--LICENSE--]
diff --git a/Sources/Fmb/FAbstractFmbKernels.hpp b/Sources/Fmb/FAbstractFmbKernels.hpp
index 67511048b..97355548d 100644
--- a/Sources/Fmb/FAbstractFmbKernels.hpp
+++ b/Sources/Fmb/FAbstractFmbKernels.hpp
@@ -52,9 +52,9 @@ protected:
     static const int FMB_Info_nexp_size = (FMB_Info_P + 1) * (FMB_Info_P + 1);
 
     // Level of the tree
-    const int treeHeight;
+    int treeHeight;
     // Width of the box at the root level
-    const double treeWidthAtRoot;
+    double treeWidthAtRoot;
 
     // transfer_M2M_container
     FComplexe*** transitionM2M;
@@ -94,7 +94,7 @@ protected:
     F3DPosition force_sum;
     double potential_sum;
 
-    const bool FMB_Info_up_to_P_in_M2L;
+    bool FMB_Info_up_to_P_in_M2L;
 
     //////////////////////////////////////////////////////////////////
     // Allocation
@@ -381,8 +381,11 @@ protected:
         for(int idxm = 0 , idxmMod4 = 0; idxm <= FMB_Info_P ; ++idxm, ++idxmMod4){
             if(idxmMod4 == 4) idxmMod4 = 0;
             const double angle = idxm*inSphere.phi + PiArrayInner[idxmMod4];
-            (cosSin + idxm)->setReal(FMath::Sin(angle + M_PI_2));
+            (cosSin + idxm)->setReal(FMath::Sin(angle + FMath::FPiDiv2));
             (cosSin + idxm)->setImag(FMath::Sin(angle));
+
+            //printf("l=%d \t inSphere.phi=%f \t this->PiArrayOuter[idxlMod4]=%f \t angle=%f \t FMath::Sin(angle + FMath::FPiDiv2)=%f \t FMath::Sin(angle)=%f\n",
+            //        idxm, inSphere.phi, this->PiArrayInner[idxmMod4], angle, FMath::Sin(angle + FMath::FPiDiv2) , FMath::Sin(angle));
         }
 
         // Initialization of associated_Legendre_function_Array:
@@ -403,6 +406,9 @@ protected:
                                         - (l+m)*(*(start_ptr_associated_Legendre_function_Array + expansion_Redirection_array_for_j[l-1] + m))) / inSphere.sinTheta);
                 p_theta_derivated_term->setReal(magnitude * (cosSin + m)->getReal());
                 p_theta_derivated_term->setImag(magnitude * (cosSin + m)->getImag());
+
+                //printf("magnitude=%f r_l=%f p_spherical_harmonic_Inner_coefficients_array=%f real=%f imag=%f\n",
+                //       magnitude,r_l,*p_spherical_harmonic_Inner_coefficients_array,p_theta_derivated_term->getReal(),p_theta_derivated_term->getImag());
             }
 
             // m=l:
@@ -415,6 +421,9 @@ protected:
             p_theta_derivated_term->setReal(magnitude * (cosSin + m)->getReal());
             p_theta_derivated_term->setImag(magnitude * (cosSin + m)->getImag());
 
+            //printf("magnitude=%f r_l=%f p_spherical_harmonic_Inner_coefficients_array=%f real=%f imag=%f\n",
+            //       magnitude,r_l,*p_spherical_harmonic_Inner_coefficients_array,p_theta_derivated_term->getReal(),p_theta_derivated_term->getImag());
+
             ++p_term;
             ++p_theta_derivated_term;
             ++p_spherical_harmonic_Inner_coefficients_array;
@@ -555,6 +564,12 @@ public:
         buildPrecompute();
     }
 
+    FAbstractFmbKernels(const FAbstractFmbKernels& other)
+        : treeHeight(other.treeHeight), treeWidthAtRoot(other.treeWidthAtRoot),
+          transitionM2M(0), transitionL2L(0), potential_sum(0), FMB_Info_up_to_P_in_M2L(true) {
+        buildPrecompute();
+    }
+
     virtual void init(){
         force_sum = F3DPosition();
         potential_sum = 0;
@@ -595,9 +610,9 @@ public:
     * Phi(x) = sum_{n=0}^{+} sum_{m=-n}^{n} M_n^m O_n^{-m} (x - *p_center)
     *
     */
-    void P2M(CellClass* const inPole, FList<ParticuleClass*>* const inParticules) {
+    void P2M(CellClass* const inPole, const FList<ParticuleClass*>* const inParticules) {
 
-        for(typename FList<ParticuleClass*>::BasicIterator iterParticule(*inParticules);
+        for(typename FList<ParticuleClass*>::ConstBasicIterator iterParticule(*inParticules);
                                 iterParticule.isValide() ; iterParticule.progress()){
 
             Spherical spherical;
@@ -649,7 +664,7 @@ public:
     *
     * Warning: if j-n < |k-l| we do nothing.
      */
-    void M2M(CellClass* const inPole, CellClass** const inChild, const int inLevel) {
+    void M2M(CellClass* const inPole, const CellClass*const* const inChild, const int inLevel) {
 
         // We do NOT have: for(l=n-j+k; l<=j-n+k ;++l){} <=> for(l=-n; l<=n ;++l){if (j-n >= abs(k-l)){}}
         //     But we have:  for(k=MAX(0,n-j+l); k<=j-n+l; ++k){} <=> for(k=0; k<=j; ++k){if (j-n >= abs(k-l)){}}
@@ -769,7 +784,7 @@ public:
     *Remark: here we have always j+n >= |-k-l|
       *
       */
-    void M2L(CellClass* const pole, CellClass** const distantNeighbors, const int size, const int inLevel) {
+    void M2L(CellClass* const pole, const CellClass*const* const distantNeighbors, const int size, const int inLevel) {
         FTreeCoordinate coordCenter;
         coordCenter.setPositionFromMorton(pole->getMortonIndex(),inLevel);
 
@@ -901,7 +916,7 @@ public:
       *
       *Warning: if |l-k| > n-j, we do nothing.
       */
-    void L2L(CellClass* const pole, CellClass** const child, const int inLevel) {
+    void L2L(const CellClass* const pole, CellClass** const child, const int inLevel) {
 
         for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){
             // if no child at this position
@@ -990,9 +1005,9 @@ public:
         }
     }
 
-    virtual void L2P(CellClass* const local, FList<ParticuleClass*>* const particules) = 0;
+    virtual void L2P(const CellClass* const local, FList<ParticuleClass*>* const particules) = 0;
 
-    virtual void P2P(FList<ParticuleClass*>* const currentBox, FList<ParticuleClass*>** directNeighbors, const int size) = 0;
+    virtual void P2P(FList<ParticuleClass*>* const currentBox, const FList<ParticuleClass*>*const* directNeighbors, const int size) = 0;
 };
 
 
diff --git a/Sources/Fmb/FExtendFmbCell.hpp b/Sources/Fmb/FExtendFmbCell.hpp
index 2c682ca57..a93acf992 100644
--- a/Sources/Fmb/FExtendFmbCell.hpp
+++ b/Sources/Fmb/FExtendFmbCell.hpp
@@ -41,6 +41,15 @@ public:
         return *this;
     }
 
+    /** Get Multipole */
+    const FComplexe* getMultipole() const {
+        return this->multipole_exp;
+    }
+    /** Get Local */
+    const FComplexe* getLocal() const {
+        return this->local_exp;
+    }
+
     /** Get Multipole */
     FComplexe* getMultipole() {
         return this->multipole_exp;
diff --git a/Sources/Fmb/FFmbKernelsForces.hpp b/Sources/Fmb/FFmbKernelsForces.hpp
index db60940f5..74a75e9c3 100644
--- a/Sources/Fmb/FFmbKernelsForces.hpp
+++ b/Sources/Fmb/FFmbKernelsForces.hpp
@@ -33,7 +33,7 @@ class FFmbKernelsForces : public FAbstractFmbKernels<ParticuleClass,CellClass> {
     /** bodies_L2P
       * expansion_L2P_add_to_force_vector
       */
-    void L2P(CellClass* const local, FList<ParticuleClass*>* const particules){
+    void L2P(const CellClass* const local, FList<ParticuleClass*>* const particules){
         typename FList<ParticuleClass*>::BasicIterator iterTarget(*particules);
         while( iterTarget.isValide() ){
             //printf("Morton %lld\n",local->getMortonIndex());
@@ -182,7 +182,7 @@ class FFmbKernelsForces : public FAbstractFmbKernels<ParticuleClass,CellClass> {
       *  )
       *
       */
-    void P2P(FList<ParticuleClass*>* const currentBox, FList<ParticuleClass*>** directNeighbors, const int size) {
+    void P2P(FList<ParticuleClass*>* const currentBox, const FList<ParticuleClass*>*const* directNeighbors, const int size) {
         typename FList<ParticuleClass*>::BasicIterator iterTarget(*currentBox);
         while( iterTarget.isValide() ){
             for(int idxDirectNeighbors = 0 ; idxDirectNeighbors < size ; ++idxDirectNeighbors){
diff --git a/Sources/Fmb/FFmbKernelsPotential.hpp b/Sources/Fmb/FFmbKernelsPotential.hpp
index 3e350f8ab..a357a0d8b 100644
--- a/Sources/Fmb/FFmbKernelsPotential.hpp
+++ b/Sources/Fmb/FFmbKernelsPotential.hpp
@@ -28,7 +28,7 @@ public:
     /** bodies_L2P
       * expansion_L2P_add_to_force_vector
       */
-    void L2P(CellClass* const local, FList<ParticuleClass*>* const particules){
+    void L2P(const CellClass* const local, FList<ParticuleClass*>* const particules){
         typename FList<ParticuleClass*>::BasicIterator iterTarget(*particules);
         while( iterTarget.isValide() ){
             //printf("Morton %lld\n",local->getMortonIndex());
@@ -88,7 +88,7 @@ public:
       *  )
       *
       */
-    void P2P(FList<ParticuleClass*>* const currentBox, FList<ParticuleClass*>** directNeighbors, const int size) {
+    void P2P(FList<ParticuleClass*>* const currentBox, const FList<ParticuleClass*>*const* directNeighbors, const int size) {
         typename FList<ParticuleClass*>::BasicIterator iterTarget(*currentBox);
         while( iterTarget.isValide() ){
             for(int idxDirectNeighbors = 0 ; idxDirectNeighbors < size ; ++idxDirectNeighbors){
diff --git a/Sources/Fmb/FFmbKernelsPotentialForces.hpp b/Sources/Fmb/FFmbKernelsPotentialForces.hpp
index 8c288d8db..a2542994e 100644
--- a/Sources/Fmb/FFmbKernelsPotentialForces.hpp
+++ b/Sources/Fmb/FFmbKernelsPotentialForces.hpp
@@ -27,7 +27,7 @@ public:
     /** bodies_L2P
       * expansion_L2P_add_to_force_vector
       */
-    void L2P(CellClass* const local, FList<ParticuleClass*>* const particules){
+    void L2P(const CellClass* const local, FList<ParticuleClass*>* const particules){
         typename FList<ParticuleClass*>::BasicIterator iterTarget(*particules);
         while( iterTarget.isValide() ){
             //printf("Morton %lld\n",local->getMortonIndex());
@@ -37,6 +37,21 @@ public:
 
             positionToSphere( iterTarget.value()->getPosition() - local->getPosition(), &spherical );
 
+            /*printf("\t\t bodies_it_Get_p_position(&it) x = %lf \t y = %lf \t z = %lf \n",
+                   (iterTarget.value()->getPosition()).getX(),
+                   (iterTarget.value()->getPosition()).getY(),
+                   (iterTarget.value()->getPosition()).getZ());
+            printf("\t\t p_leaf_center x = %lf \t y = %lf \t z = %lf \n",
+                   (local->getPosition()).getX(),
+                   (local->getPosition()).getY(),
+                   (local->getPosition()).getZ());*/
+            /*printf("\t\t p_position_to_leaf_center x = %lf \t y = %lf \t z = %lf \n",
+                    (iterTarget.value()->getPosition() - local->getPosition()).getX(),
+                    (iterTarget.value()->getPosition() - local->getPosition()).getY(),
+                    (iterTarget.value()->getPosition() - local->getPosition()).getZ());*/
+            //printf("\t\t phi = %lf \t cos = %lf \t sin = %lf \t r= %lf \n",
+            //        spherical.phi,spherical.cosTheta,spherical.sinTheta,spherical.r);
+
             harmonicInnerThetaDerivated( spherical, FAbstractFmbKernels<ParticuleClass,CellClass>::current_thread_Y, FAbstractFmbKernels<ParticuleClass,CellClass>::current_thread_Y_theta_derivated);
 
             // The maximum degree used here will be P.
@@ -60,6 +75,15 @@ public:
 
                 force_vector_in_local_base.setY( force_vector_in_local_base.getY() + exp_term_aux.getReal());
 
+
+                //printf("\t j = %d \t exp_term_aux real = %lf imag = %lf \n",
+                //                                        j,
+               //                                         exp_term_aux.getReal(),
+                //                                        exp_term_aux.getImag());
+                //printf("\t force_vector_in_local_base x = %lf \t y = %lf \t z = %lf \n",
+                //       force_vector_in_local_base.getX(),force_vector_in_local_base.getY(),force_vector_in_local_base.getZ());
+
+
                 ++p_local_exp_term;
                 ++p_Y_term;
                 ++p_Y_theta_derivated_term;
@@ -77,10 +101,23 @@ public:
                     force_vector_in_local_base.setZ( force_vector_in_local_base.getZ() - 2 * k * exp_term_aux.getImag());
                     // F_theta:
 
+                    //printf("\t\t k = %d \t j = %d \t exp_term_aux real = %lf imag = %lf \n",
+                    //                                        k,j,
+                    //                                        exp_term_aux.getReal(),
+                    //                                        exp_term_aux.getImag());
+                    //printf("\t\t\t p_Y_term->getReal = %lf \t p_Y_term->getImag = %lf \n",p_Y_term->getReal(),p_Y_term->getImag());
+
                     exp_term_aux.setReal( (p_Y_theta_derivated_term->getReal() * p_local_exp_term->getReal()) - (p_Y_theta_derivated_term->getImag() * p_local_exp_term->getImag()) );
                     exp_term_aux.setImag( (p_Y_theta_derivated_term->getReal() * p_local_exp_term->getImag()) + (p_Y_theta_derivated_term->getImag() * p_local_exp_term->getReal()) );
 
                     force_vector_in_local_base.setY(force_vector_in_local_base.getY() + 2 * exp_term_aux.getReal());
+
+                    //printf("\t\t k = %d \t j = %d \t exp_term_aux real = %lf imag = %lf \n",
+                     //                                       k,j,
+                     //                                       exp_term_aux.getReal(),
+                     //                                       exp_term_aux.getImag());
+                    //printf("\t\t force_vector_in_local_base x = %lf \t y = %lf \t z = %lf \n",
+                    //       force_vector_in_local_base.getX(),force_vector_in_local_base.getY(),force_vector_in_local_base.getZ());
                 }
             }
 
@@ -181,11 +218,12 @@ public:
       *  )
       *
       */
-    void P2P(FList<ParticuleClass*>* const currentBox, FList<ParticuleClass*>** directNeighbors, const int size) {
+    void P2P(FList<ParticuleClass*>* const currentBox, const FList<ParticuleClass*>*const* directNeighbors, const int size) {
         typename FList<ParticuleClass*>::BasicIterator iterTarget(*currentBox);
         while( iterTarget.isValide() ){
+
             for(int idxDirectNeighbors = 0 ; idxDirectNeighbors < size ; ++idxDirectNeighbors){
-                typename FList<ParticuleClass*>::BasicIterator iterSource(*directNeighbors[idxDirectNeighbors]);
+                typename FList<ParticuleClass*>::ConstBasicIterator iterSource(*directNeighbors[idxDirectNeighbors]);
                 while( iterSource.isValide() ){
                   DIRECT_COMPUTATION_NO_MUTUAL_SOFT(&iterTarget.value(),
                                                    iterSource.value());
@@ -209,7 +247,7 @@ public:
             FAbstractFmbKernels<ParticuleClass,CellClass>::force_sum += iterTarget.value()->getForces();
 
             //printf("They contains energy (res = %f, potential = %f, value = %f)\n",
-                            //potential_sum, iterTarget.value()->getPotential(), iterTarget.value()->getValue());
+            //                potential_sum, iterTarget.value()->getPotential(), iterTarget.value()->getValue());
             FAbstractFmbKernels<ParticuleClass,CellClass>::potential_sum += iterTarget.value()->getPotential() * iterTarget.value()->getValue();
 
             iterTarget.progress();
diff --git a/Tests/testFMMAlgorithm.cpp b/Tests/testFMMAlgorithm.cpp
index a7375e1ef..3e9db4a7c 100644
--- a/Tests/testFMMAlgorithm.cpp
+++ b/Tests/testFMMAlgorithm.cpp
@@ -73,12 +73,12 @@ template< class ParticuleClass, class CellClass>
 class MyTestKernels : public FAbstractKernels<ParticuleClass,CellClass> {
 public:
     // Before upward
-    virtual void P2M(CellClass* const pole, FList<ParticuleClass*>* const particules) {
+    virtual void P2M(CellClass* const pole, const FList<ParticuleClass*>* const particules) {
         // the pole represents all particules under
         pole->setDataUp(particules->getSize());
     }
     // During upward
-    virtual void M2M(CellClass* const pole, CellClass** const child, const int inLevel) {
+    virtual void M2M(CellClass* const pole, const CellClass*const* const child, const int inLevel) {
         // A parent represents the sum of the child
         for(int idx = 0 ; idx < 8 ; ++idx){
             if(child[idx]){
@@ -87,14 +87,14 @@ public:
         }
     }
     // Before Downward
-    virtual void M2L(CellClass* const pole, CellClass** const distantNeighbors, const int size, const int inLevel) {
+    virtual void M2L(CellClass* const pole, const CellClass*const* const distantNeighbors, const int size, const int inLevel) {
         // The pole is impacted by what represent other poles
         for(int idx = 0 ; idx < size ; ++idx){
             pole->setDataDown(pole->getDataDown() + distantNeighbors[idx]->getDataUp());
         }
     }
     // During Downward
-    virtual void L2L(CellClass* const local, CellClass** const child, const int inLevel) {
+    virtual void L2L(const CellClass* const local, CellClass** const child, const int inLevel) {
         // Each child is impacted by the father
         for(int idx = 0 ; idx < 8 ; ++idx){
             if(child[idx]){
@@ -103,7 +103,7 @@ public:
         }
     }
     // After Downward
-    virtual void L2P(CellClass* const local, FList<ParticuleClass*>* const particules){
+    virtual void L2P(const CellClass* const local, FList<ParticuleClass*>* const particules){
         // The particules is impacted by the parent cell
         typename FList<ParticuleClass*>::BasicIterator iter(*particules);
         while( iter.isValide() ){
@@ -112,7 +112,7 @@ public:
         }
     }
     // After Downward
-    virtual void P2P(FList<ParticuleClass*>* const currentBox, FList<ParticuleClass*>** directNeighbors, const int size) {
+    virtual void P2P(FList<ParticuleClass*>* const currentBox, const FList<ParticuleClass*>*const* directNeighbors, const int size) {
         // Each particules targeted is impacted by the particules sources
         long inc = currentBox->getSize() - 1;
         for(int idx = 0 ; idx < size ; ++idx){
@@ -215,5 +215,48 @@ int main(int , char ** ){
 	return 0;
 }
 
+/*
+Creating 2000000 particules ...
+Done  (0.12328s).
+Inserting particules ...
+Done  (3.06648s).
+Working on particules ...
+        Start Bottom Pass
+        Finished (0.835397s)
+        Start Upward Pass
+        Finished (1.12351s)
+        Start Downward Pass (M2L)
+        Finished (32.5013s)
+        Start Downward Pass (L2L)
+        Finished (1.13309s)
+        Start Direct Pass
+        Finished (6.53526s)
+Done  (42.1288s).
+Check Result
+Done
+Deleting particules ...
+Done  (0.0397179s).
+
+Creating 2000000 particules ...
+Done  (0.114306s).
+Inserting particules ...
+Done  (3.0618s).
+Working on particules ...
+        Start Bottom Pass
+        Finished (0.832s)
+        Start Upward Pass
+        Finished (1.11856s)
+        Start Downward Pass (M2L)
+        Finished (31.396s)
+        Start Downward Pass (L2L)
+        Finished (1.13624s)
+        Start Direct Pass
+        Finished (6.5025s)
+Done  (40.9855s).
+Check Result
+Done
+Deleting particules ...
+Done  (0.0378419s).
+ */
 
 // [--LICENSE--]
diff --git a/Tests/testFMMAlgorithmThreaded.cpp b/Tests/testFMMAlgorithmThreaded.cpp
new file mode 100644
index 000000000..b7b2207b8
--- /dev/null
+++ b/Tests/testFMMAlgorithmThreaded.cpp
@@ -0,0 +1,278 @@
+// /!\ Please, you must read the license at the bottom of this page
+
+#include <iostream>
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../Sources/Utils/FTic.hpp"
+
+#include "../Sources/Containers/FOctree.hpp"
+#include "../Sources/Containers/FList.hpp"
+
+#include "../Sources/Utils/FAssertable.hpp"
+#include "../Sources/Utils/F3DPosition.hpp"
+
+#include "../Sources/Core/FBasicParticule.hpp"
+#include "../Sources/Core/FBasicCell.hpp"
+#include "../Sources/Core/FBasicKernels.hpp"
+
+#include "../Sources/Core/FFMMAlgorithm.hpp"
+#include "../Sources/Core/FFMMAlgorithmThreaded.hpp"
+#include "../Sources/Core/FFMMAlgorithmThreadedInterval.hpp"
+#include "../Sources/Core/FAbstractKernels.hpp"
+
+// Compile by : g++ testFMMAlgorithmThreaded.cpp ../Sources/Utils/FAssertable.cpp ../Sources/Utils/FDebug.cpp -lgomp -fopenmp -O2 -o testFMMAlgorithmThreaded.exe
+
+/** This program show an example of use of
+  * the fmm basic algo
+  * it also check that eachh particules is little or longer
+  * related that each other
+  */
+
+
+/** Custom particule class */
+class MyTestParticule : public FBasicParticule{
+protected:
+    // To store data during downard pass
+    long dataDown;
+public:
+    MyTestParticule(): dataDown(0){
+    }
+    long getDataDown() const {
+        return this->dataDown;
+    }
+    void setDataDown(const long inData){
+        this->dataDown = inData;
+    }
+};
+
+/** Custom cell */
+class MyTestCell : public FBasicCell {
+    // To store data during upward and downward pass
+    long dataUp, dataDown;
+public:
+    MyTestCell(): dataUp(0) , dataDown(0){
+    }
+    long getDataUp() const {
+        return this->dataUp;
+    }
+    void setDataUp(const long inData){
+        this->dataUp = inData;
+    }
+    long getDataDown() const {
+        return this->dataDown;
+    }
+    void setDataDown(const long inData){
+        this->dataDown = inData;
+    }
+};
+
+/** Custom Kernel
+  * This kernel simply store in each element the number of elements
+  * it represents
+  */
+template< class ParticuleClass, class CellClass>
+class MyTestKernels : public FAbstractKernels<ParticuleClass,CellClass> {
+public:
+    // Before upward
+    virtual void P2M(CellClass* const pole, const FList<ParticuleClass*>* const particules) {
+        // the pole represents all particules under
+        pole->setDataUp(particules->getSize());
+    }
+    // During upward
+    virtual void M2M(CellClass* const pole, const CellClass*const* const child, const int inLevel) {
+        // A parent represents the sum of the child
+        for(int idx = 0 ; idx < 8 ; ++idx){
+            if(child[idx]){
+                pole->setDataUp(pole->getDataUp() + child[idx]->getDataUp());
+            }
+        }
+    }
+    // Before Downward
+    virtual void M2L(CellClass* const pole, const CellClass*const* const distantNeighbors, const int size, const int inLevel) {
+        // The pole is impacted by what represent other poles
+        for(int idx = 0 ; idx < size ; ++idx){
+            pole->setDataDown(pole->getDataDown() + distantNeighbors[idx]->getDataUp());
+        }
+    }
+    // During Downward
+    virtual void L2L(const CellClass* const local, CellClass** const child, const int inLevel) {
+        // Each child is impacted by the father
+        for(int idx = 0 ; idx < 8 ; ++idx){
+            if(child[idx]){
+                child[idx]->setDataDown(local->getDataDown() + child[idx]->getDataDown());
+            }
+        }
+    }
+    // After Downward
+    virtual void L2P(const CellClass* const local, FList<ParticuleClass*>* const particules){
+        // The particules is impacted by the parent cell
+        typename FList<ParticuleClass*>::BasicIterator iter(*particules);
+        while( iter.isValide() ){
+            iter.value()->setDataDown(iter.value()->getDataDown() + local->getDataDown());
+            iter.progress();
+        }
+    }
+    // After Downward
+    virtual void P2P(FList<ParticuleClass*>* const currentBox, const FList<ParticuleClass*>*const* directNeighbors, const int size) {
+        // Each particules targeted is impacted by the particules sources
+        long inc = currentBox->getSize() - 1;
+        for(int idx = 0 ; idx < size ; ++idx){
+            inc += directNeighbors[idx]->getSize();
+        }
+
+        typename FList<ParticuleClass*>::BasicIterator iter(*currentBox);
+        while( iter.isValide() ){
+            iter.value()->setDataDown(iter.value()->getDataDown() + inc);
+            iter.progress();
+        }
+    }
+};
+
+// Simply create particules and try the kernels
+int main(int , char ** ){
+        const int NbLevels = 10;//10;
+        const int SizeSubLevels = 3;//3
+        const long NbPart = 2000000;//2000000
+        MyTestParticule* particules = new MyTestParticule[NbPart];
+        FTic counter;
+
+        srand ( 1 ); // volontary set seed to constant
+
+        // -----------------------------------------------------
+        std::cout << "Creating " << NbPart << " particules ..." << std::endl;
+        counter.tic();
+        for(long idxPart = 0 ; idxPart < NbPart ; ++idxPart){
+            particules[idxPart].setPosition(double(rand())/RAND_MAX,double(rand())/RAND_MAX,double(rand())/RAND_MAX);
+        }
+
+        counter.tac();
+        std::cout << "Done  " << "(" << counter.elapsed() << "s)." << std::endl;
+        // -----------------------------------------------------
+
+        FOctree<MyTestParticule, MyTestCell, NbLevels, SizeSubLevels> tree(1.0,F3DPosition(0.5,0.5,0.5));
+
+        // -----------------------------------------------------
+        std::cout << "Inserting particules ..." << std::endl;
+        counter.tic();
+        for(long idxPart = 0 ; idxPart < NbPart ; ++idxPart){
+            tree.insert(&particules[idxPart]);
+        }
+        counter.tac();
+        std::cout << "Done  " << "(" << counter.elapsed() << "s)." << std::endl;
+        // -----------------------------------------------------
+
+        std::cout << "Working on particules ..." << std::endl;
+        counter.tic();
+
+        // MyTestKernels FBasicKernels
+        MyTestKernels<MyTestParticule, MyTestCell> kernels;
+        //FFMMAlgorithm FFMMAlgorithmThreaded FFMMAlgorithmThreadedInterval
+        FFMMAlgorithmThreaded<MyTestKernels, MyTestParticule, MyTestCell, NbLevels, SizeSubLevels> algo(&tree,&kernels);
+        algo.execute();
+
+        counter.tac();
+        std::cout << "Done  " << "(" << counter.elapsed() << "s)." << std::endl;
+
+        // -----------------------------------------------------
+
+        std::cout << "Check Result\n";
+        { // Check that each particule has been summed with all other
+            FOctree<MyTestParticule, MyTestCell, NbLevels, SizeSubLevels>::Iterator octreeIterator(&tree);
+            octreeIterator.gotoBottomLeft();
+            do{
+                if(octreeIterator.getCurrentCell()->getDataUp() != octreeIterator.getCurrentList()->getSize() ){
+                        std::cout << "Problem P2M : " << (octreeIterator.getCurrentCell()->getDataUp() - octreeIterator.getCurrentList()->getSize()) << "\n";
+                }
+            } while(octreeIterator.moveRight());
+        }
+        { // Ceck if there is number of NbPart summed at level 1
+            FOctree<MyTestParticule, MyTestCell, NbLevels, SizeSubLevels>::Iterator octreeIterator(&tree);
+            octreeIterator.moveDown();
+            long res = 0;
+            do{
+                res += octreeIterator.getCurrentCell()->getDataUp();
+            } while(octreeIterator.moveRight());
+            if(res != NbPart){
+                std::cout << "Problem M2M at level 1 : " << res << "\n";
+            }
+        }
+        { // Ceck if there is number of NbPart summed at level 1
+            FOctree<MyTestParticule, MyTestCell, NbLevels, SizeSubLevels>::Iterator octreeIterator(&tree);
+            octreeIterator.gotoBottomLeft();
+            for(int idxLevel = NbLevels - 1 ; idxLevel > 1 ; --idxLevel ){
+                long res = 0;
+                do{
+                    res += octreeIterator.getCurrentCell()->getDataUp();
+                } while(octreeIterator.moveRight());
+                if(res != NbPart){
+                    std::cout << "Problem M2M at level " << idxLevel << " : " << res << "\n";
+                }
+                octreeIterator.moveUp();
+                octreeIterator.gotoLeft();
+            }
+        }
+        { // Check that each particule has been summed with all other
+            FOctree<MyTestParticule, MyTestCell, NbLevels, SizeSubLevels>::Iterator octreeIterator(&tree);
+            octreeIterator.gotoBottomLeft();
+            do{
+                FList<MyTestParticule*>::BasicIterator iter(*octreeIterator.getCurrentList());
+                while( iter.isValide() ){
+                    // If a particules has been impacted by less than NbPart - 1 (the current particule)
+                    // there is a problem
+                    if(iter.value()->getDataDown() != NbPart - 1){
+                        std::cout << "Problem L2P + P2P : " << iter.value()->getDataDown() << "\n";
+                    }
+                    iter.progress();
+                }
+            } while(octreeIterator.moveRight());
+        }
+        std::cout << "Done\n";
+
+        // -----------------------------------------------------
+        std::cout << "Deleting particules ..." << std::endl;
+        counter.tic();
+        for(long idxPart = 0 ; idxPart < NbPart ; ++idxPart){
+            particules[idxPart].~MyTestParticule();
+        }
+        delete [] particules;
+        counter.tac();
+        std::cout << "Done  " << "(" << counter.elapsed() << "s)." << std::endl;
+        // -----------------------------------------------------
+
+	return 0;
+}
+
+/*
+ 2000000 particules
+
+FFMMAlgorithm
+        Start Bottom Pass
+        Finished (0.840611s)
+        Start Upward Pass
+        Finished (1.12125s)
+        Start Downward Pass (M2L)
+        Finished (31.815s)
+        Start Downward Pass (L2L)
+        Finished (1.13794s)
+        Start Direct Pass
+        Finished (6.51685s)
+Done  (41.4319s).
+
+FFMMAlgorithmThreaded
+        Start Bottom Pass
+        Finished (1.05805s)
+        Start Upward Pass
+        Finished (1.15517s)
+        Start Downward Pass (M2L)
+        Finished (16.2212s)
+        Start Downward Pass (L2L)
+        Finished (1.00922s)
+        Start Direct Pass
+        Finished (3.10627s)
+Done  (22.5614s).
+
+ */
+
+// [--LICENSE--]
diff --git a/Tests/testFmbAlgorithm.cpp b/Tests/testFmbAlgorithm.cpp
index a0af0866f..f1fb79fd3 100644
--- a/Tests/testFmbAlgorithm.cpp
+++ b/Tests/testFmbAlgorithm.cpp
@@ -18,6 +18,8 @@
 #include "../Sources/Fmb/FExtendFmbCell.hpp"
 
 #include "../Sources/Core/FFMMAlgorithm.hpp"
+#include "../Sources/Core/FFMMAlgorithmThreaded.hpp"
+#include "../Sources/Core/FFMMAlgorithmThreadedInterval.hpp"
 
 #include "../Sources/Fmb/FFmbKernelsPotentialForces.hpp"
 #include "../Sources/Fmb/FFmbKernelsForces.hpp"
@@ -96,7 +98,7 @@ int main(int , char ** ){
         counter.tic();
 
         FFmbKernelsPotentialForces<FmbParticule, FmbCell> kernels(NbLevels,loader.getBoxWidth());//FFmbKernelsPotentialForces FFmbKernelsForces FFmbKernelsPotential
-        FFMMAlgorithm<FFmbKernelsPotentialForces, FmbParticule, FmbCell, NbLevels, SizeSubLevels> algo(&tree,&kernels);
+        FFMMAlgorithmThreaded<FFmbKernelsPotentialForces, FmbParticule, FmbCell, NbLevels, SizeSubLevels> algo(&tree,&kernels);
         algo.execute();
 
         counter.tac();
-- 
GitLab