diff --git a/Src/Core/FFmmAlgorithmThreadBalance.hpp b/Src/Core/FFmmAlgorithmThreadBalance.hpp
index 00099a831b4beba789df836512f3f085aba9e0a9..3d3afb85923b99bd4bf0a0f1d2162d6d2fb34ad7 100644
--- a/Src/Core/FFmmAlgorithmThreadBalance.hpp
+++ b/Src/Core/FFmmAlgorithmThreadBalance.hpp
@@ -154,20 +154,26 @@ protected:
                                       WorkloadTemp* workPerElement, const FSize nbElements) const {
         // Now split between thread
         (*intervals).resize(MaxThreads);
+
         // Ideally each thread will have this
         const FSize idealWork = (totalWork/MaxThreads);
+        ///FLOG(FLog::Controller << "[Balance] idealWork " << idealWork << "\n");
+
         // Assign default value for first thread
         int idxThread = 0;
         (*intervals)[idxThread].iterator = workPerElement[0].iterator;
         (*intervals)[idxThread].nbElements = 1;
         FSize assignWork = workPerElement[0].amountOfWork;
+
         for(int idxElement = 1 ; idxElement < nbElements ; ++idxElement){
+            ///FLOG(FLog::Controller << "[Balance] idxElement " << workPerElement[idxElement].amountOfWork << "\n");
+            ///FLOG(FLog::Controller << "[Balance] assignWork " << assignWork << "\n");
             // is it more balance if we add the current element to the current thread
             if(FMath::Abs((idxThread+1)*idealWork - assignWork) <
                     FMath::Abs((idxThread+1)*idealWork - assignWork - workPerElement[idxElement].amountOfWork)
                     && idxThread != MaxThreads-1){
-                /// FLOG(FLog::Controller << "[Balance] Shape Thread " << idxThread << " goes from "
-                ///      << (*intervals)[idxThread].iterator.getCurrentGlobalIndex() << " nb " << (*intervals)[idxThread].nbElements << "\n");
+                ///FLOG(FLog::Controller << "[Balance] Shape Thread " << idxThread << " goes from "
+                ///      << (*intervals)[idxThread].iterator.getCurrentGlobalIndex() << " nb " << (*intervals)[idxThread].nbElements << "/" << nbElements << "\n");
                 // if not start filling the next thread
                 idxThread += 1;
                 (*intervals)[idxThread].iterator = workPerElement[idxElement].iterator;
@@ -176,8 +182,9 @@ protected:
             (*intervals)[idxThread].nbElements += 1;
             assignWork += workPerElement[idxElement].amountOfWork;
         }
-        /// FLOG(FLog::Controller << "[Balance] Shape Thread " << idxThread << " goes from "
-        ///      << (*intervals)[idxThread].iterator.getCurrentGlobalIndex() << " nb " << (*intervals)[idxThread].nbElements << "\n");
+
+        ///FLOG(FLog::Controller << "[Balance] Shape Thread " << idxThread << " goes from "
+        ///      << (*intervals)[idxThread].iterator.getCurrentGlobalIndex() << " nb " << (*intervals)[idxThread].nbElements << "/" << nbElements << "\n");
     }
 
     void buildThreadIntervals(){
@@ -405,8 +412,6 @@ protected:
                                     FMath::Abs((idxThread+1)*idealWork - assignWork - workloadBuffer[idxElement].amountOfWork)
                                     && idxThread != MaxThreads-1){
                                 (*intervals)[idxThread].second = idxElement;
-                                /// FLOG(FLog::Controller << "[Balance] Shape " << idxShape << " Thread " << idxThread << " goes from "
-                                ///      << (*intervals)[idxThread].first << " to " << (*intervals)[idxThread].second << "\n");
                                 idxThread += 1;
                                 (*intervals)[idxThread].first = idxElement;
                             }
@@ -414,8 +419,12 @@ protected:
                         }
                         (*intervals)[idxThread].second = nbElements + offsetShape;
 
-                        /// FLOG(FLog::Controller << "[Balance] Shape " << idxShape << " Thread " << idxThread << " goes from "
-                        ///      << (*intervals)[idxThread].first << " to " << (*intervals)[idxThread].second << "\n");
+                        idxThread += 1;
+                        while(idxThread != MaxThreads){
+                            (*intervals)[idxThread].first = nbElements+offsetShape;
+                            (*intervals)[idxThread].second = nbElements+offsetShape;
+                            idxThread += 1;
+                        }
 
                         offsetShape += nbElements;
                     }
@@ -441,7 +450,7 @@ protected:
         FLOG(FTic counterTime);
 
         FLOG(FTic computationCounter);
-#pragma omp parallel
+        #pragma omp parallel
         {
             KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
             const int nbCellsToCompute = workloadP2M[omp_get_thread_num()].nbElements;
@@ -453,6 +462,10 @@ protected:
                 myThreadkernels->P2M( octreeIterator.getCurrentCell() , octreeIterator.getCurrentListSrc());
                 octreeIterator.moveRight();
             }
+
+            FAssertLF(omp_get_thread_num() == MaxThreads-1
+                      || workloadP2M[omp_get_thread_num()+1].nbElements == 0
+                      || octreeIterator.getCurrentGlobalIndex() == workloadP2M[omp_get_thread_num()+1].iterator.getCurrentGlobalIndex());
         }
         FLOG(computationCounter.tac() );
 
@@ -476,7 +489,7 @@ protected:
             FLOG(FTic counterTimeLevel);
 
             FLOG(computationCounter.tic());
-#pragma omp parallel
+            #pragma omp parallel
             {
                 KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
                 const int nbCellsToCompute = workloadM2M[idxLevel][omp_get_thread_num()].nbElements;
@@ -488,6 +501,10 @@ protected:
                     myThreadkernels->M2M( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel);
                     octreeIterator.moveRight();
                 }
+
+                FAssertLF(omp_get_thread_num() == MaxThreads-1
+                          || workloadM2M[idxLevel][omp_get_thread_num()+1].nbElements == 0
+                          || octreeIterator.getCurrentGlobalIndex() == workloadM2M[idxLevel][omp_get_thread_num()+1].iterator.getCurrentGlobalIndex());
             }
 
             FLOG(computationCounter.tac());
@@ -528,7 +545,7 @@ protected:
             const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria);
             FLOG(FTic counterTimeLevel);
             FLOG(computationCounter.tic());
-#pragma omp parallel
+            #pragma omp parallel
             {
                 KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
                 const int nbCellsToCompute = workloadM2L[idxLevel][omp_get_thread_num()].nbElements;
@@ -543,6 +560,11 @@ protected:
                 }
 
                 myThreadkernels->finishedLevelM2L(idxLevel);
+
+
+                FAssertLF(omp_get_thread_num() == MaxThreads-1
+                          || workloadM2L[idxLevel][omp_get_thread_num()+1].nbElements == 0
+                          || octreeIterator.getCurrentGlobalIndex() == workloadM2L[idxLevel][omp_get_thread_num()+1].iterator.getCurrentGlobalIndex());
             }
             FLOG(computationCounter.tac());
             FLOG( FLog::Controller << "\t\t>> Level " << idxLevel << " = "  << counterTimeLevel.tacAndElapsed() << "s\n" );
@@ -570,7 +592,7 @@ protected:
             FLOG(FTic counterTimeLevel);
 
             FLOG(computationCounter.tic());
-#pragma omp parallel
+            #pragma omp parallel
             {
                 KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
                 const int nbCellsToCompute = workloadL2L[idxLevel][omp_get_thread_num()].nbElements;
@@ -580,6 +602,10 @@ protected:
                     myThreadkernels->L2L( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel);
                     octreeIterator.moveRight();
                 }
+
+                FAssertLF(omp_get_thread_num() == MaxThreads-1
+                          || workloadL2L[idxLevel][omp_get_thread_num()+1].nbElements == 0
+                          || octreeIterator.getCurrentGlobalIndex() == workloadL2L[idxLevel][omp_get_thread_num()+1].iterator.getCurrentGlobalIndex());
             }
             FLOG(computationCounter.tac());
             FLOG( FLog::Controller << "\t\t>> Level " << idxLevel << " = "  << counterTimeLevel.tacAndElapsed() << "s\n" );
@@ -597,7 +623,7 @@ protected:
     /////////////////////////////////////////////////////////////////////////////
 
     void L2P(){
-#pragma omp parallel
+        #pragma omp parallel
         {
             KernelClass * const myThreadkernels = kernels[omp_get_thread_num()];
             const int nbCellsToCompute = workloadL2P[omp_get_thread_num()].nbElements;
@@ -609,6 +635,10 @@ protected:
                 myThreadkernels->L2P( octreeIterator.getCurrentCell() , octreeIterator.getCurrentListTargets());
                 octreeIterator.moveRight();
             }
+
+            FAssertLF(omp_get_thread_num() == MaxThreads-1
+                      || workloadL2P[omp_get_thread_num()+1].nbElements == 0
+                      || octreeIterator.getCurrentGlobalIndex() == workloadL2P[omp_get_thread_num()+1].iterator.getCurrentGlobalIndex());
         }
     }
 
@@ -625,7 +655,7 @@ protected:
 
         const int LeafIndex = OctreeHeight - 1;
 
-#pragma omp parallel
+        #pragma omp parallel
         {
             FLOG(if(!omp_get_thread_num()) computationCounter.tic());
 
@@ -645,6 +675,10 @@ protected:
                     FLOG(if(!omp_get_thread_num()) computationCounterP2P.tac());
                 }
 
+                FAssertLF(omp_get_thread_num() == MaxThreads-1
+                          || interval.second == workloadP2P[idxShape][omp_get_thread_num()+1].first,
+                        omp_get_thread_num(), " ==> ", interval.second, " != ", workloadP2P[idxShape][omp_get_thread_num()+1].first);
+
                 #pragma omp barrier
             }
         }