diff --git a/Src/Components/FAbstractKernels.hpp b/Src/Components/FAbstractKernels.hpp
index edf8e7b0749ab216f5948013e80bb1e214421820..68b7a66e1d80808d9cf89d43ee5cd3427aacf7b8 100644
--- a/Src/Components/FAbstractKernels.hpp
+++ b/Src/Components/FAbstractKernels.hpp
@@ -59,6 +59,14 @@ public:
     virtual void M2L(CellClass* const FRestrict local, const CellClass* distantNeighbors[343],
                      const int size, const int inLevel) = 0;
 
+
+    /** This method can be optionnaly inherited
+      * It is called at the end of each computation level during the M2L pass
+      * @param level the ending level
+      */
+    void finishedLevelM2L(const int /*level*/){
+    }
+
     /**
         * L2L
         * Local to local
diff --git a/Src/Kernels/FSphericalBlockBlasKernel.hpp b/Src/Kernels/FSphericalBlockBlasKernel.hpp
index 58e1707882dd4a2de29f24283a316586ed0e70c7..533368c4f62c44868fc7be1fef1883cf7c348b7f 100644
--- a/Src/Kernels/FSphericalBlockBlasKernel.hpp
+++ b/Src/Kernels/FSphericalBlockBlasKernel.hpp
@@ -15,6 +15,7 @@
 
 #include "../Utils/FMemUtils.hpp"
 #include "../Utils/FBlas.hpp"
+#include "../Containers/FVector.hpp"
 
 /**
 * @author Berenger Bramas (berenger.bramas@inria.fr)
@@ -25,13 +26,27 @@ class FSphericalBlockBlasKernel : public FAbstractSphericalKernel<ParticleClass,
 protected:
     typedef FAbstractSphericalKernel<ParticleClass,CellClass,ContainerClass> Parent;
 
+    /** A interaction properties */
+    struct ComputationPair {
+        FComplexe* FRestrict pole;
+        FComplexe* FRestrict local;
+        explicit ComputationPair(FComplexe* inPole = 0, FComplexe* inLocal = 0)
+            : pole(inPole), local(inLocal) {}
+    };
+
     const int FF_MATRIX_ROW_DIM;     //< The blas matrix number of rows
     const int FF_MATRIX_COLUMN_DIM;  //< The blas matrix number of columns
     const int FF_MATRIX_SIZE;        //< The blas matrix size
 
-    FComplexe* temporaryMultiSource;                //< To perform the M2L without allocating at each call
+    const int BlockSize;             //< The size of a block
+
+    FComplexe*const multipoleMatrix;                //< To copy all the multipole vectors
+    FComplexe*const localMatrix;                    //< To save all the local vectors result
     FSmartPointer<FComplexe*> preM2LTransitions;    //< The pre-computation for the M2L based on the level and the 189 possibilities
 
+    FVector<ComputationPair> interactions[343];     //< All the current interaction
+
+
     /** To access te precomputed M2L transfer matrixes */
     int indexM2LTransition(const int idxX,const int idxY,const int idxZ) const {
         return (((((idxX+3) * 7) + (idxY+3)) * 7 ) + (idxZ+3)) * FF_MATRIX_SIZE;
@@ -39,7 +54,6 @@ protected:
 
     /** Alloc and init pre-vectors*/
     void allocAndInit(){
-
         FHarmonic blasHarmonic(Parent::devP * 2);
 
         // M2L transfer, there is a maximum of 3 neighbors in each direction,
@@ -94,11 +108,13 @@ public:
       * @param inBoxWidth the size of the simulation box
       * @param inPeriodicLevel the number of level upper to 0 that will be requiried
       */
-    FSphericalBlockBlasKernel(const int inDevP, const int inTreeHeight, const FReal inBoxWidth, const F3DPosition& inBoxCenter, const int inPeriodicLevel = 0)
+    FSphericalBlockBlasKernel(const int inDevP, const int inTreeHeight, const FReal inBoxWidth, const F3DPosition& inBoxCenter, const int inBlockSize = 100, const int inPeriodicLevel = 0)
         : Parent(inDevP, inTreeHeight, inBoxWidth, inBoxCenter, inPeriodicLevel),
           FF_MATRIX_ROW_DIM(Parent::harmonic.getExpSize()), FF_MATRIX_COLUMN_DIM(Parent::harmonic.getNExpSize()),
           FF_MATRIX_SIZE(FF_MATRIX_ROW_DIM * FF_MATRIX_COLUMN_DIM),
-          temporaryMultiSource(new FComplexe[FF_MATRIX_COLUMN_DIM]),
+          BlockSize(inBlockSize),
+          multipoleMatrix(new FComplexe[inBlockSize * FF_MATRIX_COLUMN_DIM]),
+          localMatrix(new FComplexe[inBlockSize * FF_MATRIX_ROW_DIM]),
           preM2LTransitions(0){
         allocAndInit();
     }
@@ -108,14 +124,17 @@ public:
         : Parent(other),
           FF_MATRIX_ROW_DIM(other.FF_MATRIX_ROW_DIM), FF_MATRIX_COLUMN_DIM(other.FF_MATRIX_COLUMN_DIM),
           FF_MATRIX_SIZE(other.FF_MATRIX_SIZE),
-          temporaryMultiSource(new FComplexe[FF_MATRIX_COLUMN_DIM]),
+          BlockSize(other.BlockSize),
+          multipoleMatrix(new FComplexe[other.BlockSize * FF_MATRIX_COLUMN_DIM]),
+          localMatrix(new FComplexe[other.BlockSize * FF_MATRIX_ROW_DIM]),
           preM2LTransitions(other.preM2LTransitions) {
 
     }
 
     /** Destructor */
     ~FSphericalBlockBlasKernel(){
-        delete[] temporaryMultiSource;
+        delete[] multipoleMatrix;
+        delete[] localMatrix;
         if(preM2LTransitions.isLast()){
             FMemUtils::DeleteAll(preM2LTransitions.getPtr(), Parent::treeHeight + Parent::periodicLevels);
         }
@@ -127,12 +146,23 @@ public:
         // For all neighbors compute M2L
         for(int idxNeigh = 0 ; idxNeigh < 343 ; ++idxNeigh){
             if( distantNeighbors[idxNeigh] ){
-                const FComplexe* const transitionVector = &preM2LTransitions[inLevel + Parent::periodicLevels][idxNeigh * FF_MATRIX_SIZE];
-                multipoleToLocal(pole->getLocal(), distantNeighbors[idxNeigh]->getMultipole(), transitionVector);
+                interactions[idxNeigh].push(ComputationPair(distantNeighbors[idxNeigh]->getMultipole(), pole->getLocal()));
+
+                if( interactions[idxNeigh].getSize() == BlockSize){
+                    multipoleToLocal( idxNeigh, inLevel);
+                }
             }
         }
     }
 
+    /** Do we have some computation to do in the buffers */
+    void finishedLevelM2L(const int inLevel){
+        for(int idxNeigh = 0 ; idxNeigh < 343 ; ++idxNeigh){
+            if( interactions[idxNeigh].getSize() ){
+                multipoleToLocal( idxNeigh, inLevel);
+            }
+        }
+    }
 
     /** preExpNExp
       * @param exp an exponent vector to create an computable vector
@@ -177,22 +207,29 @@ public:
     *Remark: here we have always j+n >= |-k-l|
     *
     */
-    void multipoleToLocal(FComplexe*const FRestrict local_exp, const FComplexe* const FRestrict multipole_exp_src,
-                          const FComplexe* const FRestrict M2L_Outer_transfer){
-        // Copy original vector and compute exp2nexp
-        FMemUtils::copyall<FComplexe>(temporaryMultiSource, multipole_exp_src, CellClass::GetPoleSize());
-        // Get a computable vector
-        preExpNExp(temporaryMultiSource);
-
-        //FReal alpha_and_beta[2] = {1.0, 0.0};
+    void multipoleToLocal(const int interactionIndex, const int inLevel){
+        for(int idxInter = 0 ; idxInter < interactions[interactionIndex].getSize() ; ++idxInter){
+            // Copy original vector and compute exp2nexp
+            FMemUtils::copyall<FComplexe>(&multipoleMatrix[idxInter * FF_MATRIX_COLUMN_DIM],
+                                          interactions[interactionIndex][idxInter].pole, CellClass::GetPoleSize());
+            // Get a computable vector
+            preExpNExp(&multipoleMatrix[idxInter * FF_MATRIX_COLUMN_DIM]);
+        }
 
-        FBlas::c_gemtva(
+        FBlas::c_gemmtva(
                     static_cast<unsigned int>(FF_MATRIX_COLUMN_DIM),
                     static_cast<unsigned int>(FF_MATRIX_ROW_DIM),
+                    static_cast<unsigned int>(interactions[interactionIndex].getSize()),
                     FReal(1.0),
-                    (FReal*)M2L_Outer_transfer,
-                    (FReal*)temporaryMultiSource,
-                    (FReal*)local_exp);
+                    (FReal*)&preM2LTransitions[inLevel + Parent::periodicLevels][interactionIndex * FF_MATRIX_SIZE],
+                    (FReal*)multipoleMatrix,
+                    (FReal*)localMatrix);
+
+        for(int idxInter = 0 ; idxInter < interactions[interactionIndex].getSize() ; ++idxInter){
+            FMemUtils::addall<FComplexe>(interactions[interactionIndex][idxInter].local, &localMatrix[idxInter * FF_MATRIX_ROW_DIM], FF_MATRIX_ROW_DIM);
+        }
+
+        interactions[interactionIndex].clear();
     }
 };
 
diff --git a/Src/Utils/FMemUtils.hpp b/Src/Utils/FMemUtils.hpp
index 42dd1da2a7681bbb0249cc0d3e0cb03c0f2afcb4..9d5aa4a0f6f9f1185417cab3dda17c6720bd504a 100644
--- a/Src/Utils/FMemUtils.hpp
+++ b/Src/Utils/FMemUtils.hpp
@@ -70,6 +70,14 @@ namespace FMemUtils {
         }
     }
 
+    /** copy all value from one vector to the other */
+    template <class TypeClass>
+    void addall(TypeClass*const dest, const TypeClass*const source, const int nbElements){
+        for(int idx = 0 ; idx < nbElements ; ++idx){
+            dest[idx] += source[idx];
+        }
+    }
+
     /** Delete all */
     template <class TypeClass>
     void DeleteAll(TypeClass*const array[], const int size){