diff --git a/Src/Kernels/Chebyshev/FChebInterpolator.hpp b/Src/Kernels/Chebyshev/FChebInterpolator.hpp
index 24b37295c6a572f9332ff8ccdd3103b5c5250b85..a5d38fe11e8a692e6edd211913f494bdc3c28856 100755
--- a/Src/Kernels/Chebyshev/FChebInterpolator.hpp
+++ b/Src/Kernels/Chebyshev/FChebInterpolator.hpp
@@ -47,7 +47,8 @@ class FChebInterpolator : FNoCopyable
   // compile time constants and types
   enum {nnodes = TensorTraits<ORDER>::nnodes,
         nRhs = MatrixKernelClass::NRHS,
-        nLhs = MatrixKernelClass::NLHS};
+        nLhs = MatrixKernelClass::NLHS,
+        nPV = MatrixKernelClass::NPV};
   typedef FChebRoots< ORDER>  BasisType;
   typedef FChebTensor<ORDER> TensorType;
 
@@ -861,8 +862,15 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyL2P(const FPoint& c
         }
       }
 
-      for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){
-        FReal*const potentials = inParticles->getPotentials(idxLhs);
+      for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){      
+        // distribution over potential components:
+        // We sum the multidim contribution of PhysValue
+        // This was originally done at M2L step but moved here 
+        // because their storage is required by the force computation.
+        // In fact : f_{ik}(x)=w_j(x) \nabla_{x_i} K_{ij}(x,y)w_j(y))
+        const unsigned int idxPot = idxLhs / nPV; 
+
+        FReal*const potentials = inParticles->getPotentials(idxPot);
 
         // interpolate and increment target value
         FReal targetValue = potentials[idxPart];
@@ -1110,6 +1118,8 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyL2PGradient(const F
         }
 
         for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){
+          const unsigned int idxPot = idxLhs / nPV; 
+          const unsigned int idxPV  = idxLhs % nPV; 
 
           // scale forces
           forces[idxLhs][0] *= jacobian[0] / nnodes;
@@ -1117,10 +1127,10 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyL2PGradient(const F
           forces[idxLhs][2] *= jacobian[2] / nnodes;
 
           // get pointers to PhysValues and force components
-          const FReal*const physicalValues = inParticles->getPhysicalValues(idxLhs);
-          FReal*const forcesX = inParticles->getForcesX(idxLhs);
-          FReal*const forcesY = inParticles->getForcesY(idxLhs);
-          FReal*const forcesZ = inParticles->getForcesZ(idxLhs);
+          const FReal*const physicalValues = inParticles->getPhysicalValues(idxPV);
+          FReal*const forcesX = inParticles->getForcesX(idxPot);
+          FReal*const forcesY = inParticles->getForcesY(idxPot);
+          FReal*const forcesZ = inParticles->getForcesZ(idxPot);
 
           // set computed forces
           forcesX[idxPart] += forces[idxLhs][0] * physicalValues[idxPart];
@@ -1342,12 +1352,15 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyL2PTotal(const FPoi
             forces[idxLhs][2] = (     FReal(2.)*f2[3] + FReal(4.)*f4[3] + FReal(8.)*f8[3]) * jacobian[2] / nnodes; // 7 flops
         } // 28 + (ORDER-1) * ((ORDER-1) * (27 + (ORDER-1) * 16)) flops
 
+        const unsigned int idxPot = idxLhs / nPV; 
+        const unsigned int idxPV  = idxLhs % nPV; 
+
         // get potentials, physValues and forces components 
-        const FReal*const physicalValues = inParticles->getPhysicalValues(idxLhs);
-        FReal*const forcesX = inParticles->getForcesX(idxLhs);
-        FReal*const forcesY = inParticles->getForcesY(idxLhs);
-        FReal*const forcesZ = inParticles->getForcesZ(idxLhs);
-        FReal*const potentials = inParticles->getPotentials(idxLhs);
+        const FReal*const physicalValues = inParticles->getPhysicalValues(idxPV);
+        FReal*const forcesX = inParticles->getForcesX(idxPot);
+        FReal*const forcesY = inParticles->getForcesY(idxPot);
+        FReal*const forcesZ = inParticles->getForcesZ(idxPot);
+        FReal*const potentials = inParticles->getPotentials(idxPot);
 
         // set computed potential
         potentials[idxPart] += (potential[idxLhs]); // 1 flop
diff --git a/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp b/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp
index e5d0d06532fd58c98e166ba07a19106b23e8f971..35c7833be2ba14ea86323330c6ce6aed74771505 100755
--- a/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp
+++ b/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp
@@ -46,7 +46,9 @@ class FChebTensorialKernel
     : public FAbstractChebKernel< CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>
 {
   enum {nRhs = MatrixKernelClass::NRHS,
-        nLhs = MatrixKernelClass::NLHS};
+        nLhs = MatrixKernelClass::NLHS,
+        nPot = MatrixKernelClass::NPOT,
+        nPv = MatrixKernelClass::NPV};
 
 protected://PB: for OptiDis
 
@@ -137,19 +139,19 @@ public:
 
     for(int idxV = 0 ; idxV < NVALS ; ++idxV){
       for (int idxLhs=0; idxLhs < nLhs; ++idxLhs){
-        // update local index
-        int idxLoc = idxV*nLhs + idxLhs;
+          // update local index
+          const int idxLoc = idxV*nLhs + idxLhs;
 
-        FReal *const CompressedLocalExpansion = TargetCell->getLocal(idxLoc) + AbstractBaseClass::nnodes;
+          FReal *const CompressedLocalExpansion = TargetCell->getLocal(idxLoc) + AbstractBaseClass::nnodes;
       
-        for (int idxRhs=0; idxRhs < nRhs; ++idxRhs){
+          // update idxRhs
+          const int idxRhs = idxLhs % nPv; 
           // update multipole index
-          int idxMul = idxV*nRhs + idxRhs;
-          // update kernel index such that: x_i = K_{ij}y_j 
-          int idxK = idxLhs*nRhs + idxRhs;
+          const int idxMul = idxV*nRhs + idxRhs;
+
           // get index in matrix kernel
-          unsigned int d 
-            = AbstractBaseClass::MatrixKernel.getPtr()->getPosition(idxK);
+          const unsigned int d 
+            = AbstractBaseClass::MatrixKernel.getPtr()->getPosition(idxLhs);
 
           for (int idx=0; idx<343; ++idx){
             if (SourceCells[idx]){
@@ -158,8 +160,7 @@ public:
                                  CompressedLocalExpansion);
             }
           }
-        }// NRHS
-      }// NLHS
+      }// NLHS=NPOT*NPV
     }// NVALS
 
   }
@@ -223,14 +224,14 @@ public:
                      ContainerClass* const NeighborSourceParticles[27],
                      const int /* size */)
     {
-        DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2P(TargetParticles,NeighborSourceParticles);
+        DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2P(TargetParticles,NeighborSourceParticles,AbstractBaseClass::MatrixKernel.getPtr());
     }
 
 
     void P2PRemote(const FTreeCoordinate& /*inPosition*/,
                    ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
                    ContainerClass* const inNeighbors[27], const int /*inSize*/){
-        DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2PRemote(inTargets,inNeighbors,27);
+        DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2PRemote(inTargets,inNeighbors,27,AbstractBaseClass::MatrixKernel.getPtr());
     }
 
 };
diff --git a/Src/Kernels/Chebyshev/FChebTensorialM2LHandler.hpp b/Src/Kernels/Chebyshev/FChebTensorialM2LHandler.hpp
index 674b3cdc4c4b144b7ce3d9a500af0bcf3c3258b7..000b63b10bf7f689992c19a115b6ff3f19ccdd3c 100755
--- a/Src/Kernels/Chebyshev/FChebTensorialM2LHandler.hpp
+++ b/Src/Kernels/Chebyshev/FChebTensorialM2LHandler.hpp
@@ -86,8 +86,7 @@ class FChebTensorialM2LHandler<ORDER,MatrixKernelClass,HOMOGENEOUS> : FNoCopyabl
 	enum {order = ORDER,
 				nnodes = TensorTraits<ORDER>::nnodes,
 				ninteractions = 316,// 7^3 - 3^3 (max num cells in far-field)
-        dim = MatrixKernelClass::DIM,
-        nidx = MatrixKernelClass::NIDX};
+        dim = MatrixKernelClass::DIM};
 
 //	const MatrixKernelClass MatrixKernel;
 
@@ -205,8 +204,7 @@ class FChebTensorialM2LHandler<ORDER,MatrixKernelClass,NON_HOMOGENEOUS> : FNoCop
 	enum {order = ORDER,
 				nnodes = TensorTraits<ORDER>::nnodes,
 				ninteractions = 316,// 7^3 - 3^3 (max num cells in far-field)
-        dim = MatrixKernelClass::DIM,
-        nidx = MatrixKernelClass::NIDX};
+        dim = MatrixKernelClass::DIM};
 
   // Tensorial MatrixKernel and homogeneity specific
 	FReal **U, **B;
diff --git a/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp b/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp
index b5cc77f07ae055240d60a48cecfc3e1514b0fc97..87fe875106a9ef8974921c42283c3a1a6cd096cc 100755
--- a/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp
+++ b/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp
@@ -71,7 +71,8 @@ struct FInterpMatrixKernelR : FInterpAbstractMatrixKernel
   static const KERNEL_FUNCTION_TYPE Type = /*NON_*/HOMOGENEOUS;
   static const KERNEL_FUNCTION_IDENTIFIER Identifier = ONE_OVER_R;
   static const  unsigned int DIM = 1; //PB: dimension of kernel
-  static const unsigned int NIDX = 1; //PB: number of indices 
+  static const unsigned int NPV = 1;
+  static const unsigned int NPOT = 1;
   static const unsigned int NRHS = 1;
   static const unsigned int NLHS = 1;
 
@@ -127,7 +128,8 @@ struct FInterpMatrixKernelRR : FInterpAbstractMatrixKernel
   static const KERNEL_FUNCTION_TYPE Type = HOMOGENEOUS;
   static const KERNEL_FUNCTION_IDENTIFIER Identifier = ONE_OVER_R_SQUARED;
   static const  unsigned int DIM = 1; //PB: dimension of kernel
-  static const unsigned int NIDX = 1; //PB: number of indices 
+  static const unsigned int NPV = 1;
+  static const unsigned int NPOT = 1;
   static const unsigned int NRHS = 1;
   static const unsigned int NLHS = 1;
 
@@ -165,7 +167,8 @@ struct FInterpMatrixKernelLJ : FInterpAbstractMatrixKernel
   static const KERNEL_FUNCTION_TYPE Type = NON_HOMOGENEOUS;
   static const KERNEL_FUNCTION_IDENTIFIER Identifier = LENNARD_JONES_POTENTIAL;
   static const  unsigned int DIM = 1; //PB: dimension of kernel
-  static const unsigned int NIDX = 1; //PB: number of indices 
+  static const unsigned int NPV = 1;
+  static const unsigned int NPOT = 1;
   static const unsigned int NRHS = 1;
   static const unsigned int NLHS = 1;
 
@@ -205,6 +208,26 @@ struct FInterpMatrixKernelLJ : FInterpAbstractMatrixKernel
 //
 // Tensorial Matrix Kernels (DIM>1)
 //
+// The definition of the potential p and force f are extended to the case
+// of tensorial interaction kernels:
+// p_i(x) = K_{ip}(x,y)w_p(y), \forall i=1..NPOT, p=1..NPV
+// f_{ik}= w_p(x)K_{ip,k}(x,y)w_p(y) "
+//
+// Since the interpolation scheme is such that
+// p_i(x) \approx S^m(x) L^{m}_{ip}
+// f_{ik}= w_p(x) \nabla_k S^m(x) L^{m}_{ip}
+// with
+// L^{m}_{ip} = K^{mn}_{ip} S^n(y) w_p(y) (local expansion)
+// M^{m}_{p} = S^n(y) w_p(y) (multipole expansion)
+// then the multipole exp have NPV components and the local exp NPOT*NPV.
+//
+// NB1: Only the computation of forces requires that the sum over p is 
+// performed at L2P step. It could be done at M2L step for the potential.
+//
+// NB2: An efficient application of the matrix kernel is highly kernel 
+// dependent, we recommand overriding the P2M/M2L/L2P function of the kernel 
+// you are using in order to have optimal performances + set your own NRHS/NLHS.
+//
 ////////////////////////////////////////////////////////////////////////////////
 ////////////////////////////////////////////////////////////////////////////////
 
@@ -213,11 +236,15 @@ struct FInterpMatrixKernel_IOR : FInterpAbstractMatrixKernel
 {
   static const KERNEL_FUNCTION_TYPE Type = /*NON_*/HOMOGENEOUS;
   static const KERNEL_FUNCTION_IDENTIFIER Identifier = ID_OVER_R;
-  static const unsigned int DIM = 6; //PB: dimension of kernel
-  static const unsigned int NIDX = 2; //PB: number of indices 
-  static const unsigned int indexTab[/*DIM*NIDX=12*/];
-  static const unsigned int NRHS = 3;
-  static const unsigned int NLHS = 3;
+  static const unsigned int NK = 3*3; // dim of kernel
+  static const unsigned int DIM = 6; // reduced dim
+  static const unsigned int NPOT = 3; // dim of potentials
+  static const unsigned int NPV = 3; // dim of physical values
+  static const unsigned int NRHS = NPV; // dim of mult exp (here = dim physval)
+  static const unsigned int NLHS = NPOT*NRHS; // dim of loc exp (here = NPOT*NRHS)
+
+  // store indices (i,j) corresponding to sym idx
+  static const unsigned int indexTab[/*2*DIM=12*/];
 
   // store positions in sym tensor 
   static const unsigned int applyTab[/*9*/];
@@ -271,12 +298,6 @@ struct FInterpMatrixKernel_IOR : FInterpAbstractMatrixKernel
         return FReal(2.) / CellWidth;
   }
 
-//  FReal getScaleFactor(const FReal, const int) const
-//  {
-//    // return 1 because non homogeneous kernel functions cannot be scaled!!!
-//    return FReal(1.);
-//  }
-//
 //  FReal getScaleFactor(const FReal) const
 //  {
 //    // return 1 because non homogeneous kernel functions cannot be scaled!!!
@@ -290,31 +311,45 @@ struct FInterpMatrixKernel_IOR : FInterpAbstractMatrixKernel
 // required by ChebSym kernel => only suited for Unif interpolation
 struct FInterpMatrixKernel_R_IJ : FInterpAbstractMatrixKernel
 {
-  static const KERNEL_FUNCTION_TYPE Type = HOMOGENEOUS;
+  static const KERNEL_FUNCTION_TYPE Type = NON_HOMOGENEOUS;
   static const KERNEL_FUNCTION_IDENTIFIER Identifier = R_IJ;
-  static const unsigned int DIM = 6; //PB: dimension of kernel
-  static const unsigned int NIDX = 2; //PB: number of indices 
-  static const unsigned int indexTab[/*DIM*NIDX=12*/];
-  static const unsigned int NRHS = 3;
-  static const unsigned int NLHS = 3;
+  static const unsigned int NK = 3*3; // dim of kernel
+  static const unsigned int DIM = 6; // reduced dim
+  static const unsigned int NPOT = 3; // dim of potentials
+  static const unsigned int NPV = 3; // dim of physical values
+  static const unsigned int NRHS = NPV; // dim of mult exp (here = dim physval)
+  static const unsigned int NLHS = NPOT*NRHS; // dim of loc exp (here = NPOT*NRHS)
+
+  // store indices (i,j) corresponding to sym idx
+  static const unsigned int indexTab[/*2*DIM=12*/];
 
   // store positions in sym tensor (when looping over NRHSxNLHS)
-  static const unsigned int applyTab[/*9*/];
+  static const unsigned int applyTab[/*NK=9*/];
 
+  // indices to be set at construction if component-wise evaluation is performed
   const unsigned int _i,_j;
 
-  FInterpMatrixKernel_R_IJ(const double = 0.0, const unsigned int d = 0) 
-    : _i(indexTab[d]), _j(indexTab[d+DIM])
+  // Material Parameters
+  const double _CoreWidth2; // if >0 then kernel is NON homogeneous
+
+  FInterpMatrixKernel_R_IJ(const double CoreWidth = 0.0, const unsigned int d = 0) 
+    : _i(indexTab[d]), _j(indexTab[d+DIM]), _CoreWidth2(CoreWidth*CoreWidth)
   {}
 
   // returns position in reduced storage from position in full 3x3 matrix
   int getPosition(const unsigned int n) const
   {return applyTab[n];} 
 
+  // returns Core Width squared
+  double getCoreWidth2() const
+  {return _CoreWidth2;}
+
   FReal evaluate(const FPoint& x, const FPoint& y) const
   {
     const FPoint xy(x-y);
-    const FReal one_over_r = FReal(1.)/xy.norm();
+    const FReal one_over_r = FReal(1.)/sqrt(xy.getX()*xy.getX() + 
+                                            xy.getY()*xy.getY() + 
+                                            xy.getZ()*xy.getZ() + _CoreWidth2);
     const FReal one_over_r3 = one_over_r*one_over_r*one_over_r;
     double ri,rj;
     
@@ -338,7 +373,9 @@ struct FInterpMatrixKernel_R_IJ : FInterpAbstractMatrixKernel
   void evaluateBlock(const FPoint& x, const FPoint& y, FReal* block) const
   {
     const FPoint xy(x-y);
-    const FReal one_over_r = FReal(1.)/xy.norm();
+    const FReal one_over_r = FReal(1.)/sqrt(xy.getX()*xy.getX() + 
+                                            xy.getY()*xy.getY() + 
+                                            xy.getZ()*xy.getZ() + _CoreWidth2);
     const FReal one_over_r3 = one_over_r*one_over_r*one_over_r;
     const double r[3] = {xy.getX(),xy.getY(),xy.getZ()};
 
@@ -366,36 +403,57 @@ struct FInterpMatrixKernel_R_IJ : FInterpAbstractMatrixKernel
         return FReal(2.) / CellWidth;
   }
 
+//  // R_{,ij} is set non-homogeneous
+//  FReal getScaleFactor(const FReal CellWidth) const
+//  {
+//    return FReal(1.);
+//  }
+
 };
 
 /// R_{,ijk}
 struct FInterpMatrixKernel_R_IJK : FInterpAbstractMatrixKernel
 {
-  static const KERNEL_FUNCTION_TYPE Type = HOMOGENEOUS;
+  static const KERNEL_FUNCTION_TYPE Type = NON_HOMOGENEOUS;
   static const KERNEL_FUNCTION_IDENTIFIER Identifier = R_IJK;
-  static const  unsigned int DIM = 10; //PB: dimension of kernel
-  static const unsigned int NIDX = 3; //PB: number of indices 
-  static const unsigned int indexTab[/*DIM*NIDX=30*/];
-  static const unsigned int NRHS = 3;
-  static const unsigned int NLHS = 3;
+  static const unsigned int NK = 3*3*3; // dim of kernel
+  static const unsigned int DIM = 10; // reduced dim
+  static const unsigned int NPOT = 3; // dim of potentials
+  static const unsigned int NPV = 3*3; // dim of physical values
+  static const unsigned int NRHS = NPV; // dim of mult exp (here = dim physval)
+  static const unsigned int NLHS = NPOT*NRHS; // dim of loc exp (here = NPOT*NRHS)
 
-  // store positions in sym tensor (when looping over NRHSxNLHS)
-  static const unsigned int applyTab[/*27*/];
+  // store indices (i,j,k) corresponding to sym idx
+  static const unsigned int indexTab[/*3*DIM=30*/];
+
+  // store positions in sym tensor wr to full
+  static const unsigned int applyTab[/*NK=27*/];
 
+  // indices to be set at construction if component-wise evaluation is performed
   const unsigned int _i,_j,_k;
 
-  FInterpMatrixKernel_R_IJK(const double = 0.0, const unsigned int d = 0) 
-  : _i(indexTab[d]), _j(indexTab[d+DIM]), _k(indexTab[d+2*DIM])
+  // Material Parameters
+  const double _CoreWidth2; // if >0 then kernel is NON homogeneous
+
+  FInterpMatrixKernel_R_IJK(const double CoreWidth = 0.0, const unsigned int d = 0) 
+  : _i(indexTab[d]), _j(indexTab[d+DIM]), _k(indexTab[d+2*DIM]), _CoreWidth2(CoreWidth*CoreWidth)
   {}
 
   // returns position in reduced storage from position in full 3x3x3 matrix
   int getPosition(const unsigned int n) const
   {return applyTab[n];} 
 
+  // returns Core Width squared
+  double getCoreWidth2() const
+  {return _CoreWidth2;}
+
   FReal evaluate(const FPoint& x, const FPoint& y) const
   {
-    const FPoint xy(x-y);
-    const FReal one_over_r = FReal(1.)/xy.norm();
+//    const FPoint xy(x-y);
+    const FPoint xy(y-x);
+    const FReal one_over_r = FReal(1.)/sqrt(xy.getX()*xy.getX() + 
+                                            xy.getY()*xy.getY() + 
+                                            xy.getZ()*xy.getZ() + _CoreWidth2);
     const FReal one_over_r2 = one_over_r*one_over_r;
     const FReal one_over_r3 = one_over_r2*one_over_r;
     double ri,rj,rk;
@@ -434,6 +492,39 @@ struct FInterpMatrixKernel_R_IJK : FInterpAbstractMatrixKernel
 
   }
 
+  void evaluateBlock(const FPoint& x, const FPoint& y, FReal* block) const
+  {
+//    const FPoint xy(x-y);
+    const FPoint xy(y-x);
+    const FReal one_over_r = FReal(1.)/sqrt(xy.getX()*xy.getX() + 
+                                            xy.getY()*xy.getY() + 
+                                            xy.getZ()*xy.getZ() + _CoreWidth2);
+    const FReal one_over_r2 = one_over_r*one_over_r;
+    const FReal one_over_r3 = one_over_r2*one_over_r;
+
+    const double r[3] = {xy.getX(),xy.getY(),xy.getZ()};
+
+    for(unsigned int d=0;d<DIM;++d){
+      unsigned int i = indexTab[d];
+      unsigned int j = indexTab[d+DIM];
+      unsigned int k = indexTab[d+2*DIM];
+
+      if(i==j){
+        if(j==k) //i=j=k
+          block[d] = FReal(3.) * ( FReal(-1.) + r[i]*r[i] * one_over_r2 ) * r[i] * one_over_r3;
+        else //i=j!=k
+          block[d] =  ( FReal(-1.) + FReal(3.) * r[i]*r[i] * one_over_r2 ) * r[k] * one_over_r3;
+      }
+      else //(i!=j)
+        if(i==k) //i=k!=j
+          block[d] =  ( FReal(-1.) + FReal(3.) * r[i]*r[i] * one_over_r2 ) * r[j] * one_over_r3;
+        else if(j==k) //i!=k=j
+          block[d] =  ( FReal(-1.) + FReal(3.) * r[j]*r[j] * one_over_r2 ) * r[i] * one_over_r3;
+        else //i!=k!=j
+          block[d] =  FReal(3.) * r[i]*r[j]*r[k] * one_over_r2 * one_over_r3;
+    }      
+  }
+
   FReal getScaleFactor(const FReal RootCellWidth, const int TreeLevel) const
   {
     const FReal CellWidth(RootCellWidth / FReal(FMath::pow(2, TreeLevel)));
@@ -446,6 +537,13 @@ struct FInterpMatrixKernel_R_IJK : FInterpAbstractMatrixKernel
   {
     return FReal(4.) / (CellWidth*CellWidth);
   }
+
+//  // R_{,ijk} is set non-homogeneous
+//  FReal getScaleFactor(const FReal CellWidth) const
+//  {
+//    return FReal(1.);
+//  }
+
 };
 
 
diff --git a/Src/Kernels/Interpolation/FInterpP2PKernels.hpp b/Src/Kernels/Interpolation/FInterpP2PKernels.hpp
index 0dad8959c18a7b63b61add8bcf27ea64c2acb8e3..b6186fc51e6b1250ad926e5bb56a7ef1b93e4aec 100644
--- a/Src/Kernels/Interpolation/FInterpP2PKernels.hpp
+++ b/Src/Kernels/Interpolation/FInterpP2PKernels.hpp
@@ -70,17 +70,39 @@ struct DirectInteractionComputer<ID_OVER_R, 1>
 template <>
 struct DirectInteractionComputer<R_IJ, 1>
 {
-  template <typename ContainerClass>
+  template <typename ContainerClass, typename MatrixKernelClass>
   static void P2P( ContainerClass* const FRestrict TargetParticles,
-                   ContainerClass* const NeighborSourceParticles[27]){
-    FP2P::FullMutualRIJ(TargetParticles,NeighborSourceParticles,14);
+                   ContainerClass* const NeighborSourceParticles[27],
+                   const MatrixKernelClass *const MatrixKernel){
+    FP2P::FullMutualRIJ(TargetParticles,NeighborSourceParticles,14,MatrixKernel);
   }
 
-  template <typename ContainerClass>
+  template <typename ContainerClass, typename MatrixKernelClass>
   static void P2PRemote( ContainerClass* const FRestrict inTargets,
                          ContainerClass* const inNeighbors[27],
-                         const int inSize){
-    FP2P::FullRemoteRIJ(inTargets,inNeighbors,inSize);
+                         const int inSize,
+                         const MatrixKernelClass *const MatrixKernel){
+    FP2P::FullRemoteRIJ(inTargets,inNeighbors,inSize,MatrixKernel);
+  }
+};
+
+/*! Specialization for GradGradGradR potential */
+template <>
+struct DirectInteractionComputer<R_IJK, 1>
+{
+  template <typename ContainerClass, typename MatrixKernelClass>
+  static void P2P( ContainerClass* const FRestrict TargetParticles,
+                   ContainerClass* const NeighborSourceParticles[27],
+                   const MatrixKernelClass *const MatrixKernel){
+    FP2P::FullMutualRIJK(TargetParticles,NeighborSourceParticles,14,MatrixKernel);
+  }
+
+  template <typename ContainerClass, typename MatrixKernelClass>
+  static void P2PRemote( ContainerClass* const FRestrict inTargets,
+                         ContainerClass* const inNeighbors[27],
+                         const int inSize,
+                         const MatrixKernelClass *const MatrixKernel){
+    FP2P::FullRemoteRIJK(inTargets,inNeighbors,inSize,MatrixKernel);
   }
 };
 
@@ -160,20 +182,46 @@ struct DirectInteractionComputer<ID_OVER_R, NVALS>
 template <int NVALS>
 struct DirectInteractionComputer<R_IJ, NVALS>
 {
-  template <typename ContainerClass>
+  template <typename ContainerClass, typename MatrixKernelClass>
   static void P2P( ContainerClass* const FRestrict TargetParticles,
-                   ContainerClass* const NeighborSourceParticles[27]){
+                   ContainerClass* const NeighborSourceParticles[27],
+                   const MatrixKernelClass *const MatrixKernel){
     for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
-    FP2P::FullMutualRIJ(TargetParticles,NeighborSourceParticles,14);
+    FP2P::FullMutualRIJ(TargetParticles,NeighborSourceParticles,14,MatrixKernel);
     }
   }
 
-  template <typename ContainerClass>
+  template <typename ContainerClass, typename MatrixKernelClass>
   static void P2PRemote( ContainerClass* const FRestrict inTargets,
                          ContainerClass* const inNeighbors[27],
-                         const int inSize){
+                         const int inSize,
+                         const MatrixKernelClass *const MatrixKernel){
+    for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
+    FP2P::FullRemoteRIJ(inTargets,inNeighbors,inSize,MatrixKernel);
+    }
+  }
+};
+
+/*! Specialization for GradGradGradR potential */
+template <int NVALS>
+struct DirectInteractionComputer<R_IJK, NVALS>
+{
+  template <typename ContainerClass, typename MatrixKernelClass>
+  static void P2P( ContainerClass* const FRestrict TargetParticles,
+                   ContainerClass* const NeighborSourceParticles[27],
+                   const MatrixKernelClass *const MatrixKernel){
+    for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
+    FP2P::FullMutualRIJK(TargetParticles,NeighborSourceParticles,14,MatrixKernel);
+    }
+  }
+
+  template <typename ContainerClass, typename MatrixKernelClass>
+  static void P2PRemote( ContainerClass* const FRestrict inTargets,
+                         ContainerClass* const inNeighbors[27],
+                         const int inSize,
+                         const MatrixKernelClass *const MatrixKernel){
     for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
-    FP2P::FullRemoteRIJ(inTargets,inNeighbors,inSize);
+    FP2P::FullRemoteRIJK(inTargets,inNeighbors,inSize,MatrixKernel);
     }
   }
 };
diff --git a/Src/Kernels/P2P/FP2P.hpp b/Src/Kernels/P2P/FP2P.hpp
index 863180432935cd9533ac44a1c0d68d048b7a2568..7663a1f51d83ce35f75e9dd571da5e71dac772b8 100644
--- a/Src/Kernels/P2P/FP2P.hpp
+++ b/Src/Kernels/P2P/FP2P.hpp
@@ -978,6 +978,8 @@ public:
   //////////////////////////////////////////////////////////////////////////////////////////////////////////
   //////////////////////////////////////////////////////////////////////////////////////////////////////////
   // R_IJ
+  // PB: The following functions take the MatrixKernel as input arguments for more generic implementation 
+  // Besides this MatrixKernel is already updated with any extra parameter (e.g. core width).
   //////////////////////////////////////////////////////////////////////////////////////////////////////////
   //////////////////////////////////////////////////////////////////////////////////////////////////////////
   //////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -986,9 +988,11 @@ public:
     /**
      * @brief FullMutualRIJ
      */
-    template <class ContainerClass>
+    template <class ContainerClass, typename MatrixKernelClass>
     static void FullMutualRIJ(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
-                             const int limiteNeighbors){
+                              const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
+
+      const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock
 
         const int nbParticlesTargets = inTargets->getNbParticles();
         const FReal*const targetsX = inTargets->getPositions()[0];
@@ -1008,7 +1012,7 @@ public:
                         FReal dy = sourcesY[idxSource] - targetsY[idxTarget];
                         FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget];
 
-                        FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz);
+                        FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2);
                         FReal inv_distance = FMath::Sqrt(inv_distance_pow2);
                         FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance;
 
@@ -1086,7 +1090,7 @@ public:
                 FReal dy = targetsY[idxSource] - targetsY[idxTarget];
                 FReal dz = targetsZ[idxSource] - targetsZ[idxTarget];
 
-                FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz);
+                FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2);
                 FReal inv_distance = FMath::Sqrt(inv_distance_pow2);
                 FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance;
 
@@ -1144,7 +1148,6 @@ public:
                     targetsForcesY[idxSource] -= coef[1];
                     targetsForcesZ[idxSource] -= coef[2];
                     targetsPotentials[idxSource] += potentialCoef * targetsPhysicalValues[idxTarget];
-                             
                   }// j
                 }// i
 
@@ -1155,9 +1158,11 @@ public:
     /**
      * @brief FullRemoteRIJ
      */
-    template <class ContainerClass>
+    template <class ContainerClass, typename MatrixKernelClass>
     static void FullRemoteRIJ(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
-                             const int limiteNeighbors){
+                             const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
+
+      const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock
 
         const int nbParticlesTargets = inTargets->getNbParticles();
         const FReal*const targetsX = inTargets->getPositions()[0];
@@ -1178,7 +1183,7 @@ public:
                         FReal dy = sourcesY[idxSource] - targetsY[idxTarget];
                         FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget];
 
-                        FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz);
+                        FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2);
                         FReal inv_distance = FMath::Sqrt(inv_distance_pow2);
                         FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance;
 
@@ -1261,17 +1266,21 @@ public:
      * @param targetForceZ
      * @param targetPotential
      */
+  template<typename MatrixKernelClass>
     static void MutualParticlesRIJ(const FReal sourceX,const FReal sourceY,const FReal sourceZ, const FReal* sourcePhysicalValue,
                                   FReal* sourceForceX, FReal* sourceForceY, FReal* sourceForceZ, FReal* sourcePotential,
                                   const FReal targetX,const FReal targetY,const FReal targetZ, const FReal* targetPhysicalValue,
-                                  FReal* targetForceX, FReal* targetForceY, FReal* targetForceZ, FReal* targetPotential
-                                  ){
+                                  FReal* targetForceX, FReal* targetForceY, FReal* targetForceZ, FReal* targetPotential,
+                                  const MatrixKernelClass *const MatrixKernel){
+
+      const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock
+
         // GradGradR potential
         FReal dx = sourceX - targetX;
         FReal dy = sourceY - targetY;
         FReal dz = sourceZ - targetZ;
 
-        FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz);
+        FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2);
         FReal inv_distance = FMath::Sqrt(inv_distance_pow2);
         FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance;
 
@@ -1328,6 +1337,364 @@ public:
 
     }
 
+
+  //////////////////////////////////////////////////////////////////////////////////////////////////////////
+  //////////////////////////////////////////////////////////////////////////////////////////////////////////
+  //////////////////////////////////////////////////////////////////////////////////////////////////////////
+  //////////////////////////////////////////////////////////////////////////////////////////////////////////
+  // R_IJK
+  //////////////////////////////////////////////////////////////////////////////////////////////////////////
+  //////////////////////////////////////////////////////////////////////////////////////////////////////////
+  //////////////////////////////////////////////////////////////////////////////////////////////////////////
+  //////////////////////////////////////////////////////////////////////////////////////////////////////////
+
+  /**
+   * @brief FullMutualRIJK
+   */
+  template <class ContainerClass, typename MatrixKernelClass>
+  static void FullMutualRIJK(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
+                            const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
+
+    const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock
+
+    const int nbParticlesTargets = inTargets->getNbParticles();
+    const FReal*const targetsX = inTargets->getPositions()[0];
+    const FReal*const targetsY = inTargets->getPositions()[1];
+    const FReal*const targetsZ = inTargets->getPositions()[2];
+
+    for(int idxNeighbors = 0 ; idxNeighbors < limiteNeighbors ; ++idxNeighbors){
+      for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){
+        if( inNeighbors[idxNeighbors] ){
+          const int nbParticlesSources = inNeighbors[idxNeighbors]->getNbParticles();
+          const FReal*const sourcesX = inNeighbors[idxNeighbors]->getPositions()[0];
+          const FReal*const sourcesY = inNeighbors[idxNeighbors]->getPositions()[1];
+          const FReal*const sourcesZ = inNeighbors[idxNeighbors]->getPositions()[2];
+
+          for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){
+            FReal dx = sourcesX[idxSource] - targetsX[idxTarget];
+            FReal dy = sourcesY[idxSource] - targetsY[idxTarget];
+            FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget];
+
+            FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2);
+            FReal inv_distance = FMath::Sqrt(inv_distance_pow2);
+            FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance;
+
+            FReal r[3]={dx,dy,dz};
+
+            for(unsigned int i = 0 ; i < 3 ; ++i){
+              FReal*const targetsPotentials = inTargets->getPotentials(i);
+              FReal*const targetsForcesX = inTargets->getForcesX(i);
+              FReal*const targetsForcesY = inTargets->getForcesY(i);
+              FReal*const targetsForcesZ = inTargets->getForcesZ(i);
+              FReal*const sourcesPotentials = inNeighbors[idxNeighbors]->getPotentials(i);
+              FReal*const sourcesForcesX = inNeighbors[idxNeighbors]->getForcesX(i);
+              FReal*const sourcesForcesY = inNeighbors[idxNeighbors]->getForcesY(i);
+              FReal*const sourcesForcesZ = inNeighbors[idxNeighbors]->getForcesZ(i);
+
+              const FReal ri2=r[i]*r[i];
+
+              for(unsigned int j = 0 ; j < 3 ; ++j){
+                FReal rj2=r[j]*r[j];
+
+                for(unsigned int k = 0 ; k < 3 ; ++k){
+                  const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j*3+k);
+                  const FReal*const sourcesPhysicalValues = inNeighbors[idxNeighbors]->getPhysicalValues(j*3+k);
+
+                  // potentials
+                  FReal potentialCoef;
+                  if(i==j){
+                    if(j==k) //i=j=k
+                      potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3;
+                    else //i=j!=k
+                      potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3;
+                  }
+                  else{ //(i!=j)
+                    if(i==k) //i=k!=j
+                      potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3;
+                    else if(j==k) //i!=k=j
+                      potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3;
+                    else //i!=k!=j
+                      potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3;
+                  }
+
+                  // forces
+                  FReal coef[3]; 
+                  for(unsigned int l = 0 ; l < 3 ; ++l){
+                    coef[l]= -(targetsPhysicalValues[idxTarget] * sourcesPhysicalValues[idxSource]);
+
+//                    // Grad of RIJK kernel is RIJKL kernel => TODO Implement
+//                    coef[l] *= ;
+
+                  }// l
+
+                  targetsForcesX[idxTarget] += coef[0];
+                  targetsForcesY[idxTarget] += coef[1];
+                  targetsForcesZ[idxTarget] += coef[2];
+                  targetsPotentials[idxTarget] += ( potentialCoef * sourcesPhysicalValues[idxSource] );
+
+                  sourcesForcesX[idxSource] -= coef[0];
+                  sourcesForcesY[idxSource] -= coef[1];
+                  sourcesForcesZ[idxSource] -= coef[2];
+                  sourcesPotentials[idxSource] += -potentialCoef * targetsPhysicalValues[idxTarget];
+
+                }// k
+              }// j
+            }// i
+          }
+        }
+      }
+    }
+
+    for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){
+      for(int idxSource = idxTarget + 1 ; idxSource < nbParticlesTargets ; ++idxSource){
+        FReal dx = targetsX[idxSource] - targetsX[idxTarget];
+        FReal dy = targetsY[idxSource] - targetsY[idxTarget];
+        FReal dz = targetsZ[idxSource] - targetsZ[idxTarget];
+
+        FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2);
+        FReal inv_distance = FMath::Sqrt(inv_distance_pow2);
+        FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance;
+
+        FReal r[3]={dx,dy,dz};
+
+        for(unsigned int i = 0 ; i < 3 ; ++i){
+          FReal*const targetsPotentials = inTargets->getPotentials(i);
+          FReal*const targetsForcesX = inTargets->getForcesX(i);
+          FReal*const targetsForcesY = inTargets->getForcesY(i);
+          FReal*const targetsForcesZ = inTargets->getForcesZ(i);
+          FReal ri2=r[i]*r[i];
+
+          for(unsigned int j = 0 ; j < 3 ; ++j){
+            FReal rj2=r[j]*r[j];
+
+            for(unsigned int k = 0 ; k < 3 ; ++k){
+              const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j*3+k);
+
+              // potentials
+              FReal potentialCoef;
+              if(i==j){
+                if(j==k) //i=j=k
+                  potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3;
+                else //i=j!=k
+                  potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3;
+              }
+              else{ //(i!=j)
+                if(i==k) //i=k!=j
+                  potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3;
+                else if(j==k) //i!=k=j
+                  potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3;
+                else //i!=k!=j
+                  potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3;
+              }
+
+              // forces
+              FReal coef[3]; 
+              for(unsigned int l = 0 ; l < 3 ; ++l){
+                coef[l]= -(targetsPhysicalValues[idxTarget] * targetsPhysicalValues[idxSource]);
+
+//                    // Grad of RIJK kernel is RIJKL kernel => TODO Implement
+//                    coef[l] *= ;
+
+              }// l
+
+              targetsForcesX[idxTarget] += coef[0];
+              targetsForcesY[idxTarget] += coef[1];
+              targetsForcesZ[idxTarget] += coef[2];
+              targetsPotentials[idxTarget] += ( potentialCoef * targetsPhysicalValues[idxSource] );
+
+              targetsForcesX[idxSource] -= coef[0];
+              targetsForcesY[idxSource] -= coef[1];
+              targetsForcesZ[idxSource] -= coef[2];
+              targetsPotentials[idxSource] += -potentialCoef * targetsPhysicalValues[idxTarget];
+            }// k
+          }// j
+        }// i
+
+      }
+    }
+  }
+
+  /**
+   * @brief FullRemoteRIJK
+   */
+  template <class ContainerClass, typename MatrixKernelClass>
+  static void FullRemoteRIJK(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
+                            const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
+
+    const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock
+
+    const int nbParticlesTargets = inTargets->getNbParticles();
+    const FReal*const targetsX = inTargets->getPositions()[0];
+    const FReal*const targetsY = inTargets->getPositions()[1];
+    const FReal*const targetsZ = inTargets->getPositions()[2];
+
+    for(int idxNeighbors = 0 ; idxNeighbors < limiteNeighbors ; ++idxNeighbors){
+      for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){
+        if( inNeighbors[idxNeighbors] ){
+          const int nbParticlesSources = inNeighbors[idxNeighbors]->getNbParticles();
+          const FReal*const sourcesX = inNeighbors[idxNeighbors]->getPositions()[0];
+          const FReal*const sourcesY = inNeighbors[idxNeighbors]->getPositions()[1];
+          const FReal*const sourcesZ = inNeighbors[idxNeighbors]->getPositions()[2];
+
+          for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){
+            // potential
+            FReal dx = sourcesX[idxSource] - targetsX[idxTarget];
+            FReal dy = sourcesY[idxSource] - targetsY[idxTarget];
+            FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget];
+
+            FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2);
+            FReal inv_distance = FMath::Sqrt(inv_distance_pow2);
+            FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance;
+
+            FReal r[3]={dx,dy,dz};
+
+            for(unsigned int i = 0 ; i < 3 ; ++i){
+              FReal*const targetsPotentials = inTargets->getPotentials(i);
+              FReal*const targetsForcesX = inTargets->getForcesX(i);
+              FReal*const targetsForcesY = inTargets->getForcesY(i);
+              FReal*const targetsForcesZ = inTargets->getForcesZ(i);
+              FReal ri2=r[i]*r[i];
+
+              for(unsigned int j = 0 ; j < 3 ; ++j){
+                FReal rj2=r[j]*r[j];
+
+                for(unsigned int k = 0 ; k < 3 ; ++k){
+
+                  const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j*3+k);
+                  const FReal*const sourcesPhysicalValues = inNeighbors[idxNeighbors]->getPhysicalValues(j*3+k);
+
+                  // potentials
+                  FReal potentialCoef;
+                  if(i==j){
+                    if(j==k) //i=j=k
+                      potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3;
+                    else //i=j!=k
+                      potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3;
+                  }
+                  else{ //(i!=j)
+                    if(i==k) //i=k!=j
+                      potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3;
+                    else if(j==k) //i!=k=j
+                      potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3;
+                    else //i!=k!=j
+                      potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3;
+                  }
+
+                  // forces
+                  FReal coef[3]; 
+                  for(unsigned int l = 0 ; l < 3 ; ++l){
+                    coef[l]= -(targetsPhysicalValues[idxTarget] * sourcesPhysicalValues[idxSource]);
+
+                    //                    // Grad of RIJK kernel is RIJKL kernel => TODO Implement
+                    //                    coef[l] *= ;
+
+                  }// l
+
+                  targetsForcesX[idxTarget] += coef[0];
+                  targetsForcesY[idxTarget] += coef[1];
+                  targetsForcesZ[idxTarget] += coef[2];
+                  targetsPotentials[idxTarget] += ( potentialCoef * sourcesPhysicalValues[idxSource] );
+
+                }// k
+              }// j
+            }// i
+
+          }
+        }
+      }
+    }
+  }
+
+
+  /**
+   * @brief MutualParticlesRIJK
+   * @param sourceX
+   * @param sourceY
+   * @param sourceZ
+   * @param sourcePhysicalValue
+   * @param sourceForceX
+   * @param sourceForceY
+   * @param sourceForceZ
+   * @param sourcePotential
+   * @param targetX
+   * @param targetY
+   * @param targetZ
+   * @param targetPhysicalValue
+   * @param targetForceX
+   * @param targetForceY
+   * @param targetForceZ
+   * @param targetPotential
+   */
+  template<typename MatrixKernelClass>
+  static void MutualParticlesRIJK(const FReal sourceX,const FReal sourceY,const FReal sourceZ, const FReal* sourcePhysicalValue,
+                                  FReal* sourceForceX, FReal* sourceForceY, FReal* sourceForceZ, FReal* sourcePotential,
+                                  const FReal targetX,const FReal targetY,const FReal targetZ, const FReal* targetPhysicalValue,
+                                  FReal* targetForceX, FReal* targetForceY, FReal* targetForceZ, FReal* targetPotential,
+                                  const MatrixKernelClass *const MatrixKernel){
+
+    const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock
+
+    // GradGradR potential
+    FReal dx = sourceX - targetX;
+    FReal dy = sourceY - targetY;
+    FReal dz = sourceZ - targetZ;
+
+    FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2);
+    FReal inv_distance = FMath::Sqrt(inv_distance_pow2);
+    FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance;
+
+    FReal r[3]={dx,dy,dz};
+
+    for(unsigned int i = 0 ; i < 3 ; ++i){
+      FReal ri2=r[i]*r[i];
+      for(unsigned int j = 0 ; j < 3 ; ++j){
+        FReal rj2=r[j]*r[j];
+        for(unsigned int k = 0 ; k < 3 ; ++k){
+
+          // potentials
+          FReal potentialCoef;
+          if(i==j){
+            if(j==k) //i=j=k
+              potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3;
+            else //i=j!=k
+              potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3;
+          }
+          else{ //(i!=j)
+            if(i==k) //i=k!=j
+              potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3;
+            else if(j==k) //i!=k=j
+              potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3;
+            else //i!=k!=j
+              potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3;
+          }
+
+          // forces
+          FReal coef[3]; 
+          for(unsigned int l = 0 ; l < 3 ; ++l){
+            coef[l]= -(targetPhysicalValue[j*3+k] * sourcePhysicalValue[j*3+k]);
+
+//                    // Grad of RIJK kernel is RIJKL kernel => TODO Implement
+//                    coef[l] *= ;
+
+          }// l
+
+          targetForceX[i] += coef[0];
+          targetForceY[i] += coef[1];
+          targetForceZ[i] += coef[2];
+          targetPotential[i] += ( potentialCoef * sourcePhysicalValue[j*3+k] );
+
+          sourceForceX[i] -= coef[0];
+          sourceForceY[i] -= coef[1];
+          sourceForceZ[i] -= coef[2];
+          sourcePotential[i] += ( -potentialCoef * targetPhysicalValue[j*3+k] );
+
+        }// k
+      }// j
+    }// i
+
+  }
+
+
   //////////////////////////////////////////////////////////////////////////////////////////////////////////
   //////////////////////////////////////////////////////////////////////////////////////////////////////////
   //////////////////////////////////////////////////////////////////////////////////////////////////////////
@@ -1336,7 +1703,6 @@ public:
   // i.e. [[1 1 1]
   //       [1 1 1] * 1/R
   //       [1 1 1]]
-  // Only use scalar phys val, potential and forces for now. TODO use vectorial ones.
   //////////////////////////////////////////////////////////////////////////////////////////////////////////
   //////////////////////////////////////////////////////////////////////////////////////////////////////////
   //////////////////////////////////////////////////////////////////////////////////////////////////////////
diff --git a/Src/Kernels/Uniform/FUnifInterpolator.hpp b/Src/Kernels/Uniform/FUnifInterpolator.hpp
index ada5007d582ff83fdf39d53e84068d34aae672d9..48304884035c5fd6e225d81686f446f53287dcbf 100755
--- a/Src/Kernels/Uniform/FUnifInterpolator.hpp
+++ b/Src/Kernels/Uniform/FUnifInterpolator.hpp
@@ -43,7 +43,8 @@ class FUnifInterpolator : FNoCopyable
   // compile time constants and types
   enum {nnodes = TensorTraits<ORDER>::nnodes,
         nRhs = MatrixKernelClass::NRHS,
-        nLhs = MatrixKernelClass::NLHS};
+        nLhs = MatrixKernelClass::NLHS,
+        nPV = MatrixKernelClass::NPV};
   typedef FUnifRoots< ORDER>  BasisType;
   typedef FUnifTensor<ORDER> TensorType;
 
@@ -441,7 +442,14 @@ inline void FUnifInterpolator<ORDER,MatrixKernelClass>::applyL2P(const FPoint& c
       L_of_x[o][2] = BasisType::L(o, localPosition.getZ()); // 3 * ORDER*(ORDER-1) flops
     }
 
+    // loop over dim of local expansions
     for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){
+      // distribution over potential components:
+      // We sum the multidim contribution of PhysValue
+      // This was originally done at M2L step but moved here 
+      // because their storage is required by the force computation.
+      // In fact : f_{ik}(x)=w_j(x) \nabla_{x_i} K_{ij}(x,y)w_j(y))
+      const unsigned int idxPot = idxLhs / nPV; 
 
       // interpolate and increment target value
       FReal targetValue=0.;
@@ -458,7 +466,7 @@ inline void FUnifInterpolator<ORDER,MatrixKernelClass>::applyL2P(const FPoint& c
       }
 
       // get potential
-      FReal*const potentials = inParticles->getPotentials(idxLhs);
+      FReal*const potentials = inParticles->getPotentials(idxPot);
       // add contribution to potential
       potentials[idxPart] += (targetValue);
 
@@ -514,6 +522,8 @@ inline void FUnifInterpolator<ORDER,MatrixKernelClass>::applyL2PGradient(const F
     }
 
     for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){
+      const unsigned int idxPot = idxLhs / nPV; 
+      const unsigned int idxPV  = idxLhs % nPV; 
 
       // interpolate and increment forces value
       FReal forces[3] = {FReal(0.), FReal(0.), FReal(0.)};
@@ -533,17 +543,16 @@ inline void FUnifInterpolator<ORDER,MatrixKernelClass>::applyL2PGradient(const F
           } // ORDER * ORDER * 4 flops
         } // ORDER * ORDER * ORDER * 4 flops
 
-
         // scale forces
         forces[0] *= jacobian[0];
         forces[1] *= jacobian[1];
         forces[2] *= jacobian[2];
       }
 
-      const FReal*const physicalValues = inParticles->getPhysicalValues(idxLhs);
-      FReal*const forcesX = inParticles->getForcesX(idxLhs);
-      FReal*const forcesY = inParticles->getForcesY(idxLhs);
-      FReal*const forcesZ = inParticles->getForcesZ(idxLhs);
+      const FReal*const physicalValues = inParticles->getPhysicalValues(idxPV);
+      FReal*const forcesX = inParticles->getForcesX(idxPot);
+      FReal*const forcesY = inParticles->getForcesY(idxPot);
+      FReal*const forcesZ = inParticles->getForcesZ(idxPot);
 
       // set computed forces
       forcesX[idxPart] += forces[0] * physicalValues[idxPart];
diff --git a/Src/Kernels/Uniform/FUnifTensorialKernel.hpp b/Src/Kernels/Uniform/FUnifTensorialKernel.hpp
index 74f1cc760cb78ea1e3ec7c2b3fd58b5d95fea000..d12569d46b524fa4b43529dfdb9f54fe895e65d8 100644
--- a/Src/Kernels/Uniform/FUnifTensorialKernel.hpp
+++ b/Src/Kernels/Uniform/FUnifTensorialKernel.hpp
@@ -62,7 +62,9 @@ class FUnifTensorialKernel
   : public FAbstractUnifKernel< CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>
 {
   enum {nRhs = MatrixKernelClass::NRHS,
-        nLhs = MatrixKernelClass::NLHS};
+        nLhs = MatrixKernelClass::NLHS,
+        nPot = MatrixKernelClass::NPOT,
+        nPV = MatrixKernelClass::NPV};
 
 protected://PB: for OptiDis
 
@@ -152,19 +154,24 @@ public:
 
     for(int idxV = 0 ; idxV < NVALS ; ++idxV){
       for (int idxLhs=0; idxLhs < nLhs; ++idxLhs){
-        // update local index
-        int idxLoc = idxV*nLhs + idxLhs;
-        // load transformed local expansion
-        FComplexe *const TransformedLocalExpansion = TargetCell->getTransformedLocal(idxLoc);
+          // update local index
+          const int idxLoc = idxV*nLhs + idxLhs;
+
+          // load transformed local expansion
+          FComplexe *const TransformedLocalExpansion = TargetCell->getTransformedLocal(idxLoc);
 
-        for (int idxRhs=0; idxRhs < nRhs; ++idxRhs){
+          // update idxRhs
+          const int idxRhs = idxLhs % nPV; 
           // update multipole index
-          int idxMul = idxV*nRhs + idxRhs;
-          // update kernel index such that: x_i = K_{ij}y_j 
-          int idxK = idxLhs*nRhs + idxRhs;
+          const int idxMul = idxV*nRhs + idxRhs;
+
           // get index in matrix kernel
-          unsigned int d 
-            = AbstractBaseClass::MatrixKernel.getPtr()->getPosition(idxK);
+          const unsigned int d 
+            = AbstractBaseClass::MatrixKernel.getPtr()->getPosition(idxLhs);
+
+//          std::cout<<"idxLhs="<< idxLhs 
+//                   <<" - d="<< d
+//                   <<" - idxRhs="<< idxRhs <<std::endl;
 
           for (int idx=0; idx<343; ++idx){
             if (SourceCells[idx]){
@@ -172,10 +179,9 @@ public:
                                   SourceCells[idx]->getTransformedMultipole(idxMul),
                                   TransformedLocalExpansion);
 
-            }
           }
-        }// NRHS
-      }// NLHS
+        }
+      }// NLHS=NPOT*NPV
     }// NVALS
   }
 
@@ -231,14 +237,14 @@ public:
            ContainerClass* const NeighborSourceParticles[27],
            const int /* size */)
   {
-    DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2P(TargetParticles,NeighborSourceParticles);
+    DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2P(TargetParticles,NeighborSourceParticles,AbstractBaseClass::MatrixKernel.getPtr());
   }
 
 
   void P2PRemote(const FTreeCoordinate& /*inPosition*/,
                  ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
                  ContainerClass* const inNeighbors[27], const int /*inSize*/){
-    DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2PRemote(inTargets,inNeighbors,27);
+    DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2PRemote(inTargets,inNeighbors,27,AbstractBaseClass::MatrixKernel.getPtr());
   }
 
 };
diff --git a/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp b/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp
index a4c1d62db8ca62ab5873fc2de13ba452ef99fe2b..ef75a1068e51b9caea266c0c225ba0aa21a83511 100644
--- a/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp
+++ b/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp
@@ -112,7 +112,6 @@ static void Compute(const MatrixKernelClass *const MatrixKernel,
                 for (unsigned int d=0; d<dim; ++d) 
                   _C[d][perm[ido]] = block[d];
 
-
                 ido++;
               }
 
@@ -192,8 +191,7 @@ class FUnifTensorialM2LHandler<ORDER,MatrixKernelClass,HOMOGENEOUS> : FNoCopyabl
 				nnodes = TensorTraits<ORDER>::nnodes,
 				ninteractions = 316, // 7^3 - 3^3 (max num cells in far-field)
         rc = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1),
-        dim = MatrixKernelClass::DIM,
-        nidx = MatrixKernelClass::NIDX};
+        dim = MatrixKernelClass::DIM};
 
   // Tensorial MatrixKernel specific
 	FComplexe** FC;
@@ -345,7 +343,6 @@ public:
 
     // Apply forward Discrete Fourier Transform
     Dft.applyDFT(Py,FY);
-
   }
 
 
@@ -360,8 +357,7 @@ class FUnifTensorialM2LHandler<ORDER,MatrixKernelClass,NON_HOMOGENEOUS> : FNoCop
 				nnodes = TensorTraits<ORDER>::nnodes,
 				ninteractions = 316, // 7^3 - 3^3 (max num cells in far-field)
         rc = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1),
-        dim = MatrixKernelClass::DIM,
-        nidx = MatrixKernelClass::NIDX};
+        dim = MatrixKernelClass::DIM};
 
   // Tensorial MatrixKernel and homogeneity specific
 	FComplexe*** FC;
@@ -525,7 +521,6 @@ public:
 
     // Apply forward Discrete Fourier Transform
     Dft.applyDFT(Py,FY);
-
   }
 
 
diff --git a/Tests/Kernels/testChebTensorialAlgorithm.cpp b/Tests/Kernels/testChebTensorialAlgorithm.cpp
index 2d522bbac4608331d40121344884815003b81462..6d8f11ef8ed65e900eeb4de751c2e2c5c63d2d40 100644
--- a/Tests/Kernels/testChebTensorialAlgorithm.cpp
+++ b/Tests/Kernels/testChebTensorialAlgorithm.cpp
@@ -15,7 +15,7 @@
 // ===================================================================================
 
 // ==== CMAKE =====
-// @FUSE_FFT
+// @FUSE_BLAS
 // ================
 
 #include <iostream>
@@ -69,21 +69,30 @@ int main(int argc, char* argv[])
 
   // typedefs
 //  typedef FInterpMatrixKernel_R_IJ MatrixKernelClass;
-  typedef FInterpMatrixKernel_IOR MatrixKernelClass;
+//  typedef FInterpMatrixKernel_IOR MatrixKernelClass;
 //  typedef FInterpMatrixKernelR MatrixKernelClass;
+  typedef FInterpMatrixKernel_R_IJK MatrixKernelClass;
 
   const KERNEL_FUNCTION_IDENTIFIER MK_ID = MatrixKernelClass::Identifier;
+  const unsigned int NPV  = MatrixKernelClass::NPV;
+  const unsigned int NPOT = MatrixKernelClass::NPOT;
   const unsigned int NRHS = MatrixKernelClass::NRHS;
   const unsigned int NLHS = MatrixKernelClass::NLHS;
 
+  const double CoreWidth = 0.1;
+  const MatrixKernelClass DirectMatrixKernel(CoreWidth);
+  std::cout<< "CoreWidth2 = "<< DirectMatrixKernel.getCoreWidth2()<<std::endl;
+
   // init particles position and physical value
   struct TestParticle{
     FPoint position;
-    FReal forces[3][NLHS];
-    FReal physicalValue[NRHS];
-    FReal potential[NLHS];
+    FReal forces[3][NPOT];
+    FReal physicalValue[NPV];
+    FReal potential[NPOT];
   };
 
+  const FReal FRandMax = FReal(RAND_MAX);
+
   // open particle file
   FFmaScanfLoader loader(filename);
   if(!loader.isOpen()) throw std::runtime_error("Particle file couldn't be opened!");
@@ -95,13 +104,19 @@ int main(int argc, char* argv[])
     loader.fillParticle(&position,&physicalValue);
     // get copy
     particles[idxPart].position       = position;
-    for(unsigned idxRhs = 0; idxRhs<NRHS;++idxRhs)
-      particles[idxPart].physicalValue[idxRhs]  = physicalValue; // copy same physical value in each component
-    for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs){
-      particles[idxPart].potential[idxLhs]      = 0.0;
-      particles[idxPart].forces[0][idxLhs]      = 0.0;
-      particles[idxPart].forces[1][idxLhs]      = 0.0;
-      particles[idxPart].forces[2][idxLhs]      = 0.0;
+    // Set physical values
+    for(unsigned idxPV = 0; idxPV<NPV;++idxPV){
+    //   Either copy same physical value in each component
+      particles[idxPart].physicalValue[idxPV]  = physicalValue; 
+    // ... or set random value
+//      particles[idxPart].physicalValue[idxPV]  = physicalValue*FReal(rand())/FRandMax;
+    }
+
+    for(unsigned idxPot = 0; idxPot<NPOT;++idxPot){
+      particles[idxPart].potential[idxPot]      = 0.0;
+      particles[idxPart].forces[0][idxPot]      = 0.0;
+      particles[idxPart].forces[1][idxPot]      = 0.0;
+      particles[idxPart].forces[2][idxPot]      = 0.0;
     }
   }
 
@@ -121,7 +136,8 @@ int main(int argc, char* argv[])
                                      particles[idxOther].position.getX(), particles[idxOther].position.getY(),
                                      particles[idxOther].position.getZ(), particles[idxOther].physicalValue,
                                      particles[idxOther].forces[0], particles[idxOther].forces[1],
-                                     particles[idxOther].forces[2], particles[idxOther].potential);
+                                     particles[idxOther].forces[2], particles[idxOther].potential,
+                                     &DirectMatrixKernel);
           else if(MK_ID == ID_OVER_R)
             FP2P::MutualParticlesIOR(particles[idxTarget].position.getX(), particles[idxTarget].position.getY(),
                                      particles[idxTarget].position.getZ(), particles[idxTarget].physicalValue,
@@ -140,6 +156,16 @@ int main(int argc, char* argv[])
                                   particles[idxOther].position.getZ(), particles[idxOther].physicalValue[0],
                                   particles[idxOther].forces[0], particles[idxOther].forces[1],
                                   particles[idxOther].forces[2], particles[idxOther].potential);
+          else if(MK_ID == R_IJK)
+            FP2P::MutualParticlesRIJK(particles[idxTarget].position.getX(), particles[idxTarget].position.getY(),
+                                      particles[idxTarget].position.getZ(), particles[idxTarget].physicalValue,
+                                      particles[idxTarget].forces[0], particles[idxTarget].forces[1],
+                                      particles[idxTarget].forces[2], particles[idxTarget].potential,
+                                      particles[idxOther].position.getX(), particles[idxOther].position.getY(),
+                                      particles[idxOther].position.getZ(), particles[idxOther].physicalValue,
+                                      particles[idxOther].forces[0], particles[idxOther].forces[1],
+                                      particles[idxOther].forces[2], particles[idxOther].potential,
+                                      &DirectMatrixKernel);
           else 
             std::runtime_error("Provide a valid matrix kernel!");
         }
@@ -153,10 +179,10 @@ int main(int argc, char* argv[])
 
   ////////////////////////////////////////////////////////////////////
 
-  {	// begin Lagrange kernel
+  {	// begin Chebyshev kernel
 
     // accuracy
-    const unsigned int ORDER = 5; //PB: Beware order=9 exceed memory (even 8 since compute _C then store in C) 
+const unsigned int ORDER = 7 ;
     const FReal epsilon = FReal(1e-7);
 
     typedef FP2PParticleContainerIndexed<NRHS,NLHS> ContainerClass;
@@ -180,11 +206,16 @@ int main(int argc, char* argv[])
 
       for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
         // put in tree
-        // PB: here we have to know the number of NRHS...
-        if(NRHS==1)
+        // PB: here we have to know NPV...
+        if(NPV==1)
           tree.insert(particles[idxPart].position, idxPart, particles[idxPart].physicalValue[0]);
-        else if(NRHS==3)
+        else if(NPV==3)
           tree.insert(particles[idxPart].position, idxPart, particles[idxPart].physicalValue[0], particles[idxPart].physicalValue[1], particles[idxPart].physicalValue[2]);
+        else if(NPV==9) // R_IJK
+          tree.insert(particles[idxPart].position, idxPart, 
+                      particles[idxPart].physicalValue[0], particles[idxPart].physicalValue[1], particles[idxPart].physicalValue[2],
+                      particles[idxPart].physicalValue[3], particles[idxPart].physicalValue[4], particles[idxPart].physicalValue[5],
+                      particles[idxPart].physicalValue[6], particles[idxPart].physicalValue[7], particles[idxPart].physicalValue[8]);
         else 
           std::runtime_error("Insert correct number of physical values!");
 
@@ -198,7 +229,7 @@ int main(int argc, char* argv[])
     { // -----------------------------------------------------
       std::cout << "\nChebyshev FMM (ORDER="<< ORDER << ") ... " << std::endl;
       time.tic();
-      KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), epsilon);
+      KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), epsilon,CoreWidth);
       FmmClass algorithm(&tree, &kernels);
       algorithm.execute();
       time.tac();
@@ -208,23 +239,21 @@ int main(int argc, char* argv[])
 
     { // -----------------------------------------------------
       std::cout << "\nError computation ... " << std::endl;
-      FMath::FAccurater potentialDiff[NLHS];
-      FMath::FAccurater fx[NLHS], fy[NLHS], fz[NLHS];
+      FMath::FAccurater potentialDiff[NPOT];
+      FMath::FAccurater fx[NPOT], fy[NPOT], fz[NPOT];
 
-      FReal checkPhysVal[20000][NRHS];
-      FReal checkPotential[20000][NLHS];
-      FReal checkfx[20000][NLHS];
+      FReal checkPotential[20000][NPOT];
+      FReal checkfx[20000][NPOT];
 
       { // Check that each particle has been summed with all other
 
         tree.forEachLeaf([&](LeafClass* leaf){
-            for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs){
+            for(unsigned idxPot = 0; idxPot<NPOT;++idxPot){
 
-              const FReal*const physVals = leaf->getTargets()->getPhysicalValues(idxLhs);
-              const FReal*const potentials = leaf->getTargets()->getPotentials(idxLhs);
-              const FReal*const forcesX = leaf->getTargets()->getForcesX(idxLhs);
-              const FReal*const forcesY = leaf->getTargets()->getForcesY(idxLhs);
-              const FReal*const forcesZ = leaf->getTargets()->getForcesZ(idxLhs);
+              const FReal*const potentials = leaf->getTargets()->getPotentials(idxPot);
+              const FReal*const forcesX = leaf->getTargets()->getForcesX(idxPot);
+              const FReal*const forcesY = leaf->getTargets()->getForcesY(idxPot);
+              const FReal*const forcesZ = leaf->getTargets()->getForcesZ(idxPot);
               const int nbParticlesInLeaf = leaf->getTargets()->getNbParticles();
               const FVector<int>& indexes = leaf->getTargets()->getIndexes();
 
@@ -232,46 +261,106 @@ int main(int argc, char* argv[])
                 const int indexPartOrig = indexes[idxPart];
 
                 //PB: store potential in array[nbParticles]
-                checkPhysVal[indexPartOrig][idxLhs]=physVals[idxPart];              
-                checkPotential[indexPartOrig][idxLhs]=potentials[idxPart];              
-                checkfx[indexPartOrig][idxLhs]=forcesX[idxPart];              
-
-                potentialDiff[idxLhs].add(particles[indexPartOrig].potential[idxLhs],potentials[idxPart]);
-                fx[idxLhs].add(particles[indexPartOrig].forces[0][idxLhs],forcesX[idxPart]);
-                fy[idxLhs].add(particles[indexPartOrig].forces[1][idxLhs],forcesY[idxPart]);
-                fz[idxLhs].add(particles[indexPartOrig].forces[2][idxLhs],forcesZ[idxPart]);
+                checkPotential[indexPartOrig][idxPot]=potentials[idxPart];
+                checkfx[indexPartOrig][idxPot]=forcesX[idxPart];
+
+                // update accuracy
+                potentialDiff[idxPot].add(particles[indexPartOrig].potential[idxPot],potentials[idxPart]);
+                fx[idxPot].add(particles[indexPartOrig].forces[0][idxPot],forcesX[idxPart]);
+                fy[idxPot].add(particles[indexPartOrig].forces[1][idxPot],forcesY[idxPart]);
+                fz[idxPot].add(particles[indexPartOrig].forces[2][idxPot],forcesZ[idxPart]);
               }
-            }// NLHS
+            }// NPOT
           });
       }
 
 //      std::cout << "Check Potential, forceX " << std::endl;
 //      for(int idxPart = 0 ; idxPart < 20 ; ++idxPart)
-//        for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs){
-//          std::cout << checkPhysVal[idxPart][idxLhs] << ", "<< particles[idxPart].physicalValue[idxLhs]<< "|| ";
-//          std::cout << checkPotential[idxPart][idxLhs] << ", "<< particles[idxPart].potential[idxLhs]<< "|| ";
-//          std::cout << checkfx[idxPart][idxLhs] << ", "<< particles[idxPart].forces[0][idxLhs] << std::endl;
+//        for(unsigned idxPot = 0; idxPot<NPOT;++idxPot){
+//          std::cout << checkPotential[idxPart][idxPot] << ", "<< particles[idxPart].potential[idxPot]<< "|| ";
+//          std::cout << checkfx[idxPart][idxPot] << ", "<< particles[idxPart].forces[0][idxPot] << std::endl;
 //        }
 //      std::cout << std::endl;
 
       // Print for information
-      std::cout << "\nAbsolute errors: " << std::endl;
-      std::cout << "Potential: ";
-      for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) std::cout << potentialDiff[idxLhs] << ", " ;
+      std::cout << "\nRelative Inf/L2 errors: " << std::endl;
+      std::cout << "  Potential: " << std::endl;
+      for(unsigned idxPot = 0; idxPot<NPOT;++idxPot) {
+        std::cout << "    " << idxPot << ": "
+                  << potentialDiff[idxPot].getRelativeInfNorm() << ", " 
+                  << potentialDiff[idxPot].getRelativeL2Norm() 
+                  << std::endl;
+      }
       std::cout << std::endl;
-      std::cout << "Fx: "; 
-      for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) std::cout << fx[idxLhs] << ", " ;
+      std::cout << "  Fx: " << std::endl; 
+      for(unsigned idxPot = 0; idxPot<NPOT;++idxPot) {
+        std::cout << "    " << idxPot << ": "
+                  << fx[idxPot].getRelativeInfNorm() << ", " 
+                  << fx[idxPot].getRelativeL2Norm()
+                  << std::endl;
+      }
       std::cout  << std::endl;
-      std::cout << "Fy: "; 
-      for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) std::cout << fy[idxLhs] << ", " ;
+      std::cout << "  Fy: " << std::endl; 
+      for(unsigned idxPot = 0; idxPot<NPOT;++idxPot) {
+        std::cout << "    " << idxPot << ": "
+                  << fy[idxPot].getRelativeInfNorm() << ", " 
+                  << fy[idxPot].getRelativeL2Norm()
+                  << std::endl;
+      }
       std::cout  << std::endl;
-      std::cout << "Fz: "; 
-      for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) std::cout << fz[idxLhs] << ", " ;
+      std::cout << "  Fz: " << std::endl; 
+      for(unsigned idxPot = 0; idxPot<NPOT;++idxPot) {
+        std::cout << "    " << idxPot << ": "
+                  << fz[idxPot].getRelativeInfNorm() << ", " 
+                  << fz[idxPot].getRelativeL2Norm()
+                  << std::endl;
+      }
       std::cout << std::endl;
 
+      { // -----------------------------------------------------
+
+
+        std::cout << "\nStore results in file ... "<<std::endl;
+        std::ostringstream sstream;
+        if(MK_ID == R_IJ)
+          sstream << "testChebRij_h"<< TreeHeight << "_a" << 1000*CoreWidth;
+        else if(MK_ID == R_IJK)
+          sstream << "testChebRijk_h"<< TreeHeight << "_a" << 1000*CoreWidth;
+
+        if(TreeHeight>2) sstream <<"_o"<<ORDER;
+        const std::string para_ext = sstream.str();
+        std::string outname = "../tmp/"+para_ext+".dat";
+        std::ofstream fout(outname.c_str());
+        fout.precision(15);
+        
+        for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) 
+          fout << potentialDiff[idxLhs].getL2Norm() << " " 
+               << potentialDiff[idxLhs].getInfNorm() << " "
+               << potentialDiff[idxLhs].getRelativeL2Norm() << " " 
+               << potentialDiff[idxLhs].getRelativeInfNorm() << std::endl;
+        for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) 
+          fout << fx[idxLhs].getL2Norm() << " " 
+               << fx[idxLhs].getInfNorm() << " "
+               << fx[idxLhs].getRelativeL2Norm() << " " 
+               << fx[idxLhs].getRelativeInfNorm() << std::endl;
+        for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) 
+          fout << fy[idxLhs].getL2Norm() << " " 
+               << fy[idxLhs].getInfNorm() << " "
+               << fy[idxLhs].getRelativeL2Norm() << " " 
+               << fy[idxLhs].getRelativeInfNorm() << std::endl;
+        for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) 
+          fout << fz[idxLhs].getL2Norm() << " " 
+               << fz[idxLhs].getInfNorm() << " "
+               << fz[idxLhs].getRelativeL2Norm() << " " 
+               << fz[idxLhs].getRelativeInfNorm() << std::endl;
+        fout << std::flush;
+        fout.close();
+
+      } // -----------------------------------------------------
+
     } // -----------------------------------------------------
 
-  } // end Lagrange kernel
+  } // end Chebyshev kernel
 
   return 0;
 }
diff --git a/Tests/Kernels/testUnifTensorialAlgorithm.cpp b/Tests/Kernels/testUnifTensorialAlgorithm.cpp
index 5b30af3ee98e060dcb55d93bb67a6ab7194da967..dfdf2d5bdead5a9d8c788b62a4523e000434528c 100644
--- a/Tests/Kernels/testUnifTensorialAlgorithm.cpp
+++ b/Tests/Kernels/testUnifTensorialAlgorithm.cpp
@@ -69,20 +69,31 @@ int main(int argc, char* argv[])
 
   // typedefs
 //  typedef FInterpMatrixKernel_R_IJ MatrixKernelClass;
-  typedef FInterpMatrixKernel_IOR MatrixKernelClass;
+//  typedef FInterpMatrixKernel_IOR MatrixKernelClass;
+  typedef FInterpMatrixKernel_R_IJK MatrixKernelClass;
 
+  // usefull features of matrix kernel
   const KERNEL_FUNCTION_IDENTIFIER MK_ID = MatrixKernelClass::Identifier;
+  const unsigned int NPV  = MatrixKernelClass::NPV;
+  const unsigned int NPOT = MatrixKernelClass::NPOT;
   const unsigned int NRHS = MatrixKernelClass::NRHS;
   const unsigned int NLHS = MatrixKernelClass::NLHS;
 
+  const double CoreWidth = 10;
+  const MatrixKernelClass DirectMatrixKernel(CoreWidth);
+
+  std::cout<< "CoreWidth2 = "<< DirectMatrixKernel.getCoreWidth2()<<std::endl;
+
   // init particles position and physical value
   struct TestParticle{
     FPoint position;
-    FReal forces[3][NLHS];
-    FReal physicalValue[NRHS];
-    FReal potential[NLHS];
+    FReal forces[3][NPOT];
+    FReal physicalValue[NPV];
+    FReal potential[NPOT];
   };
 
+  const FReal FRandMax = FReal(RAND_MAX);
+
   // open particle file
   FFmaScanfLoader loader(filename);
   if(!loader.isOpen()) throw std::runtime_error("Particle file couldn't be opened!");
@@ -92,15 +103,21 @@ int main(int argc, char* argv[])
     FPoint position;
     FReal physicalValue = 0.0;
     loader.fillParticle(&position,&physicalValue);
+
     // get copy
     particles[idxPart].position       = position;
-    for(unsigned idxRhs = 0; idxRhs<NRHS;++idxRhs)
-      particles[idxPart].physicalValue[idxRhs]  = physicalValue; // copy same physical value in each component
-    for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs){
-      particles[idxPart].potential[idxLhs]      = 0.0;
-      particles[idxPart].forces[0][idxLhs]      = 0.0;
-      particles[idxPart].forces[1][idxLhs]      = 0.0;
-      particles[idxPart].forces[2][idxLhs]      = 0.0;
+    // Set physical values
+    for(unsigned idxPV = 0; idxPV<NPV;++idxPV){
+//    //   Either copy same physical value in each component
+        particles[idxPart].physicalValue[idxPV]  = physicalValue;
+    // ... or set random value
+//      particles[idxPart].physicalValue[idxPV]  = physicalValue*FReal(rand())/FRandMax;
+    }
+    for(unsigned idxPot = 0; idxPot<NPOT;++idxPot){
+      particles[idxPart].potential[idxPot]      = 0.0;
+      particles[idxPart].forces[0][idxPot]      = 0.0;
+      particles[idxPart].forces[1][idxPot]      = 0.0;
+      particles[idxPart].forces[2][idxPot]      = 0.0;
     }
   }
 
@@ -120,7 +137,8 @@ int main(int argc, char* argv[])
                                      particles[idxOther].position.getX(), particles[idxOther].position.getY(),
                                      particles[idxOther].position.getZ(), particles[idxOther].physicalValue,
                                      particles[idxOther].forces[0], particles[idxOther].forces[1],
-                                     particles[idxOther].forces[2], particles[idxOther].potential);
+                                     particles[idxOther].forces[2], particles[idxOther].potential,
+                                     &DirectMatrixKernel);
           else if(MK_ID == ID_OVER_R)
             FP2P::MutualParticlesIOR(particles[idxTarget].position.getX(), particles[idxTarget].position.getY(),
                                      particles[idxTarget].position.getZ(), particles[idxTarget].physicalValue,
@@ -130,6 +148,16 @@ int main(int argc, char* argv[])
                                      particles[idxOther].position.getZ(), particles[idxOther].physicalValue,
                                      particles[idxOther].forces[0], particles[idxOther].forces[1],
                                      particles[idxOther].forces[2], particles[idxOther].potential);
+          else if(MK_ID == R_IJK)
+            FP2P::MutualParticlesRIJK(particles[idxTarget].position.getX(), particles[idxTarget].position.getY(),
+                                      particles[idxTarget].position.getZ(), particles[idxTarget].physicalValue,
+                                      particles[idxTarget].forces[0], particles[idxTarget].forces[1],
+                                      particles[idxTarget].forces[2], particles[idxTarget].potential,
+                                      particles[idxOther].position.getX(), particles[idxOther].position.getY(),
+                                      particles[idxOther].position.getZ(), particles[idxOther].physicalValue,
+                                      particles[idxOther].forces[0], particles[idxOther].forces[1],
+                                      particles[idxOther].forces[2], particles[idxOther].potential,
+                                      &DirectMatrixKernel);
           else 
             std::runtime_error("Provide a valid matrix kernel!");
         }
@@ -146,7 +174,7 @@ int main(int argc, char* argv[])
   {	// begin Lagrange kernel
 
     // accuracy
-    const unsigned int ORDER = 7;
+const unsigned int ORDER = 12 ;
 
     typedef FP2PParticleContainerIndexed<NRHS,NLHS> ContainerClass;
 
@@ -163,14 +191,26 @@ int main(int argc, char* argv[])
 
     { // -----------------------------------------------------
       std::cout << "Creating & Inserting " << loader.getNumberOfParticles()
-                << " particles ..." << std::endl;
+                << " particles ..." << std::endl; 
+
+
+
+
       std::cout << "\tHeight : " << TreeHeight << " \t sub-height : " << SubTreeHeight << std::endl;
       time.tic();
 
       for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
         // put in tree
-        // PB: here we have to know the number of NRHS...
-        tree.insert(particles[idxPart].position, idxPart, particles[idxPart].physicalValue[0], particles[idxPart].physicalValue[1], particles[idxPart].physicalValue[2]);
+        if(NPV==3) // R_IJ or IOR
+          tree.insert(particles[idxPart].position, idxPart, 
+                      particles[idxPart].physicalValue[0], particles[idxPart].physicalValue[1], particles[idxPart].physicalValue[2]);
+        else if(NPV==9) // R_IJK
+          tree.insert(particles[idxPart].position, idxPart, 
+                      particles[idxPart].physicalValue[0], particles[idxPart].physicalValue[1], particles[idxPart].physicalValue[2],
+                      particles[idxPart].physicalValue[3], particles[idxPart].physicalValue[4], particles[idxPart].physicalValue[5],
+                      particles[idxPart].physicalValue[6], particles[idxPart].physicalValue[7], particles[idxPart].physicalValue[8]);
+        else
+          std::runtime_error("NPV not yet supported in test! Add new case.");
       }
 
       time.tac();
@@ -181,7 +221,7 @@ int main(int argc, char* argv[])
     { // -----------------------------------------------------
       std::cout << "\nLagrange/Uniform grid FMM (ORDER="<< ORDER << ") ... " << std::endl;
       time.tic();
-      KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox());
+      KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(),CoreWidth);
       FmmClass algorithm(&tree, &kernels);
       algorithm.execute();
       time.tac();
@@ -191,23 +231,21 @@ int main(int argc, char* argv[])
 
     { // -----------------------------------------------------
       std::cout << "\nError computation ... " << std::endl;
-      FMath::FAccurater potentialDiff[NLHS];
-      FMath::FAccurater fx[NLHS], fy[NLHS], fz[NLHS];
+      FMath::FAccurater potentialDiff[NPOT];
+      FMath::FAccurater fx[NPOT], fy[NPOT], fz[NPOT];
 
-      FReal checkPhysVal[20000][NRHS];
-      FReal checkPotential[20000][NLHS];
-      FReal checkfx[20000][NLHS];
+      FReal checkPotential[20000][NPOT];
+      FReal checkfx[20000][NPOT];
 
       { // Check that each particle has been summed with all other
 
         tree.forEachLeaf([&](LeafClass* leaf){
-            for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs){
+            for(unsigned idxPot = 0; idxPot<NPOT;++idxPot){
 
-              const FReal*const physVals = leaf->getTargets()->getPhysicalValues(idxLhs);
-              const FReal*const potentials = leaf->getTargets()->getPotentials(idxLhs);
-              const FReal*const forcesX = leaf->getTargets()->getForcesX(idxLhs);
-              const FReal*const forcesY = leaf->getTargets()->getForcesY(idxLhs);
-              const FReal*const forcesZ = leaf->getTargets()->getForcesZ(idxLhs);
+              const FReal*const potentials = leaf->getTargets()->getPotentials(idxPot);
+              const FReal*const forcesX = leaf->getTargets()->getForcesX(idxPot);
+              const FReal*const forcesY = leaf->getTargets()->getForcesY(idxPot);
+              const FReal*const forcesZ = leaf->getTargets()->getForcesZ(idxPot);
               const int nbParticlesInLeaf = leaf->getTargets()->getNbParticles();
               const FVector<int>& indexes = leaf->getTargets()->getIndexes();
 
@@ -215,43 +253,102 @@ int main(int argc, char* argv[])
                 const int indexPartOrig = indexes[idxPart];
 
                 //PB: store potential in array[nbParticles]
-                checkPhysVal[indexPartOrig][idxLhs]=physVals[idxPart];              
-                checkPotential[indexPartOrig][idxLhs]=potentials[idxPart];              
-                checkfx[indexPartOrig][idxLhs]=forcesX[idxPart];              
-
-                potentialDiff[idxLhs].add(particles[indexPartOrig].potential[idxLhs],potentials[idxPart]);
-                fx[idxLhs].add(particles[indexPartOrig].forces[0][idxLhs],forcesX[idxPart]);
-                fy[idxLhs].add(particles[indexPartOrig].forces[1][idxLhs],forcesY[idxPart]);
-                fz[idxLhs].add(particles[indexPartOrig].forces[2][idxLhs],forcesZ[idxPart]);
+                checkPotential[indexPartOrig][idxPot]=potentials[idxPart];              
+                checkfx[indexPartOrig][idxPot]=forcesX[idxPart];              
+
+                potentialDiff[idxPot].add(particles[indexPartOrig].potential[idxPot],potentials[idxPart]);
+                fx[idxPot].add(particles[indexPartOrig].forces[0][idxPot],forcesX[idxPart]);
+                fy[idxPot].add(particles[indexPartOrig].forces[1][idxPot],forcesY[idxPart]);
+                fz[idxPot].add(particles[indexPartOrig].forces[2][idxPot],forcesZ[idxPart]);
               }
-            }// NLHS
+            }// NPOT
           });
       }
 
-//      std::cout << "Check Potential, forceX " << std::endl;
-//      for(int idxPart = 0 ; idxPart < 20 ; ++idxPart)
-//        for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs){
-//          std::cout << checkPhysVal[idxPart][idxLhs] << ", "<< particles[idxPart].physicalValue[idxLhs]<< "|| ";
-//          std::cout << checkPotential[idxPart][idxLhs] << ", "<< particles[idxPart].potential[idxLhs]<< "|| ";
-//          std::cout << checkfx[idxPart][idxLhs] << ", "<< particles[idxPart].forces[0][idxLhs] << std::endl;
-//        }
-//      std::cout << std::endl;
+      std::cout << "Check Potential, forceX, ... " << std::endl;
+      for(int idxPart = 0 ; idxPart < 20 ; ++idxPart)
+        for(unsigned idxPot = 0; idxPot<NPOT;++idxPot){
+          std::cout << checkPotential[idxPart][idxPot] << ", "<< particles[idxPart].potential[idxPot]<< "|| ";
+          std::cout << checkfx[idxPart][idxPot] << ", "<< particles[idxPart].forces[0][idxPot] << std::endl;
+        }
+      std::cout << std::endl;
 
       // Print for information
-      std::cout << "\nAbsolute errors: " << std::endl;
-      std::cout << "Potential: ";
-      for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) std::cout << potentialDiff[idxLhs] << ", " ;
+      std::cout << "\nRelative Inf/L2 errors: " << std::endl;
+      std::cout << "  Potential: " << std::endl;
+      for(unsigned idxPot = 0; idxPot<NPOT;++idxPot) {
+        std::cout << "    " << idxPot << ": "
+                  << potentialDiff[idxPot].getRelativeInfNorm() << ", " 
+                  << potentialDiff[idxPot].getRelativeL2Norm() 
+                  << std::endl;
+      }
       std::cout << std::endl;
-      std::cout << "Fx: "; 
-      for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) std::cout << fx[idxLhs] << ", " ;
+      std::cout << "  Fx: " << std::endl; 
+      for(unsigned idxPot = 0; idxPot<NPOT;++idxPot) {
+        std::cout << "    " << idxPot << ": "
+                  << fx[idxPot].getRelativeInfNorm() << ", " 
+                  << fx[idxPot].getRelativeL2Norm()
+                  << std::endl;
+      }
       std::cout  << std::endl;
-      std::cout << "Fy: "; 
-      for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) std::cout << fy[idxLhs] << ", " ;
+      std::cout << "  Fy: " << std::endl; 
+      for(unsigned idxPot = 0; idxPot<NPOT;++idxPot) {
+        std::cout << "    " << idxPot << ": "
+                  << fy[idxPot].getRelativeInfNorm() << ", " 
+                  << fy[idxPot].getRelativeL2Norm()
+                  << std::endl;
+      }
       std::cout  << std::endl;
-      std::cout << "Fz: "; 
-      for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) std::cout << fz[idxLhs] << ", " ;
+      std::cout << "  Fz: " << std::endl; 
+      for(unsigned idxPot = 0; idxPot<NPOT;++idxPot) {
+        std::cout << "    " << idxPot << ": "
+                  << fz[idxPot].getRelativeInfNorm() << ", " 
+                  << fz[idxPot].getRelativeL2Norm()
+                  << std::endl;
+      }
       std::cout << std::endl;
 
+      { // -----------------------------------------------------
+
+
+        std::cout << "\nStore results in file ... "<<std::endl;
+        std::ostringstream sstream;
+        if(MK_ID == R_IJ)
+          sstream << "testUnifRij_h"<< TreeHeight << "_a" << 1000*CoreWidth;
+        else if(MK_ID == R_IJK)
+          sstream << "testUnifRijk_h"<< TreeHeight << "_a" << 1000*CoreWidth;
+
+        if(TreeHeight>2) sstream <<"_o"<<ORDER;
+        const std::string para_ext = sstream.str();
+        std::string outname = "../tmp/"+para_ext+".dat";
+        std::ofstream fout(outname.c_str());
+        fout.precision(15);
+        
+        for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) 
+          fout << potentialDiff[idxLhs].getL2Norm() << " " 
+               << potentialDiff[idxLhs].getInfNorm() << " "
+               << potentialDiff[idxLhs].getRelativeL2Norm() << " " 
+               << potentialDiff[idxLhs].getRelativeInfNorm() << std::endl;
+        for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) 
+          fout << fx[idxLhs].getL2Norm() << " " 
+               << fx[idxLhs].getInfNorm() << " "
+               << fx[idxLhs].getRelativeL2Norm() << " " 
+               << fx[idxLhs].getRelativeInfNorm() << std::endl;
+        for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) 
+          fout << fy[idxLhs].getL2Norm() << " " 
+               << fy[idxLhs].getInfNorm() << " "
+               << fy[idxLhs].getRelativeL2Norm() << " " 
+               << fy[idxLhs].getRelativeInfNorm() << std::endl;
+        for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) 
+          fout << fz[idxLhs].getL2Norm() << " " 
+               << fz[idxLhs].getInfNorm() << " "
+               << fz[idxLhs].getRelativeL2Norm() << " " 
+               << fz[idxLhs].getRelativeInfNorm() << std::endl;
+        fout << std::flush;
+        fout.close();
+
+      } // -----------------------------------------------------
+
     } // -----------------------------------------------------
 
   } // end Lagrange kernel
diff --git a/Tests/Utils/testUnifInterpolator.cpp b/Tests/Utils/testUnifInterpolator.cpp
index ac1a22838bf63b39f5592931f1d17f9d5bfeaff8..a964d1362c7a8c33b5b20991eba809f837c84290 100755
--- a/Tests/Utils/testUnifInterpolator.cpp
+++ b/Tests/Utils/testUnifInterpolator.cpp
@@ -111,7 +111,7 @@ int main(int, char **){
 
   ////////////////////////////////////////////////////////////////////
   // approximative computation
-  const unsigned int ORDER = 2;
+  const unsigned int ORDER = 4;
   const unsigned int nnodes = TensorTraits<ORDER>::nnodes;
   typedef FUnifInterpolator<ORDER,MatrixKernelClass> InterpolatorClass;
   InterpolatorClass S;
@@ -220,6 +220,8 @@ int main(int, char **){
 //  }
 //  std::cout<<std::endl;
 
+  if(ORDER<4){// display some extra results for low orders
+
   // Check multi-index
   std::cout<< "node_ids=" <<std::endl;
   for (unsigned int i=0; i<nnodes; ++i)
@@ -228,7 +230,6 @@ int main(int, char **){
              << node_ids[i][2] <<", "<<std::endl;
   std::cout<<std::endl;
 
-
   // Check multi-index diff
   std::cout<< "node_ids=" <<std::endl;
   for (unsigned int i=0; i<nnodes; ++i){
@@ -266,6 +267,8 @@ int main(int, char **){
 //    } std::cout<<std::endl;
 //  } std::cout<<std::endl;
 
+  }// display some extra results for low orders
+
   // In 1D the Zero Padding consists in
   // inserting ORDER-1 zeros in the multipole exp
   // in order to apply the (ORDER+ORDER-1)x(ORDER+ORDER-1)
@@ -460,6 +463,13 @@ int main(int, char **){
   FReal*        p = new FReal[M];
   FBlas::setzero(M, p);
 
+  // null vectors for easy calculation of relative errors
+  FReal*        null_p = new FReal[M];
+  FBlas::setzero(M, null_p);
+  FReal*        null_f = new FReal [M * 3];
+  FBlas::setzero(M*3, null_f);
+
+
   { // start direct computation
     unsigned int counter = 0;
 
@@ -520,10 +530,12 @@ int main(int, char **){
 //  std::cout << std::endl;
 
   std::cout << "\nPotential error:" << std::endl;
-  std::cout << "Absolute error   = " << FMath::FAccurater( p, approx_p, M) << std::endl;
+  std::cout << "Relative Inf error   = " << FMath::FAccurater( p, approx_p, M).getRelativeInfNorm() << std::endl;
+  std::cout << "Relative L2 error   = " << FMath::FAccurater( p, approx_p, M).getRelativeL2Norm() << std::endl;
 
   std::cout << "\nForce error:" << std::endl;
-  std::cout << "Absolute L2 error   = " << FMath::FAccurater( f, approx_f, M*3) << std::endl;
+  std::cout << "Relative Inf error   = " << FMath::FAccurater( f, approx_f, M*3).getRelativeInfNorm() << std::endl;
+  std::cout << "Relative L2 error   = " << FMath::FAccurater( f, approx_f, M*3).getRelativeL2Norm() << std::endl;
   std::cout << std::endl;
 
   // free memory
diff --git a/Tests/Utils/testUnifTensorialInterpolator.cpp b/Tests/Utils/testUnifTensorialInterpolator.cpp
index 1602b0052ae7423f18d272aa2fbcb06108f40067..f04164986a20e38cbdbba513842e6f0c9f7079e3 100755
--- a/Tests/Utils/testUnifTensorialInterpolator.cpp
+++ b/Tests/Utils/testUnifTensorialInterpolator.cpp
@@ -59,9 +59,13 @@ int main(int, char **){
 //  typedef FInterpMatrixKernelR MatrixKernelClass;
   typedef FInterpMatrixKernel_R_IJ MatrixKernelClass;
   typedef FInterpMatrixKernel_R_IJK RIJKMatrixKernelClass; // PB: force computation
+  const double a = 0.0; // core width (Beware! if diff from 0. then Kernel should be NON HOMOGENEOUS !!!)
+
   const unsigned int dim = MatrixKernelClass::DIM;
   const unsigned int nrhs = MatrixKernelClass::NRHS;
   const unsigned int nlhs = MatrixKernelClass::NLHS;
+  const unsigned int npot = MatrixKernelClass::NPOT;
+
   typedef FP2PParticleContainer<nrhs,nlhs> ContainerClass;
   typedef FSimpleLeaf<ContainerClass> LeafClass;
 
@@ -90,7 +94,7 @@ int main(int, char **){
       FReal x = (FReal(rand())/FRandMax - FReal(.5)) * width + cx.getX();
       FReal y = (FReal(rand())/FRandMax - FReal(.5)) * width + cx.getY();
       FReal z = (FReal(rand())/FRandMax - FReal(.5)) * width + cx.getZ();
-      // PB: need to know the actual value of NLHS
+      // PB: need to know the actual value of NRHS
       X.push(FPoint(x, y, z), FReal(rand())/FRandMax, FReal(rand())/FRandMax, FReal(rand())/FRandMax);
     }
   }
@@ -98,8 +102,8 @@ int main(int, char **){
 
   ////////////////////////////////////////////////////////////////////
   LeafClass Y;
-  //  FPoint cy(FReal(2.)*width, 0., 0.);
-  FPoint cy(FReal(2.)*width, FReal(2.)*width, 0.);
+    FPoint cy(FReal(2.)*width, 0., 0.);
+  //FPoint cy(FReal(2.)*width, FReal(2.)*width, 0.);
 
   const unsigned long N = 5000;
   std::cout << "Fill the leaf Y of width " << width
@@ -118,7 +122,7 @@ int main(int, char **){
 
   ////////////////////////////////////////////////////////////////////
   // approximative computation
-  const unsigned int ORDER = 5;
+  const unsigned int ORDER = 9;
   const unsigned int nnodes = TensorTraits<ORDER>::nnodes;
   typedef FUnifInterpolator<ORDER,MatrixKernelClass> InterpolatorClass;
   InterpolatorClass S;
@@ -148,14 +152,13 @@ int main(int, char **){
   for (unsigned int i=0; i<nnodes; ++i) {
     for (unsigned int j=0; j<nnodes; ++j){
       
-      for (unsigned int idxLhs=0; idxLhs<nlhs; ++idxLhs)
-        for (unsigned int idxRhs=0; idxRhs<nrhs; ++idxRhs){
-          unsigned idxK = idxLhs*3+idxRhs; // or counter
-          unsigned int d = MatrixKernel.getPosition(idxK);
+      for (unsigned int idxLhs=0; idxLhs<nlhs; ++idxLhs){
+        unsigned int idxRhs = idxLhs % npot;
+        unsigned int d = MatrixKernel.getPosition(idxLhs);
 
-          F[i+idxLhs*nnodes] += MatrixKernelClass(d).evaluate(rootsX[i], rootsY[j]) * W[j+idxRhs*nnodes];
+        F[i+idxLhs*nnodes] += MatrixKernelClass(a,d).evaluate(rootsX[i], rootsY[j]) * W[j+idxRhs*nnodes];
 
-        }
+      }
     }
   }
   std::cout << "took " << time.tacAndElapsed() << "s" << std::endl;
@@ -175,7 +178,7 @@ int main(int, char **){
     for (unsigned int j=0; j<nnodes; ++j){
 
       for (unsigned int d=0; d<dim; ++d){
-        K[d*nnodes*nnodes+i*nnodes+j] = MatrixKernelClass(d).evaluate(rootsX[i], rootsY[j]);        
+        K[d*nnodes*nnodes+i*nnodes+j] = MatrixKernelClass(a,d).evaluate(rootsX[i], rootsY[j]);        
       }
 
     }
@@ -187,12 +190,11 @@ int main(int, char **){
   for (unsigned int i=0; i<nnodes; ++i)
     for (unsigned int j=0; j<nnodes; ++j){
 
-      for (unsigned int idxLhs=0; idxLhs<nlhs; ++idxLhs)
-        for (unsigned int idxRhs=0; idxRhs<nrhs; ++idxRhs){
-          unsigned idxK = idxLhs*3+idxRhs; // or counter
-          unsigned int d = MatrixKernel.getPosition(idxK);
+      for (unsigned int idxLhs=0; idxLhs<nlhs; ++idxLhs){
+        unsigned int idxRhs = idxLhs % npot;
+        unsigned int d = MatrixKernel.getPosition(idxLhs);
 
-          F[i+idxLhs*nnodes] += K[d*nnodes*nnodes+i*nnodes+j] * W[j+idxRhs*nnodes];
+        F[i+idxLhs*nnodes] += K[d*nnodes*nnodes+i*nnodes+j] * W[j+idxRhs*nnodes];
 
     }
   }
@@ -229,7 +231,7 @@ int main(int, char **){
         for (unsigned int d=0; d<dim; ++d){
 
           C[d*rc + ido]
-            = MatrixKernelClass(d).evaluate(rootsX[node_ids_pairs[ido][0]], 
+            = MatrixKernelClass(a,d).evaluate(rootsX[node_ids_pairs[ido][0]], 
                                     rootsY[node_ids_pairs[ido][1]]);
         }
         
@@ -330,15 +332,13 @@ int main(int, char **){
 
     // Application of M2L in PHYSICAL SPACE
     for (unsigned int pj=0; pj<rc; ++pj)
-      for (unsigned int idxLhs=0; idxLhs<nlhs; ++idxLhs)
-        for (unsigned int idxRhs=0; idxRhs<nrhs; ++idxRhs){
-          unsigned idxK = idxLhs*3+idxRhs; // or counter
-
-          unsigned int d = MatrixKernel.getPosition(idxK);
+      for (unsigned int idxLhs=0; idxLhs<nlhs; ++idxLhs){
+        unsigned int idxRhs = idxLhs % npot;
+        unsigned int d = MatrixKernel.getPosition(idxLhs);
 
-          LocalExp[i + idxLhs*nnodes]+=C[pj + d*rc]*PaddedMultExp[pj + idxRhs*rc];
+        LocalExp[i + idxLhs*nnodes]+=C[pj + d*rc]*PaddedMultExp[pj + idxRhs*rc];
 
-        }
+      }
 
   }// end i
   time.tac();
@@ -441,17 +441,15 @@ int main(int, char **){
 //            reinterpret_cast<FReal*>(FPLocalExp));
   // > or perform entrywise product manually
   FComplexe tmpFX;
-  for (unsigned int idxLhs=0; idxLhs<nlhs; ++idxLhs)
-    for (unsigned int idxRhs=0; idxRhs<nrhs; ++idxRhs){
-      unsigned int idxK = idxLhs*3+idxRhs; // or counter
-
-      unsigned int d = MatrixKernel.getPosition(idxK);
-
-      for (unsigned int pj=0; pj<rc; ++pj){
-        tmpFX=FT[pj + d*rc];
-        tmpFX*=FPMultExp[pj+idxRhs*rc];
-        FPLocalExp[pj+idxLhs*rc]+=tmpFX; // add new contribution +RijYj
-      }
+  for (unsigned int idxLhs=0; idxLhs<nlhs; ++idxLhs){
+    unsigned int idxRhs = idxLhs % npot;
+    unsigned int d = MatrixKernel.getPosition(idxLhs);
+
+    for (unsigned int pj=0; pj<rc; ++pj){
+      tmpFX=FT[pj + d*rc];
+      tmpFX*=FPMultExp[pj+idxRhs*rc];
+      FPLocalExp[pj+idxLhs*rc]+=tmpFX; // add new contribution +RijYj
+    }
 
   }
   time.tac();
@@ -509,22 +507,20 @@ int main(int, char **){
 
   ////////////////////////////////////////////////////////////////////
   // direct computation 
-  // Only scalar phys val => only compute first compo and no derivative
-  // TODO add multidim phys val !!!!!!
   std::cout << "Compute interactions directly ..." << std::endl;
   time.tic();
 
-  FReal** approx_f = new FReal* [nlhs];
-  FReal**        f = new FReal* [nlhs];
-  for (unsigned int i=0; i<nrhs; ++i){
+  FReal** approx_f = new FReal* [npot];
+  FReal**        f = new FReal* [npot];
+  for (unsigned int i=0; i<npot; ++i){
     approx_f[i] = new FReal [M * 3];
     f[i] = new FReal [M * 3];
     FBlas::setzero(M*3, f[i]);
   }
 
-  FReal** approx_p = new FReal* [nlhs];
-  FReal**        p = new FReal* [nlhs];
-  for (unsigned int i=0; i<nrhs; ++i){
+  FReal** approx_p = new FReal* [npot];
+  FReal**        p = new FReal* [npot];
+  for (unsigned int i=0; i<npot; ++i){
     approx_p[i] = new FReal [M];
     p[i] = new FReal [M];
     FBlas::setzero(M, p[i]);
@@ -562,19 +558,18 @@ int main(int, char **){
 //        f[counter*3 + 2] += force.getZ() * wx * wy;
 
         // R,ij and (R,ij),k
-        for (unsigned int i=0; i<nlhs; ++i) // sum all compo
+        for (unsigned int i=0; i<npot; ++i) // sum all compo
           for (unsigned int j=0; j<nrhs; ++j){
             unsigned int d = MatrixKernel.getPosition(i*nrhs+j);
-            const FReal rij = MatrixKernelClass(d).evaluate(x, y);
+            const FReal rij = MatrixKernelClass(a,d).evaluate(x, y);
             // potential
             p[i][counter] += rij * wy[j];
             // force
-            FReal force[3];
+            FReal force_k;
             for (unsigned int k=0; k<3; ++k){
-              //std::cout << "i,j,k,=" << i << ","<< j << ","<< k << std::endl;
-              unsigned int dk = RIJKMatrixKernel.getPosition(i*3*3+j*3+k);
-              force[k] = RIJKMatrixKernelClass(dk).evaluate(x, y);
-              f[i][counter*3 + k] += force[k] * wx[j] * wy[j];
+              unsigned int dk = RIJKMatrixKernel.getPosition((i*nrhs+j)*3+k);
+              force_k = RIJKMatrixKernelClass(a,dk).evaluate(x, y);
+              f[i][counter*3 + k] += force_k * wx[j] * wy[j];
             }
           }
 
@@ -591,7 +586,7 @@ int main(int, char **){
   ////////////////////////////////////////////////////////////////////
   unsigned int counter = 0;
   for(int idxPartX = 0 ; idxPartX < X.getSrc()->getNbParticles() ; ++idxPartX){
-    for (unsigned int i=0; i<nlhs; ++i){
+    for (unsigned int i=0; i<npot; ++i){
       approx_p[i][counter] = X.getSrc()->getPotentials(i)[idxPartX];
       const FPoint force = FPoint(X.getSrc()->getForcesX(i)[idxPartX],
                                   X.getSrc()->getForcesY(i)[idxPartX],
@@ -604,7 +599,7 @@ int main(int, char **){
   }
 
 //  std::cout << "Check Potential, forceX, forceY, forceZ " << std::endl;
-//  for (unsigned int i=0; i<nlhs; ++i){
+//  for (unsigned int i=0; i<npot; ++i){
 //    std::cout<< "idxLhs="<< i << std::endl;
 //    for(int idxPart = 0 ; idxPart < 20 ; ++idxPart){
 //      std::cout << approx_p[i][idxPart]     << ", "<< p[i][idxPart] << "|| ";
@@ -617,15 +612,23 @@ int main(int, char **){
 //  }
 //  std::cout << std::endl;
 
-  std::cout << "\nPotential error:" << std::endl;
-  std::cout << "Relative error  X = " << FMath::FAccurater( p[0], approx_p[0], M) << std::endl;
-  std::cout << "Relative error  Y = " << FMath::FAccurater( p[1], approx_p[1], M) << std::endl;
-  std::cout << "Relative error  Z = " << FMath::FAccurater( p[2], approx_p[2], M) << std::endl;
+  std::cout << "\nRelative Inf/L2 errors: " << std::endl;
+  std::cout << "  Potential:" << std::endl;
+  for(unsigned i = 0; i<npot;++i) {
+    std::cout << "    " << i << ": "
+              << FMath::FAccurater(p[i],approx_p[i],M).getRelativeInfNorm()<<", " 
+              << FMath::FAccurater(p[i],approx_p[i],M).getRelativeL2Norm()
+              << std::endl;
+  }
+  std::cout << std::endl;
 
-  std::cout << "\nForce error (Something is wrong here => TOFIX):" << std::endl;
-  std::cout << "Relative L2 error X  = " << FMath::FAccurater( f[0], approx_f[0], M*3) << std::endl;
-  std::cout << "Relative L2 error Y  = " << FMath::FAccurater( f[1], approx_f[1], M*3) << std::endl;
-  std::cout << "Relative L2 error Z  = " << FMath::FAccurater( f[2], approx_f[2], M*3) << std::endl;
+  std::cout << "  Force:" << std::endl;
+  for(unsigned i = 0; i<npot;++i) {
+    std::cout << "    " << i << ": "
+              << FMath::FAccurater(f[i],approx_f[i],3*M).getRelativeInfNorm()<<", " 
+              << FMath::FAccurater(f[i],approx_f[i],3*M).getRelativeL2Norm()
+              << std::endl;
+  }
   std::cout << std::endl;
 
   // free memory