From e73463c0061a9c86c76cf2c55a51b219fb6cf046 Mon Sep 17 00:00:00 2001
From: Berenger Bramas <Berenger.Bramas@inria.fr>
Date: Fri, 6 Nov 2015 21:58:53 +0100
Subject: [PATCH] debug the tsm and modify the P2P relatively

---
 Src/Core/FFmmAlgorithmThreadBalance.hpp       |  1 -
 Src/Core/FFmmAlgorithmThreadTsm.hpp           |  4 ++++
 Src/Core/FFmmAlgorithmTsm.hpp                 |  4 ++++
 Src/Kernels/Chebyshev/FChebKernel.hpp         | 13 +++++++++---
 Src/Kernels/Chebyshev/FChebSymKernel.hpp      | 13 +++++++++---
 .../Chebyshev/FChebTensorialKernel.hpp        | 13 +++++++++---
 .../Interpolation/FInterpP2PKernels.hpp       |  6 +++---
 Src/Kernels/P2P/FP2P.hpp                      | 20 +++++++++----------
 Src/Kernels/P2P/FP2PMultiRhs.hpp              |  2 +-
 Src/Kernels/P2P/FP2PParticleContainer.hpp     |  2 +-
 Src/Kernels/P2P/FP2PR.hpp                     | 18 ++++++++---------
 Src/Kernels/Rotation/FRotationKernel.hpp      | 13 +++++++++---
 .../Rotation/FRotationOriginalKernel.hpp      | 13 +++++++++---
 .../Spherical/FAbstractSphericalKernel.hpp    | 13 +++++++++---
 Src/Kernels/Taylor/FTaylorKernel.hpp          | 13 +++++++++---
 Src/Kernels/Taylor/FTaylorKernelSimple.hpp    | 13 +++++++++---
 Src/Kernels/Uniform/FUnifDenseKernel.hpp      | 13 +++++++++---
 Src/Kernels/Uniform/FUnifKernel.hpp           | 15 ++++++++++----
 Src/Kernels/Uniform/FUnifTensorialKernel.hpp  | 13 +++++++++---
 19 files changed, 143 insertions(+), 59 deletions(-)

diff --git a/Src/Core/FFmmAlgorithmThreadBalance.hpp b/Src/Core/FFmmAlgorithmThreadBalance.hpp
index fdb3ba977..79b5e2c36 100644
--- a/Src/Core/FFmmAlgorithmThreadBalance.hpp
+++ b/Src/Core/FFmmAlgorithmThreadBalance.hpp
@@ -350,7 +350,6 @@ protected:
                     WorkloadTemp* workloadBuffer = workloadBufferThread[omp_get_thread_num()];
                     memset(workloadBuffer, 0, sizeof(struct WorkloadTemp)*leafsNumber);
                     // Prepare the P2P
-                    const int LeafIndex = OctreeHeight - 1;
                     leafsDataArray.reset(new LeafData[leafsNumber]);
 
                     // We need the offset for each color
diff --git a/Src/Core/FFmmAlgorithmThreadTsm.hpp b/Src/Core/FFmmAlgorithmThreadTsm.hpp
index 8004e5e29..cca5fe8bf 100644
--- a/Src/Core/FFmmAlgorithmThreadTsm.hpp
+++ b/Src/Core/FFmmAlgorithmThreadTsm.hpp
@@ -398,6 +398,10 @@ protected:
                     }
                     if(p2pEnabled){
                         // need the current particles and neighbors particles
+                        if(iterArray[idxLeafs].getCurrentCell()->hasSrcChild()){
+                            myThreadkernels->P2P( iterArray[idxLeafs].getCurrentGlobalCoordinate(), iterArray[idxLeafs].getCurrentListTargets(),
+                                          iterArray[idxLeafs].getCurrentListSrc() , neighbors, neighborPositions, 0);
+                        }
                         const int counter = tree->getLeafsNeighbors(neighbors, neighborPositions, iterArray[idxLeafs].getCurrentGlobalCoordinate(),heightMinusOne);
                         myThreadkernels->P2PRemote( iterArray[idxLeafs].getCurrentGlobalCoordinate(), iterArray[idxLeafs].getCurrentListTargets(),
                                       iterArray[idxLeafs].getCurrentListSrc() , neighbors, neighborPositions, counter);
diff --git a/Src/Core/FFmmAlgorithmTsm.hpp b/Src/Core/FFmmAlgorithmTsm.hpp
index 64958533e..7f64a2734 100644
--- a/Src/Core/FFmmAlgorithmTsm.hpp
+++ b/Src/Core/FFmmAlgorithmTsm.hpp
@@ -315,6 +315,10 @@ protected:
                     kernels->L2P(octreeIterator.getCurrentCell(), octreeIterator.getCurrentListTargets());
                 }
                 if(p2pEnabled){
+                    if(octreeIterator.getCurrentCell()->hasSrcChild()){
+                        kernels->P2P( octreeIterator.getCurrentGlobalCoordinate(), octreeIterator.getCurrentListTargets(),
+                                      octreeIterator.getCurrentListSrc() , neighbors, neighborPositions, 0);
+                    }
                     // need the current particles and neighbors particles
                     const int counter = tree->getLeafsNeighbors(neighbors, neighborPositions, octreeIterator.getCurrentGlobalCoordinate(), heightMinusOne);
                     kernels->P2PRemote( octreeIterator.getCurrentGlobalCoordinate(), octreeIterator.getCurrentListTargets(),
diff --git a/Src/Kernels/Chebyshev/FChebKernel.hpp b/Src/Kernels/Chebyshev/FChebKernel.hpp
index 11ee09eb5..bb29e6605 100644
--- a/Src/Kernels/Chebyshev/FChebKernel.hpp
+++ b/Src/Kernels/Chebyshev/FChebKernel.hpp
@@ -207,11 +207,18 @@ public:
     }
 
     void P2P(const FTreeCoordinate& inPosition,
-             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
+             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
              ContainerClass* const inNeighbors[], const int neighborPositions[],
              const int inSize) override {
-        P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
-        DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
+        if(inTargets == inSources){
+            P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
+            DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
+        }
+        else{
+            const ContainerClass* const srcPtr[1] = {inSources};
+            DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,srcPtr,1,MatrixKernel);
+            DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel);
+        }
     }
 
     void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
diff --git a/Src/Kernels/Chebyshev/FChebSymKernel.hpp b/Src/Kernels/Chebyshev/FChebSymKernel.hpp
index 185d56843..afd4ce565 100644
--- a/Src/Kernels/Chebyshev/FChebSymKernel.hpp
+++ b/Src/Kernels/Chebyshev/FChebSymKernel.hpp
@@ -448,11 +448,18 @@ public:
     }
 
     void P2P(const FTreeCoordinate& inPosition,
-             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
+             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
              ContainerClass* const inNeighbors[], const int neighborPositions[],
              const int inSize) override {
-        P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
-        DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
+        if(inTargets == inSources){
+            P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
+            DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
+        }
+        else{
+            const ContainerClass* const srcPtr[1] = {inSources};
+            DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,srcPtr,1,MatrixKernel);
+            DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel);
+        }
     }
 
     void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
diff --git a/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp b/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp
index 60d0ff4f6..391445128 100644
--- a/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp
+++ b/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp
@@ -220,11 +220,18 @@ public:
     }
 
     void P2P(const FTreeCoordinate& inPosition,
-             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
+             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
              ContainerClass* const inNeighbors[], const int neighborPositions[],
              const int inSize) override {
-        P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
-        DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
+        if(inTargets == inSources){
+            P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
+            DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
+        }
+        else{
+            const ContainerClass* const srcPtr[1] = {inSources};
+            DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,srcPtr,1,MatrixKernel);
+            DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel);
+        }
     }
 
     void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
diff --git a/Src/Kernels/Interpolation/FInterpP2PKernels.hpp b/Src/Kernels/Interpolation/FInterpP2PKernels.hpp
index 3c1c87d2d..7f5fa3cf1 100644
--- a/Src/Kernels/Interpolation/FInterpP2PKernels.hpp
+++ b/Src/Kernels/Interpolation/FInterpP2PKernels.hpp
@@ -28,7 +28,7 @@ struct DirectInteractionComputer
 
   template <typename ContainerClass, typename MatrixKernelClass>
   static void P2PRemote( ContainerClass* const FRestrict inTargets,
-                         ContainerClass* const inNeighbors[],
+                         const ContainerClass* const inNeighbors[],
                          const int inSize,
                          const MatrixKernelClass *const MatrixKernel){
       FP2P::FullRemoteKIJ<FReal, ContainerClass, MatrixKernelClass>(inTargets,inNeighbors,inSize,MatrixKernel);
@@ -56,7 +56,7 @@ struct DirectInteractionComputer<FReal, 1,NVALS>
 
   template <typename ContainerClass, typename MatrixKernelClass>
   static void P2PRemote( ContainerClass* const FRestrict inTargets,
-                         ContainerClass* const inNeighbors[],
+                         const ContainerClass* const inNeighbors[],
                          const int inSize,
                          const MatrixKernelClass *const MatrixKernel){
       FP2P::FullRemoteMultiRhs<FReal, ContainerClass, MatrixKernelClass>(inTargets,inNeighbors,inSize,MatrixKernel);
@@ -83,7 +83,7 @@ struct DirectInteractionComputer<FReal, 1,1>
 
   template <typename ContainerClass, typename MatrixKernelClass>
   static void P2PRemote( ContainerClass* const FRestrict inTargets,
-                         ContainerClass* const inNeighbors[],
+                         const ContainerClass* const inNeighbors[],
                          const int inSize,
                          const MatrixKernelClass *const MatrixKernel){
       FP2PT<FReal>::template FullRemote<ContainerClass,MatrixKernelClass>(inTargets,inNeighbors,inSize,MatrixKernel);
diff --git a/Src/Kernels/P2P/FP2P.hpp b/Src/Kernels/P2P/FP2P.hpp
index 3b52ca54d..1577568ba 100644
--- a/Src/Kernels/P2P/FP2P.hpp
+++ b/Src/Kernels/P2P/FP2P.hpp
@@ -324,7 +324,7 @@ inline void InnerKIJ(ContainerClass* const FRestrict inTargets, const MatrixKern
    * @brief FullRemoteKIJ
    */
 template <class FReal, class ContainerClass, typename MatrixKernelClass>
-inline void FullRemoteKIJ(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
+inline void FullRemoteKIJ(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
                           const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
 
     // get information on tensorial aspect of matrix kernel
@@ -547,7 +547,7 @@ static void GenericInner(ContainerClass* const FRestrict inTargets, const Matrix
 }
 
 template <class FReal, class ContainerClass, class MatrixKernelClass, class ComputeClass, int NbFRealInComputeClass>
-static void GenericFullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
+static void GenericFullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
                               const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
     const FSize nbParticlesTargets = inTargets->getNbParticles();
     const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues();
@@ -625,7 +625,7 @@ struct FP2PT<double>{
     }
 
     template <class ContainerClass, class MatrixKernelClass>
-    static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
+    static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
                            const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
         FP2P::GenericFullRemote<double, ContainerClass, MatrixKernelClass, __m256d, 4>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
     }
@@ -645,7 +645,7 @@ struct FP2PT<float>{
     }
 
     template <class ContainerClass, class MatrixKernelClass>
-    static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
+    static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
                            const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
         FP2P::GenericFullRemote<float, ContainerClass, MatrixKernelClass, __m256, 8>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
     }
@@ -665,7 +665,7 @@ struct FP2PT<double>{
     }
 
     template <class ContainerClass, class MatrixKernelClass>
-    static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
+    static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
                            const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
         FP2P::GenericFullRemote<double, ContainerClass, MatrixKernelClass, __m512d, 8>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
     }
@@ -686,7 +686,7 @@ struct FP2PT<float>{
     }
 
     template <class ContainerClass, class MatrixKernelClass>
-    static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
+    static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
                            const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
         FP2P::GenericFullRemote<float, ContainerClass, MatrixKernelClass, __m512, 16>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
     }
@@ -707,7 +707,7 @@ struct FP2PT<double>{
     }
 
     template <class ContainerClass, class MatrixKernelClass>
-    static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
+    static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
                            const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
         FP2P::GenericFullRemote<double, ContainerClass, MatrixKernelClass, __m128d, 2>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
     }
@@ -728,7 +728,7 @@ struct FP2PT<float>{
     }
 
     template <class ContainerClass, class MatrixKernelClass>
-    static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
+    static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
                            const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
         FP2P::GenericFullRemote<float, ContainerClass, MatrixKernelClass, __m128, 4>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
     }
@@ -748,7 +748,7 @@ struct FP2PT<double>{
     }
 
     template <class ContainerClass, class MatrixKernelClass>
-    static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
+    static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
                            const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
         FP2P::GenericFullRemote<double, ContainerClass, MatrixKernelClass, double, 1>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
     }
@@ -768,7 +768,7 @@ struct FP2PT<float>{
     }
 
     template <class ContainerClass, class MatrixKernelClass>
-    static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
+    static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
                            const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
         FP2P::GenericFullRemote<float, ContainerClass, MatrixKernelClass, float, 1>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
     }
diff --git a/Src/Kernels/P2P/FP2PMultiRhs.hpp b/Src/Kernels/P2P/FP2PMultiRhs.hpp
index d89bc9b37..dc45ae59b 100644
--- a/Src/Kernels/P2P/FP2PMultiRhs.hpp
+++ b/Src/Kernels/P2P/FP2PMultiRhs.hpp
@@ -140,7 +140,7 @@ namespace FP2P {
    * FullRemoteMultiRhs (generic version)
    */
 template <class FReal, class ContainerClass, typename MatrixKernelClass>
-inline void FullRemoteMultiRhs(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
+inline void FullRemoteMultiRhs(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
                        const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
 
     const FSize nbParticlesTargets = inTargets->getNbParticles();
diff --git a/Src/Kernels/P2P/FP2PParticleContainer.hpp b/Src/Kernels/P2P/FP2PParticleContainer.hpp
index d6cb2ac75..01c7fd780 100644
--- a/Src/Kernels/P2P/FP2PParticleContainer.hpp
+++ b/Src/Kernels/P2P/FP2PParticleContainer.hpp
@@ -42,7 +42,7 @@ public:
         return Parent::getRawData() + ((0+idxRhs)*NVALS+idxVals)*Parent::getLeadingRawData();
     }
 
-    FSize getLeadingDimension(){
+    FSize getLeadingDimension() const {
         return Parent::getLeadingRawData();
     }
 
diff --git a/Src/Kernels/P2P/FP2PR.hpp b/Src/Kernels/P2P/FP2PR.hpp
index 8aa52252b..8ee64307a 100644
--- a/Src/Kernels/P2P/FP2PR.hpp
+++ b/Src/Kernels/P2P/FP2PR.hpp
@@ -256,7 +256,7 @@ static void GenericInner(ContainerClass* const FRestrict inTargets){
 }
 
 template <class FReal, class ContainerClass, class ComputeClass, int NbFRealInComputeClass>
-static void GenericFullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
+static void GenericFullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
                        const int limiteNeighbors){
     const FSize nbParticlesTargets = inTargets->getNbParticles();
     const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues();
@@ -340,7 +340,7 @@ struct FP2PRT<double>{
     }
 
     template <class ContainerClass>
-    static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
+    static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
                const int limiteNeighbors){
         FP2PR::GenericFullRemote<double, ContainerClass, __m256d, 4>(inTargets, inNeighbors, limiteNeighbors);
     }
@@ -360,7 +360,7 @@ struct FP2PRT<float>{
     }
 
     template <class ContainerClass>
-    static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
+    static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
                const int limiteNeighbors){
         FP2PR::GenericFullRemote<float, ContainerClass, __m256, 8>(inTargets, inNeighbors, limiteNeighbors);
     }
@@ -380,7 +380,7 @@ struct FP2PRT<double>{
     }
 
     template <class ContainerClass>
-    static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
+    static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
                const int limiteNeighbors){
         FP2PR::GenericFullRemote<double, ContainerClass, __m512d, 8>(inTargets, inNeighbors, limiteNeighbors);
     }
@@ -400,7 +400,7 @@ struct FP2PRT<float>{
     }
 
     template <class ContainerClass>
-    static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
+    static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
                const int limiteNeighbors){
         FP2PR::GenericFullRemote<float, ContainerClass, __m512, 16>(inTargets, inNeighbors, limiteNeighbors);
     }
@@ -421,7 +421,7 @@ struct FP2PRT<double>{
     }
 
     template <class ContainerClass>
-    static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
+    static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
                const int limiteNeighbors){
         FP2PR::GenericFullRemote<double, ContainerClass, __m128d, 2>(inTargets, inNeighbors, limiteNeighbors);
     }
@@ -441,7 +441,7 @@ struct FP2PRT<float>{
     }
 
     template <class ContainerClass>
-    static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
+    static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
                const int limiteNeighbors){
         FP2PR::GenericFullRemote<float, ContainerClass, __m128, 4>(inTargets, inNeighbors, limiteNeighbors);
     }
@@ -462,7 +462,7 @@ struct FP2PRT<double>{
     }
 
     template <class ContainerClass>
-    static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
+    static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
                const int limiteNeighbors){
         FP2PR::GenericFullRemote<double, ContainerClass, double, 1>(inTargets, inNeighbors, limiteNeighbors);
     }
@@ -482,7 +482,7 @@ struct FP2PRT<float>{
     }
 
     template <class ContainerClass>
-    static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
+    static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
                const int limiteNeighbors){
         FP2PR::GenericFullRemote<float, ContainerClass, float, 1>(inTargets, inNeighbors, limiteNeighbors);
     }
diff --git a/Src/Kernels/Rotation/FRotationKernel.hpp b/Src/Kernels/Rotation/FRotationKernel.hpp
index e358959bc..a1bd15d84 100644
--- a/Src/Kernels/Rotation/FRotationKernel.hpp
+++ b/Src/Kernels/Rotation/FRotationKernel.hpp
@@ -1355,11 +1355,18 @@ public:
       * Calling this method in multi thread should be done carrefully.
       */
     void P2P(const FTreeCoordinate& inPosition,
-             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
+             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
              ContainerClass* const inNeighbors[], const int neighborPositions[],
              const int inSize) override {
-        FP2PRT<FReal>::template Inner<ContainerClass>(inTargets);
-        P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
+        if(inTargets == inSources){
+            FP2PRT<FReal>::template Inner<ContainerClass>(inTargets);
+            P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
+        }
+        else{
+            const ContainerClass* const srcPtr[1] = {inSources};
+            FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,srcPtr,1);
+            FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,inNeighbors,inSize);
+        }
     }
 
     void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
diff --git a/Src/Kernels/Rotation/FRotationOriginalKernel.hpp b/Src/Kernels/Rotation/FRotationOriginalKernel.hpp
index 30465b65d..c24e9cf90 100644
--- a/Src/Kernels/Rotation/FRotationOriginalKernel.hpp
+++ b/Src/Kernels/Rotation/FRotationOriginalKernel.hpp
@@ -791,11 +791,18 @@ public:
       * Calling this method in multi thread should be done carrefully.
       */
     void P2P(const FTreeCoordinate& inPosition,
-             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
+             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
              ContainerClass* const inNeighbors[], const int neighborPositions[],
              const int inSize) override {
-        FP2PRT<FReal>::template Inner<ContainerClass>(inTargets);
-        P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
+        if(inTargets == inSources){
+            FP2PRT<FReal>::template Inner<ContainerClass>(inTargets);
+            P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
+        }
+        else{
+            const ContainerClass* const srcPtr[1] = {inSources};
+            FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,srcPtr,1);
+            FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,inNeighbors,inSize);
+        }
     }
 
     void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
diff --git a/Src/Kernels/Spherical/FAbstractSphericalKernel.hpp b/Src/Kernels/Spherical/FAbstractSphericalKernel.hpp
index 881aa0b97..3ef4273c6 100644
--- a/Src/Kernels/Spherical/FAbstractSphericalKernel.hpp
+++ b/Src/Kernels/Spherical/FAbstractSphericalKernel.hpp
@@ -218,11 +218,18 @@ public:
       * Calling this method in multi thread should be done carrefully.
       */
     void P2P(const FTreeCoordinate& inPosition,
-             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
+             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
              ContainerClass* const inNeighbors[], const int neighborPositions[],
              const int inSize) override {
-        FP2PRT<FReal>::template Inner<ContainerClass>(inTargets);
-        P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
+        if(inTargets == inSources){
+            FP2PRT<FReal>::template Inner<ContainerClass>(inTargets);
+            P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
+        }
+        else{
+            const ContainerClass* const srcPtr[1] = {inSources};
+            FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,srcPtr,1);
+            FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,inNeighbors,inSize);
+        }
     }
 
     void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
diff --git a/Src/Kernels/Taylor/FTaylorKernel.hpp b/Src/Kernels/Taylor/FTaylorKernel.hpp
index d4fb62956..06f515f09 100644
--- a/Src/Kernels/Taylor/FTaylorKernel.hpp
+++ b/Src/Kernels/Taylor/FTaylorKernel.hpp
@@ -958,11 +958,18 @@ public:
       * Calling this method in multi thread should be done carrefully.
       */
     void P2P(const FTreeCoordinate& inPosition,
-             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
+             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
              ContainerClass* const inNeighbors[], const int neighborPositions[],
              const int inSize) override {
-        FP2PRT<FReal>::template Inner<ContainerClass>(inTargets);
-        P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
+        if(inTargets == inSources){
+            FP2PRT<FReal>::template Inner<ContainerClass>(inTargets);
+            P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
+        }
+        else{
+            const ContainerClass* const srcPtr[1] = {inSources};
+            FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,srcPtr,1);
+            FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,inNeighbors,inSize);
+        }
     }
 
     void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
diff --git a/Src/Kernels/Taylor/FTaylorKernelSimple.hpp b/Src/Kernels/Taylor/FTaylorKernelSimple.hpp
index af1df17c0..da4322dfd 100644
--- a/Src/Kernels/Taylor/FTaylorKernelSimple.hpp
+++ b/Src/Kernels/Taylor/FTaylorKernelSimple.hpp
@@ -868,11 +868,18 @@ public:
       * Calling this method in multi thread should be done carrefully.
       */
     void P2P(const FTreeCoordinate& inPosition,
-             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
+             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
              ContainerClass* const inNeighbors[], const int neighborPositions[],
              const int inSize) override {
-        FP2PRT<FReal>::template Inner<ContainerClass>(inTargets);
-        P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
+        if(inTargets == inSources){
+            FP2PRT<FReal>::template Inner<ContainerClass>(inTargets);
+            P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
+        }
+        else{
+            const ContainerClass* const srcPtr[1] = {inSources};
+            FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,srcPtr,1);
+            FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,inNeighbors,inSize);
+        }
     }
 
     void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
diff --git a/Src/Kernels/Uniform/FUnifDenseKernel.hpp b/Src/Kernels/Uniform/FUnifDenseKernel.hpp
index 2c4a5f741..3a4c5da7e 100644
--- a/Src/Kernels/Uniform/FUnifDenseKernel.hpp
+++ b/Src/Kernels/Uniform/FUnifDenseKernel.hpp
@@ -163,11 +163,18 @@ public:
     }
 
     void P2P(const FTreeCoordinate& inPosition,
-             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
+             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
              ContainerClass* const inNeighbors[], const int neighborPositions[],
              const int inSize) override {
-        P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
-        DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
+        if(inTargets == inSources){
+            P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
+            DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
+        }
+        else{
+            const ContainerClass* const srcPtr[1] = {inSources};
+            DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,srcPtr,1,MatrixKernel);
+            DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel);
+        }
     }
 
     void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
diff --git a/Src/Kernels/Uniform/FUnifKernel.hpp b/Src/Kernels/Uniform/FUnifKernel.hpp
index da2bc6dc6..14e547189 100644
--- a/Src/Kernels/Uniform/FUnifKernel.hpp
+++ b/Src/Kernels/Uniform/FUnifKernel.hpp
@@ -189,13 +189,20 @@ public:
     }
 
     void P2P(const FTreeCoordinate& inPosition,
-             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
+             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
              ContainerClass* const inNeighbors[], const int neighborPositions[],
              const int inSize) override {
         // Standard FMM separation criterion, i.e. max 27 neighbor clusters per leaf
-        if(LeafLevelSeparationCriterion==1) {            
-            P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
-            DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
+        if(LeafLevelSeparationCriterion==1) {
+            if(inTargets == inSources){
+                P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
+                DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
+            }
+            else{
+                const ContainerClass* const srcPtr[1] = {inSources};
+                DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,srcPtr,1,MatrixKernel);
+                DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel);
+            }
         }
         // Nearfield interactions are only computed within the target leaf
         else if(LeafLevelSeparationCriterion==0){
diff --git a/Src/Kernels/Uniform/FUnifTensorialKernel.hpp b/Src/Kernels/Uniform/FUnifTensorialKernel.hpp
index 0110a96a1..afd6ab262 100644
--- a/Src/Kernels/Uniform/FUnifTensorialKernel.hpp
+++ b/Src/Kernels/Uniform/FUnifTensorialKernel.hpp
@@ -251,11 +251,18 @@ public:
     }
 
     void P2P(const FTreeCoordinate& inPosition,
-             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
+             ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
              ContainerClass* const inNeighbors[], const int neighborPositions[],
              const int inSize) override {
-        P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
-        DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
+        if(inTargets == inSources){
+            P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
+            DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
+        }
+        else{
+            const ContainerClass* const srcPtr[1] = {inSources};
+            DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,srcPtr,1,MatrixKernel);
+            DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel);
+        }
     }
 
     void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
-- 
GitLab