From e73463c0061a9c86c76cf2c55a51b219fb6cf046 Mon Sep 17 00:00:00 2001 From: Berenger Bramas <Berenger.Bramas@inria.fr> Date: Fri, 6 Nov 2015 21:58:53 +0100 Subject: [PATCH] debug the tsm and modify the P2P relatively --- Src/Core/FFmmAlgorithmThreadBalance.hpp | 1 - Src/Core/FFmmAlgorithmThreadTsm.hpp | 4 ++++ Src/Core/FFmmAlgorithmTsm.hpp | 4 ++++ Src/Kernels/Chebyshev/FChebKernel.hpp | 13 +++++++++--- Src/Kernels/Chebyshev/FChebSymKernel.hpp | 13 +++++++++--- .../Chebyshev/FChebTensorialKernel.hpp | 13 +++++++++--- .../Interpolation/FInterpP2PKernels.hpp | 6 +++--- Src/Kernels/P2P/FP2P.hpp | 20 +++++++++---------- Src/Kernels/P2P/FP2PMultiRhs.hpp | 2 +- Src/Kernels/P2P/FP2PParticleContainer.hpp | 2 +- Src/Kernels/P2P/FP2PR.hpp | 18 ++++++++--------- Src/Kernels/Rotation/FRotationKernel.hpp | 13 +++++++++--- .../Rotation/FRotationOriginalKernel.hpp | 13 +++++++++--- .../Spherical/FAbstractSphericalKernel.hpp | 13 +++++++++--- Src/Kernels/Taylor/FTaylorKernel.hpp | 13 +++++++++--- Src/Kernels/Taylor/FTaylorKernelSimple.hpp | 13 +++++++++--- Src/Kernels/Uniform/FUnifDenseKernel.hpp | 13 +++++++++--- Src/Kernels/Uniform/FUnifKernel.hpp | 15 ++++++++++---- Src/Kernels/Uniform/FUnifTensorialKernel.hpp | 13 +++++++++--- 19 files changed, 143 insertions(+), 59 deletions(-) diff --git a/Src/Core/FFmmAlgorithmThreadBalance.hpp b/Src/Core/FFmmAlgorithmThreadBalance.hpp index fdb3ba977..79b5e2c36 100644 --- a/Src/Core/FFmmAlgorithmThreadBalance.hpp +++ b/Src/Core/FFmmAlgorithmThreadBalance.hpp @@ -350,7 +350,6 @@ protected: WorkloadTemp* workloadBuffer = workloadBufferThread[omp_get_thread_num()]; memset(workloadBuffer, 0, sizeof(struct WorkloadTemp)*leafsNumber); // Prepare the P2P - const int LeafIndex = OctreeHeight - 1; leafsDataArray.reset(new LeafData[leafsNumber]); // We need the offset for each color diff --git a/Src/Core/FFmmAlgorithmThreadTsm.hpp b/Src/Core/FFmmAlgorithmThreadTsm.hpp index 8004e5e29..cca5fe8bf 100644 --- a/Src/Core/FFmmAlgorithmThreadTsm.hpp +++ b/Src/Core/FFmmAlgorithmThreadTsm.hpp @@ -398,6 +398,10 @@ protected: } if(p2pEnabled){ // need the current particles and neighbors particles + if(iterArray[idxLeafs].getCurrentCell()->hasSrcChild()){ + myThreadkernels->P2P( iterArray[idxLeafs].getCurrentGlobalCoordinate(), iterArray[idxLeafs].getCurrentListTargets(), + iterArray[idxLeafs].getCurrentListSrc() , neighbors, neighborPositions, 0); + } const int counter = tree->getLeafsNeighbors(neighbors, neighborPositions, iterArray[idxLeafs].getCurrentGlobalCoordinate(),heightMinusOne); myThreadkernels->P2PRemote( iterArray[idxLeafs].getCurrentGlobalCoordinate(), iterArray[idxLeafs].getCurrentListTargets(), iterArray[idxLeafs].getCurrentListSrc() , neighbors, neighborPositions, counter); diff --git a/Src/Core/FFmmAlgorithmTsm.hpp b/Src/Core/FFmmAlgorithmTsm.hpp index 64958533e..7f64a2734 100644 --- a/Src/Core/FFmmAlgorithmTsm.hpp +++ b/Src/Core/FFmmAlgorithmTsm.hpp @@ -315,6 +315,10 @@ protected: kernels->L2P(octreeIterator.getCurrentCell(), octreeIterator.getCurrentListTargets()); } if(p2pEnabled){ + if(octreeIterator.getCurrentCell()->hasSrcChild()){ + kernels->P2P( octreeIterator.getCurrentGlobalCoordinate(), octreeIterator.getCurrentListTargets(), + octreeIterator.getCurrentListSrc() , neighbors, neighborPositions, 0); + } // need the current particles and neighbors particles const int counter = tree->getLeafsNeighbors(neighbors, neighborPositions, octreeIterator.getCurrentGlobalCoordinate(), heightMinusOne); kernels->P2PRemote( octreeIterator.getCurrentGlobalCoordinate(), octreeIterator.getCurrentListTargets(), diff --git a/Src/Kernels/Chebyshev/FChebKernel.hpp b/Src/Kernels/Chebyshev/FChebKernel.hpp index 11ee09eb5..bb29e6605 100644 --- a/Src/Kernels/Chebyshev/FChebKernel.hpp +++ b/Src/Kernels/Chebyshev/FChebKernel.hpp @@ -207,11 +207,18 @@ public: } void P2P(const FTreeCoordinate& inPosition, - ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, + ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources, ContainerClass* const inNeighbors[], const int neighborPositions[], const int inSize) override { - P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); - DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel); + if(inTargets == inSources){ + P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); + DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel); + } + else{ + const ContainerClass* const srcPtr[1] = {inSources}; + DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,srcPtr,1,MatrixKernel); + DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel); + } } void P2POuter(const FTreeCoordinate& /*inLeafPosition*/, diff --git a/Src/Kernels/Chebyshev/FChebSymKernel.hpp b/Src/Kernels/Chebyshev/FChebSymKernel.hpp index 185d56843..afd4ce565 100644 --- a/Src/Kernels/Chebyshev/FChebSymKernel.hpp +++ b/Src/Kernels/Chebyshev/FChebSymKernel.hpp @@ -448,11 +448,18 @@ public: } void P2P(const FTreeCoordinate& inPosition, - ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, + ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources, ContainerClass* const inNeighbors[], const int neighborPositions[], const int inSize) override { - P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); - DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel); + if(inTargets == inSources){ + P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); + DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel); + } + else{ + const ContainerClass* const srcPtr[1] = {inSources}; + DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,srcPtr,1,MatrixKernel); + DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel); + } } void P2POuter(const FTreeCoordinate& /*inLeafPosition*/, diff --git a/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp b/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp index 60d0ff4f6..391445128 100644 --- a/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp +++ b/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp @@ -220,11 +220,18 @@ public: } void P2P(const FTreeCoordinate& inPosition, - ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, + ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources, ContainerClass* const inNeighbors[], const int neighborPositions[], const int inSize) override { - P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); - DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel); + if(inTargets == inSources){ + P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); + DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel); + } + else{ + const ContainerClass* const srcPtr[1] = {inSources}; + DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,srcPtr,1,MatrixKernel); + DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel); + } } void P2POuter(const FTreeCoordinate& /*inLeafPosition*/, diff --git a/Src/Kernels/Interpolation/FInterpP2PKernels.hpp b/Src/Kernels/Interpolation/FInterpP2PKernels.hpp index 3c1c87d2d..7f5fa3cf1 100644 --- a/Src/Kernels/Interpolation/FInterpP2PKernels.hpp +++ b/Src/Kernels/Interpolation/FInterpP2PKernels.hpp @@ -28,7 +28,7 @@ struct DirectInteractionComputer template <typename ContainerClass, typename MatrixKernelClass> static void P2PRemote( ContainerClass* const FRestrict inTargets, - ContainerClass* const inNeighbors[], + const ContainerClass* const inNeighbors[], const int inSize, const MatrixKernelClass *const MatrixKernel){ FP2P::FullRemoteKIJ<FReal, ContainerClass, MatrixKernelClass>(inTargets,inNeighbors,inSize,MatrixKernel); @@ -56,7 +56,7 @@ struct DirectInteractionComputer<FReal, 1,NVALS> template <typename ContainerClass, typename MatrixKernelClass> static void P2PRemote( ContainerClass* const FRestrict inTargets, - ContainerClass* const inNeighbors[], + const ContainerClass* const inNeighbors[], const int inSize, const MatrixKernelClass *const MatrixKernel){ FP2P::FullRemoteMultiRhs<FReal, ContainerClass, MatrixKernelClass>(inTargets,inNeighbors,inSize,MatrixKernel); @@ -83,7 +83,7 @@ struct DirectInteractionComputer<FReal, 1,1> template <typename ContainerClass, typename MatrixKernelClass> static void P2PRemote( ContainerClass* const FRestrict inTargets, - ContainerClass* const inNeighbors[], + const ContainerClass* const inNeighbors[], const int inSize, const MatrixKernelClass *const MatrixKernel){ FP2PT<FReal>::template FullRemote<ContainerClass,MatrixKernelClass>(inTargets,inNeighbors,inSize,MatrixKernel); diff --git a/Src/Kernels/P2P/FP2P.hpp b/Src/Kernels/P2P/FP2P.hpp index 3b52ca54d..1577568ba 100644 --- a/Src/Kernels/P2P/FP2P.hpp +++ b/Src/Kernels/P2P/FP2P.hpp @@ -324,7 +324,7 @@ inline void InnerKIJ(ContainerClass* const FRestrict inTargets, const MatrixKern * @brief FullRemoteKIJ */ template <class FReal, class ContainerClass, typename MatrixKernelClass> -inline void FullRemoteKIJ(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], +inline void FullRemoteKIJ(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[], const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ // get information on tensorial aspect of matrix kernel @@ -547,7 +547,7 @@ static void GenericInner(ContainerClass* const FRestrict inTargets, const Matrix } template <class FReal, class ContainerClass, class MatrixKernelClass, class ComputeClass, int NbFRealInComputeClass> -static void GenericFullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], +static void GenericFullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[], const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ const FSize nbParticlesTargets = inTargets->getNbParticles(); const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(); @@ -625,7 +625,7 @@ struct FP2PT<double>{ } template <class ContainerClass, class MatrixKernelClass> - static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], + static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[], const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ FP2P::GenericFullRemote<double, ContainerClass, MatrixKernelClass, __m256d, 4>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel); } @@ -645,7 +645,7 @@ struct FP2PT<float>{ } template <class ContainerClass, class MatrixKernelClass> - static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], + static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[], const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ FP2P::GenericFullRemote<float, ContainerClass, MatrixKernelClass, __m256, 8>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel); } @@ -665,7 +665,7 @@ struct FP2PT<double>{ } template <class ContainerClass, class MatrixKernelClass> - static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], + static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[], const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ FP2P::GenericFullRemote<double, ContainerClass, MatrixKernelClass, __m512d, 8>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel); } @@ -686,7 +686,7 @@ struct FP2PT<float>{ } template <class ContainerClass, class MatrixKernelClass> - static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], + static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[], const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ FP2P::GenericFullRemote<float, ContainerClass, MatrixKernelClass, __m512, 16>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel); } @@ -707,7 +707,7 @@ struct FP2PT<double>{ } template <class ContainerClass, class MatrixKernelClass> - static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], + static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[], const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ FP2P::GenericFullRemote<double, ContainerClass, MatrixKernelClass, __m128d, 2>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel); } @@ -728,7 +728,7 @@ struct FP2PT<float>{ } template <class ContainerClass, class MatrixKernelClass> - static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], + static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[], const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ FP2P::GenericFullRemote<float, ContainerClass, MatrixKernelClass, __m128, 4>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel); } @@ -748,7 +748,7 @@ struct FP2PT<double>{ } template <class ContainerClass, class MatrixKernelClass> - static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], + static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[], const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ FP2P::GenericFullRemote<double, ContainerClass, MatrixKernelClass, double, 1>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel); } @@ -768,7 +768,7 @@ struct FP2PT<float>{ } template <class ContainerClass, class MatrixKernelClass> - static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], + static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[], const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ FP2P::GenericFullRemote<float, ContainerClass, MatrixKernelClass, float, 1>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel); } diff --git a/Src/Kernels/P2P/FP2PMultiRhs.hpp b/Src/Kernels/P2P/FP2PMultiRhs.hpp index d89bc9b37..dc45ae59b 100644 --- a/Src/Kernels/P2P/FP2PMultiRhs.hpp +++ b/Src/Kernels/P2P/FP2PMultiRhs.hpp @@ -140,7 +140,7 @@ namespace FP2P { * FullRemoteMultiRhs (generic version) */ template <class FReal, class ContainerClass, typename MatrixKernelClass> -inline void FullRemoteMultiRhs(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], +inline void FullRemoteMultiRhs(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[], const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ const FSize nbParticlesTargets = inTargets->getNbParticles(); diff --git a/Src/Kernels/P2P/FP2PParticleContainer.hpp b/Src/Kernels/P2P/FP2PParticleContainer.hpp index d6cb2ac75..01c7fd780 100644 --- a/Src/Kernels/P2P/FP2PParticleContainer.hpp +++ b/Src/Kernels/P2P/FP2PParticleContainer.hpp @@ -42,7 +42,7 @@ public: return Parent::getRawData() + ((0+idxRhs)*NVALS+idxVals)*Parent::getLeadingRawData(); } - FSize getLeadingDimension(){ + FSize getLeadingDimension() const { return Parent::getLeadingRawData(); } diff --git a/Src/Kernels/P2P/FP2PR.hpp b/Src/Kernels/P2P/FP2PR.hpp index 8aa52252b..8ee64307a 100644 --- a/Src/Kernels/P2P/FP2PR.hpp +++ b/Src/Kernels/P2P/FP2PR.hpp @@ -256,7 +256,7 @@ static void GenericInner(ContainerClass* const FRestrict inTargets){ } template <class FReal, class ContainerClass, class ComputeClass, int NbFRealInComputeClass> -static void GenericFullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], +static void GenericFullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[], const int limiteNeighbors){ const FSize nbParticlesTargets = inTargets->getNbParticles(); const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(); @@ -340,7 +340,7 @@ struct FP2PRT<double>{ } template <class ContainerClass> - static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], + static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[], const int limiteNeighbors){ FP2PR::GenericFullRemote<double, ContainerClass, __m256d, 4>(inTargets, inNeighbors, limiteNeighbors); } @@ -360,7 +360,7 @@ struct FP2PRT<float>{ } template <class ContainerClass> - static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], + static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[], const int limiteNeighbors){ FP2PR::GenericFullRemote<float, ContainerClass, __m256, 8>(inTargets, inNeighbors, limiteNeighbors); } @@ -380,7 +380,7 @@ struct FP2PRT<double>{ } template <class ContainerClass> - static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], + static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[], const int limiteNeighbors){ FP2PR::GenericFullRemote<double, ContainerClass, __m512d, 8>(inTargets, inNeighbors, limiteNeighbors); } @@ -400,7 +400,7 @@ struct FP2PRT<float>{ } template <class ContainerClass> - static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], + static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[], const int limiteNeighbors){ FP2PR::GenericFullRemote<float, ContainerClass, __m512, 16>(inTargets, inNeighbors, limiteNeighbors); } @@ -421,7 +421,7 @@ struct FP2PRT<double>{ } template <class ContainerClass> - static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], + static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[], const int limiteNeighbors){ FP2PR::GenericFullRemote<double, ContainerClass, __m128d, 2>(inTargets, inNeighbors, limiteNeighbors); } @@ -441,7 +441,7 @@ struct FP2PRT<float>{ } template <class ContainerClass> - static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], + static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[], const int limiteNeighbors){ FP2PR::GenericFullRemote<float, ContainerClass, __m128, 4>(inTargets, inNeighbors, limiteNeighbors); } @@ -462,7 +462,7 @@ struct FP2PRT<double>{ } template <class ContainerClass> - static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], + static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[], const int limiteNeighbors){ FP2PR::GenericFullRemote<double, ContainerClass, double, 1>(inTargets, inNeighbors, limiteNeighbors); } @@ -482,7 +482,7 @@ struct FP2PRT<float>{ } template <class ContainerClass> - static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], + static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[], const int limiteNeighbors){ FP2PR::GenericFullRemote<float, ContainerClass, float, 1>(inTargets, inNeighbors, limiteNeighbors); } diff --git a/Src/Kernels/Rotation/FRotationKernel.hpp b/Src/Kernels/Rotation/FRotationKernel.hpp index e358959bc..a1bd15d84 100644 --- a/Src/Kernels/Rotation/FRotationKernel.hpp +++ b/Src/Kernels/Rotation/FRotationKernel.hpp @@ -1355,11 +1355,18 @@ public: * Calling this method in multi thread should be done carrefully. */ void P2P(const FTreeCoordinate& inPosition, - ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, + ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources, ContainerClass* const inNeighbors[], const int neighborPositions[], const int inSize) override { - FP2PRT<FReal>::template Inner<ContainerClass>(inTargets); - P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); + if(inTargets == inSources){ + FP2PRT<FReal>::template Inner<ContainerClass>(inTargets); + P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); + } + else{ + const ContainerClass* const srcPtr[1] = {inSources}; + FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,srcPtr,1); + FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,inNeighbors,inSize); + } } void P2POuter(const FTreeCoordinate& /*inLeafPosition*/, diff --git a/Src/Kernels/Rotation/FRotationOriginalKernel.hpp b/Src/Kernels/Rotation/FRotationOriginalKernel.hpp index 30465b65d..c24e9cf90 100644 --- a/Src/Kernels/Rotation/FRotationOriginalKernel.hpp +++ b/Src/Kernels/Rotation/FRotationOriginalKernel.hpp @@ -791,11 +791,18 @@ public: * Calling this method in multi thread should be done carrefully. */ void P2P(const FTreeCoordinate& inPosition, - ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, + ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources, ContainerClass* const inNeighbors[], const int neighborPositions[], const int inSize) override { - FP2PRT<FReal>::template Inner<ContainerClass>(inTargets); - P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); + if(inTargets == inSources){ + FP2PRT<FReal>::template Inner<ContainerClass>(inTargets); + P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); + } + else{ + const ContainerClass* const srcPtr[1] = {inSources}; + FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,srcPtr,1); + FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,inNeighbors,inSize); + } } void P2POuter(const FTreeCoordinate& /*inLeafPosition*/, diff --git a/Src/Kernels/Spherical/FAbstractSphericalKernel.hpp b/Src/Kernels/Spherical/FAbstractSphericalKernel.hpp index 881aa0b97..3ef4273c6 100644 --- a/Src/Kernels/Spherical/FAbstractSphericalKernel.hpp +++ b/Src/Kernels/Spherical/FAbstractSphericalKernel.hpp @@ -218,11 +218,18 @@ public: * Calling this method in multi thread should be done carrefully. */ void P2P(const FTreeCoordinate& inPosition, - ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, + ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources, ContainerClass* const inNeighbors[], const int neighborPositions[], const int inSize) override { - FP2PRT<FReal>::template Inner<ContainerClass>(inTargets); - P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); + if(inTargets == inSources){ + FP2PRT<FReal>::template Inner<ContainerClass>(inTargets); + P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); + } + else{ + const ContainerClass* const srcPtr[1] = {inSources}; + FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,srcPtr,1); + FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,inNeighbors,inSize); + } } void P2POuter(const FTreeCoordinate& /*inLeafPosition*/, diff --git a/Src/Kernels/Taylor/FTaylorKernel.hpp b/Src/Kernels/Taylor/FTaylorKernel.hpp index d4fb62956..06f515f09 100644 --- a/Src/Kernels/Taylor/FTaylorKernel.hpp +++ b/Src/Kernels/Taylor/FTaylorKernel.hpp @@ -958,11 +958,18 @@ public: * Calling this method in multi thread should be done carrefully. */ void P2P(const FTreeCoordinate& inPosition, - ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, + ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources, ContainerClass* const inNeighbors[], const int neighborPositions[], const int inSize) override { - FP2PRT<FReal>::template Inner<ContainerClass>(inTargets); - P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); + if(inTargets == inSources){ + FP2PRT<FReal>::template Inner<ContainerClass>(inTargets); + P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); + } + else{ + const ContainerClass* const srcPtr[1] = {inSources}; + FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,srcPtr,1); + FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,inNeighbors,inSize); + } } void P2POuter(const FTreeCoordinate& /*inLeafPosition*/, diff --git a/Src/Kernels/Taylor/FTaylorKernelSimple.hpp b/Src/Kernels/Taylor/FTaylorKernelSimple.hpp index af1df17c0..da4322dfd 100644 --- a/Src/Kernels/Taylor/FTaylorKernelSimple.hpp +++ b/Src/Kernels/Taylor/FTaylorKernelSimple.hpp @@ -868,11 +868,18 @@ public: * Calling this method in multi thread should be done carrefully. */ void P2P(const FTreeCoordinate& inPosition, - ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, + ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources, ContainerClass* const inNeighbors[], const int neighborPositions[], const int inSize) override { - FP2PRT<FReal>::template Inner<ContainerClass>(inTargets); - P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); + if(inTargets == inSources){ + FP2PRT<FReal>::template Inner<ContainerClass>(inTargets); + P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); + } + else{ + const ContainerClass* const srcPtr[1] = {inSources}; + FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,srcPtr,1); + FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,inNeighbors,inSize); + } } void P2POuter(const FTreeCoordinate& /*inLeafPosition*/, diff --git a/Src/Kernels/Uniform/FUnifDenseKernel.hpp b/Src/Kernels/Uniform/FUnifDenseKernel.hpp index 2c4a5f741..3a4c5da7e 100644 --- a/Src/Kernels/Uniform/FUnifDenseKernel.hpp +++ b/Src/Kernels/Uniform/FUnifDenseKernel.hpp @@ -163,11 +163,18 @@ public: } void P2P(const FTreeCoordinate& inPosition, - ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, + ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources, ContainerClass* const inNeighbors[], const int neighborPositions[], const int inSize) override { - P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); - DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel); + if(inTargets == inSources){ + P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); + DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel); + } + else{ + const ContainerClass* const srcPtr[1] = {inSources}; + DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,srcPtr,1,MatrixKernel); + DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel); + } } void P2POuter(const FTreeCoordinate& /*inLeafPosition*/, diff --git a/Src/Kernels/Uniform/FUnifKernel.hpp b/Src/Kernels/Uniform/FUnifKernel.hpp index da2bc6dc6..14e547189 100644 --- a/Src/Kernels/Uniform/FUnifKernel.hpp +++ b/Src/Kernels/Uniform/FUnifKernel.hpp @@ -189,13 +189,20 @@ public: } void P2P(const FTreeCoordinate& inPosition, - ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, + ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources, ContainerClass* const inNeighbors[], const int neighborPositions[], const int inSize) override { // Standard FMM separation criterion, i.e. max 27 neighbor clusters per leaf - if(LeafLevelSeparationCriterion==1) { - P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); - DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel); + if(LeafLevelSeparationCriterion==1) { + if(inTargets == inSources){ + P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); + DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel); + } + else{ + const ContainerClass* const srcPtr[1] = {inSources}; + DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,srcPtr,1,MatrixKernel); + DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel); + } } // Nearfield interactions are only computed within the target leaf else if(LeafLevelSeparationCriterion==0){ diff --git a/Src/Kernels/Uniform/FUnifTensorialKernel.hpp b/Src/Kernels/Uniform/FUnifTensorialKernel.hpp index 0110a96a1..afd6ab262 100644 --- a/Src/Kernels/Uniform/FUnifTensorialKernel.hpp +++ b/Src/Kernels/Uniform/FUnifTensorialKernel.hpp @@ -251,11 +251,18 @@ public: } void P2P(const FTreeCoordinate& inPosition, - ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, + ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources, ContainerClass* const inNeighbors[], const int neighborPositions[], const int inSize) override { - P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); - DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel); + if(inTargets == inSources){ + P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); + DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel); + } + else{ + const ContainerClass* const srcPtr[1] = {inSources}; + DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,srcPtr,1,MatrixKernel); + DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel); + } } void P2POuter(const FTreeCoordinate& /*inLeafPosition*/, -- GitLab