Commit e73463c0 authored by BRAMAS Berenger's avatar BRAMAS Berenger

debug the tsm and modify the P2P relatively

parent 5367316b
...@@ -350,7 +350,6 @@ protected: ...@@ -350,7 +350,6 @@ protected:
WorkloadTemp* workloadBuffer = workloadBufferThread[omp_get_thread_num()]; WorkloadTemp* workloadBuffer = workloadBufferThread[omp_get_thread_num()];
memset(workloadBuffer, 0, sizeof(struct WorkloadTemp)*leafsNumber); memset(workloadBuffer, 0, sizeof(struct WorkloadTemp)*leafsNumber);
// Prepare the P2P // Prepare the P2P
const int LeafIndex = OctreeHeight - 1;
leafsDataArray.reset(new LeafData[leafsNumber]); leafsDataArray.reset(new LeafData[leafsNumber]);
// We need the offset for each color // We need the offset for each color
......
...@@ -398,6 +398,10 @@ protected: ...@@ -398,6 +398,10 @@ protected:
} }
if(p2pEnabled){ if(p2pEnabled){
// need the current particles and neighbors particles // need the current particles and neighbors particles
if(iterArray[idxLeafs].getCurrentCell()->hasSrcChild()){
myThreadkernels->P2P( iterArray[idxLeafs].getCurrentGlobalCoordinate(), iterArray[idxLeafs].getCurrentListTargets(),
iterArray[idxLeafs].getCurrentListSrc() , neighbors, neighborPositions, 0);
}
const int counter = tree->getLeafsNeighbors(neighbors, neighborPositions, iterArray[idxLeafs].getCurrentGlobalCoordinate(),heightMinusOne); const int counter = tree->getLeafsNeighbors(neighbors, neighborPositions, iterArray[idxLeafs].getCurrentGlobalCoordinate(),heightMinusOne);
myThreadkernels->P2PRemote( iterArray[idxLeafs].getCurrentGlobalCoordinate(), iterArray[idxLeafs].getCurrentListTargets(), myThreadkernels->P2PRemote( iterArray[idxLeafs].getCurrentGlobalCoordinate(), iterArray[idxLeafs].getCurrentListTargets(),
iterArray[idxLeafs].getCurrentListSrc() , neighbors, neighborPositions, counter); iterArray[idxLeafs].getCurrentListSrc() , neighbors, neighborPositions, counter);
......
...@@ -315,6 +315,10 @@ protected: ...@@ -315,6 +315,10 @@ protected:
kernels->L2P(octreeIterator.getCurrentCell(), octreeIterator.getCurrentListTargets()); kernels->L2P(octreeIterator.getCurrentCell(), octreeIterator.getCurrentListTargets());
} }
if(p2pEnabled){ if(p2pEnabled){
if(octreeIterator.getCurrentCell()->hasSrcChild()){
kernels->P2P( octreeIterator.getCurrentGlobalCoordinate(), octreeIterator.getCurrentListTargets(),
octreeIterator.getCurrentListSrc() , neighbors, neighborPositions, 0);
}
// need the current particles and neighbors particles // need the current particles and neighbors particles
const int counter = tree->getLeafsNeighbors(neighbors, neighborPositions, octreeIterator.getCurrentGlobalCoordinate(), heightMinusOne); const int counter = tree->getLeafsNeighbors(neighbors, neighborPositions, octreeIterator.getCurrentGlobalCoordinate(), heightMinusOne);
kernels->P2PRemote( octreeIterator.getCurrentGlobalCoordinate(), octreeIterator.getCurrentListTargets(), kernels->P2PRemote( octreeIterator.getCurrentGlobalCoordinate(), octreeIterator.getCurrentListTargets(),
......
...@@ -207,11 +207,18 @@ public: ...@@ -207,11 +207,18 @@ public:
} }
void P2P(const FTreeCoordinate& inPosition, void P2P(const FTreeCoordinate& inPosition,
ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
ContainerClass* const inNeighbors[], const int neighborPositions[], ContainerClass* const inNeighbors[], const int neighborPositions[],
const int inSize) override { const int inSize) override {
P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); if(inTargets == inSources){
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel); P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
}
else{
const ContainerClass* const srcPtr[1] = {inSources};
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,srcPtr,1,MatrixKernel);
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel);
}
} }
void P2POuter(const FTreeCoordinate& /*inLeafPosition*/, void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
......
...@@ -448,11 +448,18 @@ public: ...@@ -448,11 +448,18 @@ public:
} }
void P2P(const FTreeCoordinate& inPosition, void P2P(const FTreeCoordinate& inPosition,
ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
ContainerClass* const inNeighbors[], const int neighborPositions[], ContainerClass* const inNeighbors[], const int neighborPositions[],
const int inSize) override { const int inSize) override {
P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); if(inTargets == inSources){
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel); P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
}
else{
const ContainerClass* const srcPtr[1] = {inSources};
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,srcPtr,1,MatrixKernel);
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel);
}
} }
void P2POuter(const FTreeCoordinate& /*inLeafPosition*/, void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
......
...@@ -220,11 +220,18 @@ public: ...@@ -220,11 +220,18 @@ public:
} }
void P2P(const FTreeCoordinate& inPosition, void P2P(const FTreeCoordinate& inPosition,
ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
ContainerClass* const inNeighbors[], const int neighborPositions[], ContainerClass* const inNeighbors[], const int neighborPositions[],
const int inSize) override { const int inSize) override {
P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); if(inTargets == inSources){
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel); P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
}
else{
const ContainerClass* const srcPtr[1] = {inSources};
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,srcPtr,1,MatrixKernel);
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel);
}
} }
void P2POuter(const FTreeCoordinate& /*inLeafPosition*/, void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
......
...@@ -28,7 +28,7 @@ struct DirectInteractionComputer ...@@ -28,7 +28,7 @@ struct DirectInteractionComputer
template <typename ContainerClass, typename MatrixKernelClass> template <typename ContainerClass, typename MatrixKernelClass>
static void P2PRemote( ContainerClass* const FRestrict inTargets, static void P2PRemote( ContainerClass* const FRestrict inTargets,
ContainerClass* const inNeighbors[], const ContainerClass* const inNeighbors[],
const int inSize, const int inSize,
const MatrixKernelClass *const MatrixKernel){ const MatrixKernelClass *const MatrixKernel){
FP2P::FullRemoteKIJ<FReal, ContainerClass, MatrixKernelClass>(inTargets,inNeighbors,inSize,MatrixKernel); FP2P::FullRemoteKIJ<FReal, ContainerClass, MatrixKernelClass>(inTargets,inNeighbors,inSize,MatrixKernel);
...@@ -56,7 +56,7 @@ struct DirectInteractionComputer<FReal, 1,NVALS> ...@@ -56,7 +56,7 @@ struct DirectInteractionComputer<FReal, 1,NVALS>
template <typename ContainerClass, typename MatrixKernelClass> template <typename ContainerClass, typename MatrixKernelClass>
static void P2PRemote( ContainerClass* const FRestrict inTargets, static void P2PRemote( ContainerClass* const FRestrict inTargets,
ContainerClass* const inNeighbors[], const ContainerClass* const inNeighbors[],
const int inSize, const int inSize,
const MatrixKernelClass *const MatrixKernel){ const MatrixKernelClass *const MatrixKernel){
FP2P::FullRemoteMultiRhs<FReal, ContainerClass, MatrixKernelClass>(inTargets,inNeighbors,inSize,MatrixKernel); FP2P::FullRemoteMultiRhs<FReal, ContainerClass, MatrixKernelClass>(inTargets,inNeighbors,inSize,MatrixKernel);
...@@ -83,7 +83,7 @@ struct DirectInteractionComputer<FReal, 1,1> ...@@ -83,7 +83,7 @@ struct DirectInteractionComputer<FReal, 1,1>
template <typename ContainerClass, typename MatrixKernelClass> template <typename ContainerClass, typename MatrixKernelClass>
static void P2PRemote( ContainerClass* const FRestrict inTargets, static void P2PRemote( ContainerClass* const FRestrict inTargets,
ContainerClass* const inNeighbors[], const ContainerClass* const inNeighbors[],
const int inSize, const int inSize,
const MatrixKernelClass *const MatrixKernel){ const MatrixKernelClass *const MatrixKernel){
FP2PT<FReal>::template FullRemote<ContainerClass,MatrixKernelClass>(inTargets,inNeighbors,inSize,MatrixKernel); FP2PT<FReal>::template FullRemote<ContainerClass,MatrixKernelClass>(inTargets,inNeighbors,inSize,MatrixKernel);
......
...@@ -324,7 +324,7 @@ inline void InnerKIJ(ContainerClass* const FRestrict inTargets, const MatrixKern ...@@ -324,7 +324,7 @@ inline void InnerKIJ(ContainerClass* const FRestrict inTargets, const MatrixKern
* @brief FullRemoteKIJ * @brief FullRemoteKIJ
*/ */
template <class FReal, class ContainerClass, typename MatrixKernelClass> template <class FReal, class ContainerClass, typename MatrixKernelClass>
inline void FullRemoteKIJ(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], inline void FullRemoteKIJ(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
// get information on tensorial aspect of matrix kernel // get information on tensorial aspect of matrix kernel
...@@ -547,7 +547,7 @@ static void GenericInner(ContainerClass* const FRestrict inTargets, const Matrix ...@@ -547,7 +547,7 @@ static void GenericInner(ContainerClass* const FRestrict inTargets, const Matrix
} }
template <class FReal, class ContainerClass, class MatrixKernelClass, class ComputeClass, int NbFRealInComputeClass> template <class FReal, class ContainerClass, class MatrixKernelClass, class ComputeClass, int NbFRealInComputeClass>
static void GenericFullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], static void GenericFullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
const FSize nbParticlesTargets = inTargets->getNbParticles(); const FSize nbParticlesTargets = inTargets->getNbParticles();
const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(); const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues();
...@@ -625,7 +625,7 @@ struct FP2PT<double>{ ...@@ -625,7 +625,7 @@ struct FP2PT<double>{
} }
template <class ContainerClass, class MatrixKernelClass> template <class ContainerClass, class MatrixKernelClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
FP2P::GenericFullRemote<double, ContainerClass, MatrixKernelClass, __m256d, 4>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel); FP2P::GenericFullRemote<double, ContainerClass, MatrixKernelClass, __m256d, 4>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
} }
...@@ -645,7 +645,7 @@ struct FP2PT<float>{ ...@@ -645,7 +645,7 @@ struct FP2PT<float>{
} }
template <class ContainerClass, class MatrixKernelClass> template <class ContainerClass, class MatrixKernelClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
FP2P::GenericFullRemote<float, ContainerClass, MatrixKernelClass, __m256, 8>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel); FP2P::GenericFullRemote<float, ContainerClass, MatrixKernelClass, __m256, 8>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
} }
...@@ -665,7 +665,7 @@ struct FP2PT<double>{ ...@@ -665,7 +665,7 @@ struct FP2PT<double>{
} }
template <class ContainerClass, class MatrixKernelClass> template <class ContainerClass, class MatrixKernelClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
FP2P::GenericFullRemote<double, ContainerClass, MatrixKernelClass, __m512d, 8>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel); FP2P::GenericFullRemote<double, ContainerClass, MatrixKernelClass, __m512d, 8>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
} }
...@@ -686,7 +686,7 @@ struct FP2PT<float>{ ...@@ -686,7 +686,7 @@ struct FP2PT<float>{
} }
template <class ContainerClass, class MatrixKernelClass> template <class ContainerClass, class MatrixKernelClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
FP2P::GenericFullRemote<float, ContainerClass, MatrixKernelClass, __m512, 16>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel); FP2P::GenericFullRemote<float, ContainerClass, MatrixKernelClass, __m512, 16>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
} }
...@@ -707,7 +707,7 @@ struct FP2PT<double>{ ...@@ -707,7 +707,7 @@ struct FP2PT<double>{
} }
template <class ContainerClass, class MatrixKernelClass> template <class ContainerClass, class MatrixKernelClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
FP2P::GenericFullRemote<double, ContainerClass, MatrixKernelClass, __m128d, 2>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel); FP2P::GenericFullRemote<double, ContainerClass, MatrixKernelClass, __m128d, 2>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
} }
...@@ -728,7 +728,7 @@ struct FP2PT<float>{ ...@@ -728,7 +728,7 @@ struct FP2PT<float>{
} }
template <class ContainerClass, class MatrixKernelClass> template <class ContainerClass, class MatrixKernelClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
FP2P::GenericFullRemote<float, ContainerClass, MatrixKernelClass, __m128, 4>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel); FP2P::GenericFullRemote<float, ContainerClass, MatrixKernelClass, __m128, 4>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
} }
...@@ -748,7 +748,7 @@ struct FP2PT<double>{ ...@@ -748,7 +748,7 @@ struct FP2PT<double>{
} }
template <class ContainerClass, class MatrixKernelClass> template <class ContainerClass, class MatrixKernelClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
FP2P::GenericFullRemote<double, ContainerClass, MatrixKernelClass, double, 1>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel); FP2P::GenericFullRemote<double, ContainerClass, MatrixKernelClass, double, 1>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
} }
...@@ -768,7 +768,7 @@ struct FP2PT<float>{ ...@@ -768,7 +768,7 @@ struct FP2PT<float>{
} }
template <class ContainerClass, class MatrixKernelClass> template <class ContainerClass, class MatrixKernelClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
FP2P::GenericFullRemote<float, ContainerClass, MatrixKernelClass, float, 1>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel); FP2P::GenericFullRemote<float, ContainerClass, MatrixKernelClass, float, 1>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
} }
......
...@@ -140,7 +140,7 @@ namespace FP2P { ...@@ -140,7 +140,7 @@ namespace FP2P {
* FullRemoteMultiRhs (generic version) * FullRemoteMultiRhs (generic version)
*/ */
template <class FReal, class ContainerClass, typename MatrixKernelClass> template <class FReal, class ContainerClass, typename MatrixKernelClass>
inline void FullRemoteMultiRhs(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], inline void FullRemoteMultiRhs(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
const FSize nbParticlesTargets = inTargets->getNbParticles(); const FSize nbParticlesTargets = inTargets->getNbParticles();
......
...@@ -42,7 +42,7 @@ public: ...@@ -42,7 +42,7 @@ public:
return Parent::getRawData() + ((0+idxRhs)*NVALS+idxVals)*Parent::getLeadingRawData(); return Parent::getRawData() + ((0+idxRhs)*NVALS+idxVals)*Parent::getLeadingRawData();
} }
FSize getLeadingDimension(){ FSize getLeadingDimension() const {
return Parent::getLeadingRawData(); return Parent::getLeadingRawData();
} }
......
...@@ -256,7 +256,7 @@ static void GenericInner(ContainerClass* const FRestrict inTargets){ ...@@ -256,7 +256,7 @@ static void GenericInner(ContainerClass* const FRestrict inTargets){
} }
template <class FReal, class ContainerClass, class ComputeClass, int NbFRealInComputeClass> template <class FReal, class ContainerClass, class ComputeClass, int NbFRealInComputeClass>
static void GenericFullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], static void GenericFullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
const int limiteNeighbors){ const int limiteNeighbors){
const FSize nbParticlesTargets = inTargets->getNbParticles(); const FSize nbParticlesTargets = inTargets->getNbParticles();
const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(); const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues();
...@@ -340,7 +340,7 @@ struct FP2PRT<double>{ ...@@ -340,7 +340,7 @@ struct FP2PRT<double>{
} }
template <class ContainerClass> template <class ContainerClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
const int limiteNeighbors){ const int limiteNeighbors){
FP2PR::GenericFullRemote<double, ContainerClass, __m256d, 4>(inTargets, inNeighbors, limiteNeighbors); FP2PR::GenericFullRemote<double, ContainerClass, __m256d, 4>(inTargets, inNeighbors, limiteNeighbors);
} }
...@@ -360,7 +360,7 @@ struct FP2PRT<float>{ ...@@ -360,7 +360,7 @@ struct FP2PRT<float>{
} }
template <class ContainerClass> template <class ContainerClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
const int limiteNeighbors){ const int limiteNeighbors){
FP2PR::GenericFullRemote<float, ContainerClass, __m256, 8>(inTargets, inNeighbors, limiteNeighbors); FP2PR::GenericFullRemote<float, ContainerClass, __m256, 8>(inTargets, inNeighbors, limiteNeighbors);
} }
...@@ -380,7 +380,7 @@ struct FP2PRT<double>{ ...@@ -380,7 +380,7 @@ struct FP2PRT<double>{
} }
template <class ContainerClass> template <class ContainerClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
const int limiteNeighbors){ const int limiteNeighbors){
FP2PR::GenericFullRemote<double, ContainerClass, __m512d, 8>(inTargets, inNeighbors, limiteNeighbors); FP2PR::GenericFullRemote<double, ContainerClass, __m512d, 8>(inTargets, inNeighbors, limiteNeighbors);
} }
...@@ -400,7 +400,7 @@ struct FP2PRT<float>{ ...@@ -400,7 +400,7 @@ struct FP2PRT<float>{
} }
template <class ContainerClass> template <class ContainerClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
const int limiteNeighbors){ const int limiteNeighbors){
FP2PR::GenericFullRemote<float, ContainerClass, __m512, 16>(inTargets, inNeighbors, limiteNeighbors); FP2PR::GenericFullRemote<float, ContainerClass, __m512, 16>(inTargets, inNeighbors, limiteNeighbors);
} }
...@@ -421,7 +421,7 @@ struct FP2PRT<double>{ ...@@ -421,7 +421,7 @@ struct FP2PRT<double>{
} }
template <class ContainerClass> template <class ContainerClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
const int limiteNeighbors){ const int limiteNeighbors){
FP2PR::GenericFullRemote<double, ContainerClass, __m128d, 2>(inTargets, inNeighbors, limiteNeighbors); FP2PR::GenericFullRemote<double, ContainerClass, __m128d, 2>(inTargets, inNeighbors, limiteNeighbors);
} }
...@@ -441,7 +441,7 @@ struct FP2PRT<float>{ ...@@ -441,7 +441,7 @@ struct FP2PRT<float>{
} }
template <class ContainerClass> template <class ContainerClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
const int limiteNeighbors){ const int limiteNeighbors){
FP2PR::GenericFullRemote<float, ContainerClass, __m128, 4>(inTargets, inNeighbors, limiteNeighbors); FP2PR::GenericFullRemote<float, ContainerClass, __m128, 4>(inTargets, inNeighbors, limiteNeighbors);
} }
...@@ -462,7 +462,7 @@ struct FP2PRT<double>{ ...@@ -462,7 +462,7 @@ struct FP2PRT<double>{
} }
template <class ContainerClass> template <class ContainerClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
const int limiteNeighbors){ const int limiteNeighbors){
FP2PR::GenericFullRemote<double, ContainerClass, double, 1>(inTargets, inNeighbors, limiteNeighbors); FP2PR::GenericFullRemote<double, ContainerClass, double, 1>(inTargets, inNeighbors, limiteNeighbors);
} }
...@@ -482,7 +482,7 @@ struct FP2PRT<float>{ ...@@ -482,7 +482,7 @@ struct FP2PRT<float>{
} }
template <class ContainerClass> template <class ContainerClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], static void FullRemote(ContainerClass* const FRestrict inTargets, const ContainerClass* const inNeighbors[],
const int limiteNeighbors){ const int limiteNeighbors){
FP2PR::GenericFullRemote<float, ContainerClass, float, 1>(inTargets, inNeighbors, limiteNeighbors); FP2PR::GenericFullRemote<float, ContainerClass, float, 1>(inTargets, inNeighbors, limiteNeighbors);
} }
......
...@@ -1355,11 +1355,18 @@ public: ...@@ -1355,11 +1355,18 @@ public:
* Calling this method in multi thread should be done carrefully. * Calling this method in multi thread should be done carrefully.
*/ */
void P2P(const FTreeCoordinate& inPosition, void P2P(const FTreeCoordinate& inPosition,
ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
ContainerClass* const inNeighbors[], const int neighborPositions[], ContainerClass* const inNeighbors[], const int neighborPositions[],
const int inSize) override { const int inSize) override {
FP2PRT<FReal>::template Inner<ContainerClass>(inTargets); if(inTargets == inSources){
P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); FP2PRT<FReal>::template Inner<ContainerClass>(inTargets);
P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
}
else{
const ContainerClass* const srcPtr[1] = {inSources};
FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,srcPtr,1);
FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,inNeighbors,inSize);
}
} }
void P2POuter(const FTreeCoordinate& /*inLeafPosition*/, void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
......
...@@ -791,11 +791,18 @@ public: ...@@ -791,11 +791,18 @@ public:
* Calling this method in multi thread should be done carrefully. * Calling this method in multi thread should be done carrefully.
*/ */
void P2P(const FTreeCoordinate& inPosition, void P2P(const FTreeCoordinate& inPosition,
ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
ContainerClass* const inNeighbors[], const int neighborPositions[], ContainerClass* const inNeighbors[], const int neighborPositions[],
const int inSize) override { const int inSize) override {
FP2PRT<FReal>::template Inner<ContainerClass>(inTargets); if(inTargets == inSources){
P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); FP2PRT<FReal>::template Inner<ContainerClass>(inTargets);
P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
}
else{
const ContainerClass* const srcPtr[1] = {inSources};
FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,srcPtr,1);
FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,inNeighbors,inSize);
}
} }
void P2POuter(const FTreeCoordinate& /*inLeafPosition*/, void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
......
...@@ -218,11 +218,18 @@ public: ...@@ -218,11 +218,18 @@ public:
* Calling this method in multi thread should be done carrefully. * Calling this method in multi thread should be done carrefully.
*/ */
void P2P(const FTreeCoordinate& inPosition, void P2P(const FTreeCoordinate& inPosition,
ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
ContainerClass* const inNeighbors[], const int neighborPositions[], ContainerClass* const inNeighbors[], const int neighborPositions[],
const int inSize) override { const int inSize) override {
FP2PRT<FReal>::template Inner<ContainerClass>(inTargets); if(inTargets == inSources){
P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); FP2PRT<FReal>::template Inner<ContainerClass>(inTargets);
P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
}
else{
const ContainerClass* const srcPtr[1] = {inSources};
FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,srcPtr,1);
FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,inNeighbors,inSize);
}
} }
void P2POuter(const FTreeCoordinate& /*inLeafPosition*/, void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
......
...@@ -958,11 +958,18 @@ public: ...@@ -958,11 +958,18 @@ public:
* Calling this method in multi thread should be done carrefully. * Calling this method in multi thread should be done carrefully.
*/ */
void P2P(const FTreeCoordinate& inPosition, void P2P(const FTreeCoordinate& inPosition,
ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
ContainerClass* const inNeighbors[], const int neighborPositions[], ContainerClass* const inNeighbors[], const int neighborPositions[],
const int inSize) override { const int inSize) override {
FP2PRT<FReal>::template Inner<ContainerClass>(inTargets); if(inTargets == inSources){
P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); FP2PRT<FReal>::template Inner<ContainerClass>(inTargets);
P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
}
else{
const ContainerClass* const srcPtr[1] = {inSources};
FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,srcPtr,1);
FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,inNeighbors,inSize);
}
} }
void P2POuter(const FTreeCoordinate& /*inLeafPosition*/, void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
......
...@@ -868,11 +868,18 @@ public: ...@@ -868,11 +868,18 @@ public:
* Calling this method in multi thread should be done carrefully. * Calling this method in multi thread should be done carrefully.
*/ */
void P2P(const FTreeCoordinate& inPosition, void P2P(const FTreeCoordinate& inPosition,
ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
ContainerClass* const inNeighbors[], const int neighborPositions[], ContainerClass* const inNeighbors[], const int neighborPositions[],
const int inSize) override { const int inSize) override {
FP2PRT<FReal>::template Inner<ContainerClass>(inTargets); if(inTargets == inSources){
P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); FP2PRT<FReal>::template Inner<ContainerClass>(inTargets);
P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
}
else{
const ContainerClass* const srcPtr[1] = {inSources};
FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,srcPtr,1);
FP2PRT<FReal>::template FullRemote<ContainerClass>(inTargets,inNeighbors,inSize);
}
} }
void P2POuter(const FTreeCoordinate& /*inLeafPosition*/, void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
......
...@@ -163,11 +163,18 @@ public: ...@@ -163,11 +163,18 @@ public:
} }
void P2P(const FTreeCoordinate& inPosition, void P2P(const FTreeCoordinate& inPosition,
ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
ContainerClass* const inNeighbors[], const int neighborPositions[], ContainerClass* const inNeighbors[], const int neighborPositions[],
const int inSize) override { const int inSize) override {
P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize); if(inTargets == inSources){
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel); P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
}
else{
const ContainerClass* const srcPtr[1] = {inSources};
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,srcPtr,1,MatrixKernel);
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,inSize,MatrixKernel);
}
} }
void P2POuter(const FTreeCoordinate& /*inLeafPosition*/, void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
......
...@@ -189,13 +189,20 @@ public: ...@@ -189,13 +189,20 @@ public:
} }
void P2P(const FTreeCoordinate& inPosition,