Commit 4fa5f55c authored by BRAMAS Berenger's avatar BRAMAS Berenger

add an additional P2P: P2POuter which compute the P2P between neighbors...

add an additional P2P: P2POuter which compute the P2P between neighbors (possibly in mutual way) but not the inner P2P in the target leaf
parent 743d2d05
......@@ -55,7 +55,7 @@ class FTreeCoordinate;
*/
template<class FReal, class CellClass, class ContainerClass, class MatrixKernelClass, int ORDER, int NVALS = 1>
class FAdaptiveChebSymKernel : FChebSymKernel<FReal,CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>
class FAdaptiveChebSymKernel : public FChebSymKernel<FReal,CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>
, public FAbstractAdaptiveKernel<CellClass, ContainerClass> {
//
typedef FChebSymKernel<FReal,CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS> KernelBaseClass;
......
......@@ -56,7 +56,7 @@ class FTreeCoordinate;
*/
template<class FReal, class CellClass, class ContainerClass, class MatrixKernelClass, int ORDER, int NVALS = 1>
class FAdaptiveUnifKernel : FUnifKernel<FReal,CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>
class FAdaptiveUnifKernel : public FUnifKernel<FReal,CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>
, public FAbstractAdaptiveKernel<CellClass, ContainerClass> {
//
typedef FUnifKernel<FReal,CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS> KernelBaseClass;
......
......@@ -347,6 +347,13 @@ public:
kernel.P2P(inLeafPosition, targets, sources, directNeighborsParticles, positions, size);
}
/** This is a normal P2P */
void P2POuter(const FTreeCoordinate& inLeafPosition,
ContainerClass* const FRestrict targets,
ContainerClass* const directNeighborsParticles[], const int positions[], const int size) override {
kernel.P2POuter(inLeafPosition, targets, directNeighborsParticles, positions, size);
}
/** This is a normal P2P */
void P2PRemote(const FTreeCoordinate& inLeafPosition,
ContainerClass* const FRestrict targets, const ContainerClass* const FRestrict sources,
......
......@@ -322,6 +322,34 @@ public:
cell->addNearCost(tmpCost);
countP2P++;
}
void P2POuter(const FTreeCoordinate& LeafCellCoordinate, // needed for periodic boundary conditions
ContainerClass* const FRestrict TargetParticles,
ContainerClass* const NeighborSourceParticles[],
const int positions[],
const int size) override {
FSize tmpCost = 0;
FSize tgtPartCount = TargetParticles->getNbParticles();
{
for (int idx=0; idx < size && positions[idx]<=13; ++idx)
{
tmpCost +=
countFlopsP2Pmutual()
* tgtPartCount
* NeighborSourceParticles[idx]->getNbParticles();
}
}
flopsP2P += tmpCost;
CellClass* cell = _tree->getCell(
LeafCellCoordinate.getMortonIndex(_treeHeight - 1),
_treeHeight - 1);
cell->addNearCost(tmpCost);
countP2P++;
}
};
......
......@@ -193,6 +193,22 @@ public:
FSize tmpCost = srcPartCount * tgtPartCount;
CellClass* cell = _tree->getCell(
LeafCellCoordinate.getMortonIndex(_treeHeight - 1),
_treeHeight - 1);
flopsP2P += tmpCost;
cell->addNearCost(tmpCost);
countP2P++;
}
void P2POuter(const FTreeCoordinate& LeafCellCoordinate, // needed for periodic boundary conditions
ContainerClass* const FRestrict TargetParticles,
ContainerClass* const /*NeighborSourceParticles*/[],
const int /*positions*/[],
const int size) override {
FSize tmpCost = 0;
CellClass* cell = _tree->getCell(
LeafCellCoordinate.getMortonIndex(_treeHeight - 1),
_treeHeight - 1);
......
This diff is collapsed.
......@@ -68,6 +68,14 @@ public:
}
/** Do nothing */
virtual void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
ContainerClass* const FRestrict /*targets*/,
ContainerClass* const /*directNeighborsParticles*/[], const int /*neighborPositions*/[],
const int /*size*/) override {
}
/** Do nothing */
virtual void P2PRemote(const FTreeCoordinate& /*treeCoord*/,
ContainerClass* const FRestrict /*targetParticles*/, const ContainerClass* const FRestrict /*sourceParticles*/,
......
......@@ -111,6 +111,22 @@ public:
}
}
void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
ContainerClass* const FRestrict targets,
ContainerClass* const directNeighborsParticles[], const int neighborPositions[],
const int inSize) override {
long long int inc = 0;
for(int idx = 0 ; idx < inSize ; ++idx){
inc += directNeighborsParticles[idx]->getNbParticles();
}
long long int*const particlesAttributes = targets->getDataDown();
for(FSize idxPart = 0 ; idxPart < targets->getNbParticles() ; ++idxPart){
particlesAttributes[idxPart] += inc;
}
}
/** After Downward */
void P2PRemote(const FTreeCoordinate& ,
ContainerClass* const FRestrict targets, const ContainerClass* const FRestrict /*sources*/,
......
......@@ -385,12 +385,12 @@ protected:
FAssertLF((*iterParticles)->getLeafMortonIndex(outsideInteractions[outInterIdx].insideIdxInBlock) == outsideInteractions[outInterIdx].insideIndex);
ParticleContainerClass* ptrLeaf = &interParticles;
kernels->P2PRemote( FTreeCoordinate(outsideInteractions[outInterIdx].insideIndex, tree->getHeight()-1),
&particles, &particles , &ptrLeaf, &outsideInteractions[outInterIdx].outPosition, 1);
kernels->P2POuter( FTreeCoordinate(outsideInteractions[outInterIdx].insideIndex, tree->getHeight()-1),
&particles , &ptrLeaf, &outsideInteractions[outInterIdx].outPosition, 1);
const int otherPosition = getOppositeNeighIndex(outsideInteractions[outInterIdx].outPosition);
ptrLeaf = &particles;
kernels->P2PRemote( FTreeCoordinate(outsideInteractions[outInterIdx].outIndex, tree->getHeight()-1),
&interParticles, &interParticles , &ptrLeaf, &otherPosition, 1);
kernels->P2POuter( FTreeCoordinate(outsideInteractions[outInterIdx].outIndex, tree->getHeight()-1),
&interParticles , &ptrLeaf, &otherPosition, 1);
}
}
}
......
......@@ -653,12 +653,12 @@ protected:
FAssertLF(containers->getLeafMortonIndex((*outsideInteractions)[outInterIdx].insideIdxInBlock) == (*outsideInteractions)[outInterIdx].insideIndex);
ParticleContainerClass* ptrLeaf = &interParticles;
kernel->P2PRemote( FTreeCoordinate((*outsideInteractions)[outInterIdx].insideIndex, tree->getHeight()-1),
&particles, &particles , &ptrLeaf, &(*outsideInteractions)[outInterIdx].outPosition, 1);
kernel->P2POuter( FTreeCoordinate((*outsideInteractions)[outInterIdx].insideIndex, tree->getHeight()-1),
&particles , &ptrLeaf, &(*outsideInteractions)[outInterIdx].outPosition, 1);
const int otherPosition = getOppositeNeighIndex((*outsideInteractions)[outInterIdx].outPosition);
ptrLeaf = &particles;
kernel->P2PRemote( FTreeCoordinate((*outsideInteractions)[outInterIdx].outIndex, tree->getHeight()-1),
&interParticles, &interParticles , &ptrLeaf, &otherPosition, 1);
kernel->P2POuter( FTreeCoordinate((*outsideInteractions)[outInterIdx].outIndex, tree->getHeight()-1),
&interParticles , &ptrLeaf, &otherPosition, 1);
}
}
}
......
......@@ -811,12 +811,12 @@ protected:
FAssertLF(containers->getLeafMortonIndex((*outsideInteractions)[outInterIdx].insideIdxInBlock) == (*outsideInteractions)[outInterIdx].insideIndex);
ParticleContainerClass* ptrLeaf = &interParticles;
kernel->P2PRemote( FTreeCoordinate((*outsideInteractions)[outInterIdx].insideIndex, tree->getHeight()-1),
&particles, &particles , &ptrLeaf, &(*outsideInteractions)[outInterIdx].outPosition, 1);
kernel->P2POuter( FTreeCoordinate((*outsideInteractions)[outInterIdx].insideIndex, tree->getHeight()-1),
&particles , &ptrLeaf, &(*outsideInteractions)[outInterIdx].outPosition, 1);
const int otherPosition = getOppositeNeighIndex((*outsideInteractions)[outInterIdx].outPosition);
ptrLeaf = &particles;
kernel->P2PRemote( FTreeCoordinate((*outsideInteractions)[outInterIdx].outIndex, tree->getHeight()-1),
&interParticles, &interParticles , &ptrLeaf, &otherPosition, 1);
kernel->P2POuter( FTreeCoordinate((*outsideInteractions)[outInterIdx].outIndex, tree->getHeight()-1),
&interParticles , &ptrLeaf, &otherPosition, 1);
}
}
}
......
......@@ -489,12 +489,12 @@ public:
FAssertLF(containers->getLeafMortonIndex((*outsideInteractions)[outInterIdx].insideIdxInBlock) == (*outsideInteractions)[outInterIdx].insideIndex);
ParticleContainerClass* ptrLeaf = &interParticles;
kernel->P2PRemote( FTreeCoordinate((*outsideInteractions)[outInterIdx].insideIndex, treeHeight-1),
&particles, &particles , &ptrLeaf, &(*outsideInteractions)[outInterIdx].outPosition, 1);
kernel->P2POuter( FTreeCoordinate((*outsideInteractions)[outInterIdx].insideIndex, treeHeight-1),
&particles , &ptrLeaf, &(*outsideInteractions)[outInterIdx].outPosition, 1);
const int otherPosition = getOppositeNeighIndex((*outsideInteractions)[outInterIdx].outPosition);
ptrLeaf = &particles;
kernel->P2PRemote( FTreeCoordinate((*outsideInteractions)[outInterIdx].outIndex, treeHeight-1),
&interParticles, &interParticles , &ptrLeaf, &otherPosition, 1);
kernel->P2POuter( FTreeCoordinate((*outsideInteractions)[outInterIdx].outIndex, treeHeight-1),
&interParticles , &ptrLeaf, &otherPosition, 1);
}
}
}
......
......@@ -156,6 +156,11 @@ public:
const int SourcePositions[],
const int /* size */) = 0;
virtual void P2POuter(const FTreeCoordinate& inLeafPosition,
ContainerClass* const FRestrict targets,
ContainerClass* const directNeighborsParticles[], const int neighborPositions[],
const int size) = 0;
virtual void P2PRemote(const FTreeCoordinate& /*inPosition*/,
ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
......
......@@ -205,10 +205,10 @@ public:
unsigned int flops = 0;
// count how ofter each of the 16 interactions is used
memset(countExp, 0, sizeof(int) * 343);
for (unsigned int idx=0; idx<size; ++idx)
for (int idx=0; idx<size; ++idx)
countExp[SymHandler->pindices[positions[idx]]]++;
// multiply (mat-mat-mul)
for (unsigned int pidx=0; pidx<343; ++pidx)
for (int pidx=0; pidx<343; ++pidx)
if (countExp[pidx])
flops += countFlopsM2L(countExp[pidx], SymHandler->LowRank[pidx]) + countExp[pidx]*nnodes;
flopsM2L += flops;
......@@ -254,15 +254,24 @@ public:
{
if (TargetParticles != SourceParticles) {
flopsP2P += countFlopsP2P() * TargetParticles->getNbParticles() * SourceParticles->getNbParticles();
for (unsigned int idx=0; idx<size; ++idx)
for (int idx=0; idx<size; ++idx)
flopsP2P += countFlopsP2P() * TargetParticles->getNbParticles() * NeighborSourceParticles[idx]->getNbParticles();
} else {
flopsP2P += countFlopsP2Pmutual() * ((TargetParticles->getNbParticles()*TargetParticles->getNbParticles()+TargetParticles->getNbParticles()) / 2);
for (unsigned int idx=0; idx < size && neighborPositions[idx]<=13; ++idx)
for (int idx=0; idx < size && neighborPositions[idx]<=13; ++idx)
flopsP2P += countFlopsP2Pmutual() * TargetParticles->getNbParticles() * NeighborSourceParticles[idx]->getNbParticles();
}
}
void P2POuter(const FTreeCoordinate& /* LeafCellCoordinate */, // needed for periodic boundary conditions
ContainerClass* const FRestrict TargetParticles,
ContainerClass* const NeighborSourceParticles[],
const int neighborPositions[],
const int size) override
{
for (int idx=0; idx < size && neighborPositions[idx]<=13; ++idx)
flopsP2P += countFlopsP2Pmutual() * TargetParticles->getNbParticles() * NeighborSourceParticles[idx]->getNbParticles();
}
};
......
......@@ -206,10 +206,18 @@ public:
}
void P2P(const FTreeCoordinate& /*inPosition*/,
void P2P(const FTreeCoordinate& inPosition,
ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
ContainerClass* const inNeighbors[], const int neighborPositions[],
const int inSize) override {
P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
}
void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
ContainerClass* const FRestrict inTargets,
ContainerClass* const inNeighbors[], const int neighborPositions[],
const int inSize) override {
int nbNeighborsToCompute = 0;
while(nbNeighborsToCompute < inSize
&& neighborPositions[nbNeighborsToCompute] < 14){
......@@ -218,7 +226,6 @@ public:
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2P(inTargets,inNeighbors,nbNeighborsToCompute,MatrixKernel);
}
void P2PRemote(const FTreeCoordinate& /*inPosition*/,
ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
ContainerClass* const inNeighbors[], const int /*neighborPositions*/[],
......
......@@ -447,10 +447,18 @@ public:
}
void P2P(const FTreeCoordinate& /*inPosition*/,
void P2P(const FTreeCoordinate& inPosition,
ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
ContainerClass* const inNeighbors[], const int neighborPositions[],
const int inSize) override {
P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
}
void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
ContainerClass* const FRestrict inTargets,
ContainerClass* const inNeighbors[], const int neighborPositions[],
const int inSize) override {
int nbNeighborsToCompute = 0;
while(nbNeighborsToCompute < inSize
&& neighborPositions[nbNeighborsToCompute] < 14){
......
......@@ -219,10 +219,18 @@ public:
}
}
void P2P(const FTreeCoordinate& /*inPosition*/,
void P2P(const FTreeCoordinate& inPosition,
ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
ContainerClass* const inNeighbors[], const int neighborPositions[],
const int inSize) override {
P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
}
void P2POuter(const FTreeCoordinate& /*inLeafPosition*/,
ContainerClass* const FRestrict inTargets,
ContainerClass* const inNeighbors[], const int neighborPositions[],
const int inSize) override {
int nbNeighborsToCompute = 0;
while(nbNeighborsToCompute < inSize
&& neighborPositions[nbNeighborsToCompute] < 14){
......
......@@ -20,6 +20,12 @@ struct DirectInteractionComputer
FP2P::FullMutualKIJ<FReal, ContainerClass, MatrixKernelClass>(TargetParticles,NeighborSourceParticles,inSize,MatrixKernel);
}
template <typename ContainerClass, typename MatrixKernelClass>
static void P2PInner( ContainerClass* const FRestrict TargetParticles,
const MatrixKernelClass *const MatrixKernel){
FP2P::InnerKIJ<FReal, ContainerClass, MatrixKernelClass>(TargetParticles,MatrixKernel);
}
template <typename ContainerClass, typename MatrixKernelClass>
static void P2PRemote( ContainerClass* const FRestrict inTargets,
ContainerClass* const inNeighbors[],
......@@ -42,6 +48,12 @@ struct DirectInteractionComputer<FReal, 1,NVALS>
FP2P::FullMutualMultiRhs<FReal, ContainerClass, MatrixKernelClass>(TargetParticles,NeighborSourceParticles,inSize,MatrixKernel);
}
template <typename ContainerClass, typename MatrixKernelClass>
static void P2PInner( ContainerClass* const FRestrict TargetParticles,
const MatrixKernelClass *const MatrixKernel){
FP2P::InnerMultiRhs<FReal, ContainerClass, MatrixKernelClass>(TargetParticles,MatrixKernel);
}
template <typename ContainerClass, typename MatrixKernelClass>
static void P2PRemote( ContainerClass* const FRestrict inTargets,
ContainerClass* const inNeighbors[],
......@@ -63,6 +75,12 @@ struct DirectInteractionComputer<FReal, 1,1>
FP2PT<FReal>::template FullMutual<ContainerClass,MatrixKernelClass> (TargetParticles,NeighborSourceParticles,inSize,MatrixKernel);
}
template <typename ContainerClass, typename MatrixKernelClass>
static void P2PInner( ContainerClass* const FRestrict TargetParticles,
const MatrixKernelClass *const MatrixKernel){
FP2PT<FReal>::template Inner<ContainerClass, MatrixKernelClass>(TargetParticles,MatrixKernel);
}
template <typename ContainerClass, typename MatrixKernelClass>
static void P2PRemote( ContainerClass* const FRestrict inTargets,
ContainerClass* const inNeighbors[],
......
......@@ -263,6 +263,21 @@ inline void FullMutualKIJ(ContainerClass* const FRestrict inTargets, ContainerCl
}
}
}
}
template <class FReal, class ContainerClass, typename MatrixKernelClass>
inline void InnerKIJ(ContainerClass* const FRestrict inTargets, const MatrixKernelClass *const MatrixKernel){
// get information on tensorial aspect of matrix kernel
const int ncmp = MatrixKernelClass::NCMP;
const int applyTab[9] = {0,1,2,
1,3,4,
2,4,5};
const FSize nbParticlesTargets = inTargets->getNbParticles();
const FReal*const targetsX = inTargets->getPositions()[0];
const FReal*const targetsY = inTargets->getPositions()[1];
const FReal*const targetsZ = inTargets->getPositions()[2];
for(FSize idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){
for(FSize idxSource = idxTarget + 1 ; idxSource < nbParticlesTargets ; ++idxSource){
......@@ -436,6 +451,20 @@ static void GenericFullMutual(ContainerClass* const FRestrict inTargets, Contain
}
}
}
}
template <class FReal, class ContainerClass, class MatrixKernelClass, class ComputeClass, int NbFRealInComputeClass>
static void GenericInner(ContainerClass* const FRestrict inTargets, const MatrixKernelClass *const MatrixKernel){
const FSize nbParticlesTargets = inTargets->getNbParticles();
const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues();
const FReal*const targetsX = inTargets->getPositions()[0];
const FReal*const targetsY = inTargets->getPositions()[1];
const FReal*const targetsZ = inTargets->getPositions()[2];
FReal*const targetsForcesX = inTargets->getForcesX();
FReal*const targetsForcesY = inTargets->getForcesY();
FReal*const targetsForcesZ = inTargets->getForcesZ();
FReal*const targetsPotentials = inTargets->getPotentials();
{//In this part, we compute (vectorially) the interaction
//within the target leaf.
......@@ -589,6 +618,12 @@ struct FP2PT<double>{
FP2P::GenericFullMutual<double, ContainerClass, MatrixKernelClass, __m256d, 4>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
}
template <class ContainerClass, class MatrixKernelClass>
static void Inner(ContainerClass* const FRestrict inTargets, const MatrixKernelClass *const MatrixKernel){
FP2P::GenericInner<double, ContainerClass, MatrixKernelClass, __m256d, 4>(inTargets, MatrixKernel);
}
template <class ContainerClass, class MatrixKernelClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
......@@ -604,6 +639,11 @@ struct FP2PT<float>{
FP2P::GenericFullMutual<float, ContainerClass, MatrixKernelClass, __m256, 8>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
}
template <class ContainerClass, class MatrixKernelClass>
static void Inner(ContainerClass* const FRestrict inTargets, const MatrixKernelClass *const MatrixKernel){
FP2P::GenericInner<float, ContainerClass, MatrixKernelClass, __m256, 8>(inTargets, MatrixKernel);
}
template <class ContainerClass, class MatrixKernelClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
......@@ -619,6 +659,11 @@ struct FP2PT<double>{
FP2P::GenericFullMutual<double, ContainerClass, MatrixKernelClass, __m512d, 8>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
}
template <class ContainerClass, class MatrixKernelClass>
static void Inner(ContainerClass* const FRestrict inTargets, const MatrixKernelClass *const MatrixKernel){
FP2P::GenericInner<double, ContainerClass, MatrixKernelClass, __m512d, 8>(inTargets, MatrixKernel);
}
template <class ContainerClass, class MatrixKernelClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
......@@ -634,6 +679,12 @@ struct FP2PT<float>{
FP2P::GenericFullMutual<float, ContainerClass, MatrixKernelClass, __m512, 16>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
}
template <class ContainerClass, class MatrixKernelClass>
static void Inner(ContainerClass* const FRestrict inTargets, const MatrixKernelClass *const MatrixKernel){
FP2P::GenericInner<float, ContainerClass, MatrixKernelClass, __m512, 16>(inTargets, MatrixKernel);
}
template <class ContainerClass, class MatrixKernelClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
......@@ -649,6 +700,12 @@ struct FP2PT<double>{
FP2P::GenericFullMutual<double, ContainerClass, MatrixKernelClass, __m128d, 2>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
}
template <class ContainerClass, class MatrixKernelClass>
static void Inner(ContainerClass* const FRestrict inTargets, const MatrixKernelClass *const MatrixKernel){
FP2P::GenericInner<double, ContainerClass, MatrixKernelClass, __m128d, 2>(inTargets, MatrixKernel);
}
template <class ContainerClass, class MatrixKernelClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
......@@ -664,6 +721,12 @@ struct FP2PT<float>{
FP2P::GenericFullMutual<float, ContainerClass, MatrixKernelClass, __m128, 4>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
}
template <class ContainerClass, class MatrixKernelClass>
static void Inner(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
FP2P::GenericInner<float, ContainerClass, MatrixKernelClass, __m128, 4>(inTargets, MatrixKernel);
}
template <class ContainerClass, class MatrixKernelClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
......@@ -679,6 +742,11 @@ struct FP2PT<double>{
FP2P::GenericFullMutual<double, ContainerClass, MatrixKernelClass, double, 1>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
}
template <class ContainerClass, class MatrixKernelClass>
static void Inner(ContainerClass* const FRestrict inTargets, const MatrixKernelClass *const MatrixKernel){
FP2P::GenericInner<double, ContainerClass, MatrixKernelClass, double, 1>(inTargets, MatrixKernel);
}
template <class ContainerClass, class MatrixKernelClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
......@@ -694,6 +762,11 @@ struct FP2PT<float>{
FP2P::GenericFullMutual<float, ContainerClass, MatrixKernelClass, float, 1>(inTargets, inNeighbors, limiteNeighbors, MatrixKernel);
}
template <class ContainerClass, class MatrixKernelClass>
static void Inner(ContainerClass* const FRestrict inTargets, const MatrixKernelClass *const MatrixKernel){
FP2P::GenericInner<float, ContainerClass, MatrixKernelClass, float, 1>(inTargets, MatrixKernel);
}
template <class ContainerClass, class MatrixKernelClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
......
......@@ -84,6 +84,22 @@ namespace FP2P {
}
}
}
}
template <class FReal, class ContainerClass, typename MatrixKernelClass>
inline void InnerMultiRhs(ContainerClass* const FRestrict inTargets, const MatrixKernelClass *const MatrixKernel){
const FSize nbParticlesTargets = inTargets->getNbParticles();
const FReal*const targetsPhysicalValues = inTargets->getPhysicalValuesArray();
const FReal*const targetsX = inTargets->getPositions()[0];
const FReal*const targetsY = inTargets->getPositions()[1];
const FReal*const targetsZ = inTargets->getPositions()[2];
FReal*const targetsForcesX = inTargets->getForcesXArray();
FReal*const targetsForcesY = inTargets->getForcesYArray();
FReal*const targetsForcesZ = inTargets->getForcesZArray();
FReal*const targetsPotentials = inTargets->getPotentialsArray();
const int NVALS = inTargets->getNVALS();
const FSize targetsLD = inTargets->getLeadingDimension();
for(FSize idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){
for(FSize idxSource = idxTarget + 1 ; idxSource < nbParticlesTargets ; ++idxSource){
......
......@@ -151,6 +151,22 @@ static void GenericFullMutual(ContainerClass* const FRestrict inTargets, Contain
}
}
}
}
template <class FReal, class ContainerClass, class ComputeClass, int NbFRealInComputeClass>
static void GenericInner(ContainerClass* const FRestrict inTargets){
const FSize nbParticlesTargets = inTargets->getNbParticles();
const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues();
const FReal*const targetsX = inTargets->getPositions()[0];
const FReal*const targetsY = inTargets->getPositions()[1];
const FReal*const targetsZ = inTargets->getPositions()[2];
FReal*const targetsForcesX = inTargets->getForcesX();
FReal*const targetsForcesY = inTargets->getForcesY();
FReal*const targetsForcesZ = inTargets->getForcesZ();
FReal*const targetsPotentials = inTargets->getPotentials();
const ComputeClass mOne = FMath::One<ComputeClass>();
{//In this part, we compute (vectorially) the interaction
//within the target leaf.
......@@ -318,6 +334,11 @@ struct FP2PRT<double>{
FP2PR::GenericFullMutual<double, ContainerClass, __m256d, 4>(inTargets, inNeighbors, limiteNeighbors);
}
template <class ContainerClass>
static void Inner(ContainerClass* const FRestrict inTargets){
FP2PR::GenericInner<double, ContainerClass, __m256d, 4>(inTargets);
}
template <class ContainerClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors){
......@@ -333,6 +354,11 @@ struct FP2PRT<float>{
FP2PR::GenericFullMutual<float, ContainerClass, __m256, 8>(inTargets, inNeighbors, limiteNeighbors);
}
template <class ContainerClass>
static void Inner(ContainerClass* const FRestrict inTargets){
FP2PR::GenericFullMutual<float, ContainerClass, __m256, 8>(inTargets);
}
template <class ContainerClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors){
......@@ -348,6 +374,11 @@ struct FP2PRT<double>{
FP2PR::GenericFullMutual<double, ContainerClass, __m512d, 8>(inTargets, inNeighbors, limiteNeighbors);
}
template <class ContainerClass>
static void Inner(ContainerClass* const FRestrict inTargets){
FP2PR::GenericInner<double, ContainerClass, __m512d, 8>(inTargets);
}
template <class ContainerClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors){
......@@ -363,6 +394,11 @@ struct FP2PRT<float>{
FP2PR::GenericFullMutual<float, ContainerClass, __m512, 16>(inTargets, inNeighbors, limiteNeighbors);
}
template <class ContainerClass>
static void Inner(ContainerClass* const FRestrict inTargets){
FP2PR::GenericFullMutual<float, ContainerClass, __m512, 16>(inTargets);
}
template <class ContainerClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors){
......@@ -379,6 +415,11 @@ struct FP2PRT<double>{
FP2PR::GenericFullMutual<double, ContainerClass, __m128d, 2>(inTargets, inNeighbors, limiteNeighbors);
}
template <class ContainerClass>
static void Inner(ContainerClass* const FRestrict inTargets){
FP2PR::GenericInner<double, ContainerClass, __m128d, 2>(inTargets);
}
template <class ContainerClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors){
......@@ -394,6 +435,11 @@ struct FP2PRT<float>{
FP2PR::GenericFullMutual<float, ContainerClass, __m128, 4>(inTargets, inNeighbors, limiteNeighbors);
}
template <class ContainerClass>
static void Inner(ContainerClass* const FRestrict inTargets){
FP2PR::GenericInner<float, ContainerClass, __m128, 4>(inTargets);
}
template <class ContainerClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors){
......@@ -410,6 +456,11 @@ struct FP2PRT<double>{
FP2PR::GenericFullMutual<double, ContainerClass, double, 1>(inTargets, inNeighbors, limiteNeighbors);
}
template <class ContainerClass>
static void Inner(ContainerClass* const FRestrict inTargets){
FP2PR::GenericInner<double, ContainerClass, double, 1>(inTargets);
}
template <class ContainerClass>
static void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors){