Mentions légales du service

Skip to content
Snippets Groups Projects
Commit eb21d923 authored by Berenger Bramas's avatar Berenger Bramas
Browse files

Update vectorized kernels

parent c125b9c1
No related branches found
No related tags found
No related merge requests found
Pipeline #31468 passed
......@@ -2,8 +2,7 @@
#define FADAPTCHEBKERNEL_HPP
#include "Kernels/Chebyshev/FChebSymKernel.hpp"
#include "FComputeClassDescriptor.hpp"
#include "InastempCompileConfig.h"
#ifdef _OPENMP
#include <omp.h>
......@@ -69,8 +68,8 @@ public:
const ContainerClass* const particles,
const SymbolicData * const /*source_symb*/)
{
using ComputeClass = typename ComputeClassDescriptor<FReal>::type;
constexpr std::size_t FRealCount = ComputeClassDescriptor<FReal>::count;
using ComputeClass = InaVecBestType<FReal>;
constexpr int FRealCount = ComputeClass::VecLength;
// Target cell: local
const FReal localCellWidth(FBase::BoxWidth / FReal(1 << symb->getLevel()));
......@@ -82,15 +81,15 @@ public:
FChebTensor<FReal,ORDER>::setRoots(localCellCenter, localCellWidth, X);
// Particles attributes
const ComputeClass * const posX = (const ComputeClass * const) particles->getPositions()[0];
const ComputeClass * const posY = (const ComputeClass * const) particles->getPositions()[1];
const ComputeClass * const posZ = (const ComputeClass * const) particles->getPositions()[2];
const ComputeClass * const physicalValues = (const ComputeClass * const) particles->getPhysicalValues();
const FReal * const posX = particles->getPositions()[0];
const FReal * const posY = particles->getPositions()[1];
const FReal * const posZ = particles->getPositions()[2];
const FReal * const physicalValues = particles->getPhysicalValues();
const FReal* pX = particles->getPositions()[0];
const FReal* pY = particles->getPositions()[1];
const FReal* pZ = particles->getPositions()[2];
const FReal* pV = particles->getPhysicalValues();
// const FReal* pX = particles->getPositions()[0];
// const FReal* pY = particles->getPositions()[1];
// const FReal* pZ = particles->getPositions()[2];
// const FReal* pV = particles->getPhysicalValues();
// apply P2L
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
......@@ -99,40 +98,18 @@ public:
ComputeClass XY = ComputeClass(X[m].getY());
ComputeClass XZ = ComputeClass(X[m].getZ());
std::size_t idxPart = 0;
// Compute using vectorization for all but the last array elements
ComputeClass tmpLocalExp = ComputeClass(0);
for (;
idxPart < ((particles->getNbParticles())
/ FRealCount);
++idxPart)
ComputeClass tmpLocalExp = ComputeClass::GetZero();
for (std::size_t idxPart = 0 ; idxPart < particles->getNbParticles() ; idxPart += FRealCount)
{
tmpLocalExp +=
FBase::MatrixKernel->evaluate(
XX, XY, XZ,
posX[idxPart], posY[idxPart], posZ[idxPart])
ComputeClass(&posX[idxPart]), ComputeClass(&posY[idxPart]), ComputeClass(&posZ[idxPart]))
* physicalValues[idxPart];
}
local->get(idxRhs)[m] += ComputeClass(tmpLocalExp);
// Compute the last array elements one by one if they exist
if(idxPart < ((particles->getNbParticles() + FRealCount - 1) / FRealCount)) {
auto Xx = X[m].getX();
auto Xy = X[m].getY();
auto Xz = X[m].getZ();
for(idxPart = FRealCount * (particles->getNbParticles() / FRealCount);
idxPart < static_cast<std::size_t>(particles->getNbParticles());
++idxPart)
{
local->get(idxRhs)[m] +=
FBase::MatrixKernel->evaluate(
Xx, Xy, Xz,
pX[idxPart], pY[idxPart], pZ[idxPart])
* pV[idxPart];
}
}
local->get(idxRhs)[m] += (tmpLocalExp.horizontalSum());
}
}// NVALS
}
......@@ -143,7 +120,7 @@ public:
ContainerClass* const particles,
const SymbolicData * const /*source_symb*/)
{
using ComputeClass = typename InaVecBestType<FReal>;
using ComputeClass = InaVecBestType<FReal>;
constexpr int FRealCount = ComputeClass::VecLength;
// Source cell: pole
......@@ -191,10 +168,10 @@ public:
YX, YY, YZ,
Kxy,dKxy);
potentials[idxPart] += Kxy[0] * MultipoleExpansion;
fx[idxPart] += dKxy[0] * physVal[idxPart] * MultipoleExpansion;
fy[idxPart] += dKxy[1] * physVal[idxPart] * MultipoleExpansion;
fz[idxPart] += dKxy[2] * physVal[idxPart] * MultipoleExpansion;
(ComputeClass(&potentials[idxPart]) + Kxy[0] * MultipoleExpansion).storeInArray(&potentials[idxPart]);
(ComputeClass(&fx[idxPart]) + dKxy[0] * physVal[idxPart] * MultipoleExpansion).storeInArray(&fx[idxPart]);
(ComputeClass(&fy[idxPart]) + dKxy[1] * physVal[idxPart] * MultipoleExpansion).storeInArray(&fy[idxPart]);
(ComputeClass(&fz[idxPart]) + dKxy[2] * physVal[idxPart] * MultipoleExpansion).storeInArray(&fz[idxPart]);
}
}// Particles
}// NVALS
......
......@@ -4,11 +4,10 @@
#include <cassert>
#include "Kernels/Uniform/FUnifKernel.hpp"
#include "InastempCompileConfig.h"
#include "Utils/FMath.hpp"
#include "FComputeClassDescriptor.hpp"
#include <fstream>
......@@ -114,8 +113,8 @@ public:
const ContainerClass* const particles,
const SymbolicData * const /*source_symb*/)
{
using ComputeClass = typename ComputeClassDescriptor<FReal>::type;
constexpr std::size_t FRealCount = ComputeClassDescriptor<FReal>::count;
using ComputeClass = InaVecBestType<FReal>;
constexpr int FRealCount = ComputeClass::VecLength;
// Target cell: local
const FReal localCellWidth(FBase::BoxWidth / FReal(1 << symb->getLevel()));
......@@ -127,15 +126,15 @@ public:
FUnifTensor<FReal,ORDER>::setRoots(localCellCenter, localCellWidth, X);
// Particles attributes
const ComputeClass * const posX = (const ComputeClass * const) particles->getPositions()[0];
const ComputeClass * const posY = (const ComputeClass * const) particles->getPositions()[1];
const ComputeClass * const posZ = (const ComputeClass * const) particles->getPositions()[2];
const ComputeClass * const physicalValues = (const ComputeClass * const) particles->getPhysicalValues();
const FReal * const posX = particles->getPositions()[0];
const FReal * const posY = particles->getPositions()[1];
const FReal * const posZ = particles->getPositions()[2];
const FReal * const physicalValues = particles->getPhysicalValues();
const FReal* pX = particles->getPositions()[0];
const FReal* pY = particles->getPositions()[1];
const FReal* pZ = particles->getPositions()[2];
const FReal* pV = particles->getPhysicalValues();
// const FReal* pX = particles->getPositions()[0];
// const FReal* pY = particles->getPositions()[1];
// const FReal* pZ = particles->getPositions()[2];
// const FReal* pV = particles->getPhysicalValues();
// apply P2L
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
......@@ -144,38 +143,19 @@ public:
ComputeClass XY = ComputeClass(X[m].getY());
ComputeClass XZ = ComputeClass(X[m].getZ());
ComputeClass tmpLocalExp = ComputeClass(0);
ComputeClass tmpLocalExp = ComputeClass::GetZero();
// Compute using vectorization for all but the last array elements
std::size_t idxPart = 0;
for (; idxPart < (particles->getNbParticles() / FRealCount);
++idxPart)
for (std::size_t idxPart = 0 ; idxPart < particles->getNbParticles() ; idxPart += FRealCount)
{
tmpLocalExp +=
FBase::MatrixKernel->evaluate(
XX, XY, XZ,
posX[idxPart], posY[idxPart], posZ[idxPart])
ComputeClass(&posX[idxPart]), ComputeClass(&posY[idxPart]), ComputeClass(&posZ[idxPart]))
* physicalValues[idxPart];
}
local->get(idxRhs)[m] += FReal(tmpLocalExp);
// Compute the last array elements one by one if they exist
if(idxPart < ((particles->getNbParticles() + FRealCount - 1) / FRealCount)) {
auto Xx = X[m].getX();
auto Xy = X[m].getY();
auto Xz = X[m].getZ();
for(idxPart = FRealCount * (particles->getNbParticles() / FRealCount);
idxPart < static_cast<std::size_t>(particles->getNbParticles());
++idxPart)
{
local->get(idxRhs)[m] +=
FBase::MatrixKernel->evaluate(
Xx, Xy, Xz,
pX[idxPart], pY[idxPart], pZ[idxPart])
* pV[idxPart];
}
}
local->get(idxRhs)[m] += (tmpLocalExp.horizontalSum());
}
}// NVALS
}
......@@ -191,7 +171,7 @@ public:
ContainerClass* const particles,
const SymbolicData * const /*target_symb*/)
{
using ComputeClass = typename InaVecBestType<FReal>;
using ComputeClass = InaVecBestType<FReal>;
constexpr int FRealCount = ComputeClass::VecLength;
// Source cell: pole
......@@ -241,10 +221,10 @@ public:
YX, YY, YZ,
Kxy,dKxy);
potentials[idxPart] += Kxy[0] * MultipoleExpansion;
fx[idxPart] += dKxy[0] * physVal[idxPart] * MultipoleExpansion;
fy[idxPart] += dKxy[1] * physVal[idxPart] * MultipoleExpansion;
fz[idxPart] += dKxy[2] * physVal[idxPart] * MultipoleExpansion;
(ComputeClass(&potentials[idxPart]) + Kxy[0] * MultipoleExpansion).storeInArray(&potentials[idxPart]);
(ComputeClass(&fx[idxPart]) + dKxy[0] * physVal[idxPart] * MultipoleExpansion).storeInArray(&fx[idxPart]);
(ComputeClass(&fy[idxPart]) + dKxy[1] * physVal[idxPart] * MultipoleExpansion).storeInArray(&fy[idxPart]);
(ComputeClass(&fz[idxPart]) + dKxy[2] * physVal[idxPart] * MultipoleExpansion).storeInArray(&fz[idxPart]);
}
......
This diff is collapsed.
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment