Commit eb21d923 authored by Berenger Bramas's avatar Berenger Bramas

Update vectorized kernels

parent c125b9c1
......@@ -2,8 +2,7 @@
#define FADAPTCHEBKERNEL_HPP
#include "Kernels/Chebyshev/FChebSymKernel.hpp"
#include "FComputeClassDescriptor.hpp"
#include "InastempCompileConfig.h"
#ifdef _OPENMP
#include <omp.h>
......@@ -69,8 +68,8 @@ public:
const ContainerClass* const particles,
const SymbolicData * const /*source_symb*/)
{
using ComputeClass = typename ComputeClassDescriptor<FReal>::type;
constexpr std::size_t FRealCount = ComputeClassDescriptor<FReal>::count;
using ComputeClass = InaVecBestType<FReal>;
constexpr int FRealCount = ComputeClass::VecLength;
// Target cell: local
const FReal localCellWidth(FBase::BoxWidth / FReal(1 << symb->getLevel()));
......@@ -82,15 +81,15 @@ public:
FChebTensor<FReal,ORDER>::setRoots(localCellCenter, localCellWidth, X);
// Particles attributes
const ComputeClass * const posX = (const ComputeClass * const) particles->getPositions()[0];
const ComputeClass * const posY = (const ComputeClass * const) particles->getPositions()[1];
const ComputeClass * const posZ = (const ComputeClass * const) particles->getPositions()[2];
const ComputeClass * const physicalValues = (const ComputeClass * const) particles->getPhysicalValues();
const FReal * const posX = particles->getPositions()[0];
const FReal * const posY = particles->getPositions()[1];
const FReal * const posZ = particles->getPositions()[2];
const FReal * const physicalValues = particles->getPhysicalValues();
const FReal* pX = particles->getPositions()[0];
const FReal* pY = particles->getPositions()[1];
const FReal* pZ = particles->getPositions()[2];
const FReal* pV = particles->getPhysicalValues();
// const FReal* pX = particles->getPositions()[0];
// const FReal* pY = particles->getPositions()[1];
// const FReal* pZ = particles->getPositions()[2];
// const FReal* pV = particles->getPhysicalValues();
// apply P2L
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
......@@ -99,40 +98,18 @@ public:
ComputeClass XY = ComputeClass(X[m].getY());
ComputeClass XZ = ComputeClass(X[m].getZ());
std::size_t idxPart = 0;
// Compute using vectorization for all but the last array elements
ComputeClass tmpLocalExp = ComputeClass(0);
for (;
idxPart < ((particles->getNbParticles())
/ FRealCount);
++idxPart)
ComputeClass tmpLocalExp = ComputeClass::GetZero();
for (std::size_t idxPart = 0 ; idxPart < particles->getNbParticles() ; idxPart += FRealCount)
{
tmpLocalExp +=
FBase::MatrixKernel->evaluate(
XX, XY, XZ,
posX[idxPart], posY[idxPart], posZ[idxPart])
ComputeClass(&posX[idxPart]), ComputeClass(&posY[idxPart]), ComputeClass(&posZ[idxPart]))
* physicalValues[idxPart];
}
local->get(idxRhs)[m] += ComputeClass(tmpLocalExp);
// Compute the last array elements one by one if they exist
if(idxPart < ((particles->getNbParticles() + FRealCount - 1) / FRealCount)) {
auto Xx = X[m].getX();
auto Xy = X[m].getY();
auto Xz = X[m].getZ();
for(idxPart = FRealCount * (particles->getNbParticles() / FRealCount);
idxPart < static_cast<std::size_t>(particles->getNbParticles());
++idxPart)
{
local->get(idxRhs)[m] +=
FBase::MatrixKernel->evaluate(
Xx, Xy, Xz,
pX[idxPart], pY[idxPart], pZ[idxPart])
* pV[idxPart];
}
}
local->get(idxRhs)[m] += (tmpLocalExp.horizontalSum());
}
}// NVALS
}
......@@ -143,7 +120,7 @@ public:
ContainerClass* const particles,
const SymbolicData * const /*source_symb*/)
{
using ComputeClass = typename InaVecBestType<FReal>;
using ComputeClass = InaVecBestType<FReal>;
constexpr int FRealCount = ComputeClass::VecLength;
// Source cell: pole
......@@ -191,10 +168,10 @@ public:
YX, YY, YZ,
Kxy,dKxy);
potentials[idxPart] += Kxy[0] * MultipoleExpansion;
fx[idxPart] += dKxy[0] * physVal[idxPart] * MultipoleExpansion;
fy[idxPart] += dKxy[1] * physVal[idxPart] * MultipoleExpansion;
fz[idxPart] += dKxy[2] * physVal[idxPart] * MultipoleExpansion;
(ComputeClass(&potentials[idxPart]) + Kxy[0] * MultipoleExpansion).storeInArray(&potentials[idxPart]);
(ComputeClass(&fx[idxPart]) + dKxy[0] * physVal[idxPart] * MultipoleExpansion).storeInArray(&fx[idxPart]);
(ComputeClass(&fy[idxPart]) + dKxy[1] * physVal[idxPart] * MultipoleExpansion).storeInArray(&fy[idxPart]);
(ComputeClass(&fz[idxPart]) + dKxy[2] * physVal[idxPart] * MultipoleExpansion).storeInArray(&fz[idxPart]);
}
}// Particles
}// NVALS
......
......@@ -4,11 +4,10 @@
#include <cassert>
#include "Kernels/Uniform/FUnifKernel.hpp"
#include "InastempCompileConfig.h"
#include "Utils/FMath.hpp"
#include "FComputeClassDescriptor.hpp"
#include <fstream>
......@@ -114,8 +113,8 @@ public:
const ContainerClass* const particles,
const SymbolicData * const /*source_symb*/)
{
using ComputeClass = typename ComputeClassDescriptor<FReal>::type;
constexpr std::size_t FRealCount = ComputeClassDescriptor<FReal>::count;
using ComputeClass = InaVecBestType<FReal>;
constexpr int FRealCount = ComputeClass::VecLength;
// Target cell: local
const FReal localCellWidth(FBase::BoxWidth / FReal(1 << symb->getLevel()));
......@@ -127,15 +126,15 @@ public:
FUnifTensor<FReal,ORDER>::setRoots(localCellCenter, localCellWidth, X);
// Particles attributes
const ComputeClass * const posX = (const ComputeClass * const) particles->getPositions()[0];
const ComputeClass * const posY = (const ComputeClass * const) particles->getPositions()[1];
const ComputeClass * const posZ = (const ComputeClass * const) particles->getPositions()[2];
const ComputeClass * const physicalValues = (const ComputeClass * const) particles->getPhysicalValues();
const FReal * const posX = particles->getPositions()[0];
const FReal * const posY = particles->getPositions()[1];
const FReal * const posZ = particles->getPositions()[2];
const FReal * const physicalValues = particles->getPhysicalValues();
const FReal* pX = particles->getPositions()[0];
const FReal* pY = particles->getPositions()[1];
const FReal* pZ = particles->getPositions()[2];
const FReal* pV = particles->getPhysicalValues();
// const FReal* pX = particles->getPositions()[0];
// const FReal* pY = particles->getPositions()[1];
// const FReal* pZ = particles->getPositions()[2];
// const FReal* pV = particles->getPhysicalValues();
// apply P2L
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
......@@ -144,38 +143,19 @@ public:
ComputeClass XY = ComputeClass(X[m].getY());
ComputeClass XZ = ComputeClass(X[m].getZ());
ComputeClass tmpLocalExp = ComputeClass(0);
ComputeClass tmpLocalExp = ComputeClass::GetZero();
// Compute using vectorization for all but the last array elements
std::size_t idxPart = 0;
for (; idxPart < (particles->getNbParticles() / FRealCount);
++idxPart)
for (std::size_t idxPart = 0 ; idxPart < particles->getNbParticles() ; idxPart += FRealCount)
{
tmpLocalExp +=
FBase::MatrixKernel->evaluate(
XX, XY, XZ,
posX[idxPart], posY[idxPart], posZ[idxPart])
ComputeClass(&posX[idxPart]), ComputeClass(&posY[idxPart]), ComputeClass(&posZ[idxPart]))
* physicalValues[idxPart];
}
local->get(idxRhs)[m] += FReal(tmpLocalExp);
// Compute the last array elements one by one if they exist
if(idxPart < ((particles->getNbParticles() + FRealCount - 1) / FRealCount)) {
auto Xx = X[m].getX();
auto Xy = X[m].getY();
auto Xz = X[m].getZ();
for(idxPart = FRealCount * (particles->getNbParticles() / FRealCount);
idxPart < static_cast<std::size_t>(particles->getNbParticles());
++idxPart)
{
local->get(idxRhs)[m] +=
FBase::MatrixKernel->evaluate(
Xx, Xy, Xz,
pX[idxPart], pY[idxPart], pZ[idxPart])
* pV[idxPart];
}
}
local->get(idxRhs)[m] += (tmpLocalExp.horizontalSum());
}
}// NVALS
}
......@@ -191,7 +171,7 @@ public:
ContainerClass* const particles,
const SymbolicData * const /*target_symb*/)
{
using ComputeClass = typename InaVecBestType<FReal>;
using ComputeClass = InaVecBestType<FReal>;
constexpr int FRealCount = ComputeClass::VecLength;
// Source cell: pole
......@@ -241,10 +221,10 @@ public:
YX, YY, YZ,
Kxy,dKxy);
potentials[idxPart] += Kxy[0] * MultipoleExpansion;
fx[idxPart] += dKxy[0] * physVal[idxPart] * MultipoleExpansion;
fy[idxPart] += dKxy[1] * physVal[idxPart] * MultipoleExpansion;
fz[idxPart] += dKxy[2] * physVal[idxPart] * MultipoleExpansion;
(ComputeClass(&potentials[idxPart]) + Kxy[0] * MultipoleExpansion).storeInArray(&potentials[idxPart]);
(ComputeClass(&fx[idxPart]) + dKxy[0] * physVal[idxPart] * MultipoleExpansion).storeInArray(&fx[idxPart]);
(ComputeClass(&fy[idxPart]) + dKxy[1] * physVal[idxPart] * MultipoleExpansion).storeInArray(&fy[idxPart]);
(ComputeClass(&fz[idxPart]) + dKxy[2] * physVal[idxPart] * MultipoleExpansion).storeInArray(&fz[idxPart]);
}
......
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment