Commit 395ae248 authored by Berenger Bramas's avatar Berenger Bramas

Update P2P kernel, but still has to manage the boundaries

parent 2e5207db
......@@ -143,8 +143,8 @@ public:
ContainerClass* const particles,
const SymbolicData * const /*source_symb*/)
{
using ComputeClass = typename ComputeClassDescriptor<FReal>::type;
constexpr std::size_t FRealCount = ComputeClassDescriptor<FReal>::count;
using ComputeClass = typename InaVecBestType<FReal>;
constexpr int FRealCount = ComputeClass::VecLength;
// Source cell: pole
const FReal poleCellWidth(FBase::BoxWidth / FReal(1 << symb->getLevel()));
......@@ -156,16 +156,16 @@ public:
FChebTensor<FReal,ORDER>::setRoots(poleCellCenter, poleCellWidth, Y);
// read positions
const ComputeClass* const posX = (const ComputeClass* const)(particles->getPositions()[0]);
const ComputeClass* const posY = (const ComputeClass* const)(particles->getPositions()[1]);
const ComputeClass* const posZ = (const ComputeClass* const)(particles->getPositions()[2]);
const FReal* const posX = (particles->getPositions()[0]);
const FReal* const posY = (particles->getPositions()[1]);
const FReal* const posZ = (particles->getPositions()[2]);
// get potential
ComputeClass* const physVal = (ComputeClass* const)(particles->getPhysicalValues());
ComputeClass* const potentials = (ComputeClass* const)(particles->getPotentials());
ComputeClass* const fx = (ComputeClass* const)(particles->getForcesX());
ComputeClass* const fy = (ComputeClass* const)(particles->getForcesY());
ComputeClass* const fz = (ComputeClass* const)(particles->getForcesZ());
FReal* const physVal = (particles->getPhysicalValues());
FReal* const potentials = (particles->getPotentials());
FReal* const fx = (particles->getForcesX());
FReal* const fy = (particles->getForcesY());
FReal* const fz = (particles->getForcesZ());
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// apply M2P
......@@ -179,14 +179,15 @@ public:
ComputeClass YZ = ComputeClass(Y[n].getZ());
for(std::size_t idxPart = 0;
idxPart < ( (particles->getNbParticles() + FRealCount - 1)
/ FRealCount);
++idxPart)
idxPart < particles->getNbParticles();
idxPart += FRealCount)
{
ComputeClass Kxy[1];
ComputeClass dKxy[3];
FBase::MatrixKernel->evaluateBlockAndDerivative(
posX[idxPart], posY[idxPart], posZ[idxPart],
ComputeClass(&posX[idxPart]),
ComputeClass(&posY[idxPart]),
ComputeClass(&posZ[idxPart]),
YX, YY, YZ,
Kxy,dKxy);
......
......@@ -191,8 +191,8 @@ public:
ContainerClass* const particles,
const SymbolicData * const /*target_symb*/)
{
using ComputeClass = typename ComputeClassDescriptor<FReal>::type;
constexpr std::size_t FRealCount = ComputeClassDescriptor<FReal>::count;
using ComputeClass = typename InaVecBestType<FReal>;
constexpr int FRealCount = ComputeClass::VecLength;
// Source cell: pole
const FReal poleCellWidth(FBase::BoxWidth / FReal(1 << symb->getLevel()));
......@@ -204,16 +204,16 @@ public:
FUnifTensor<FReal,ORDER>::setRoots(poleCellCenter, poleCellWidth, Y);
// read positions
const ComputeClass* const posX = (const ComputeClass* const)(particles->getPositions()[0]);
const ComputeClass* const posY = (const ComputeClass* const)(particles->getPositions()[1]);
const ComputeClass* const posZ = (const ComputeClass* const)(particles->getPositions()[2]);
const FReal* const posX = (particles->getPositions()[0]);
const FReal* const posY = (particles->getPositions()[1]);
const FReal* const posZ = (particles->getPositions()[2]);
// get potential
ComputeClass* const physVal = (ComputeClass* const)(particles->getPhysicalValues());
ComputeClass* const potentials = (ComputeClass* const)(particles->getPotentials());
ComputeClass* const fx = (ComputeClass* const)(particles->getForcesX());
ComputeClass* const fy = (ComputeClass* const)(particles->getForcesY());
ComputeClass* const fz = (ComputeClass* const)(particles->getForcesZ());
FReal* const physVal = (particles->getPhysicalValues());
FReal* const potentials = (particles->getPotentials());
FReal* const fx = (particles->getForcesX());
FReal* const fy = (particles->getForcesY());
FReal* const fz = (particles->getForcesZ());
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
......@@ -228,15 +228,16 @@ public:
ComputeClass YZ = ComputeClass(Y[n].getZ());
for(std::size_t idxPart = 0;
idxPart < ( (particles->getNbParticles() + FRealCount - 1)
/ FRealCount);
++idxPart)
idxPart < particles->getNbParticles();
idxPart += FRealCount)
{
ComputeClass Kxy[1];
ComputeClass dKxy[3];
FBase::MatrixKernel->evaluateBlockAndDerivative(
posX[idxPart], posY[idxPart], posZ[idxPart],
ComputeClass(&posX[idxPart]),
ComputeClass(&posY[idxPart]),
ComputeClass(&posZ[idxPart]),
YX, YY, YZ,
Kxy,dKxy);
......
#ifndef FCOMPUTECLASSDESCRIPTOR_HPP
#define FCOMPUTECLASSDESCRIPTOR_HPP
template<typename FReal>
struct ComputeClassDescriptor {};
template<>
struct ComputeClassDescriptor<double> {
#if 0 // for easy macro reordering
#elif defined SCALFMM_USE_SSE
using type = __m128d;
enum {count = 2};
#elif defined SCALFMM_USE_AVX
using type = __m256d;
enum {count = 4};
#elif defined SCALFMM_USE_AVX2
using type = __m512d;
enum {count = 8};
#else
using type = double;
enum {count = 1};
#endif
};
template<>
struct ComputeClassDescriptor<float> {
#if 0
#elif defined SCALFMM_USE_SSE
using type = __m128;
enum {count = 4};
#elif defined SCALFMM_USE_AVX
using type = __m256;
enum {count = 8};
#elif defined SCALFMM_USE_AVX2
using type = __m512;
enum {count = 16};
#else
using type = float;
enum {count = 1};
#endif
};
#endif /* FCOMPUTECLASSDESCRIPTOR_HPP */
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment