Commit 0d7bfc7d authored by Quentin Khan's avatar Quentin Khan
Browse files

Adapt uniform kernel to StarPU interface

  - Add multipole_t and local_expansion_t structures to split
    corresponding data in FUnifCell.
  - Add multipole and local_expansion attributes in FUnifCell.
  - Make FUnifKernel compatible with StarPU interface.
parent 8f6e2d0d
......@@ -43,19 +43,48 @@ class FUnifCell : public FBasicCell, public FAbstractSendable
static const int VectorSize = TensorTraits<ORDER>::nnodes;
static const int TransformedVectorSize = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1);
FReal multipole_exp[NRHS * NVALS * VectorSize]; //< Multipole expansion
FReal local_exp[NLHS * NVALS * VectorSize]; //< Local expansion
// PB: Store multipole and local expansion in Fourier space
FComplex<FReal> transformed_multipole_exp[NRHS * NVALS * TransformedVectorSize];
FComplex<FReal> transformed_local_exp[NLHS * NVALS * TransformedVectorSize];
public:
struct multipole_t {
FReal multipole_exp[NRHS * NVALS * VectorSize]; //< Multipole expansion
// Multipole expansion in Fourier space
FComplex<FReal> transformed_multipole_exp[NRHS * NVALS * TransformedVectorSize];
const FReal* getMultipole(const int inRhs) const
{ return this->multipole_exp + inRhs*VectorSize; }
FReal* getMultipole(const int inRhs)
{ return this->multipole_exp + inRhs*VectorSize; }
const FComplex<FReal>* getTransformedMultipole(const int inRhs) const
{ return this->transformed_multipole_exp + inRhs*TransformedVectorSize; }
FComplex<FReal>* getTransformedMultipole(const int inRhs)
{ return this->transformed_multipole_exp + inRhs*TransformedVectorSize; }
};
struct local_expansion_t {
FReal local_exp[NLHS * NVALS * VectorSize]; //< Local expansion
// Local expansion in Fourier space
FComplex<FReal> transformed_local_exp[NLHS * NVALS * TransformedVectorSize];
const FReal* getLocal(const int inRhs) const
{ return this->local_exp + inRhs*VectorSize; }
FReal* getLocal(const int inRhs)
{ return this->local_exp + inRhs*VectorSize; }
const FComplex<FReal>* getTransformedLocal(const int inRhs) const
{ return this->transformed_local_exp + inRhs*TransformedVectorSize; }
FComplex<FReal>* getTransformedLocal(const int inRhs)
{ return this->transformed_local_exp + inRhs*TransformedVectorSize; }
};
multipole_t multipole;
local_expansion_t local_expansion;
FUnifCell(){
memset(multipole_exp, 0, sizeof(FReal) * NRHS * NVALS * VectorSize);
memset(local_exp, 0, sizeof(FReal) * NLHS * NVALS * VectorSize);
memset(transformed_multipole_exp, 0,
memset(multipole.multipole_exp, 0, sizeof(FReal) * NRHS * NVALS * VectorSize);
memset(local_expansion.local_exp, 0, sizeof(FReal) * NLHS * NVALS * VectorSize);
memset(multipole.transformed_multipole_exp, 0,
sizeof(FComplex<FReal>) * NRHS * NVALS * TransformedVectorSize);
memset(transformed_local_exp, 0,
memset(local_expansion.transformed_local_exp, 0,
sizeof(FComplex<FReal>) * NLHS * NVALS * TransformedVectorSize);
}
......@@ -63,20 +92,19 @@ public:
/** Get Multipole */
const FReal* getMultipole(const int inRhs) const
{ return this->multipole_exp + inRhs*VectorSize;
}
{ return this->multipole.getMultipole(inRhs); }
/** Get Local */
const FReal* getLocal(const int inRhs) const{
return this->local_exp + inRhs*VectorSize;
return this->local_expansion.local_exp + inRhs*VectorSize;
}
/** Get Multipole */
FReal* getMultipole(const int inRhs){
return this->multipole_exp + inRhs*VectorSize;
return this->multipole.getMultipole(inRhs);
}
/** Get Local */
FReal* getLocal(const int inRhs){
return this->local_exp + inRhs*VectorSize;
return this->local_expansion.local_exp + inRhs*VectorSize;
}
/** To get the leading dim of a vec */
......@@ -86,20 +114,20 @@ public:
/** Get Transformed Multipole */
const FComplex<FReal>* getTransformedMultipole(const int inRhs) const{
return this->transformed_multipole_exp + inRhs*TransformedVectorSize;
return this->multipole.transformed_multipole_exp + inRhs*TransformedVectorSize;
}
/** Get Transformed Local */
const FComplex<FReal>* getTransformedLocal(const int inRhs) const{
return this->transformed_local_exp + inRhs*TransformedVectorSize;
return this->local_expansion.transformed_local_exp + inRhs*TransformedVectorSize;
}
/** Get Transformed Multipole */
FComplex<FReal>* getTransformedMultipole(const int inRhs){
return this->transformed_multipole_exp + inRhs*TransformedVectorSize;
return this->multipole.transformed_multipole_exp + inRhs*TransformedVectorSize;
}
/** Get Transformed Local */
FComplex<FReal>* getTransformedLocal(const int inRhs){
return this->transformed_local_exp + inRhs*TransformedVectorSize;
return this->local_expansion.transformed_local_exp + inRhs*TransformedVectorSize;
}
/** To get the leading dim of a vec */
......@@ -109,11 +137,11 @@ public:
/** Make it like the begining */
void resetToInitialState(){
memset(multipole_exp, 0, sizeof(FReal) * NRHS * NVALS * VectorSize);
memset(local_exp, 0, sizeof(FReal) * NLHS * NVALS * VectorSize);
memset(transformed_multipole_exp, 0,
memset(multipole.multipole_exp, 0, sizeof(FReal) * NRHS * NVALS * VectorSize);
memset(local_expansion.local_exp, 0, sizeof(FReal) * NLHS * NVALS * VectorSize);
memset(multipole.transformed_multipole_exp, 0,
sizeof(FComplex<FReal>) * NRHS * NVALS * TransformedVectorSize);
memset(transformed_local_exp, 0,
memset(local_expansion.transformed_local_exp, 0,
sizeof(FComplex<FReal>) * NLHS * NVALS * TransformedVectorSize);
}
......@@ -122,26 +150,26 @@ public:
///////////////////////////////////////////////////////
template <class BufferWriterClass>
void serializeUp(BufferWriterClass& buffer) const{
buffer.write(multipole_exp, VectorSize*NVALS*NRHS);
buffer.write(transformed_multipole_exp, TransformedVectorSize*NVALS*NRHS);
buffer.write(multipole.multipole_exp, VectorSize*NVALS*NRHS);
buffer.write(multipole.transformed_multipole_exp, TransformedVectorSize*NVALS*NRHS);
}
template <class BufferReaderClass>
void deserializeUp(BufferReaderClass& buffer){
buffer.fillArray(multipole_exp, VectorSize*NVALS*NRHS);
buffer.fillArray(transformed_multipole_exp, TransformedVectorSize*NVALS*NRHS);
buffer.fillArray(multipole.multipole_exp, VectorSize*NVALS*NRHS);
buffer.fillArray(multipole.transformed_multipole_exp, TransformedVectorSize*NVALS*NRHS);
}
template <class BufferWriterClass>
void serializeDown(BufferWriterClass& buffer) const{
buffer.write(local_exp, VectorSize*NVALS*NLHS);
buffer.write(transformed_local_exp, TransformedVectorSize*NVALS*NLHS);
buffer.write(local_expansion.local_exp, VectorSize*NVALS*NLHS);
buffer.write(local_expansion.transformed_local_exp, TransformedVectorSize*NVALS*NLHS);
}
template <class BufferReaderClass>
void deserializeDown(BufferReaderClass& buffer){
buffer.fillArray(local_exp, VectorSize*NVALS*NLHS);
buffer.fillArray(transformed_local_exp, TransformedVectorSize*NVALS*NLHS);
buffer.fillArray(local_expansion.local_exp, VectorSize*NVALS*NLHS);
buffer.fillArray(local_expansion.transformed_local_exp, TransformedVectorSize*NVALS*NLHS);
}
///////////////////////////////////////////////////////
......@@ -150,19 +178,19 @@ public:
template <class BufferWriterClass>
void save(BufferWriterClass& buffer) const{
FBasicCell::save(buffer);
buffer.write(multipole_exp, VectorSize*NVALS*NRHS);
buffer.write(transformed_multipole_exp, TransformedVectorSize*NVALS*NRHS);
buffer.write(local_exp, VectorSize*NVALS*NLHS);
buffer.write(transformed_local_exp, TransformedVectorSize*NVALS*NLHS);
buffer.write(multipole.multipole_exp, VectorSize*NVALS*NRHS);
buffer.write(multipole.transformed_multipole_exp, TransformedVectorSize*NVALS*NRHS);
buffer.write(local_expansion.local_exp, VectorSize*NVALS*NLHS);
buffer.write(local_expansion.transformed_local_exp, TransformedVectorSize*NVALS*NLHS);
}
template <class BufferReaderClass>
void restore(BufferReaderClass& buffer){
FBasicCell::restore(buffer);
buffer.fillArray(multipole_exp, VectorSize*NVALS*NRHS);
buffer.fillArray(transformed_multipole_exp, TransformedVectorSize*NVALS*NRHS);
buffer.fillArray(local_exp, VectorSize*NVALS*NLHS);
buffer.fillArray(transformed_local_exp, TransformedVectorSize*NVALS*NLHS);
buffer.fillArray(multipole.multipole_exp, VectorSize*NVALS*NRHS);
buffer.fillArray(multipole.transformed_multipole_exp, TransformedVectorSize*NVALS*NRHS);
buffer.fillArray(local_expansion.local_exp, VectorSize*NVALS*NLHS);
buffer.fillArray(local_expansion.transformed_local_exp, TransformedVectorSize*NVALS*NLHS);
}
FSize getSavedSize() const {
......
......@@ -18,6 +18,7 @@
#ifndef FUNIFKERNEL_HPP
#define FUNIFKERNEL_HPP
#include <algorithm>
#include <vector>
#include "Utils/FGlobal.hpp"
......@@ -96,32 +97,40 @@ public:
LeafCell->getMultipole(0), SourceParticles);
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// 2) apply Discrete Fourier Transform
M2LHandler.applyZeroPaddingAndDFT(LeafCell->getMultipole(idxRhs),
LeafCell->getTransformedMultipole(idxRhs));
}
}
void M2M(CellClass* const FRestrict ParentCell,
const CellClass*const FRestrict *const FRestrict ChildCells,
const int /*TreeLevel*/)
const int TreeLevel)
override
{
std::array<const typename CellClass::multipole_t*, 8> child_multipoles;
std::transform(ChildCells, ChildCells+8, child_multipoles.data(),
[](const CellClass * data) {return data ? &(data->multipole) : nullptr;});
this->M2M(&(ParentCell->multipole), child_multipoles.data(), TreeLevel);
}
void M2M(typename CellClass::multipole_t* const FRestrict ParentMultipole,
const typename CellClass::multipole_t*const FRestrict *const FRestrict ChildMultipoles,
const int /*TreeLevel*/)
{
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// 1) apply Sy
//FBlas::scal(AbstractBaseClass::nnodes, FReal(0.), ParentCell->getMultipole(idxRhs));
//FBlas::scal(AbstractBaseClass::nnodes, FReal(0.), ParentMultipole->getMultipole(idxRhs));
for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){
if (ChildCells[ChildIndex]){
AbstractBaseClass::Interpolator->applyM2M(ChildIndex, ChildCells[ChildIndex]->getMultipole(idxRhs),
ParentCell->getMultipole(idxRhs));
if (ChildMultipoles[ChildIndex]){
AbstractBaseClass::Interpolator->applyM2M(
ChildIndex, ChildMultipoles[ChildIndex]->getMultipole(idxRhs),
ParentMultipole->getMultipole(idxRhs));
}
}
// 2) Apply Discete Fourier Transform
M2LHandler.applyZeroPaddingAndDFT(ParentCell->getMultipole(idxRhs),
ParentCell->getTransformedMultipole(idxRhs));
M2LHandler.applyZeroPaddingAndDFT(ParentMultipole->getMultipole(idxRhs),
ParentMultipole->getTransformedMultipole(idxRhs));
}
}
......@@ -146,28 +155,62 @@ public:
}
void M2L(typename CellClass::local_expansion_t * const FRestrict TargetExpansion,
const typename CellClass::multipole_t * const FRestrict SourceMultipoles[],
const int neighborPositions[], const int inSize, const int TreeLevel)
{
const FReal CellWidth(AbstractBaseClass::BoxWidth / FReal(FMath::pow(2, TreeLevel)));
const FReal scale(MatrixKernel->getScaleFactor(CellWidth));
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
FComplex<FReal> *const TransformedLocalExpansion = TargetExpansion->getTransformedLocal(idxRhs);
for(int idxExistingNeigh = 0 ; idxExistingNeigh < inSize ; ++idxExistingNeigh){
const int idxNeigh = neighborPositions[idxExistingNeigh];
M2LHandler.applyFC(idxNeigh, TreeLevel, scale,
SourceMultipoles[idxExistingNeigh]->getTransformedMultipole(idxRhs),
TransformedLocalExpansion);
}
}
}
void L2L(const CellClass* const FRestrict ParentCell,
CellClass* FRestrict *const FRestrict ChildCells,
const int /*TreeLevel*/)
const int TreeLevel)
override
{
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
std::array<typename CellClass::local_expansion_t*, 8> child_expansions;
std::transform(ChildCells, ChildCells+8, child_expansions.data(),
[](CellClass * data) {return data ? &(data->local_expansion) : nullptr;});
this->L2L(&(ParentCell->local_expansion), child_expansions.data(), TreeLevel);
}
void L2L(const typename CellClass::local_expansion_t * const FRestrict ParentExpansion,
typename CellClass::local_expansion_t * FRestrict * const FRestrict ChildExpansion,
const int /*TreeLevel*/)
{
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// 1) Apply Inverse Discete Fourier Transform
FReal localExp[AbstractBaseClass::nnodes];
M2LHandler.unapplyZeroPaddingAndDFT(ParentCell->getTransformedLocal(idxRhs),
M2LHandler.unapplyZeroPaddingAndDFT(ParentExpansion->getTransformedLocal(idxRhs),
localExp);
FBlas::add(AbstractBaseClass::nnodes,const_cast<FReal*>(ParentCell->getLocal(idxRhs)),localExp);
FBlas::add(AbstractBaseClass::nnodes,
const_cast<FReal*>(ParentExpansion->getLocal(idxRhs)),localExp);
// 2) apply Sx
for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){
if (ChildCells[ChildIndex]){
AbstractBaseClass::Interpolator->applyL2L(ChildIndex, localExp, ChildCells[ChildIndex]->getLocal(idxRhs));
if (ChildExpansion[ChildIndex]){
AbstractBaseClass::Interpolator->applyL2L(
ChildIndex, localExp, ChildExpansion[ChildIndex]->getLocal(idxRhs));
}
}
}
}
void L2P(const CellClass* const LeafCell,
ContainerClass* const TargetParticles)
override
......@@ -196,6 +239,7 @@ public:
}
void P2P(const FTreeCoordinate& inPosition,
ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict inSources,
ContainerClass* const inNeighbors[], const int neighborPositions[],
......@@ -213,14 +257,13 @@ public:
{
// Standard FMM separation criterion, i.e. max 27 neighbor clusters per leaf
if(LeafLevelSeparationCriterion==1) {
if(inTargets == inSources){
if(inTargets == inSources) {
P2POuter(inPosition, inTargets, inNeighbors, neighborPositions, inSize);
if(do_inner) {
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PInner(inTargets,MatrixKernel);
}
}
else{
} else {
const ContainerClass* const srcPtr[1] = {inSources};
if(do_inner) {
DirectInteractionComputer<FReal, MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,srcPtr,1,MatrixKernel);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment