Commit b26fd3c1 authored by Quentin Khan's avatar Quentin Khan
Browse files

Update Chebyshev kernel interface to use the new data organisation

 - Remove virtual FMM operators overloads from the abstract base class.

 - Divide the FChebCell inner data layout into two sub-types:
   multipole_t and local_expansion_t. Two class attribute are accessible
   through the `getMultipoledata` and `getLocalExpansionData` methods.

 - Even out files indentation.

 - Change FMM operators signature to take advantage of the new
   layout. The operators only take as parameters the data they may need
   instead of the whole cell. For instance (simplified):

   void M2M(CellClass* parent, CellClass** children);

   becomes

   void M2M(multipole_t*  parent_m,    symbolic_data_t* parent_s,
            multipole_t** children_ms, symbolic_data_t* children_ss);
parent d0ba4c57
......@@ -122,50 +122,6 @@ public:
const InterpolatorClass * getPtrToInterpolator() const
{ return Interpolator.getPtr(); }
virtual void P2M(CellClass* const LeafCell,
const ContainerClass* const SourceParticles) = 0;
virtual void M2M(CellClass* const FRestrict ParentCell,
const CellClass*const FRestrict *const FRestrict ChildCells,
const int TreeLevel) = 0;
virtual void M2L(CellClass* const FRestrict TargetCell,
const CellClass* SourceCells[],
const int SourcePositions[],
const int NumSourceCells,
const int TreeLevel) = 0;
virtual void L2L(const CellClass* const FRestrict ParentCell,
CellClass* FRestrict *const FRestrict ChildCells,
const int TreeLevel) = 0;
virtual void L2P(const CellClass* const LeafCell,
ContainerClass* const TargetParticles) = 0;
virtual void P2P(const FTreeCoordinate& /* LeafCellCoordinate */, // needed for periodic boundary conditions
ContainerClass* const FRestrict TargetParticles,
const ContainerClass* const FRestrict /*SourceParticles*/,
ContainerClass* const NeighborSourceParticles[],
const int SourcePositions[],
const int /* size */) = 0;
virtual void P2POuter(const FTreeCoordinate& inLeafPosition,
ContainerClass* const FRestrict targets,
ContainerClass* const directNeighborsParticles[], const int neighborPositions[],
const int size) = 0;
virtual void P2PRemote(const FTreeCoordinate& /*inPosition*/,
ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
const ContainerClass* const inNeighbors[], const int SourcePositions[], const int /*inSize*/) = 0;
};
......
......@@ -37,34 +37,88 @@ class FChebCell : public FBasicCell, public FAbstractSendable
// we multiply by 2 because we store the Multipole expansion end the compressed one.
static const int VectorSize = TensorTraits<ORDER>::nnodes * 2;
public:
struct multipole_t {
FReal multipole_exp[NRHS * NVALS * VectorSize]; //< Multipole expansion
FReal local_exp[NLHS * NVALS * VectorSize]; //< Local expansion
public:
FChebCell(){
const FReal* getMultipole(const int inRhs) const
{ return this->multipole_exp + inRhs*VectorSize; }
FReal* getMultipole(const int inRhs)
{ return this->multipole_exp + inRhs*VectorSize; }
constexpr int getVectorSize() const {
return VectorSize;
}
// to extend FAbstractSendable
template <class BufferWriterClass>
void serialize(BufferWriterClass& buffer) const{
buffer.write(this->multipole_exp, VectorSize*NVALS*NRHS);
}
template <class BufferReaderClass>
void deserialize(BufferReaderClass& buffer){
buffer.fillArray(this->multipole_exp, VectorSize*NVALS*NRHS);
}
void reset() {
memset(multipole_exp, 0, sizeof(FReal) * NRHS * NVALS * VectorSize);
}
};
struct local_expansion_t {
FReal local_exp[NLHS * NVALS * VectorSize]; //< Local expansion
const FReal* getLocal(const int inRhs) const
{ return this->local_exp + inRhs*VectorSize; }
FReal* getLocal(const int inRhs)
{ return this->local_exp + inRhs*VectorSize; }
constexpr int getVectorSize() const {
return VectorSize;
}
// to extend FAbstractSendable
template <class BufferWriterClass>
void serialize(BufferWriterClass& buffer) const{
buffer.write(this->local_exp, VectorSize*NVALS*NLHS);
}
template <class BufferReaderClass>
void deserialize(BufferReaderClass& buffer){
buffer.fillArray(this->local_exp, VectorSize*NVALS*NLHS);
}
void reset() {
memset(local_exp, 0, sizeof(FReal) * NLHS * NVALS * VectorSize);
}
};
~FChebCell() {}
multipole_t m_data {};
local_expansion_t l_data {};
/** Get Multipole */
const FReal* getMultipole(const int inRhs) const
{ return this->multipole_exp + inRhs*VectorSize;
bool hasMultipoleData() const noexcept {
return true;
}
/** Get Local */
const FReal* getLocal(const int inRhs) const{
return this->local_exp + inRhs*VectorSize;
bool hasLocalExpansionData() const noexcept {
return true;
}
/** Get Multipole */
FReal* getMultipole(const int inRhs){
return this->multipole_exp + inRhs*VectorSize;
multipole_t& getMultipoleData() noexcept {
return m_data;
}
const multipole_t& getMultipoleData() const noexcept {
return m_data;
}
/** Get Local */
FReal* getLocal(const int inRhs){
return this->local_exp + inRhs*VectorSize;
local_expansion_t& getLocalExpansionData() noexcept {
return l_data;
}
const local_expansion_t& getLocalExpansionData() const noexcept {
return l_data;
}
/** To get the leading dim of a vec */
int getVectorSize() const{
......@@ -73,8 +127,8 @@ public:
/** Make it like the begining */
void resetToInitialState(){
memset(multipole_exp, 0, sizeof(FReal) * NRHS * NVALS * VectorSize);
memset(local_exp, 0, sizeof(FReal) * NLHS * NVALS * VectorSize);
m_data.reset();
l_data.reset();
}
///////////////////////////////////////////////////////
......@@ -82,20 +136,20 @@ public:
///////////////////////////////////////////////////////
template <class BufferWriterClass>
void serializeUp(BufferWriterClass& buffer) const{
buffer.write(multipole_exp, VectorSize*NVALS*NRHS);
m_data.serialize(buffer);
}
template <class BufferReaderClass>
void deserializeUp(BufferReaderClass& buffer){
buffer.fillArray(multipole_exp, VectorSize*NVALS*NRHS);
m_data.deserialize(buffer);
}
template <class BufferWriterClass>
void serializeDown(BufferWriterClass& buffer) const{
buffer.write(local_exp, VectorSize*NVALS*NLHS);
l_data.serialize(buffer);
}
template <class BufferReaderClass>
void deserializeDown(BufferReaderClass& buffer){
buffer.fillArray(local_exp, VectorSize*NVALS*NLHS);
l_data.deserialize(buffer);
}
///////////////////////////////////////////////////////
......@@ -104,14 +158,14 @@ public:
template <class BufferWriterClass>
void save(BufferWriterClass& buffer) const{
FBasicCell::save(buffer);
buffer.write(multipole_exp, VectorSize*NVALS*NRHS);
buffer.write(local_exp, VectorSize*NVALS*NLHS);
m_data.serialize(buffer);
l_data.serialize(buffer);
}
template <class BufferReaderClass>
void restore(BufferReaderClass& buffer){
FBasicCell::restore(buffer);
buffer.fillArray(multipole_exp, VectorSize*NVALS*NRHS);
buffer.fillArray(local_exp, VectorSize*NVALS*NLHS);
m_data.deserialize(buffer);
l_data.deserialize(buffer);
}
FSize getSavedSize() const {
......@@ -172,5 +226,3 @@ public:
};
#endif //FCHEBCELL_HPP
......@@ -175,62 +175,76 @@ public:
void P2M(CellClass* const /* not needed */, const ContainerClass* const SourceParticles) override
template<class SymbolicData>
void P2M(typename CellClass::multipole_t* const LeafCell,
const SymbolicData* const LeafSymbData,
const ContainerClass* const SourceParticles)
{
flopsP2M += countFlopsP2M(int(SourceParticles->getNbParticles()));
}
void M2M(CellClass* const FRestrict /* not needed */,
const CellClass*const FRestrict *const FRestrict ChildCells,
const int TreeLevel) override
template<class SymbolicData>
void M2M(typename CellClass::multipole_t * const FRestrict /*ParentMultipole*/,
const SymbolicData* const ParentSymb,
const typename CellClass::multipole_t * const FRestrict * const FRestrict ChildMultipoles,
const SymbolicData* const /*ChildSymbs*/[])
{
int TreeLevel = static_cast<int>(ParentSymb->getLevel());
unsigned int flops = 0;
for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex)
if (ChildCells[ChildIndex]) flops += countFlopsM2MorL2L();
if (ChildMultipoles[ChildIndex])
flops += countFlopsM2MorL2L();
flopsM2M += flops;
flopsPerLevelM2M[TreeLevel] += flops;
}
void M2L(CellClass* const FRestrict /* not needed */,
const CellClass* SourceCells[],
const int positions[],
const int size,
const int TreeLevel) override
template<class SymbolicData>
void M2L(typename CellClass::local_expansion_t * const FRestrict /*TargetExpansion*/,
const SymbolicData* const TargetSymb,
const typename CellClass::multipole_t * const FRestrict /*SourceMultipoles*/[],
const SymbolicData* const FRestrict /*SourceSymbs*/[],
const int neighborPositions[],
const int size)
{
int TreeLevel = static_cast<int>(TargetSymb->getLevel());
unsigned int flops = 0;
// count how ofter each of the 16 interactions is used
memset(countExp, 0, sizeof(int) * 343);
for (int idx=0; idx<size; ++idx)
countExp[SymHandler->pindices[positions[idx]]]++;
countExp[SymHandler->pindices[neighborPositions[idx]]]++;
// multiply (mat-mat-mul)
for (int pidx=0; pidx<343; ++pidx)
if (countExp[pidx])
flops += countFlopsM2L(countExp[pidx], SymHandler->LowRank[pidx]) + countExp[pidx]*nnodes;
flopsM2L += flops;
flopsPerLevelM2L[TreeLevel] += flops;
}
flopsPerLevelM2L[TreeLevel] += flops;}
void L2L(const CellClass* const FRestrict /* not needed */,
CellClass* FRestrict *const FRestrict ChildCells,
const int TreeLevel) override
template<class SymbolicData>
void L2L(const typename CellClass::local_expansion_t * const FRestrict /*ParentExpansion*/,
const SymbolicData* const ParentSymb,
typename CellClass::local_expansion_t * FRestrict *const FRestrict ChildExpansions,
const SymbolicData* const /*ChildSymbs*/[])
{
int TreeLevel = static_cast<int>(ParentSymb->getLevel());
unsigned int flops = 0;
for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex)
if (ChildCells[ChildIndex]) flops += countFlopsM2MorL2L() + nnodes;
if (ChildExpansions[ChildIndex]) flops += countFlopsM2MorL2L() + nnodes;
flopsL2L += flops;
flopsPerLevelL2L[TreeLevel] += flops;
}
void L2P(const CellClass* const /* not needed */,
ContainerClass* const TargetParticles) override
template<class SymbolicData>
void L2P(const typename CellClass::local_expansion_t * const /*LeafCell*/,
const SymbolicData * const /*LeafSymbData*/,
ContainerClass* const TargetParticles)
{
//// 1.a) apply Sx
//flopsL2P += countFlopsP2MorL2P(TargetParticlesParticles->getNbParticles()) + TargetParticles->getNbParticles();
......@@ -273,8 +287,8 @@ public:
flopsP2P += countFlopsP2Pmutual() * TargetParticles->getNbParticles() * NeighborSourceParticles[idx]->getNbParticles();
}
};
};
......@@ -288,7 +302,8 @@ public:
* Handler to deal with all symmetries: Stores permutation indices and vectors
* to reduce 343 different interactions to 16 only.
*/
template <class FReal, class CellClass,
template <class FReal,
class CellClass,
class ContainerClass,
class MatrixKernelClass,
int ORDER>
......
......@@ -83,38 +83,48 @@ public:
}
void P2M(CellClass* const LeafCell,
template<class SymbolicData>
void P2M(typename CellClass::multipole_t* const LeafMultipole,
const SymbolicData* const LeafSymbData,
const ContainerClass* const SourceParticles)
override
{
const FPoint<FReal> LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafCell->getCoordinate()));
int leafLevel = static_cast<int>(LeafSymbData->getLevel());
FReal leafBoxWidth = AbstractBaseClass::BoxWidth / FReal(1 << leafLevel);
const FPoint<FReal> LeafCellCenter(
AbstractBaseClass::getCellCenter(LeafSymbData->getCoordinate(),
leafLevel));
// 1) apply Sy
AbstractBaseClass::Interpolator->applyP2M(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf,
LeafCell->getMultipole(0), SourceParticles);
AbstractBaseClass::Interpolator->applyP2M(LeafCellCenter, leafBoxWidth,
LeafMultipole->getMultipole(0), SourceParticles);
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// 2) apply B
M2LHandler->applyB(LeafCell->getMultipole(idxRhs), LeafCell->getMultipole(idxRhs) + AbstractBaseClass::nnodes);
M2LHandler->applyB(LeafMultipole->getMultipole(idxRhs),
LeafMultipole->getMultipole(idxRhs) + AbstractBaseClass::nnodes);
}
}
void M2M(CellClass* const FRestrict ParentCell,
const CellClass*const FRestrict *const FRestrict ChildCells,
const int /*TreeLevel*/)
override
template<class SymbolicData>
void M2M(typename CellClass::multipole_t * const FRestrict ParentMultipole,
const SymbolicData* const /*ParentSymb*/,
const typename CellClass::multipole_t * const FRestrict * const FRestrict ChildMultipoles,
const SymbolicData* const /*ChildSymbs*/[])
{
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// 1) apply Sy
for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){
if (ChildCells[ChildIndex]){
AbstractBaseClass::Interpolator->applyM2M(ChildIndex, ChildCells[ChildIndex]->getMultipole(idxRhs),
ParentCell->getMultipole(idxRhs));
if (ChildMultipoles[ChildIndex]){
AbstractBaseClass::Interpolator->applyM2M(
ChildIndex, ChildMultipoles[ChildIndex]->getMultipole(idxRhs),
ParentMultipole->getMultipole(idxRhs));
}
}
// 2) apply B
M2LHandler->applyB(ParentCell->getMultipole(idxRhs), ParentCell->getMultipole(idxRhs) + AbstractBaseClass::nnodes);
M2LHandler->applyB(ParentMultipole->getMultipole(idxRhs),
ParentMultipole->getMultipole(idxRhs) + AbstractBaseClass::nnodes);
}
}
......@@ -137,16 +147,22 @@ public:
// }
// }
void M2L(CellClass* const FRestrict TargetCell, const CellClass* SourceCells[],
const int neighborPositions[], const int inSize, const int TreeLevel)
override
template<class SymbolicData>
void M2L(typename CellClass::local_expansion_t * const FRestrict TargetExpansion,
const SymbolicData* const TargetSymb,
const typename CellClass::multipole_t * const FRestrict SourceMultipoles[],
const SymbolicData* const FRestrict /*SourceSymbs*/[],
const int neighborPositions[],
const int inSize)
{
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
FReal *const CompressedLocalExpansion = TargetCell->getLocal(idxRhs) + AbstractBaseClass::nnodes;
const FReal CellWidth(AbstractBaseClass::BoxWidth / FReal(FMath::pow(2, TreeLevel)));
FReal *const CompressedLocalExpansion = TargetExpansion->getLocal(idxRhs) + AbstractBaseClass::nnodes;
const FReal CellWidth(AbstractBaseClass::BoxWidth / FReal(1 << TargetSymb->getLevel()));
for(int idxExistingNeigh = 0 ; idxExistingNeigh < inSize ; ++idxExistingNeigh){
const int idx = neighborPositions[idxExistingNeigh];
M2LHandler->applyC(idx, CellWidth, SourceCells[idxExistingNeigh]->getMultipole(idxRhs) + AbstractBaseClass::nnodes,
M2LHandler->applyC(idx, CellWidth,
SourceMultipoles[idxExistingNeigh]->getMultipole(idxRhs)
+ AbstractBaseClass::nnodes,
CompressedLocalExpansion);
}
}
......@@ -166,33 +182,44 @@ public:
// }
void L2L(const CellClass* const FRestrict ParentCell,
CellClass* FRestrict *const FRestrict ChildCells,
const int /*TreeLevel*/)
override
template<class SymbolicData>
void L2L(const typename CellClass::local_expansion_t * const FRestrict ParentExpansion,
const SymbolicData* const /*ParentSymb*/,
typename CellClass::local_expansion_t * FRestrict *const FRestrict ChildExpansions,
const SymbolicData* const /*ChildSymbs*/[])
{
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// 1) apply U
M2LHandler->applyU(ParentCell->getLocal(idxRhs) + AbstractBaseClass::nnodes,
const_cast<CellClass*>(ParentCell)->getLocal(idxRhs));
M2LHandler->applyU(ParentExpansion->getLocal(idxRhs) + AbstractBaseClass::nnodes,
const_cast<typename CellClass::local_expansion_t*>(ParentExpansion)->getLocal(idxRhs));
// 2) apply Sx
for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){
if (ChildCells[ChildIndex]){
AbstractBaseClass::Interpolator->applyL2L(ChildIndex, ParentCell->getLocal(idxRhs), ChildCells[ChildIndex]->getLocal(idxRhs));
if (ChildExpansions[ChildIndex]){
AbstractBaseClass::Interpolator->applyL2L(
ChildIndex,
ParentExpansion->getLocal(idxRhs),
ChildExpansions[ChildIndex]->getLocal(idxRhs));
}
}
}
}
void L2P(const CellClass* const LeafCell,
template<class SymbolicData>
void L2P(const typename CellClass::local_expansion_t* const LeafExpansion,
const SymbolicData* const LeafSymb,
ContainerClass* const TargetParticles)
override
{
const FPoint<FReal> LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafCell->getCoordinate()));
int leafLevel = static_cast<int>(LeafSymb->getLevel());
FReal leafBoxWidth = AbstractBaseClass::BoxWidth / FReal(1 << leafLevel);
const FPoint<FReal> LeafCellCenter(
AbstractBaseClass::getCellCenter(LeafSymb->getCoordinate(),
leafLevel));
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// 1) apply U
M2LHandler->applyU(LeafCell->getLocal(idxRhs) + AbstractBaseClass::nnodes, const_cast<CellClass*>(LeafCell)->getLocal(idxRhs));
M2LHandler->applyU(LeafExpansion->getLocal(idxRhs) + AbstractBaseClass::nnodes,
const_cast<typename CellClass::local_expansion_t*>(LeafExpansion)->getLocal(idxRhs));
}
//// 2.a) apply Sx
......@@ -207,8 +234,8 @@ public:
// TargetParticles);
// 2.c) apply Sx and Px (grad Sx)
AbstractBaseClass::Interpolator->applyL2PTotal(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf,
LeafCell->getLocal(0), TargetParticles);
AbstractBaseClass::Interpolator->applyL2PTotal(LeafCellCenter, leafBoxWidth,
LeafExpansion->getLocal(0), TargetParticles);
}
......
......@@ -95,6 +95,8 @@ protected:
assert(Mul[idx]==nullptr || Loc[idx]==nullptr);
Mul[idx] = new FReal [24 * nnodes];
Loc[idx] = new FReal [24 * nnodes];
memset(Mul[idx], 0, 24 * nnodes * sizeof(FReal));
memset(Loc[idx], 0, 24 * nnodes * sizeof(FReal));
}
}
......@@ -178,30 +180,37 @@ public:
void P2M(CellClass* const LeafCell,
const ContainerClass* const SourceParticles/*, const int level = AbstractBaseClass::TreeHeight*/)
override
template<class SymbolicData>
void P2M(typename CellClass::multipole_t* const LeafCell,
const SymbolicData* const LeafSymbData,
const ContainerClass* const SourceParticles)
{
// apply Sy
const FPoint<FReal> LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafCell->getCoordinate()));
AbstractBaseClass::Interpolator->applyP2M(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf,
const FPoint<FReal> LeafCellCenter =
AbstractBaseClass::getLeafCellCenter(LeafSymbData->getCoordinate());
FReal leafBoxWidth = AbstractBaseClass::BoxWidth / FReal(1 << LeafSymbData->getLevel());
AbstractBaseClass::Interpolator->applyP2M(LeafCellCenter, leafBoxWidth,
LeafCell->getMultipole(0), SourceParticles);
}
void M2M(CellClass* const FRestrict ParentCell,
const CellClass*const FRestrict *const FRestrict ChildCells,
const int /*TreeLevel*/)
override
template<class SymbolicData>
void M2M(typename CellClass::multipole_t * const FRestrict ParentMultipole,
const SymbolicData* const /*ParentSymb*/,
const typename CellClass::multipole_t * const FRestrict * const FRestrict ChildMultipoles,
const SymbolicData* const /*ChildSymbs*/[])
{
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// Reset the Parent expansion to zero
// FBlas::scal(nnodes*2, FReal(0.), ParentCell->getMultipole(idxRhs));
// FBlas::scal(nnodes*2, FReal(0.), ParentMultipole->getMultipole(idxRhs));
for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){
// apply Sy
if (ChildCells[ChildIndex]){
AbstractBaseClass::Interpolator->applyM2M(ChildIndex, ChildCells[ChildIndex]->getMultipole(idxRhs), ParentCell->getMultipole(idxRhs));
if (ChildMultipoles[ChildIndex]){
AbstractBaseClass::Interpolator->applyM2M(
ChildIndex, ChildMultipoles[ChildIndex]->getMultipole(idxRhs),
ParentMultipole->getMultipole(idxRhs));
}
}
}
......@@ -209,8 +218,14 @@ public:
void M2L(CellClass* const FRestrict TargetCell, const CellClass* SourceCells[],
const int neighborPositions[], const int inSize, const int TreeLevel) override {
template<class SymbolicData>
void M2L(typename CellClass::local_expansion_t * const FRestrict TargetExpansion,
const SymbolicData* const TargetSymb,
const typename CellClass::multipole_t * const FRestrict SourceMultipoles[],
const SymbolicData* const FRestrict /*SourceSymbs*/[],
const int neighborPositions[],
const int inSize)
{
#ifdef LOG_TIMINGS
time.tic();
#endif
......@@ -223,7 +238,7 @@ public:
const unsigned int count = (countExp[pidx])++;
FReal *const mul = Mul[pidx] + count*nnodes;
const unsigned int *const pvec = SymHandler->pvectors[idx];
const FReal *const MultiExp = SourceCells[idxExistingNeigh]->getMultipole(idxRhs);
const FReal *const MultiExp = SourceMultipoles[idxExistingNeigh]->getMultipole(idxRhs);
/*
// no loop unrolling
......@@ -269,7 +284,8 @@ public:
#endif
// multiply (mat-mat-mul)
FReal Compressed [nnodes * 24];
const int TreeLevel = static_cast<int>(TargetSymb->getLevel());
FReal Compressed [nnodes * 24] {};
const FReal scale = MatrixKernel->getScaleFactor(AbstractBaseClass::BoxWidth, TreeLevel);
for (unsigned int pidx=0; pidx<343; ++pidx) {
const unsigned int count = countExp[pidx];
......@@ -295,7 +311,7 @@ public:
#endif
// permute and add contribution to local expansions
FReal *const LocalExpansion = TargetCell->getLocal(idxRhs