Commit b26fd3c1 authored by Quentin Khan's avatar Quentin Khan

Update Chebyshev kernel interface to use the new data organisation

 - Remove virtual FMM operators overloads from the abstract base class.

 - Divide the FChebCell inner data layout into two sub-types:
   multipole_t and local_expansion_t. Two class attribute are accessible
   through the `getMultipoledata` and `getLocalExpansionData` methods.

 - Even out files indentation.

 - Change FMM operators signature to take advantage of the new
   layout. The operators only take as parameters the data they may need
   instead of the whole cell. For instance (simplified):

   void M2M(CellClass* parent, CellClass** children);

   becomes

   void M2M(multipole_t*  parent_m,    symbolic_data_t* parent_s,
            multipole_t** children_ms, symbolic_data_t* children_ss);
parent d0ba4c57
......@@ -122,50 +122,6 @@ public:
const InterpolatorClass * getPtrToInterpolator() const
{ return Interpolator.getPtr(); }
virtual void P2M(CellClass* const LeafCell,
const ContainerClass* const SourceParticles) = 0;
virtual void M2M(CellClass* const FRestrict ParentCell,
const CellClass*const FRestrict *const FRestrict ChildCells,
const int TreeLevel) = 0;
virtual void M2L(CellClass* const FRestrict TargetCell,
const CellClass* SourceCells[],
const int SourcePositions[],
const int NumSourceCells,
const int TreeLevel) = 0;
virtual void L2L(const CellClass* const FRestrict ParentCell,
CellClass* FRestrict *const FRestrict ChildCells,
const int TreeLevel) = 0;
virtual void L2P(const CellClass* const LeafCell,
ContainerClass* const TargetParticles) = 0;
virtual void P2P(const FTreeCoordinate& /* LeafCellCoordinate */, // needed for periodic boundary conditions
ContainerClass* const FRestrict TargetParticles,
const ContainerClass* const FRestrict /*SourceParticles*/,
ContainerClass* const NeighborSourceParticles[],
const int SourcePositions[],
const int /* size */) = 0;
virtual void P2POuter(const FTreeCoordinate& inLeafPosition,
ContainerClass* const FRestrict targets,
ContainerClass* const directNeighborsParticles[], const int neighborPositions[],
const int size) = 0;
virtual void P2PRemote(const FTreeCoordinate& /*inPosition*/,
ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
const ContainerClass* const inNeighbors[], const int SourcePositions[], const int /*inSize*/) = 0;
};
......
......@@ -4,13 +4,13 @@
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.
//
// abiding by the rules of distribution of free software.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info".
// "http://www.cecill.info".
// "http://www.gnu.org/licenses".
// ===================================================================================
#ifndef FCHEBCELL_HPP
......@@ -37,35 +37,89 @@ class FChebCell : public FBasicCell, public FAbstractSendable
// we multiply by 2 because we store the Multipole expansion end the compressed one.
static const int VectorSize = TensorTraits<ORDER>::nnodes * 2;
FReal multipole_exp[NRHS * NVALS * VectorSize]; //< Multipole expansion
FReal local_exp[NLHS * NVALS * VectorSize]; //< Local expansion
public:
FChebCell(){
memset(multipole_exp, 0, sizeof(FReal) * NRHS * NVALS * VectorSize);
memset(local_exp, 0, sizeof(FReal) * NLHS * NVALS * VectorSize);
struct multipole_t {
FReal multipole_exp[NRHS * NVALS * VectorSize]; //< Multipole expansion
const FReal* getMultipole(const int inRhs) const
{ return this->multipole_exp + inRhs*VectorSize; }
FReal* getMultipole(const int inRhs)
{ return this->multipole_exp + inRhs*VectorSize; }
constexpr int getVectorSize() const {
return VectorSize;
}
// to extend FAbstractSendable
template <class BufferWriterClass>
void serialize(BufferWriterClass& buffer) const{
buffer.write(this->multipole_exp, VectorSize*NVALS*NRHS);
}
template <class BufferReaderClass>
void deserialize(BufferReaderClass& buffer){
buffer.fillArray(this->multipole_exp, VectorSize*NVALS*NRHS);
}
void reset() {
memset(multipole_exp, 0, sizeof(FReal) * NRHS * NVALS * VectorSize);
}
};
struct local_expansion_t {
FReal local_exp[NLHS * NVALS * VectorSize]; //< Local expansion
const FReal* getLocal(const int inRhs) const
{ return this->local_exp + inRhs*VectorSize; }
FReal* getLocal(const int inRhs)
{ return this->local_exp + inRhs*VectorSize; }
constexpr int getVectorSize() const {
return VectorSize;
}
// to extend FAbstractSendable
template <class BufferWriterClass>
void serialize(BufferWriterClass& buffer) const{
buffer.write(this->local_exp, VectorSize*NVALS*NLHS);
}
template <class BufferReaderClass>
void deserialize(BufferReaderClass& buffer){
buffer.fillArray(this->local_exp, VectorSize*NVALS*NLHS);
}
void reset() {
memset(local_exp, 0, sizeof(FReal) * NLHS * NVALS * VectorSize);
}
};
multipole_t m_data {};
local_expansion_t l_data {};
bool hasMultipoleData() const noexcept {
return true;
}
bool hasLocalExpansionData() const noexcept {
return true;
}
~FChebCell() {}
/** Get Multipole */
const FReal* getMultipole(const int inRhs) const
{ return this->multipole_exp + inRhs*VectorSize;
multipole_t& getMultipoleData() noexcept {
return m_data;
}
/** Get Local */
const FReal* getLocal(const int inRhs) const{
return this->local_exp + inRhs*VectorSize;
const multipole_t& getMultipoleData() const noexcept {
return m_data;
}
/** Get Multipole */
FReal* getMultipole(const int inRhs){
return this->multipole_exp + inRhs*VectorSize;
local_expansion_t& getLocalExpansionData() noexcept {
return l_data;
}
/** Get Local */
FReal* getLocal(const int inRhs){
return this->local_exp + inRhs*VectorSize;
const local_expansion_t& getLocalExpansionData() const noexcept {
return l_data;
}
/** To get the leading dim of a vec */
int getVectorSize() const{
return VectorSize;
......@@ -73,8 +127,8 @@ public:
/** Make it like the begining */
void resetToInitialState(){
memset(multipole_exp, 0, sizeof(FReal) * NRHS * NVALS * VectorSize);
memset(local_exp, 0, sizeof(FReal) * NLHS * NVALS * VectorSize);
m_data.reset();
l_data.reset();
}
///////////////////////////////////////////////////////
......@@ -82,20 +136,20 @@ public:
///////////////////////////////////////////////////////
template <class BufferWriterClass>
void serializeUp(BufferWriterClass& buffer) const{
buffer.write(multipole_exp, VectorSize*NVALS*NRHS);
m_data.serialize(buffer);
}
template <class BufferReaderClass>
void deserializeUp(BufferReaderClass& buffer){
buffer.fillArray(multipole_exp, VectorSize*NVALS*NRHS);
m_data.deserialize(buffer);
}
template <class BufferWriterClass>
void serializeDown(BufferWriterClass& buffer) const{
buffer.write(local_exp, VectorSize*NVALS*NLHS);
l_data.serialize(buffer);
}
template <class BufferReaderClass>
void deserializeDown(BufferReaderClass& buffer){
buffer.fillArray(local_exp, VectorSize*NVALS*NLHS);
l_data.deserialize(buffer);
}
///////////////////////////////////////////////////////
......@@ -104,14 +158,14 @@ public:
template <class BufferWriterClass>
void save(BufferWriterClass& buffer) const{
FBasicCell::save(buffer);
buffer.write(multipole_exp, VectorSize*NVALS*NRHS);
buffer.write(local_exp, VectorSize*NVALS*NLHS);
m_data.serialize(buffer);
l_data.serialize(buffer);
}
template <class BufferReaderClass>
void restore(BufferReaderClass& buffer){
FBasicCell::restore(buffer);
buffer.fillArray(multipole_exp, VectorSize*NVALS*NRHS);
buffer.fillArray(local_exp, VectorSize*NVALS*NLHS);
m_data.deserialize(buffer);
l_data.deserialize(buffer);
}
FSize getSavedSize() const {
......@@ -172,5 +226,3 @@ public:
};
#endif //FCHEBCELL_HPP
......@@ -4,13 +4,13 @@
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.
//
// abiding by the rules of distribution of free software.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info".
// "http://www.cecill.info".
// "http://www.gnu.org/licenses".
// ===================================================================================
#ifndef FCHEBFLOPSSYMKERNEL_HPP
......@@ -51,20 +51,20 @@ template < class FReal, class CellClass, class ContainerClass, class MatrixKerne
class FChebFlopsSymKernel
: public FAbstractKernels<CellClass, ContainerClass>
{
enum {nnodes = TensorTraits<ORDER>::nnodes};
enum {nnodes = TensorTraits<ORDER>::nnodes};
public:
/// Handler to deal with all symmetries: Stores permutation indices and
/// vectors to reduce 343 different interactions to 16 only.
struct SymmetryHandler;
/// Handler to deal with all symmetries: Stores permutation indices and
/// vectors to reduce 343 different interactions to 16 only.
struct SymmetryHandler;
/// Needed for handling all symmetries
const FSmartPointer<MatrixKernelClass,FSmartPointerMemory> MatrixKernel;
const FSmartPointer<SymmetryHandler, FSmartPointerMemory> SymHandler;
/// Needed for handling all symmetries
const FSmartPointer<MatrixKernelClass,FSmartPointerMemory> MatrixKernel;
const FSmartPointer<SymmetryHandler, FSmartPointerMemory> SymHandler;
/// tree height
/// tree height
const unsigned int inTreeHeight;
// count permuted local and multipole expansions
// count permuted local and multipole expansions
unsigned int* countExp;
......@@ -72,209 +72,223 @@ public:
unsigned long long *flopsPerLevelM2M, *flopsPerLevelM2L, *flopsPerLevelL2L;
// start flop counters
// start flop counters
unsigned int countFlopsM2MorL2L() const
{ return 3 * nnodes * (2*ORDER-1); }
{ return 3 * nnodes * (2*ORDER-1); }
unsigned int countFlopsM2L(const unsigned int nexp, const unsigned int rank) const
{ return nexp * (4*nnodes*rank - rank - nnodes); }
{ return nexp * (4*nnodes*rank - rank - nnodes); }
unsigned int countFlopsP2P() const
{ return 34; }
{ return 34; }
unsigned int countFlopsP2Pmutual() const
{ return 39; }
{ return 39; }
unsigned int countFlopsP2M(const unsigned int N) const {
const unsigned first = N * (18 + (ORDER-2) * 6 + (ORDER-1) * (6 + (ORDER-1) * (6 + (ORDER-1) * 2)));
const unsigned W2 = 3 * ORDER*(2*(ORDER-1)-1);
const unsigned W4 = 3 * (ORDER*(ORDER-1)*(2*(ORDER-1)-1) + ORDER*ORDER*(2*(ORDER-1)-1));
const unsigned W8 = 3 * (2*(ORDER-1)-1) * (ORDER*(ORDER-1)*(ORDER-1) + ORDER*ORDER*(ORDER-1) + nnodes);
return first + W2 + W4 + W8 + nnodes*11;
}
return first + W2 + W4 + W8 + nnodes*11;
}
unsigned int countFlopsL2PTotal(const unsigned int N) const {
const unsigned W0 = nnodes;
const unsigned W2 = 3 * (ORDER-1)*ORDER*ORDER * 2*ORDER;
const unsigned W4 = 3 * ORDER*(ORDER-1)*(ORDER-1) * 2*ORDER;
const unsigned W8 = (ORDER-1)*(ORDER-1)*(ORDER-1) * (2*ORDER-1);
const unsigned second = N * (38 + (ORDER-2)*15 + (ORDER-1)*((ORDER-1) * (27 + (ORDER-1) * 16))) + 6;
return W0 + W2 + W4 + W8 + second;
}
// end flop counters
return W0 + W2 + W4 + W8 + second;
}
// end flop counters
public:
/**
* The constructor initializes all constant attributes and it reads the
* precomputed and compressed M2L operators from a binary file (an
* runtime_error is thrown if the required file is not valid).
*/
FChebFlopsSymKernel(const int _inTreeHeight,
const FReal inBoxWidth,
const FPoint<FReal>& inBoxCenter,
const FReal Epsilon)
: MatrixKernel(new MatrixKernelClass()),
SymHandler(new SymmetryHandler(MatrixKernel.getPtr(), Epsilon)), inTreeHeight(_inTreeHeight),
flopsP2M(0), flopsM2M(0), flopsM2L(0), flopsL2L(0), flopsL2P(0), flopsP2P(0),
flopsPerLevelM2M(nullptr), flopsPerLevelM2L(nullptr), flopsPerLevelL2L(nullptr)
{
/**
* The constructor initializes all constant attributes and it reads the
* precomputed and compressed M2L operators from a binary file (an
* runtime_error is thrown if the required file is not valid).
*/
FChebFlopsSymKernel(const int _inTreeHeight,
const FReal inBoxWidth,
const FPoint<FReal>& inBoxCenter,
const FReal Epsilon)
: MatrixKernel(new MatrixKernelClass()),
SymHandler(new SymmetryHandler(MatrixKernel.getPtr(), Epsilon)), inTreeHeight(_inTreeHeight),
flopsP2M(0), flopsM2M(0), flopsM2L(0), flopsL2L(0), flopsL2P(0), flopsP2P(0),
flopsPerLevelM2M(nullptr), flopsPerLevelM2L(nullptr), flopsPerLevelL2L(nullptr)
{
countExp = new unsigned int [343];
flopsPerLevelM2M = new unsigned long long [inTreeHeight];
flopsPerLevelM2L = new unsigned long long [inTreeHeight];
flopsPerLevelL2L = new unsigned long long [inTreeHeight];
for (unsigned int level = 0; level<inTreeHeight; ++level)
flopsPerLevelM2M[level] = flopsPerLevelM2L[level] = flopsPerLevelL2L[level] = 0;
}
flopsPerLevelM2M[level] = flopsPerLevelM2L[level] = flopsPerLevelL2L[level] = 0;
}
/** Copy constructor */
FChebFlopsSymKernel(const FChebFlopsSymKernel& other)
: SymHandler(other.SymHandler),
flopsP2M(0), flopsM2M(0), flopsM2L(0), flopsL2L(0), flopsL2P(0), flopsP2P(0)
{ countExp = new unsigned int [343]; }
/** Copy constructor */
FChebFlopsSymKernel(const FChebFlopsSymKernel& other)
: SymHandler(other.SymHandler),
flopsP2M(0), flopsM2M(0), flopsM2L(0), flopsL2L(0), flopsL2P(0), flopsP2P(0)
{ countExp = new unsigned int [343]; }
/** Destructor */
~FChebFlopsSymKernel()
{
delete [] countExp;
std::cout << "\n=================================================="
<< "\n- Flops for P2M = " << flopsP2M
<< "\n- Flops for M2M = " << flopsM2M
<< "\n- Flops for M2L = " << flopsM2L
<< "\n- Flops for L2L = " << flopsL2L
<< "\n- Flops for L2P = " << flopsL2P
<< "\n- Flops for P2P = " << flopsP2P
<< "\n- Overall Flops = " << flopsP2M + flopsM2M + flopsM2L + flopsL2L + flopsL2P + flopsP2P
<< "\n==================================================\n"
<< std::endl;
std::cout << "\n=================================================="
<< "\n- Flops for P2M/M2M" << std::endl;
/** Destructor */
~FChebFlopsSymKernel()
{
delete [] countExp;
std::cout << "\n=================================================="
<< "\n- Flops for P2M = " << flopsP2M
<< "\n- Flops for M2M = " << flopsM2M
<< "\n- Flops for M2L = " << flopsM2L
<< "\n- Flops for L2L = " << flopsL2L
<< "\n- Flops for L2P = " << flopsL2P
<< "\n- Flops for P2P = " << flopsP2P
<< "\n- Overall Flops = " << flopsP2M + flopsM2M + flopsM2L + flopsL2L + flopsL2P + flopsP2P
<< "\n==================================================\n"
<< std::endl;
std::cout << "\n=================================================="
<< "\n- Flops for P2M/M2M" << std::endl;
for (unsigned int level=0; level<inTreeHeight; ++level)
if (level < inTreeHeight-1)
std::cout << " |- at level " << level << " flops = " << flopsPerLevelM2M[level] << std::endl;
else
std::cout << " |- at level " << level << " flops = " << flopsP2M << std::endl;
std::cout << "=================================================="
<< "\n- Flops for M2L" << std::endl;
if (level < inTreeHeight-1)
std::cout << " |- at level " << level << " flops = " << flopsPerLevelM2M[level] << std::endl;
else
std::cout << " |- at level " << level << " flops = " << flopsP2M << std::endl;
std::cout << "=================================================="
<< "\n- Flops for M2L" << std::endl;
for (unsigned int level=0; level<inTreeHeight; ++level)
std::cout << " |- at level " << level << " flops = " << flopsPerLevelM2L[level] << std::endl;
std::cout << "=================================================="
<< "\n- Flops for L2L/L2P" << std::endl;
std::cout << " |- at level " << level << " flops = " << flopsPerLevelM2L[level] << std::endl;
std::cout << "=================================================="
<< "\n- Flops for L2L/L2P" << std::endl;
for (unsigned int level=0; level<inTreeHeight; ++level)
if (level < inTreeHeight-1)
std::cout << " |- at level " << level << " flops = " << flopsPerLevelL2L[level] << std::endl;
else
std::cout << " |- at level " << level << " flops = " << flopsL2P << std::endl;
std::cout << "==================================================" << std::endl;
if (flopsPerLevelM2M) delete [] flopsPerLevelM2M;
if (flopsPerLevelM2L) delete [] flopsPerLevelM2L;
if (flopsPerLevelL2L) delete [] flopsPerLevelL2L;
}
void P2M(CellClass* const /* not needed */, const ContainerClass* const SourceParticles) override
{
if (level < inTreeHeight-1)
std::cout << " |- at level " << level << " flops = " << flopsPerLevelL2L[level] << std::endl;
else
std::cout << " |- at level " << level << " flops = " << flopsL2P << std::endl;
std::cout << "==================================================" << std::endl;
if (flopsPerLevelM2M) delete [] flopsPerLevelM2M;
if (flopsPerLevelM2L) delete [] flopsPerLevelM2L;
if (flopsPerLevelL2L) delete [] flopsPerLevelL2L;
}
template<class SymbolicData>
void P2M(typename CellClass::multipole_t* const LeafCell,
const SymbolicData* const LeafSymbData,
const ContainerClass* const SourceParticles)
{
flopsP2M += countFlopsP2M(int(SourceParticles->getNbParticles()));
}
}
void M2M(CellClass* const FRestrict /* not needed */,
const CellClass*const FRestrict *const FRestrict ChildCells,
const int TreeLevel) override
{
template<class SymbolicData>
void M2M(typename CellClass::multipole_t * const FRestrict /*ParentMultipole*/,
const SymbolicData* const ParentSymb,
const typename CellClass::multipole_t * const FRestrict * const FRestrict ChildMultipoles,
const SymbolicData* const /*ChildSymbs*/[])
{
int TreeLevel = static_cast<int>(ParentSymb->getLevel());
unsigned int flops = 0;
for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex)
if (ChildCells[ChildIndex]) flops += countFlopsM2MorL2L();
flopsM2M += flops;
flopsPerLevelM2M[TreeLevel] += flops;
}
if (ChildMultipoles[ChildIndex])
flops += countFlopsM2MorL2L();
flopsM2M += flops;
flopsPerLevelM2M[TreeLevel] += flops;
}
void M2L(CellClass* const FRestrict /* not needed */,
const CellClass* SourceCells[],
const int positions[],
const int size,
const int TreeLevel) override
{
template<class SymbolicData>
void M2L(typename CellClass::local_expansion_t * const FRestrict /*TargetExpansion*/,
const SymbolicData* const TargetSymb,
const typename CellClass::multipole_t * const FRestrict /*SourceMultipoles*/[],
const SymbolicData* const FRestrict /*SourceSymbs*/[],
const int neighborPositions[],
const int size)
{
int TreeLevel = static_cast<int>(TargetSymb->getLevel());
unsigned int flops = 0;
// count how ofter each of the 16 interactions is used
memset(countExp, 0, sizeof(int) * 343);
// count how ofter each of the 16 interactions is used
memset(countExp, 0, sizeof(int) * 343);
for (int idx=0; idx<size; ++idx)
countExp[SymHandler->pindices[positions[idx]]]++;
// multiply (mat-mat-mul)
countExp[SymHandler->pindices[neighborPositions[idx]]]++;
// multiply (mat-mat-mul)
for (int pidx=0; pidx<343; ++pidx)
if (countExp[pidx])
flops += countFlopsM2L(countExp[pidx], SymHandler->LowRank[pidx]) + countExp[pidx]*nnodes;
flopsM2L += flops;
flopsPerLevelM2L[TreeLevel] += flops;
}
if (countExp[pidx])
flops += countFlopsM2L(countExp[pidx], SymHandler->LowRank[pidx]) + countExp[pidx]*nnodes;
flopsM2L += flops;
flopsPerLevelM2L[TreeLevel] += flops;}
void L2L(const CellClass* const FRestrict /* not needed */,
CellClass* FRestrict *const FRestrict ChildCells,
const int TreeLevel) override
{
template<class SymbolicData>
void L2L(const typename CellClass::local_expansion_t * const FRestrict /*ParentExpansion*/,
const SymbolicData* const ParentSymb,
typename CellClass::local_expansion_t * FRestrict *const FRestrict ChildExpansions,
const SymbolicData* const /*ChildSymbs*/[])
{
int TreeLevel = static_cast<int>(ParentSymb->getLevel());
unsigned int flops = 0;
for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex)
if (ChildCells[ChildIndex]) flops += countFlopsM2MorL2L() + nnodes;
flopsL2L += flops;
flopsPerLevelL2L[TreeLevel] += flops;
}
if (ChildExpansions[ChildIndex]) flops += countFlopsM2MorL2L() + nnodes;
flopsL2L += flops;
flopsPerLevelL2L[TreeLevel] += flops;
}
void L2P(const CellClass* const /* not needed */,
ContainerClass* const TargetParticles) override
{
//// 1.a) apply Sx
template<class SymbolicData>
void L2P(const typename CellClass::local_expansion_t * const /*LeafCell*/,
const SymbolicData * const /*LeafSymbData*/,
ContainerClass* const TargetParticles)
{
//// 1.a) apply Sx
//flopsL2P += countFlopsP2MorL2P(TargetParticlesParticles->getNbParticles()) + TargetParticles->getNbParticles();
//// 1.b) apply Px (grad Sx)
//// 1.b) apply Px (grad Sx)
//flopsL2P += countFlopsL2PGradient(TargetParticlesParticles->getNbParticles()) + 3 * TargetParticles->getNbParticles();
// or
// or
// 2) apply Sx and Px (grad Sx)
// 2) apply Sx and Px (grad Sx)
flopsL2P += countFlopsL2PTotal(int(TargetParticles->getNbParticles())) + 4 * int(TargetParticles->getNbParticles());
}