Commit ed7b9153 authored by Quentin Khan's avatar Quentin Khan

Update the balanced tree to use the new kernel data layout

 - Whitespace cleaning

 - Divide the FCostCell inner data layout into two sub-types:
   multipole_t and local_expansion_t. Two class attribute are accessible
   through the `getMultipoledata` and `getLocalExpansionData` methods.
   Define the optional symbolic_data_t type to store the cost of each
   operator.

 - Change FMM operators signature to take advantage of the new
   layout. The operators only take as parameters the data they may need
   instead of the whole cell. For instance (simplified):

   void M2M(CellClass* parent, CellClass** children);

   becomes

   void M2M(multipole_t*  parent_m,    symbolic_data_t* parent_s,
            multipole_t** children_ms, symbolic_data_t* children_ss);

 - Update the balanced algorithm to use the new interface.
parent 39d34cc8
......@@ -4,13 +4,13 @@
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.
//
// abiding by the rules of distribution of free software.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info".
// "http://www.cecill.info".
// "http://www.gnu.org/licenses".
// ===================================================================================
......@@ -69,7 +69,7 @@ public:
/// Type of matrix kernel function
using _MatrixKernelClass = MatrixKernelClass;
/// Chebyshev interpolation order
constexpr static const int order = ORDER;
constexpr static const int order = ORDER;
/// Class of the octree to work on
using _OctreeClass = OctreeClass;
......@@ -92,7 +92,7 @@ public:
/// count permuted local and multipole expansions
unsigned int* countExp;
/// Flops count for each operator of the FMM.
/// Flops count for each operator of the FMM.
unsigned long long flopsP2M = 0,
flopsM2M = 0,
flopsM2L = 0,
......@@ -110,7 +110,7 @@ public:
/// start flop counters
/// start flop counters
FSize countFlopsM2MorL2L() const
{ return 3 * nnodes * (2*ORDER-1); }
......@@ -156,7 +156,7 @@ public:
{
countExp = new unsigned int [343];
}
/** Copy constructor */
......@@ -173,10 +173,10 @@ public:
delete [] countExp;
}
void printResults(std::ostream& os) const {
os << "\n=================================================="
<< "\n- P2M Flops:" << flopsP2M
os << "\n=================================================="
<< "\n- P2M Flops:" << flopsP2M
<< "\n- M2M Flops:" << flopsM2M
<< "\n- M2L Flops:" << flopsM2L
<< "\n- L2L Flops:" << flopsL2L
......@@ -193,65 +193,78 @@ public:
os << "M2L count: " << countM2L << std::endl;
os << "L2L count: " << countL2L << std::endl;
os << "L2P count: " << countL2P << std::endl;
}
void P2M(CellClass* const cell, const ContainerClass* const SourceParticles) override {
template<class SymbolicData>
void P2M(typename CellClass::multipole_t* const /*LeafCell*/,
const SymbolicData* const LeafSymbData,
const ContainerClass* const SourceParticles)
{
FSize tmpCost = countFlopsP2M(SourceParticles->getNbParticles());
flopsP2M += tmpCost;
cell->addCost(tmpCost);
LeafSymbData->addCost(tmpCost);
countP2M++;
}
void M2M(CellClass* const FRestrict cell,
const CellClass*const FRestrict *const FRestrict ChildCells,
const int /*TreeLevel*/) override {
template<class SymbolicData>
void M2M(typename CellClass::multipole_t * const FRestrict /*ParentMultipole*/,
const SymbolicData* const ParentSymb,
const typename CellClass::multipole_t * const FRestrict * const FRestrict ChildMultipoles,
const SymbolicData* const /*ChildSymbs*/[])
{
FSize flops = 0;
for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex)
if (ChildCells[ChildIndex]) flops += countFlopsM2MorL2L();
if (ChildMultipoles[ChildIndex])
flops += countFlopsM2MorL2L();
flopsM2M += flops;
cell->addCost(flops);
ParentSymb->addCost(flops);
countM2M++;
}
void M2L(CellClass* const FRestrict cell,
const CellClass* /*SourceCells*/[],
const int positions[],
const int size,
const int /* TreeLevel */) override
{
FSize flops = 0;
// count how ofter each of the 16 interactions is used
memset(countExp, 0, sizeof(int) * 343);
for (int idx=0; idx<size; ++idx)
countExp[SymHandler->pindices[positions[idx]]]++;
// multiply (mat-mat-mul)
for (int pidx=0; pidx<343; ++pidx)
if (countExp[pidx])
flops += countFlopsM2L(countExp[pidx], SymHandler->LowRank[pidx])
+ countExp[pidx]*nnodes;
flopsM2L += flops;
cell->addCost(flops);
countM2L++;
}
template<class SymbolicData>
void M2L(typename CellClass::local_expansion_t * const FRestrict /*TargetExpansion*/,
const SymbolicData* const TargetSymb,
const typename CellClass::multipole_t * const FRestrict /*SourceMultipoles*/[],
const SymbolicData* const FRestrict /*SourceSymbs*/[],
const int neighborPositions[],
const int size)
{
FSize flops = 0;
// count how ofter each of the 16 interactions is used
memset(countExp, 0, sizeof(int) * 343);
for (int idx=0; idx<size; ++idx)
countExp[SymHandler->pindices[neighborPositions[idx]]]++;
// multiply (mat-mat-mul)
for (int pidx=0; pidx<343; ++pidx)
if (countExp[pidx])
flops += countFlopsM2L(countExp[pidx], SymHandler->LowRank[pidx])
+ countExp[pidx]*nnodes;
flopsM2L += flops;
TargetSymb->addCost(flops);
countM2L++;
}
void L2L(const CellClass* const FRestrict /* not needed */,
CellClass* FRestrict *const FRestrict ChildCells,
const int /* TreeLevel*/) override {
template<class SymbolicData>
void L2L(const typename CellClass::local_expansion_t * const FRestrict /*ParentExpansion*/,
const SymbolicData* const /*ParentSymb*/,
typename CellClass::local_expansion_t * FRestrict *const FRestrict ChildExpansions,
const SymbolicData* const ChildSymbs[])
{
FSize flops = 0;
FSize tmpCost = 0;
for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex)
if (ChildCells[ChildIndex]) {
if (ChildExpansions[ChildIndex]) {
tmpCost = countFlopsM2MorL2L() + nnodes;
flops += tmpCost;
ChildCells[ChildIndex]->addCost(tmpCost);
ChildSymbs[ChildIndex]->addCost(tmpCost);
}
flopsL2L += flops;
......@@ -260,24 +273,29 @@ public:
void L2P(const CellClass* const cell,
ContainerClass* const TargetParticles) override {
template<class SymbolicData>
void L2P(const typename CellClass::local_expansion_t * const /*LeafCell*/,
const SymbolicData * const LeafSymbData,
ContainerClass* const TargetParticles)
{
//// 1.a) apply Sx
//flopsL2P += countFlopsP2MorL2P(TargetParticlesParticles->getNbParticles()) + TargetParticles->getNbParticles();
//// 1.b) apply Px (grad Sx)
//flopsL2P += countFlopsL2PGradient(TargetParticlesParticles->getNbParticles()) + 3 * TargetParticles->getNbParticles();
// or
// 2) apply Sx and Px (grad Sx)
FSize tmpCost = 0;
tmpCost = countFlopsL2PTotal(TargetParticles->getNbParticles()) + 4 * TargetParticles->getNbParticles();
tmpCost =
countFlopsL2PTotal(TargetParticles->getNbParticles())
+ 4 * TargetParticles->getNbParticles();
flopsL2P += tmpCost;
cell->addCost(tmpCost);
LeafSymbData->addCost(tmpCost);
countL2P++;
}
void P2P(const FTreeCoordinate& LeafCellCoordinate, // needed for periodic boundary conditions
ContainerClass* const FRestrict TargetParticles,
......@@ -293,8 +311,8 @@ public:
tmpCost += countFlopsP2P() * tgtPartCount * srcPartCount;
for ( int idx = 0; idx < size; ++idx ) {
tmpCost +=
countFlopsP2P()
tmpCost +=
countFlopsP2P()
* tgtPartCount
* NeighborSourceParticles[idx]->getNbParticles();
}
......@@ -306,10 +324,10 @@ public:
/ 2);
for (int idx=0; idx < size && positions[idx]<=13; ++idx)
{
tmpCost +=
tmpCost +=
countFlopsP2Pmutual()
* tgtPartCount
* NeighborSourceParticles[idx]->getNbParticles();
* NeighborSourceParticles[idx]->getNbParticles();
}
}
......@@ -366,7 +384,7 @@ struct FChebSymCostKernel<FReal, CellClass, ContainerClass, MatrixKernelClass, O
// M2L operators
FReal* K[343];
int LowRank[343];
// permutation vectors and permutated indices
unsigned int pvectors[343][nnodes];
unsigned int pindices[343];
......@@ -378,7 +396,7 @@ struct FChebSymCostKernel<FReal, CellClass, ContainerClass, MatrixKernelClass, O
FReal nrm2(0.);
for (unsigned int k=0; k<nnodes; ++k)
nrm2 += singular_values[k] * singular_values[k];
FReal nrm2k(0.);
for (unsigned int k=nnodes; k>0; --k) {
nrm2k += singular_values[k-1] * singular_values[k-1];
......@@ -387,7 +405,7 @@ struct FChebSymCostKernel<FReal, CellClass, ContainerClass, MatrixKernelClass, O
throw std::runtime_error("rank cannot be larger than nnodes");
return 0;
}
/** Constructor */
SymmetryHandler(const MatrixKernelClass *const MatrixKernel, const double Epsilon)
......@@ -397,7 +415,7 @@ struct FChebSymCostKernel<FReal, CellClass, ContainerClass, MatrixKernelClass, O
K[t] = nullptr;
LowRank[t] = 0;
}
// set permutation vector and indices
const FInterpSymmetries<ORDER> Symmetries;
for (int i=-3; i<=3; ++i)
......@@ -438,7 +456,7 @@ private:
FReal *const WORK = new FReal [LWORK];
FReal *const VT = new FReal [nnodes*nnodes];
FReal *const S = new FReal [nnodes];
unsigned int counter = 0;
for (int i=2; i<=3; ++i) {
for (int j=0; j<=i; ++j) {
......@@ -458,14 +476,14 @@ private:
FBlas::scal(nnodes, weights[n], U + n, nnodes); // scale rows
FBlas::scal(nnodes, weights[n], U + n * nnodes); // scale cols
}
//////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////
// truncated singular value decomposition of matrix
const unsigned int info = FBlas::gesvd(nnodes, nnodes, U, S, VT, nnodes, LWORK, WORK);
if (info!=0) throw std::runtime_error("SVD did not converge with " + std::to_string(info));
const unsigned int rank = this->getRank(S, Epsilon);
// store
// store
const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3);
assert(K[idx]==nullptr);
K[idx] = new FReal [2*rank*nnodes];
......@@ -483,7 +501,7 @@ private:
// scale rows
FBlas::scal(rank, FReal(1.) / weights[n], K[idx] + rank*nnodes + n, nnodes);
}
//////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////
//std::cout << "(" << i << "," << j << "," << k << ") " << idx <<
// ", low rank = " << rank << std::endl;
......@@ -502,6 +520,6 @@ private:
};
#endif
#endif
// [--END--]
......@@ -8,7 +8,7 @@
#include <type_traits>
/**
/**
* \brief Empty trait class.
* \author Quentin Khan
*
......@@ -18,6 +18,79 @@
class FCostCellTypeTrait {};
namespace scalfmm {
namespace detail {
template<class CostType>
class symbolic_data_t {
/// The far-field cost of the cell.
/** Declared mutable because the existing algorithms use const cells.*/
mutable CostType _cost = 0;
/// The near-field cost of the cell.
/** Declared mutable because the existing algorithms use const cells.*/
mutable CostType _leafCost = 0;
public:
/// Type definition that can be retrieved by other classes
using cost_t = CostType;
/// Debug member, used to check whether the cell was already visited.
bool _visited = false;
/**
* \brief Gets the far-field cost of the cell.
* \return The far-field cost of the cell
*/
CostType getCost() const {
return _cost;
}
/**
* \brief Gets the near-field cost of the cell.
* \return The near-field cost of the cell
*/
CostType getNearCost() const {
return _leafCost;
}
/**
* \brief Sets the cost of the cell.
*/
void setCost(CostType newCost) {
_cost = newCost;
}
/**
* \brief Sets the near-field cost of the cell.
*/
void setNearCost(CostType newCost) {
_leafCost = newCost;
}
/**
* \brief Add a far-field cost to the cell.
* \return The cost of the cell
* \warning Can be used on const cells !
*/
CostType addCost(CostType cost) const {
_cost += cost;
return _cost;
}
/**
* \brief Add a near-field cost to the cell.
* \return The cost of the cell
* \warning Can be used on const cells !
*/
CostType addNearCost(CostType cost) const {
_leafCost += cost;
return _leafCost;
}
};
}
}
/**
* \brief Cell with a cost memory for balance computations.
* \author Quentin Khan
......@@ -26,79 +99,20 @@ class FCostCellTypeTrait {};
*
* \tparam BaseClass The base cell class to extend. The constructors are
* inherited automatically.
* \tparam CostType The type to use in order to store the cost. Defaults to FSize.
* \tparam CostType The type to use in order to store the cost. Defaults to
* `std::size_t`.
*/
template<typename BaseClass, typename CostType = FSize>
class FCostCell : public BaseClass, virtual public FCostCellTypeTrait {
template<typename BaseClass, typename CostType = std::size_t>
class FCostCell : public BaseClass, public scalfmm::detail::symbolic_data_t<CostType> {
static_assert(std::is_arithmetic<CostType>::value,
"The cell cost type must be an arithmetic type.");
/// The far-field cost of the cell.
/** Declared mutable because the existing algorithms use const cells.*/
mutable CostType _cost = 0;
/// The near-field cost of the cell.
/** Declared mutable because the existing algorithms use const cells.*/
mutable CostType _leafCost = 0;
public:
/// Type definition that can be retrieved by other classes
using costtype = CostType;
/// Type definition that can be retrieved by other classes
using cost_t = CostType;
using BaseClass::BaseClass;
/// Debug member, used to check whether the cell was already visited.
bool _visited = false;
/**
* \brief Gets the far-field cost of the cell.
* \return The far-field cost of the cell
*/
CostType getCost() const {
return _cost;
}
/**
* \brief Gets the near-field cost of the cell.
* \return The near-field cost of the cell
*/
CostType getNearCost() const {
return _leafCost;
}
/**
* \brief Sets the cost of the cell.
*/
void setCost(CostType newCost) {
_cost = newCost;
}
/**
* \brief Sets the near-field cost of the cell.
*/
void setNearCost(CostType newCost) {
_leafCost = newCost;
}
/**
* \brief Add a far-field cost to the cell.
* \return The cost of the cell
* \warning Can be used on const cells !
*/
CostType addCost(CostType cost) const {
_cost += cost;
return _cost;
}
/**
* \brief Add a near-field cost to the cell.
* \return The cost of the cell
* \warning Can be used on const cells !
*/
CostType addNearCost(CostType cost) const {
_leafCost += cost;
return _leafCost;
}
};
#endif
......@@ -7,6 +7,7 @@
#include "FCostCell.hpp"
#include "FCoordColour.hpp"
#include "Containers/FTreeCoordinate.hpp"
#include <vector>
#include <stdexcept>
......@@ -34,14 +35,14 @@
template<typename OctreeClass, typename CellClass>
class FCostZones {
public:
using CostType = typename CellClass::costtype;
using CostType = typename CellClass::cost_t;
using TreeIterator = typename OctreeClass::Iterator;
/**
* \brief Class used to store the bounds of a zone.
* The bounds consist in the Morton index of the first node and the number
* of subsequent nodes.
*/
*/
using BoundClass = std::pair<TreeIterator, int>;
/// Initial value for empty bounds.
const BoundClass _boundInit {TreeIterator(),0};
......@@ -109,7 +110,7 @@ public:
* \param tree The tree to work on.
* \param nbZones The number of zones to create.
*/
FCostZones(OctreeClass* tree, int nbZones) :
FCostZones(OctreeClass* tree, int nbZones) :
_it( tree ),
_nbZones( nbZones ),
_treeHeight( tree->getHeight() ),
......@@ -126,7 +127,7 @@ public:
_nbZones,
std::vector< std::pair< int, CellClass*> >( ))
{
_it.gotoBottomLeft();
_it.gotoBottomLeft();
typename OctreeClass::Iterator ittest(_it);
ittest.gotoBottomLeft();
}
......@@ -161,12 +162,12 @@ public:
return _leafZoneBounds;
}
/// Gets the tree topmost level used.
/// Gets the tree topmost level used.
int getTopMostLevel() const {
return _topMostLevel;
}
/// Sets the tree topmost level used.
/// Sets the tree topmost level used.
void setTopMostLevel(unsigned int level) {
if( level > _treeHeight-1 ) {
std::stringstream msgstream;
......@@ -174,16 +175,16 @@ public:
<< " tree height=" << _treeHeight;
throw std::out_of_range(msgstream.str());
}
_topMostLevel = level;
}
/// Gets the tree bottom most level that we use.
/// Gets the tree bottom most level that we use.
int getBottomMostLevel() const {
return _bottomMostLevel;
}
/// Sets the tree bottom most level that we use.
/// Sets the tree bottom most level that we use.
void setBottomMostLevel(unsigned int level) {
if( level > _treeHeight-1 ) {
std::stringstream msgstream;
......@@ -191,7 +192,7 @@ public:
<< " tree height=" << _treeHeight;
throw std::out_of_range(msgstream.str());
}
_bottomMostLevel = level;
}
......@@ -228,7 +229,7 @@ protected:
* next one.
*/
void costzones() {
std::pair<int,int> childrenCount;
const int level = _it.level();
const bool progressDown = _it.canProgressToDown()
......@@ -242,11 +243,11 @@ protected:
childrenCount = countLeftRightChildren();
callCostZonesOnChildren(LEFT, childrenCount);
}
// if the current cell is within the levels we consider, we add it
if( useCell )
addCurrentCell();
// When not on a leaf, apply to right children
if ( progressDown ) {
callCostZonesOnChildren(RIGHT, childrenCount);
......@@ -254,7 +255,7 @@ protected:
}
/**
* \brief Applies costzones to the left or right children of the current cell.
*
......@@ -267,7 +268,7 @@ protected:
* countLeftRightChildren.
*/
void callCostZonesOnChildren(const ChildrenSide side, const std::pair<int, int>& childrenCount) {
const int& nbChildren = (side == LEFT ? childrenCount.first : childrenCount.second);
// Don't move if there are no children on the right when we want to
......@@ -278,7 +279,7 @@ protected:
// move down to the children level
_it.moveDown();
if ( side == RIGHT ) {
// move to the first right child
for ( int childIdx = 0; childIdx < childrenCount.first; childIdx++) {
......@@ -354,7 +355,7 @@ protected:
_internalZoneBounds.at(cellZone)[level].first = _it;
_internalZoneBounds.at(cellZone)[level].second = 1;
} else {
_internalZoneBounds.at(cellZone)[level].second++;
_internalZoneBounds.at(cellZone)[level].second++;
}
///////////////////////////////