Commit 35c0ced8 authored by Quentin Khan's avatar Quentin Khan

First balance commit

parent 8f41770a
#ifndef _COSTZONES_HPP_
#define _COSTZONES_HPP_
template<typename OctreeClass, typename CellClass>
class CostZones {
unsigned long long _currentCost = 0;
unsigned long long _totalCost = 0;
std::vector< std::pair< int, CellClass*> > _emptyzone;
std::vector< std::vector< std::pair<int, CellClass*> > > _zones;
typename OctreeClass::Iterator _it;
int _nbZones;
enum ChildrenSide {LEFT, RIGHT};
public:
CostZones(OctreeClass* tree, int nbZones) :
_zones(1, _emptyzone),
_it(tree),
_nbZones(nbZones)
{}
std::vector< std::vector< std::pair<int, CellClass*> > >& getZones() {return _zones;}
void run() {
_totalCost = 0;
int nbRootChildren = 0;
// Compute tree total cost;
_it.gotoBottomLeft();
do {
_it.gotoLeft();
nbRootChildren = 0; // waste of ressources only toplevel _iteration is kept
do {
_totalCost += _it.getCurrentCell()->getCost();
nbRootChildren++;
} while(_it.moveRight());
} while(_it.moveUp());
_it.gotoLeft();
// Compute costzones, we have to do the first level manualy
for ( int i = 0; i < nbRootChildren; i++ ) {
costzones();
_it.moveRight();
}
}
private:
/**
* Counts the children to the left and to the right of the cell currently
* pointed to by the iterator _it. You must check by yourself whether the
* cell is a leaf or not.
*/
void countLeftRightChildren(int& nbLeftChildren, int& nbRightChildren) {
FCostCell** children = _it.getCurrentChildren();
// Left children
for ( int childIdx = 0; childIdx < 4; childIdx++) {
if ( children[childIdx] ) {
++ nbLeftChildren;
}
}
// Right children
for ( int childIdx = 4; childIdx < 8; childIdx++) {
if ( children[childIdx] ) {
++ nbRightChildren;
}
}
}
void callCostZonesOnChildren(ChildrenSide side, int nbLeftChildren, int nbRightChildren) {
int startIdx = (side == LEFT ? 0 : 4);
int endIdx = (side == LEFT ? 4 : 8);
int nbChildren = (side == LEFT ? nbLeftChildren : nbRightChildren);
_it.moveDown();
// move to the first right child
if ( side == RIGHT ) {
if ( nbRightChildren == 0)
return;
}
_it.moveUp();
}
void costzones() {
// Current position is a leaf
CellClass* cell = _it.getCurrentCell();
if (cell->_visited) {
std::cerr << "Error : cell revisited..." << _it.level()
<< ": " << cell->getCoordinate() << std::endl;
return;
}
else
cell->_visited = true;
int cellCost = cell->getCost();
int nbLeftChildren = 0;
int nbRightChildren = 0;
#if 0
std::cout << "lvl " << std::setw(2) << _it.level() << " |"
<< "cellidx " << std::setw(4)
<< cell->getMortonIndex() << " : "
<< cell->getCoordinate() << " "
<< ( _it.canProgressToDown() ? "internal" : "leaf") << " "
<< std::endl;
#endif
// When not on a leaf, apply to left children first
if ( _it.canProgressToDown() ) {
FCostCell** children = _it.getCurrentChildren();
// Count left children
for ( int childIdx = 0; childIdx < 4; childIdx++) {
if ( children[childIdx] ) {
++ nbLeftChildren;
}
}
_it.moveDown();
// Apply costzones to left children
for ( int childIdx = 0; childIdx < nbLeftChildren; childIdx++ ) {
costzones();
// avoid changing tree
if(childIdx < nbLeftChildren -1)
_it.moveRight();
}
_it.moveUp();
}
if ( cellCost != 0) {
// Add the current cell
if ( _currentCost + cellCost < _zones.size() * _totalCost / _nbZones + 1 ) {
_currentCost += cellCost;
_zones.back().push_back({_it.level(), cell});
} else {
_zones.push_back(std::vector< std::pair<int, CellClass*> >(1, {_it.level(), cell}));
}
}
// When not on a leaf, apply to right children
if ( _it.canProgressToDown() ) {
FCostCell** children = _it.getCurrentChildren();
// Count right children
for ( int childIdx = 4; childIdx < 8; childIdx++) {
if ( children[childIdx] ) {
++ nbRightChildren;
}
}
if ( nbRightChildren == 0)
return;
// Move to the first right child
_it.moveDown();
for ( int childIdx = 0; childIdx < nbLeftChildren; childIdx++) {
_it.moveRight();
}
// Apply costzones to the right children
for ( int childIdx = 0; childIdx < nbRightChildren; childIdx++) {
costzones();
// avoid changing tree
if(childIdx < nbRightChildren -1)
_it.moveRight();
}
_it.moveUp();
}
}
};
#endif
// ===================================================================================
// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Berenger Bramas, Matthias Messner
// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info".
// "http://www.gnu.org/licenses".
// ===================================================================================
#ifndef FCHEBFLOPSSYMKERNEL_HPP
#define FCHEBFLOPSSYMKERNEL_HPP
#include <stdexcept>
#include "Utils/FGlobal.hpp"
#include "Utils/FSmartPointer.hpp"
#include "Components/FAbstractKernels.hpp"
#include "Kernels/Chebyshev/FChebInterpolator.hpp"
#include "Kernels/Interpolation/FInterpSymmetries.hpp"
class FTreeCoordinate;
/**
* This class extends FBasicCell to add simple computation cost memory.
*/
class FCostCell : public FBasicCell {
mutable long int _cost = 0;
public:
bool _visited = false;
long int getCost() const {
return _cost;
}
long int setCost(int newCost) {
_cost = newCost;
return _cost;
}
long int addCost(int addCost) const {
_cost += addCost;
return _cost;
}
};
/**
* \author Quentin Khan, Matthias Messner (original file)
* \brief Cost computation of Chebyshev interpolation based FMM.
*
* Please read the license
*
* This kernel implements the cost computation of the Chebyshev interpolation
* based FMM operators exploiting the symmetries in the far-field. It implements
* all interfaces (P2P, P2M, M2M, M2L, L2L, L2P) which are required by the
* FFmmAlgorithm and FFmmAlgorithmThread.
*
* \tparam CellClass Type of cell
* \tparam ContainerClass Type of container to store particles
* \tparam MatrixKernelClass Type of matrix kernel function
* \tparam ORDER Chebyshev interpolation order
*/
template < class CellClass, class ContainerClass, class MatrixKernelClass, int ORDER, class OctreeClass>
class FChebBalanceSymKernel
: public FAbstractKernels<CellClass, ContainerClass>
{
enum {nnodes = TensorTraits<ORDER>::nnodes};
public:
/// Handler to deal with all symmetries: Stores permutation indices and
/// vectors to reduce 343 different interactions to 16 only.
struct SymmetryHandler;
/// Needed for handling all symmetries
const FSmartPointer<MatrixKernelClass,FSmartPointerMemory> MatrixKernel;
const FSmartPointer<SymmetryHandler, FSmartPointerMemory> SymHandler;
///
OctreeClass* _tree;
/// tree height
const unsigned int _treeHeight;
/// count permuted local and multipole expansions
unsigned int* countExp;
/// Flops count for each operator of the FMM.
unsigned long long flopsP2M = 0,
flopsM2M = 0,
flopsM2L = 0,
flopsL2L = 0,
flopsL2P = 0,
flopsP2P = 0;
/// Flops count for operators of the FMM by level.
unsigned long long *flopsPerLevelM2M = nullptr,
*flopsPerLevelM2L = nullptr,
*flopsPerLevelL2L = nullptr;
/// Operators count.
unsigned long long countP2M = 0,
countM2M = 0,
countM2L = 0,
countL2L = 0,
countL2P = 0,
countP2P = 0;
/// start flop counters
unsigned int countFlopsM2MorL2L() const
{ return 3 * nnodes * (2*ORDER-1); }
unsigned int countFlopsM2L(const unsigned int nexp, const unsigned int rank) const
{ return nexp * (4*nnodes*rank - rank - nnodes); }
unsigned int countFlopsP2P() const
{ return 34; }
unsigned int countFlopsP2Pmutual() const
{ return 39; }
unsigned int countFlopsP2M(const unsigned int N) const {
const unsigned first = N * (18 + (ORDER-2) * 6 + (ORDER-1) * (6 + (ORDER-1) * (6 + (ORDER-1) * 2)));
const unsigned W2 = 3 * ORDER*(2*(ORDER-1)-1);
const unsigned W4 = 3 * (ORDER*(ORDER-1)*(2*(ORDER-1)-1) + ORDER*ORDER*(2*(ORDER-1)-1));
const unsigned W8 = 3 * (2*(ORDER-1)-1) * (ORDER*(ORDER-1)*(ORDER-1) + ORDER*ORDER*(ORDER-1) + nnodes);
return first + W2 + W4 + W8 + nnodes*11;
}
unsigned int countFlopsL2PTotal(const unsigned int N) const {
const unsigned W0 = nnodes;
const unsigned W2 = 3 * (ORDER-1)*ORDER*ORDER * 2*ORDER;
const unsigned W4 = 3 * ORDER*(ORDER-1)*(ORDER-1) * 2*ORDER;
const unsigned W8 = (ORDER-1)*(ORDER-1)*(ORDER-1) * (2*ORDER-1);
const unsigned second = N * (38 + (ORDER-2)*15 + (ORDER-1)*((ORDER-1) * (27 + (ORDER-1) * 16))) + 6;
return W0 + W2 + W4 + W8 + second;
}
// end flop counters
public:
/**
* The constructor initializes all constant attributes and it reads the
* precomputed and compressed M2L operators from a binary file (an
* runtime_error is thrown if the required file is not valid).
*/
FChebBalanceSymKernel(OctreeClass* tree,
const FReal Epsilon)
: MatrixKernel(new MatrixKernelClass()),
SymHandler(new SymmetryHandler(MatrixKernel.getPtr(), Epsilon)),
_tree(tree),
_treeHeight(_tree->getHeight())
{
countExp = new unsigned int [343];
flopsPerLevelM2M = new unsigned long long [_treeHeight];
flopsPerLevelM2L = new unsigned long long [_treeHeight];
flopsPerLevelL2L = new unsigned long long [_treeHeight];
for (unsigned int level = 0; level<_treeHeight; ++level)
flopsPerLevelM2M[level] = flopsPerLevelM2L[level] = flopsPerLevelL2L[level] = 0;
}
/** Copy constructor */
FChebBalanceSymKernel(const FChebBalanceSymKernel& other)
: SymHandler(other.SymHandler) {
countExp = new unsigned int [343];
}
/** Destructor */
~FChebBalanceSymKernel() {
delete [] countExp;
if (flopsPerLevelM2M) delete [] flopsPerLevelM2M;
if (flopsPerLevelM2L) delete [] flopsPerLevelM2L;
if (flopsPerLevelL2L) delete [] flopsPerLevelL2L;
}
void printResults(std::ostream& os) const {
os << "\n=================================================="
<< "\n- P2M Flops:" << flopsP2M
<< "\n- M2M Flops:" << flopsM2M
<< "\n- M2L Flops:" << flopsM2L
<< "\n- L2L Flops:" << flopsL2L
<< "\n- L2P Flops:" << flopsL2P
<< "\n- P2P Flops:" << flopsP2P
<< "\n- Overall Flops = " << flopsP2M + flopsM2M + flopsM2L
+ flopsL2L + flopsL2P + flopsP2P
<< "\n==================================================\n"
<< std::endl;
os << "\n=================================================="
<< "\n- Flops for P2M/M2M" << std::endl;
for (unsigned int level=0; level<_treeHeight; ++level)
if (level < _treeHeight-1)
os << " |- at level " << level
<< " flops = " << flopsPerLevelM2M[level] << std::endl;
else
os << " |- at level " << level
<< " flops = " << flopsP2M << std::endl;
os << "=================================================="
<< "\n- Flops for M2L" << std::endl;
for (unsigned int level=0; level<_treeHeight; ++level)
os << " |- at level " << level
<< " flops = " << flopsPerLevelM2L[level] << std::endl;
os << "=================================================="
<< "\n- Flops for L2L/L2P" << std::endl;
for (unsigned int level=0; level<_treeHeight; ++level)
if (level < _treeHeight-1)
os << " |- at level " << level
<< " flops = " << flopsPerLevelL2L[level] << std::endl;
else
os << " |- at level " << level
<< " flops = " << flopsL2P << std::endl;
os << "==================================================" << std::endl;
os << "P2P count: " << countP2P << std::endl;
os << "P2M count: " << countP2M << std::endl;
os << "M2M count: " << countM2M << std::endl;
os << "M2L count: " << countM2L << std::endl;
os << "L2L count: " << countL2L << std::endl;
os << "L2P count: " << countL2P << std::endl;
}
void P2M(CellClass* const cell, const ContainerClass* const SourceParticles) {
unsigned int tmpCost = countFlopsP2M(SourceParticles->getNbParticles());
flopsP2M += tmpCost;
cell->addCost(tmpCost);
countP2M++;
}
void M2M(CellClass* const FRestrict cell,
const CellClass*const FRestrict *const FRestrict ChildCells,
const int TreeLevel) {
unsigned int flops = 0;
for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex)
if (ChildCells[ChildIndex]) flops += countFlopsM2MorL2L();
flopsM2M += flops;
flopsPerLevelM2M[TreeLevel] += flops;
cell->addCost(flops);
countM2M++;
}
void M2L(CellClass* const FRestrict cell,
const CellClass* SourceCells[343],
const int /* not needed */,
const int TreeLevel)
{
unsigned int flops = 0;
// count how ofter each of the 16 interactions is used
memset(countExp, 0, sizeof(int) * 343);
for (unsigned int idx=0; idx<343; ++idx)
if (SourceCells[idx]) countExp[SymHandler->pindices[idx]]++;
// multiply (mat-mat-mul)
for (unsigned int pidx=0; pidx<343; ++pidx)
if (countExp[pidx])
flops += countFlopsM2L(countExp[pidx], SymHandler->LowRank[pidx]) + countExp[pidx]*nnodes;
flopsM2L += flops;
flopsPerLevelM2L[TreeLevel] += flops;
cell->addCost(flops);
countM2L++;
}
void L2L(const CellClass* const FRestrict /* not needed */,
CellClass* FRestrict *const FRestrict ChildCells,
const int TreeLevel) {
unsigned int flops = 0;
unsigned int tmpCost = 0;
for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex)
if (ChildCells[ChildIndex]) {
tmpCost = countFlopsM2MorL2L() + nnodes;
flops += tmpCost;
ChildCells[ChildIndex]->addCost(flops);
}
flopsL2L += flops;
flopsPerLevelL2L[TreeLevel] += flops;
countL2L++;
}
void L2P(const CellClass* const cell,
ContainerClass* const TargetParticles) {
//// 1.a) apply Sx
//flopsL2P += countFlopsP2MorL2P(TargetParticlesParticles->getNbParticles()) + TargetParticles->getNbParticles();
//// 1.b) apply Px (grad Sx)
//flopsL2P += countFlopsL2PGradient(TargetParticlesParticles->getNbParticles()) + 3 * TargetParticles->getNbParticles();
// or
// 2) apply Sx and Px (grad Sx)
unsigned int tmpCost = 0;
tmpCost = countFlopsL2PTotal(TargetParticles->getNbParticles()) + 4 * TargetParticles->getNbParticles();
flopsL2P += tmpCost;
cell->addCost(tmpCost);
countL2P++;
}
void P2P(const FTreeCoordinate& LeafCellCoordinate, // needed for periodic boundary conditions
ContainerClass* const FRestrict TargetParticles,
const ContainerClass* const FRestrict SourceParticles,
ContainerClass* const NeighborSourceParticles[27],
const int /* size */) {
unsigned int tmpCost = 0;
int srcPartCount = SourceParticles->getNbParticles();
int tgtPartCount = TargetParticles->getNbParticles();
if ( TargetParticles != SourceParticles ) {
tmpCost += countFlopsP2P() * tgtPartCount * srcPartCount;
for ( unsigned int idx = 0; idx < 27; ++idx ) {
if (NeighborSourceParticles[idx]) {
tmpCost +=
countFlopsP2P()
* tgtPartCount
* NeighborSourceParticles[idx]->getNbParticles();
}
}
} else {
tmpCost +=
countFlopsP2Pmutual()
* ((tgtPartCount * tgtPartCount
+ tgtPartCount)
/ 2);
for (unsigned int idx=0; idx<=13; ++idx)
{
if (NeighborSourceParticles[idx]) {
tmpCost +=
countFlopsP2Pmutual()
* tgtPartCount
* NeighborSourceParticles[idx]->getNbParticles();
}
}
}
flopsP2P += tmpCost;
CellClass* cell = _tree->getCell(
LeafCellCoordinate.getMortonIndex(_treeHeight - 1),
_treeHeight - 1);
cell->addCost(tmpCost);
countP2P++;
}
};
/**
* Handler to deal with all symmetries: Stores permutation indices and vectors
* to reduce 343 different interactions to 16 only.
*/
template < class CellClass, class ContainerClass, class MatrixKernelClass,
int ORDER, class OctreeClass>
struct FChebBalanceSymKernel<CellClass, ContainerClass, MatrixKernelClass, ORDER, OctreeClass>
::SymmetryHandler
{
// M2L operators
FReal* K[343];
int LowRank[343];
// permutation vectors and permutated indices
unsigned int pvectors[343][nnodes];
unsigned int pindices[343];
// compute rank
unsigned int getRank(const FReal singular_values[], const double eps)
{
FReal nrm2(0.);
for (unsigned int k=0; k<nnodes; ++k)
nrm2 += singular_values[k] * singular_values[k];
FReal nrm2k(0.);
for (unsigned int k=nnodes; k>0; --k) {
nrm2k += singular_values[k-1] * singular_values[k-1];
if (nrm2k > eps*eps * nrm2) return k;
}
throw std::runtime_error("rank cannot be larger than nnodes");
return 0;
}
/** Constructor */
SymmetryHandler(const MatrixKernelClass *const MatrixKernel, const double Epsilon)
{
// init all 343 item to zero, because effectively only 16 exist
for (unsigned int t=0; t<343; ++t) {
K[t] = nullptr;
LowRank[t] = 0;
}
// set permutation vector and indices
const FInterpSymmetries<ORDER> Symmetries;
for (int i=-3; i<=3; ++i)
for (int j=-3; j<=3; ++j)
for (int k=-3; k<=3; ++k)
if (abs(i)>1 || abs(j)>1 || abs(k)>1) {
const unsigned int idx = ((i+3) * 7 + (j+3)) * 7 + (k+3);
pindices[idx] = Symmetries.getPermutationArrayAndIndex(i,j,k, pvectors[idx]);
}
// precompute 16 M2L operators
this->precomputeSVD(MatrixKernel, Epsilon);
}