Commit adb8effe authored by messner's avatar messner

Big Chebyshev commit: The Chebyshev FMM works for FFmmAlgorithm and

FFmmAlgorithmThread. Compressed M2L operators are precomputed and stored in
binray files. This features are tested in several test files.


git-svn-id: svn+ssh://scm.gforge.inria.fr/svn/scalfmm/scalfmm/trunk@388 2616d619-271b-44dc-8df4-d4a8f33a7222
parent 855d4cc4
......@@ -60,8 +60,11 @@ if( SCALFMM_USE_CBLAS )
if( SCALFMM_USE_MKL_AS_BLAS )
SET(CBLAS_LIBRARIES "-L$ENV{MKLROOT}/lib -lmkl_intel_lp64 -lmkl_sequential -lmkl_core")
else()
FIND_PACKAGE(BLAS)
SET(BLAS_LIBRARIES "-lcblas")
#FIND_PACKAGE(BLAS)
#FIND_PACKAGE(Lapack)
INCLUDE(FindBLAS)
INCLUDE(FindLAPACK)
#SET(CBLAS_LIBRARIES "-lcblas")
endif()
endif()
......
......@@ -2,7 +2,6 @@
#define FCHEBCELL_HPP
#include "../Extensions/FExtendPosition.hpp"
#include "../Extensions/FExtendMortonIndex.hpp"
#include "../Extensions/FExtendCoordinate.hpp"
......@@ -16,12 +15,11 @@
* This class defines a cell used in the Chebyshev based FMM.
*/
template <int ORDER>
class FChebCell : public FExtendPosition,
public FExtendMortonIndex,
class FChebCell : public FExtendMortonIndex,
public FExtendCoordinate
{
FReal multipole_exp[TensorTraits<ORDER>::nnodes]; //< Multipole expansion
FReal local_exp[TensorTraits<ORDER>::nnodes]; //< Local expansion
FReal multipole_exp[TensorTraits<ORDER>::nnodes * 2]; //< Multipole expansion
FReal local_exp[TensorTraits<ORDER>::nnodes * 2]; //< Local expansion
public:
~FChebCell() {}
......
......@@ -6,6 +6,8 @@
#include "./FChebTensor.hpp"
#include "./FChebRoots.hpp"
#include "../Utils/FBlas.hpp"
/**
......@@ -15,8 +17,9 @@
/**
* @class FChebInterpolator
*
* The class @p FChebInterpolator defines the anterpolation (M2M) and
* interpolation (L2L) operations.
* interpolation (L2L) concerning operations.
*/
template <int ORDER>
class FChebInterpolator : FNoCopyable
......@@ -28,12 +31,43 @@ class FChebInterpolator : FNoCopyable
FReal T_of_roots[ORDER][ORDER];
unsigned int node_ids[nnodes][3];
FReal* ChildParentInterpolator[8];
/**
* Initialize the child - parent - interpolator, it is basically the matrix
* S which is precomputed and reused for all M2M and L2L operations, ie for
* all non leaf inter/anterpolations.
*/
void initChildParentInterpolator()
{
F3DPosition ParentRoots[nnodes], ChildRoots[nnodes];
const FReal ParentWidth(2.);
const F3DPosition ParentCenter(0., 0., 0.);
FChebTensor<ORDER>::setRoots(ParentCenter, ParentWidth, ParentRoots);
F3DPosition ChildCenter;
const FReal ChildWidth(1.);
// loop: child cells
for (unsigned int child=0; child<8; ++child) {
// allocate memory
ChildParentInterpolator[child] = new FReal [nnodes * nnodes];
// set child info
FChebTensor<ORDER>::setRelativeChildCenter(child, ChildCenter);
FChebTensor<ORDER>::setRoots(ChildCenter, ChildWidth, ChildRoots);
// assemble child - parent - interpolator
assembleInterpolator(nnodes, ChildRoots, ChildParentInterpolator[child]);
}
}
public:
/**
* Constructor: It initializes the Chebyshev polynomials at the Chebyshev
* Constructor: Initialize the Chebyshev polynomials at the Chebyshev
* roots/interpolation point
*/
explicit FChebInterpolator()
......@@ -45,29 +79,95 @@ public:
// initialize root node ids
TensorType::setNodeIds(node_ids);
// initialize interpolation operator for non M2M and L2L (non leaf
// operations)
this -> initChildParentInterpolator();
}
/**
* Destructor: Delete dynamically allocated memory for M2M and L2L operator
*/
~FChebInterpolator()
{
for (unsigned int child=0; child<8; ++child)
delete [] ChildParentInterpolator[child];
}
/**
* Assembles the interpolator \f$S_\ell\f$ of size \f$N\times
* \ell^3\f$. Here local points is meant as points whose global coordinates
* have already been mapped to the reference interval [-1,1].
*
* @param[in] NumberOfLocalPoints
* @param[in] LocalPoints
* @param[out] Interpolator
*/
void assembleInterpolator(const unsigned int NumberOfLocalPoints,
const F3DPosition *const LocalPoints,
FReal *const Interpolator) const
{
// values of chebyshev polynomials of source particle: T_o(x_i)
FReal T_of_x[ORDER][3];
// loop: local points (mapped in [-1,1])
for (unsigned int m=0; m<NumberOfLocalPoints; ++m) {
// evaluate chebyshev polynomials at local points
for (unsigned int o=1; o<ORDER; ++o) {
T_of_x[o][0] = BasisType::T(o, LocalPoints[m].getX());
T_of_x[o][1] = BasisType::T(o, LocalPoints[m].getY());
T_of_x[o][2] = BasisType::T(o, LocalPoints[m].getZ());
}
// assemble interpolator
for (unsigned int n=0; n<nnodes; ++n) {
Interpolator[n*nnodes + m] = FReal(1.);
for (unsigned int d=0; d<3; ++d) {
const unsigned int j = node_ids[n][d];
FReal S_d = FReal(1.) / ORDER;
for (unsigned int o=1; o<ORDER; ++o)
S_d += FReal(2.) / ORDER * T_of_x[o][d] * T_of_roots[o][j];
Interpolator[n*nnodes + m] *= S_d;
}
}
}
}
/**
* The anterpolation corresponds to the P2M and M2M operation. It is indeed
* the transposed interpolation.
*/
template <class ContainerClass>
void anterpolate(const F3DPosition& center,
const FReal width,
FReal *const multipoleExpansion,
const ContainerClass *const sourceParticles) const
void applyP2M(const F3DPosition& center,
const FReal width,
FReal *const multipoleExpansion,
const ContainerClass *const sourceParticles) const
{
// setup global to local mapping
const map_glob_loc map(center, width);
F3DPosition localPosition;
FReal T_of_x[ORDER][3];
// std::cout << "ClusterCenter = " << center
// << ", width = " << width << std::endl;
// set all multipole expansions to zero
for (unsigned int n=0; n<nnodes; ++n) multipoleExpansion[n] = FReal(0.);
// loop: source particles
typename ContainerClass::ConstBasicIterator iter(*sourceParticles);
while(iter.hasNotFinished()){
// map global position to [-1,1]
map(iter.data().getPosition(), localPosition);
// std::cout << "\tParticlePosition = " << iter.data().getPosition()
// << ", local Position = " << localPosition << std::endl;
// get source value
const FReal sourceValue = iter.data().getPhysicalValue();
......@@ -81,6 +181,7 @@ public:
// anterpolate
for (unsigned int n=0; n<nnodes; ++n) {
//multipoleExpansion[n] = FReal(0.);
FReal S = FReal(1.);
for (unsigned int d=0; d<3; ++d) {
const unsigned int j = node_ids[n][d];
......@@ -93,7 +194,7 @@ public:
}
iter.gotoNext();
}
} // end loop: source particles
}
......@@ -102,11 +203,12 @@ public:
* The interpolation corresponds to the L2L and L2P operation.
*/
template <class ContainerClass>
void interpolate(const F3DPosition& center,
const FReal width,
const FReal *const localExpansion,
ContainerClass *const localParticles) const
void applyL2P(const F3DPosition& center,
const FReal width,
const FReal *const localExpansion,
ContainerClass *const localParticles) const
{
// setup local to global mapping
const map_glob_loc map(center, width);
F3DPosition localPosition;
FReal T_of_x[ORDER][3];
......@@ -118,7 +220,7 @@ public:
map(iter.data().getPosition(), localPosition);
// get target value
FReal targetValue = iter.data().getPhysicalValue();
FReal targetValue = iter.data().getPotential();
// evaluate chebyshev polynomials of source particle: T_o(x_i)
for (unsigned int o=1; o<ORDER; ++o) {
......@@ -140,92 +242,32 @@ public:
targetValue += S * localExpansion[n];
}
iter.data().setPhysicalValue(targetValue);
iter.data().setPotential(targetValue);
iter.gotoNext();
}
}
};
void applyM2M(const unsigned int ChildIndex,
const FReal *const ChildExpansion,
FReal *const ParentExpansion) const
{
FBlas::gemtva(nnodes, nnodes, FReal(1.),
ChildParentInterpolator[ChildIndex],
const_cast<FReal *const>(ChildExpansion), ParentExpansion);
}
///**
// * Interpolation operator setter of (N)on (L)eaf clusters. Since the grid is
// * regular, all non leaf clusters of the same level have identical
// * interpolation operators. Hence, memory for the interpolation operator S is
// * only allocated here but not freed.
// */
//template <typename cluster_type,
// typename clusterbasis_type>
//class IOsetterNL
// : public std::unary_function<cluster_type, void>
//{
// enum {dim = cluster_type::dim,
// nboxes = cluster_type::nboxes,
// order = clusterbasis_type::order,
// nnodes = clusterbasis_type::nnodes};
//
// typedef typename clusterbasis_type::value_type value_type;
// typedef typename PointTraits<dim>::point_type point_type;
//
// typedef Chebyshev< order> basis_type;
// typedef Tensor<dim,order> tensor_type;
//
// boost::shared_array<value_type> S;
//
// std::vector<clusterbasis_type>& basis;
//
//public:
// explicit IOsetterNL(std::vector<clusterbasis_type>& _basis,
// const double ext,
// const double cext)
// : S(new value_type [nboxes*nnodes * nnodes]), basis(_basis)
// {
// // some factors
// const double frac = cext / ext;
// const double radius = 1. - frac;
// const double cextension = 2. * frac;
//
// // setup interpolation nodes of child clusters
// point_type x [nboxes*nnodes];
// for (unsigned int b=0; b<nboxes; ++b) {
// point_type center;
// for (unsigned int d=0; d<dim; ++d)
// center[d] = radius * DimTraits<dim>::child_pos[b][d];
// const map_loc_glob<dim> map(center, cextension);
//
// for (unsigned int n=0; n<nnodes; ++n)
// for (unsigned int d=0; d<dim; ++d)
// x[b*nnodes + n][d]
// = map(d, basis_type::nodes[tensor_type::node_ids[n][d]]);
// }
//
// // compute S
// FChebInterpolator<dim,order,value_type> cmp;
// value_type S_[nnodes];
// for (unsigned int b=0; b<nboxes; ++b)
// for (unsigned int i=0; i<nnodes; ++i) {
// cmp(x[b*nnodes + i], S_);
// HYENA_ASSERT(check_nan(nnodes, S_));
// for (unsigned int j=0; j<nnodes; ++j)
// S[b * nnodes*nnodes + j*nnodes + i] = S_[j];
// }
// }
//
// void operator()(cluster_type *const cl) const
// {
// HYENA_ASSERT(!cl->level->isleaf());
// basis.at(cl->cidx).assign(S);
// }
//
//};
void applyL2L(const unsigned int ChildIndex,
const FReal *const ParentExpansion,
FReal *const ChildExpansion) const
{
FBlas::gemva(nnodes, nnodes, FReal(1.),
ChildParentInterpolator[ChildIndex],
const_cast<FReal *const>(ParentExpansion), ChildExpansion);
}
};
......
#ifndef FCHEBKERNELS_HPP
#define FCHEBKERNELS_HPP
// [--License--]
#include "../Utils/FGlobal.hpp"
#include "../Utils/FTrace.hpp"
#include "../Utils/FSmartPointer.hpp"
#include "./FChebInterpolator.hpp"
#include "./FChebM2LHandler.hpp"
class FTreeCoordinate;
/**
* @author Matthias Messner(matthias.messner@inria.fr)
* @class FChebKernels
* @brief
* Please read the license
*
* This kernels implement the Chebyshev interpolation based FMM operators. It
* implements all interfaces (P2P, P2M, M2M, M2L, L2L, L2P) which are required by
* the FFmmAlgorithm and FFmmAlgorithmThread.
*
* @tparam ParticleClass Type of particle
* @tparam CellClass Type of cell
* @tparam ContainerClass Type of container to store particles
* @tparam MatrixKernelClass Type of matrix kernel function
* @tparam ORDER Chebyshev interpolation order
*/
template<class ParticleClass,
class CellClass,
class ContainerClass,
class MatrixKernelClass,
int ORDER>
class FChebKernels
{
enum {nnodes = TensorTraits<ORDER>::nnodes};
typedef FChebInterpolator<ORDER> InterpolatorClass;
typedef FChebM2LHandler<ORDER,MatrixKernelClass> M2LHandlerClass;
/// Needed for P2M, M2M, L2L and L2P operators
FSmartPointer<InterpolatorClass,FSmartPointerMemory> Interpolator;
/// Needed for M2L operator
FSmartPointer< M2LHandlerClass,FSmartPointerMemory> M2LHandler;
/// Needed for P2P operator
FSmartPointer<MatrixKernelClass,FSmartPointerMemory> MatrixKernel;
/// Height of the entire oct-tree
const unsigned int TreeHeight;
/// Corner of oct-tree box
const F3DPosition BoxCorner;
/// Width of oct-tree box
const FReal BoxWidth;
/// Width of a leaf cell box
const FReal BoxWidthLeaf;
/// Prescribed accuracy of the compressed M2L operators
const FReal Epsilon;
/**
* Compute center of leaf cell from its tree coordinate.
* @param[in] Coordinate tree coordinate
* @return center of leaf cell
*/
const F3DPosition getLeafCellCenter(const FTreeCoordinate& Coordinate) const
{
return F3DPosition(BoxCorner.getX() + (FReal(Coordinate.getX()) + FReal(.5)) * BoxWidthLeaf,
BoxCorner.getY() + (FReal(Coordinate.getY()) + FReal(.5)) * BoxWidthLeaf,
BoxCorner.getZ() + (FReal(Coordinate.getZ()) + FReal(.5)) * BoxWidthLeaf);
}
public:
/**
* The constructor initializes all constant attributes and it reads the
* precomputed and compressed M2L operators from a binary file (an
* runtime_error is thrown if the required file is not valid).
*/
FChebKernels(const int inTreeHeight,
const F3DPosition& inBoxCenter,
const FReal inBoxWidth,
const FReal inEpsilon)
: Interpolator(new InterpolatorClass()),
M2LHandler(new M2LHandlerClass(inEpsilon)),
MatrixKernel(new MatrixKernelClass()),
TreeHeight(inTreeHeight),
BoxCorner(inBoxCenter - inBoxWidth / FReal(2.)),
BoxWidth(inBoxWidth),
BoxWidthLeaf(BoxWidth / FReal(FMath::pow(2, inTreeHeight - 1))),
Epsilon(inEpsilon)
{
// read precomputed compressed m2l operators from binary file
M2LHandler->ReadFromBinaryFileAndSet();
}
// /** Default destructor */
// ~FChebKernels() {}
void P2M(CellClass* const LeafCell,
const ContainerClass* const SourceParticles) const
{
// 1) apply Sy
const F3DPosition LeafCellCenter(getLeafCellCenter(LeafCell->getCoordinate()));
Interpolator->applyP2M(LeafCellCenter,
BoxWidthLeaf,
LeafCell->getMultipole(),
SourceParticles);
// 2) apply B
M2LHandler->applyB(LeafCell->getMultipole(),
LeafCell->getMultipole() + nnodes);
}
void M2M(CellClass* const FRestrict ParentCell,
const CellClass*const FRestrict *const FRestrict ChildCells,
const int TreeLevel) const
{
// 1) apply Sy
FBlas::scal(nnodes*2, FReal(0.), ParentCell->getMultipole());
for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex)
if (ChildCells[ChildIndex])
Interpolator->applyM2M(ChildIndex,
ChildCells[ChildIndex]->getMultipole(),
ParentCell->getMultipole());
// 2) apply B
M2LHandler->applyB(ParentCell->getMultipole(),
ParentCell->getMultipole() + nnodes);
}
// void M2L(CellClass* const FRestrict TargetCell,
// const CellClass* SourceCells[343],
// const int NumSourceCells,
// const int TreeLevel) const
// {
// const FReal CellWidth(BoxWidth / FReal(FMath::pow(2, TreeLevel)));
// const FTreeCoordinate& cx = TargetCell->getCoordinate();
// for (int idx=0; idx<NumSourceCells; ++idx) {
// const FTreeCoordinate& cy = SourceCells[idx]->getCoordinate();
// const int transfer[3] = {cy.getX()-cx.getX(),
// cy.getY()-cx.getY(),
// cy.getZ()-cx.getZ()};
// M2LHandler->applyC(transfer, CellWidth,
// SourceCells[idx]->getMultipole() + nnodes,
// TargetCell->getLocal() + nnodes);
// }
// }
void M2L(CellClass* const FRestrict TargetCell,
const CellClass* SourceCells[343],
const int NumSourceCells,
const int TreeLevel) const
{
FReal *const CompressedLocalExpansion = TargetCell->getLocal() + nnodes;
const FReal CellWidth(BoxWidth / FReal(FMath::pow(2, TreeLevel)));
for (int idx=0; idx<343; ++idx)
if (SourceCells[idx])
M2LHandler->applyC(idx, CellWidth,
SourceCells[idx]->getMultipole() + nnodes,
CompressedLocalExpansion);
}
// void M2L(CellClass* const FRestrict TargetCell,
// const CellClass* SourceCells[343],
// const int NumSourceCells,
// const int TreeLevel) const
// {
// const unsigned int rank = M2LHandler.getRank();
// FBlas::scal(343*rank, FReal(0.), MultipoleExpansion);
// const FReal CellWidth(BoxWidth / FReal(FMath::pow(2, TreeLevel)));
// for (int idx=0; idx<343; ++idx)
// if (SourceCells[idx])
// FBlas::copy(rank, const_cast<FReal *const>(SourceCells[idx]->getMultipole())+nnodes,
// MultipoleExpansion+idx*rank);
//
// M2LHandler->applyC(CellWidth, MultipoleExpansion, TargetCell->getLocal() + nnodes);
// }
void L2L(const CellClass* const FRestrict ParentCell,
CellClass* FRestrict *const FRestrict ChildCells,
const int TreeLevel) const
{
// 1) apply U
M2LHandler->applyU(ParentCell->getLocal() + nnodes,
const_cast<CellClass *const>(ParentCell)->getLocal());
// 2) apply Sx
for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex)
if (ChildCells[ChildIndex])
Interpolator->applyL2L(ChildIndex,
ParentCell->getLocal(),
ChildCells[ChildIndex]->getLocal());
}
void L2P(const CellClass* const LeafCell,
ContainerClass* const TargetParticles) const
{
// 1) apply U
M2LHandler->applyU(LeafCell->getLocal() + nnodes,
const_cast<CellClass *const>(LeafCell)->getLocal());
// 2) apply Sx
const F3DPosition LeafCellCenter(getLeafCellCenter(LeafCell->getCoordinate()));
Interpolator->applyL2P(LeafCellCenter,
BoxWidthLeaf,
LeafCell->getLocal(),
TargetParticles);
}
void P2P(const FTreeCoordinate& LeafCellCoordinate,
ContainerClass* const FRestrict TargetParticles,
const ContainerClass* const FRestrict SourceParticles,
const ContainerClass* const NeighborSourceParticles[27],
const int ) const
{
// loop: target particles
typename ContainerClass::BasicIterator iTargets(*TargetParticles);
while (iTargets.hasNotFinished()) {
ParticleClass& Target = iTargets.data();
{ // loop: source particles (target leaf cell == source leaf cell)
typename ContainerClass::ConstBasicIterator iSources(*SourceParticles);
while (iSources.hasNotFinished()) {
const ParticleClass& Source = iSources.data();
// only if target and source are not identical
if (&Target != &Source)
Target.incPotential(MatrixKernel->evaluate(Target.getPosition(), Source.getPosition())
* Source.getPhysicalValue());
// progress sources
iSources.gotoNext();
}
}
{ // loop: source particles (target leaf cell != source leaf cell)
for (unsigned int idx=0; idx<27; ++idx) {
if (NeighborSourceParticles[idx]) {
typename ContainerClass::ConstBasicIterator iSources(*NeighborSourceParticles[idx]);
while (iSources.hasNotFinished()) {
const ParticleClass& Source = iSources.data();
// target and source cannot be identical
Target.incPotential(MatrixKernel->evaluate(Target.getPosition(), Source.getPosition())
* Source.getPhysicalValue());
// progress sources
iSources.gotoNext();
}
}
}
}
// progress targets
iTargets.gotoNext();
}
}
};
#endif //FCHEBKERNELS_HPP
// [--END--]
This diff is collapsed.
#ifndef FCHEBMATRIXKERNEL_HPP
#define FCHEBMATRIXKERNEL_HPP
// [--License--]
#include "../Utils/F3DPosition.hpp"
#include "../Utils/FNoCopyable.hpp"
#include "../Utils/FMath.hpp"
enum {ONE_OVER_R = 1,
ONE_OVER_R_SQUARED = 2};
/**
* @author Matthias Messner (matthias.messner@inria.fr)
* @class FChebMatrixKernels
* Please read the license
*/
struct FChebAbstractMatrixKernel : FNoCopyable
{
virtual FReal evaluate(const F3DPosition&, const F3DPosition&) const = 0;
virtual FReal getScaleFactor(FReal) const = 0;
};
/// One over r
struct FChebMatrixKernelR : FChebAbstractMatrixKernel
{
enum {Identifier = ONE_OVER_R};
FReal evaluate(const F3DPosition& x, const F3DPosition& y) const
{
const F3DPosition xy(x-y);
return FReal(1.) / FMath::Sqrt(xy.getX()*xy.getX() +
xy.getY()*xy.getY() +
xy.getZ()*xy.getZ());
}
FReal getScaleFactor(FReal CellWidth) const
{ return FReal(2.) / CellWidth; }
};
/// One over r^2
struct FChebMatrixKernelRR : FChebAbstractMatrixKernel
{
enum {Identifier = ONE_OVER_R_SQUARED};
FReal evaluate(const F3DPosition& x, const F3DPosition& y) const
{
const F3DPosition xy(x-y);
return FReal(1.) / FReal(xy.getX()*xy.getX() +
xy.getY()*xy.getY() +
xy.getZ()*xy.getZ());
}
FReal getScaleFactor(FReal CellWidth) const
{ return FReal(4.) / (CellWidth*CellWidth); }
};
#endif // FCHEBMATRIXKERNEL_HPP
// [--END--]
......@@ -6,6 +6,7 @@
#include "../Extensions/FExtendPosition.hpp"
#include "../Extensions/FExtendPhysicalValue.hpp"
#include "../Extensions/FExtendPotential.hpp"
/**
* @author Matthias Messner (matthias.matthias@inria.fr)
......@@ -16,7 +17,8 @@
* approach.
*/
class FChebParticle : public FExtendPosition,
public FExtendPhysicalValue
public FExtendPhysicalValue,
public FExtendPotential
{
public:
~FChebParticle() {}
......
......@@ -3,6 +3,9 @@
#include <cmath>
#include <limits>
#include <cassert>
#include "../Utils/FNoCopyable.hpp"
/**
......@@ -40,6 +43,7 @@ struct FChebRoots : FNoCopyable
*/
const static FReal T(const unsigned int n, FReal x)
{
//std::cout << x << std::endl;
assert(std::fabs(x)-1.<10.*std::numeric_limits<FReal>::epsilon());
if (std::fabs(x)>1.) {
x = (x > FReal( 1.) ? FReal( 1.) : x);
......
......@@ -79,6 +79,29 @@ public:
map(localPosition, rootPositions[n]);
}
}
/**
* Set the relative child (width = 1) center according to the Morton index.
*
* @param[in] ChildIndex index of child according to Morton index
* @param[out] center
*/
static
void setRelativeChildCenter(const unsigned int ChildIndex,
F3DPosition& ChildCenter)
{
const int RelativeChildPositions[][3] = { {-1, -1, -1},
{-1, -1, 1},