Commit ef6fd9f8 authored by Matthias Messner's avatar Matthias Messner

FChebSymKernels works now also with FFmmAlgorithmThread

parent 59ddf450
......@@ -37,11 +37,11 @@ class FChebKernels : public FAbstractKernels<ParticleClass, CellClass, Container
typedef FChebM2LHandler<ORDER,MatrixKernelClass> M2LHandlerClass;
/// Needed for P2M, M2M, L2L and L2P operators
FSmartPointer<InterpolatorClass,FSmartPointerMemory> Interpolator;
const FSmartPointer<InterpolatorClass,FSmartPointerMemory> Interpolator;
/// Needed for M2L operator
FSmartPointer< M2LHandlerClass,FSmartPointerMemory> M2LHandler;
/// Needed for P2P operator
FSmartPointer<MatrixKernelClass,FSmartPointerMemory> MatrixKernel;
const FSmartPointer<MatrixKernelClass,FSmartPointerMemory> MatrixKernel;
/// Height of the entire oct-tree
const unsigned int TreeHeight;
/// Corner of oct-tree box
......
......@@ -13,6 +13,8 @@
class FTreeCoordinate;
/**
* @author Matthias Messner(matthias.messner@inria.fr)
* @class FChebSymKernels
......@@ -35,14 +37,17 @@ class FChebSymKernels : public FAbstractKernels<ParticleClass, CellClass, Contai
{
enum {nnodes = TensorTraits<ORDER>::nnodes};
typedef FChebInterpolator<ORDER> InterpolatorClass;
typedef FChebSymmetries<ORDER> SymmetriesClass;
/// Handler to deal with all symmetries: Stores permutation indices and
/// vectors to reduce 343 different interactions to 16 only.
struct SymmetryHandler;
/// Needed for P2M, M2M, L2L and L2P operators
FSmartPointer<InterpolatorClass,FSmartPointerMemory> Interpolator;
/// Needed for M2L operator
FSmartPointer< SymmetriesClass,FSmartPointerMemory> Symmetries;
const FSmartPointer<InterpolatorClass,FSmartPointerMemory> Interpolator;
/// Needed for P2P operator
FSmartPointer<MatrixKernelClass,FSmartPointerMemory> MatrixKernel;
const FSmartPointer<MatrixKernelClass,FSmartPointerMemory> MatrixKernel;
/// Needed for handling all symmetries
const FSmartPointer<SymmetryHandler,FSmartPointerMemory> SymHandler;
/// Height of the entire oct-tree
const unsigned int TreeHeight;
/// Corner of oct-tree box
......@@ -51,21 +56,14 @@ class FChebSymKernels : public FAbstractKernels<ParticleClass, CellClass, Contai
const FReal BoxWidth;
/// Width of a leaf cell box
const FReal BoxWidthLeaf;
/// Prescribed accuracy of the compressed M2L operators
const FReal Epsilon;
// M2L operators
FReal* K[343];
int LowRank[343];
// permuted local and multipole expansions
FReal* Loc[343];
FReal* Mul[343];
unsigned int countExp[343];
// permutation vectors and permutated indices
unsigned int pvectors[343][nnodes];
unsigned int pindices[343];
/**
* Compute center of leaf cell from its tree coordinate.
......@@ -80,81 +78,24 @@ class FChebSymKernels : public FAbstractKernels<ParticleClass, CellClass, Contai
}
void precomputeSVD()
/**
* Allocate memory for storing locally permuted mulipole and local expansions
*/
void allocateMemoryForPermutedExpansions()
{
// interpolation points of source (Y) and target (X) cell
F3DPosition X[nnodes], Y[nnodes];
// set roots of target cell (X)
FChebTensor<ORDER>::setRoots(F3DPosition(0.,0.,0.), FReal(2.), X);
// temporary matrix
FReal* U = new FReal [nnodes*nnodes];
unsigned int counter = 0;
for (int i=2; i<=3; ++i) {
for (int j=0; j<=i; ++j) {
for (int i=2; i<=3; ++i)
for (int j=0; j<=i; ++j)
for (int k=0; k<=j; ++k) {
// assemble matrix
const F3DPosition cy(FReal(2.*i), FReal(2.*j), FReal(2.*k));
FChebTensor<ORDER>::setRoots(cy, FReal(2.), Y);
for (unsigned int n=0; n<nnodes; ++n)
for (unsigned int m=0; m<nnodes; ++m)
U[n*nnodes + m] = MatrixKernel->evaluate(X[m], Y[n]);
// applying weights ////////////////////////////////////////
FReal weights[nnodes];
FChebTensor<ORDER>::setRootOfWeights(weights);
for (unsigned int n=0; n<nnodes; ++n) {
FBlas::scal(nnodes, weights[n], U + n, nnodes); // scale rows
FBlas::scal(nnodes, weights[n], U + n * nnodes); // scale cols
}
//////////////////////////////////////////////////////////
// truncated singular value decomposition of matrix
const unsigned int LWORK = 2 * (3*nnodes + nnodes);
FReal *const WORK = new FReal [LWORK];
FReal *const VT = new FReal [nnodes*nnodes];
FReal *const S = new FReal [nnodes];
const unsigned int info = FBlas::gesvd(nnodes, nnodes, U, S, VT, nnodes, LWORK, WORK);
if (info!=0) throw std::runtime_error("SVD did not converge with " + info);
const unsigned int rank = getRank<ORDER>(S, Epsilon);
// store
const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3);
assert(K[idx]==NULL);
K[idx] = new FReal [2*rank*nnodes];
LowRank[idx] = rank;
for (unsigned int r=0; r<rank; ++r)
FBlas::scal(nnodes, S[r], U + r*nnodes);
FBlas::copy(rank*nnodes, U, K[idx]);
for (unsigned int r=0; r<rank; ++r)
FBlas::copy(nnodes, VT + r, nnodes, K[idx] + rank*nnodes + r*nnodes, 1);
// un-weighting ////////////////////////////////////////////
for (unsigned int n=0; n<nnodes; ++n) {
FBlas::scal(rank, FReal(1.) / weights[n], K[idx] + n, nnodes); // scale rows
FBlas::scal(rank, FReal(1.) / weights[n], K[idx] + rank*nnodes + n, nnodes); // scale rows
}
//////////////////////////////////////////////////////////
std::cout << "(" << i << "," << j << "," << k << ") " << idx <<
", low rank = " << rank << std::endl;
counter++;
assert(Mul[idx]!=NULL || Loc[idx]!=NULL);
Mul[idx] = new FReal [30 * nnodes];
Loc[idx] = new FReal [30 * nnodes];
}
}
}
std::cout << "num interactions = " << counter << std::endl;
delete [] U;
}
public:
/**
* The constructor initializes all constant attributes and it reads the
......@@ -164,52 +105,39 @@ public:
FChebSymKernels(const int inTreeHeight,
const F3DPosition& inBoxCenter,
const FReal inBoxWidth,
const FReal inEpsilon)
const FReal Epsilon)
: Interpolator(new InterpolatorClass()),
Symmetries(new SymmetriesClass()),
MatrixKernel(new MatrixKernelClass()),
SymHandler(new SymmetryHandler(MatrixKernel.getPtr(), Epsilon)),
TreeHeight(inTreeHeight),
BoxCorner(inBoxCenter - inBoxWidth / FReal(2.)),
BoxWidth(inBoxWidth),
BoxWidthLeaf(BoxWidth / FReal(FMath::pow(2, inTreeHeight - 1))),
Epsilon(inEpsilon)
BoxWidthLeaf(BoxWidth / FReal(FMath::pow(2, inTreeHeight - 1)))
{
// init all 343 item to zero, because effectively only 16 exist
for (unsigned int t=0; t<343; ++t) {
K[t] = NULL;
LowRank[t] = 0;
Loc[t] = NULL;
Mul[t] = NULL;
}
this->allocateMemoryForPermutedExpansions();
}
// precompute M2L operators and set rank
this -> precomputeSVD();
// set permutation vector and indices
for (int i=-3; i<=3; ++i)
for (int j=-3; j<=3; ++j)
for (int k=-3; k<=3; ++k)
if (abs(i)>1 || abs(j)>1 || abs(k)>1) {
const unsigned int idx = ((i+3) * 7 + (j+3)) * 7 + (k+3);
pindices[idx] = Symmetries->getPermutationArrayAndIndex(i,j,k, pvectors[idx]);
}
for (int i=2; i<=3; ++i)
for (int j=0; j<=i; ++j)
for (int k=0; k<=j; ++k) {
const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3);
Mul[idx] = new FReal [30 * nnodes];
Loc[idx] = new FReal [30 * nnodes];
}
/** Copy constructor */
FChebSymKernels(const FChebSymKernels& other)
: Interpolator(other.Interpolator),
MatrixKernel(other.MatrixKernel),
SymHandler(other.SymHandler),
TreeHeight(other.TreeHeight),
BoxCorner(other.BoxCorner),
BoxWidth(other.BoxWidth),
BoxWidthLeaf(other.BoxWidthLeaf)
{
this->allocateMemoryForPermutedExpansions();
}
/** Destructor */
~FChebSymKernels()
{
for (unsigned int t=0; t<343; ++t) {
if (K[ t]!=NULL) delete [] K[ t];
if (Loc[t]!=NULL) delete [] Loc[t];
if (Mul[t]!=NULL) delete [] Mul[t];
}
......@@ -255,11 +183,11 @@ public:
memset(countExp, 0, sizeof(int) * 343);
for (unsigned int idx=0; idx<343; ++idx) {
if (SourceCells[idx]) {
const unsigned int pidx = pindices[idx];
const unsigned int pidx = SymHandler->pindices[idx];
const unsigned int count = (countExp[pidx])++;
const FReal *const MultiExp = SourceCells[idx]->getMultipole();
for (unsigned int n=0; n<nnodes; ++n)
Mul[pidx][count*nnodes + pvectors[idx][n]] = MultiExp[n];
Mul[pidx][count*nnodes + SymHandler->pvectors[idx][n]] = MultiExp[n];
}
}
......@@ -270,10 +198,11 @@ public:
for (unsigned int pidx=0; pidx<343; ++pidx) {
const unsigned int count = countExp[pidx];
if (count) {
const unsigned int rank = LowRank[pidx];
const unsigned int rank = SymHandler->LowRank[pidx];
FBlas::gemtm(nnodes, rank, count, FReal(1.),
K[pidx]+rank*nnodes, nnodes, Mul[pidx], nnodes, Compressed, rank);
FBlas::gemm( nnodes, rank, count, scale, K[pidx], nnodes, Compressed, rank, Loc[pidx], nnodes);
SymHandler->K[pidx]+rank*nnodes, nnodes, Mul[pidx], nnodes, Compressed, rank);
FBlas::gemm( nnodes, rank, count, scale,
SymHandler->K[pidx], nnodes, Compressed, rank, Loc[pidx], nnodes);
}
}
......@@ -282,10 +211,10 @@ public:
memset(countExp, 0, sizeof(int) * 343);
for (unsigned int idx=0; idx<343; ++idx) {
if (SourceCells[idx]) {
const unsigned int pidx = pindices[idx];
const unsigned int pidx = SymHandler->pindices[idx];
const unsigned int count = (countExp[pidx])++;
for (unsigned int n=0; n<nnodes; ++n)
LocalExpansion[n] += Loc[pidx][count*nnodes + pvectors[idx][n]];
LocalExpansion[n] += Loc[pidx][count*nnodes + SymHandler->pvectors[idx][n]];
}
}
......@@ -348,6 +277,8 @@ public:
ChildCells[ChildIndex]->getLocal());
}
void L2P(const CellClass* const LeafCell,
ContainerClass* const TargetParticles)
{
......@@ -365,6 +296,7 @@ public:
}
void P2P(const FTreeCoordinate& LeafCellCoordinate,
ContainerClass* const FRestrict TargetParticles,
const ContainerClass* const FRestrict SourceParticles,
......@@ -404,7 +336,8 @@ public:
while (iSources.hasNotFinished()) {
const ParticleClass& Source = iSources.data();
// target and source cannot be identical
const FReal one_over_r = MatrixKernel->evaluate(Target.getPosition(), Source.getPosition());
const FReal one_over_r = MatrixKernel->evaluate(Target.getPosition(),
Source.getPosition());
const FReal ws = Source.getPhysicalValue();
// potential
Target.incPotential(one_over_r * ws);
......@@ -427,6 +360,151 @@ public:
};
/**
* Handler to deal with all symmetries: Stores permutation indices and vectors
* to reduce 343 different interactions to 16 only.
*/
template <class ParticleClass,
class CellClass,
class ContainerClass,
class MatrixKernelClass,
int ORDER>
struct FChebSymKernels<ParticleClass, CellClass, ContainerClass, MatrixKernelClass, ORDER>
::SymmetryHandler
{
// M2L operators
FReal* K[343];
int LowRank[343];
// permutation vectors and permutated indices
unsigned int pvectors[343][nnodes];
unsigned int pindices[343];
/** Constructor */
SymmetryHandler(const MatrixKernelClass *const MatrixKernel, const double Epsilon)
{
// init all 343 item to zero, because effectively only 16 exist
for (unsigned int t=0; t<343; ++t) {
K[t] = NULL;
LowRank[t] = 0;
}
// set permutation vector and indices
const FChebSymmetries<ORDER> Symmetries;
for (int i=-3; i<=3; ++i)
for (int j=-3; j<=3; ++j)
for (int k=-3; k<=3; ++k)
if (abs(i)>1 || abs(j)>1 || abs(k)>1) {
const unsigned int idx = ((i+3) * 7 + (j+3)) * 7 + (k+3);
pindices[idx] = Symmetries.getPermutationArrayAndIndex(i,j,k, pvectors[idx]);
}
// precompute 16 M2L operators
this->precomputeSVD(MatrixKernel, Epsilon);
}
/** Destructor */
~SymmetryHandler()
{
for (unsigned int t=0; t<343; ++t)
if (K[ t]!=NULL) delete [] K[ t];
}
private:
void precomputeSVD(const MatrixKernelClass *const MatrixKernel, const double Epsilon)
{
// interpolation points of source (Y) and target (X) cell
F3DPosition X[nnodes], Y[nnodes];
// set roots of target cell (X)
FChebTensor<ORDER>::setRoots(F3DPosition(0.,0.,0.), FReal(2.), X);
// temporary matrix
FReal* U = new FReal [nnodes*nnodes];
unsigned int counter = 0;
for (int i=2; i<=3; ++i) {
for (int j=0; j<=i; ++j) {
for (int k=0; k<=j; ++k) {
// assemble matrix
const F3DPosition cy(FReal(2.*i), FReal(2.*j), FReal(2.*k));
FChebTensor<ORDER>::setRoots(cy, FReal(2.), Y);
for (unsigned int n=0; n<nnodes; ++n)
for (unsigned int m=0; m<nnodes; ++m)
U[n*nnodes + m] = MatrixKernel->evaluate(X[m], Y[n]);
// applying weights ////////////////////////////////////////
FReal weights[nnodes];
FChebTensor<ORDER>::setRootOfWeights(weights);
for (unsigned int n=0; n<nnodes; ++n) {
FBlas::scal(nnodes, weights[n], U + n, nnodes); // scale rows
FBlas::scal(nnodes, weights[n], U + n * nnodes); // scale cols
}
//////////////////////////////////////////////////////////
// truncated singular value decomposition of matrix
const unsigned int LWORK = 2 * (3*nnodes + nnodes);
FReal *const WORK = new FReal [LWORK];
FReal *const VT = new FReal [nnodes*nnodes];
FReal *const S = new FReal [nnodes];
const unsigned int info = FBlas::gesvd(nnodes, nnodes, U, S, VT, nnodes, LWORK, WORK);
if (info!=0) throw std::runtime_error("SVD did not converge with " + info);
const unsigned int rank = getRank<ORDER>(S, Epsilon);
// store
const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3);
assert(K[idx]==NULL);
K[idx] = new FReal [2*rank*nnodes];
LowRank[idx] = rank;
for (unsigned int r=0; r<rank; ++r)
FBlas::scal(nnodes, S[r], U + r*nnodes);
FBlas::copy(rank*nnodes, U, K[idx]);
for (unsigned int r=0; r<rank; ++r)
FBlas::copy(nnodes, VT + r, nnodes, K[idx] + rank*nnodes + r*nnodes, 1);
// un-weighting ////////////////////////////////////////////
for (unsigned int n=0; n<nnodes; ++n) {
FBlas::scal(rank, FReal(1.) / weights[n], K[idx] + n, nnodes); // scale rows
FBlas::scal(rank, FReal(1.) / weights[n], K[idx] + rank*nnodes + n, nnodes); // scale rows
}
//////////////////////////////////////////////////////////
std::cout << "(" << i << "," << j << "," << k << ") " << idx <<
", low rank = " << rank << std::endl;
counter++;
}
}
}
std::cout << "num interactions = " << counter << std::endl;
delete [] U;
}
};
#endif //FCHEBSYMKERNELS_HPP
// [--END--]
......
......@@ -101,8 +101,8 @@ int main(int argc, char* argv[])
typedef FOctree<ParticleClass,CellClass,ContainerClass,LeafClass> OctreeClass;
//typedef FChebKernels<ParticleClass,CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass;
typedef FChebSymKernels<ParticleClass,CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass;
typedef FFmmAlgorithm<OctreeClass,ParticleClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;
//typedef FFmmAlgorithmThread<OctreeClass,ParticleClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;
//typedef FFmmAlgorithm<OctreeClass,ParticleClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;
typedef FFmmAlgorithmThread<OctreeClass,ParticleClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;
// What we do //////////////////////////////////////////////////////
std::cout << ">> Testing the Chebyshev interpolation base FMM algorithm.\n";
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment