Commit 085ecc19 authored by Matthias Messner's avatar Matthias Messner
Browse files

working FChebSymKernels: uses only 16 blas3 operations instead of max 189

blas2 operations [WIP]
parent 5c9efe5d
......@@ -85,8 +85,8 @@ public:
Epsilon(inEpsilon)
{
// read precomputed compressed m2l operators from binary file
//M2LHandler->ReadFromBinaryFileAndSet();
M2LHandler->ComputeAndCompressAndSet();
M2LHandler->ReadFromBinaryFileAndSet();
//M2LHandler->ComputeAndCompressAndSet();
}
......
......@@ -58,6 +58,15 @@ class FChebSymKernels : public FAbstractKernels<ParticleClass, CellClass, Contai
FReal* K[343];
int LowRank[343];
// permuted local and multipole expansions
FReal* Loc[343];
FReal* Mul[343];
unsigned int countExp[343];
// permutation vectors and permutated indices
unsigned int pvectors[343][nnodes];
unsigned int pindices[343];
/**
* Compute center of leaf cell from its tree coordinate.
* @param[in] Coordinate tree coordinate
......@@ -70,29 +79,10 @@ class FChebSymKernels : public FAbstractKernels<ParticleClass, CellClass, Contai
BoxCorner.getZ() + (FReal(Coordinate.getZ()) + FReal(.5)) * BoxWidthLeaf);
}
void precomputeDense()
{
// interpolation points of source (Y) and target (X) cell
F3DPosition X[nnodes], Y[nnodes];
// set roots of target cell (X)
FChebTensor<ORDER>::setRoots(F3DPosition(0.,0.,0.), FReal(2.), X);
unsigned int counter = 0;
for (int i=2; i<=3; ++i) {
for (int j=0; j<=i; ++j) {
for (int k=0; k<=j; ++k) {
const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3);
K[idx] = new FReal [nnodes*nnodes];
const F3DPosition cy(FReal(2.*i), FReal(2.*j), FReal(2.*k));
FChebTensor<ORDER>::setRoots(cy, FReal(2.), Y);
for (unsigned int n=0; n<nnodes; ++n)
for (unsigned int m=0; m<nnodes; ++m)
K[idx][n*nnodes + m] = MatrixKernel->evaluate(X[m], Y[n]);
counter++;
}
}
}
std::cout << "num interactions = " << counter << std::endl;
}
void precomputeSVD()
{
......@@ -115,6 +105,15 @@ class FChebSymKernels : public FAbstractKernels<ParticleClass, CellClass, Contai
for (unsigned int m=0; m<nnodes; ++m)
U[n*nnodes + m] = MatrixKernel->evaluate(X[m], Y[n]);
// applying weights ////////////////////////////////////////
FReal weights[nnodes];
FChebTensor<ORDER>::setRootOfWeights(weights);
for (unsigned int n=0; n<nnodes; ++n) {
FBlas::scal(nnodes, weights[n], U + n, nnodes); // scale rows
FBlas::scal(nnodes, weights[n], U + n * nnodes); // scale cols
}
//////////////////////////////////////////////////////////
// truncated singular value decomposition of matrix
const unsigned int LWORK = 2 * (3*nnodes + nnodes);
FReal *const WORK = new FReal [LWORK];
......@@ -126,6 +125,7 @@ class FChebSymKernels : public FAbstractKernels<ParticleClass, CellClass, Contai
// store
const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3);
assert(K[idx]==NULL);
K[idx] = new FReal [2*rank*nnodes];
LowRank[idx] = rank;
for (unsigned int r=0; r<rank; ++r)
......@@ -134,7 +134,15 @@ class FChebSymKernels : public FAbstractKernels<ParticleClass, CellClass, Contai
for (unsigned int r=0; r<rank; ++r)
FBlas::copy(nnodes, VT + r, nnodes, K[idx] + rank*nnodes + r*nnodes, 1);
std::cout << "(" << i << "," << j << "," << k << ") low rank = " << rank << std::endl;
// un-weighting ////////////////////////////////////////////
for (unsigned int n=0; n<nnodes; ++n) {
FBlas::scal(rank, FReal(1.) / weights[n], K[idx] + n, nnodes); // scale rows
FBlas::scal(rank, FReal(1.) / weights[n], K[idx] + rank*nnodes + n, nnodes); // scale rows
}
//////////////////////////////////////////////////////////
std::cout << "(" << i << "," << j << "," << k << ") " << idx <<
", low rank = " << rank << std::endl;
counter++;
}
......@@ -144,6 +152,9 @@ class FChebSymKernels : public FAbstractKernels<ParticleClass, CellClass, Contai
delete [] U;
}
public:
/**
* The constructor initializes all constant attributes and it reads the
......@@ -163,20 +174,49 @@ public:
BoxWidthLeaf(BoxWidth / FReal(FMath::pow(2, inTreeHeight - 1))),
Epsilon(inEpsilon)
{
// precompute M2L operators
for (unsigned int t=0; t<343; ++t) {K[t] = NULL; LowRank[t] = -1;}
//this -> precomputeDense();
// init all 343 item to zero, because effectively only 16 exist
for (unsigned int t=0; t<343; ++t) {
K[t] = NULL;
LowRank[t] = 0;
Loc[t] = NULL;
Mul[t] = NULL;
}
// precompute M2L operators and set rank
this -> precomputeSVD();
// set permutation vector and indices
for (int i=-3; i<=3; ++i)
for (int j=-3; j<=3; ++j)
for (int k=-3; k<=3; ++k)
if (abs(i)>1 || abs(j)>1 || abs(k)>1) {
const unsigned int idx = ((i+3) * 7 + (j+3)) * 7 + (k+3);
pindices[idx] = Symmetries->getPermutationArrayAndIndex(i,j,k, pvectors[idx]);
}
for (int i=2; i<=3; ++i)
for (int j=0; j<=i; ++j)
for (int k=0; k<=j; ++k) {
const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3);
Mul[idx] = new FReal [30 * nnodes];
Loc[idx] = new FReal [30 * nnodes];
}
}
~FChebSymKernels()
{
for (unsigned int i=0; i<343; ++i) if (K[i]!=NULL) delete [] K[i];
for (unsigned int t=0; t<343; ++t) {
if (K[ t]!=NULL) delete [] K[ t];
if (Loc[t]!=NULL) delete [] Loc[t];
if (Mul[t]!=NULL) delete [] Mul[t];
}
}
void P2M(CellClass* const LeafCell,
const ContainerClass* const SourceParticles)
{
......@@ -189,6 +229,7 @@ public:
}
void M2M(CellClass* const FRestrict ParentCell,
const CellClass*const FRestrict *const FRestrict ChildCells,
const int TreeLevel)
......@@ -204,6 +245,54 @@ public:
void M2L(CellClass* const FRestrict TargetCell,
const CellClass* SourceCells[343],
const int NumSourceCells,
const int TreeLevel)
{
// permute and copy multipole expansion
memset(countExp, 0, sizeof(int) * 343);
for (unsigned int idx=0; idx<343; ++idx) {
if (SourceCells[idx]) {
const unsigned int pidx = pindices[idx];
const unsigned int count = (countExp[pidx])++;
const FReal *const MultiExp = SourceCells[idx]->getMultipole();
for (unsigned int n=0; n<nnodes; ++n)
Mul[pidx][count*nnodes + pvectors[idx][n]] = MultiExp[n];
}
}
// multiply (mat-mat-mul)
FReal Compressed [nnodes * 30];
const FReal CellWidth(BoxWidth / FReal(FMath::pow(2, TreeLevel)));
const FReal scale(MatrixKernel->getScaleFactor(CellWidth));
for (unsigned int pidx=0; pidx<343; ++pidx) {
const unsigned int count = countExp[pidx];
if (count) {
const unsigned int rank = LowRank[pidx];
FBlas::gemtm(nnodes, rank, count, FReal(1.),
K[pidx]+rank*nnodes, nnodes, Mul[pidx], nnodes, Compressed, rank);
FBlas::gemm( nnodes, rank, count, scale, K[pidx], nnodes, Compressed, rank, Loc[pidx], nnodes);
}
}
// permute and add contribution to local expansions
FReal *const LocalExpansion = TargetCell->getLocal();
memset(countExp, 0, sizeof(int) * 343);
for (unsigned int idx=0; idx<343; ++idx) {
if (SourceCells[idx]) {
const unsigned int pidx = pindices[idx];
const unsigned int count = (countExp[pidx])++;
for (unsigned int n=0; n<nnodes; ++n)
LocalExpansion[n] += Loc[pidx][count*nnodes + pvectors[idx][n]];
}
}
}
/*
void M2L(CellClass* const FRestrict TargetCell,
const CellClass* SourceCells[343],
const int NumSourceCells,
......@@ -213,35 +302,37 @@ public:
const FReal CellWidth(BoxWidth / FReal(FMath::pow(2, TreeLevel)));
const FReal scale(MatrixKernel->getScaleFactor(CellWidth));
unsigned int permutations[nnodes];
FReal PermLocalExp[nnodes];
FReal PermMultiExp[nnodes];
FReal Compressed[nnodes];
for (int i=-3; i<=3; ++i) {
for (int j=-3; j<=3; ++j) {
for (int k=-3; k<=3; ++k) {
const unsigned int idx = ((i+3) * 7 + (j+3)) * 7 + (k+3);
if (SourceCells[idx]) {
const FReal *const MultiExp = SourceCells[idx]->getMultipole();
const unsigned int pidx = Symmetries->getPermutationArrayAndIndex(i,j,k, permutations);
if (K[pidx]==NULL) std::cout << pidx << "does not exist" << std::endl;
for (unsigned int n=0; n<nnodes; ++n) PermMultiExp[permutations[n]] = MultiExp[n];
//// dense
//FBlas::gemv(nnodes, nnodes, scale, K[pidx], PermMultiExp, PermLocalExp);
// svd
const int rank = LowRank[pidx];
FBlas::gemtv(nnodes, rank, FReal(1.), K[pidx]+rank*nnodes, PermMultiExp, Compressed);
FBlas::gemv( nnodes, rank, scale, K[pidx], Compressed, PermLocalExp);
for (unsigned int n=0; n<nnodes; ++n) LocalExpansion[n] += PermLocalExp[permutations[n]];
// permute
for (unsigned int n=0; n<nnodes; ++n) PermMultiExp[pvectors[idx][n]] = MultiExp[n];
// mat-vec-mult (svd)
assert(K[pindices[idx]]!=NULL);
const int rank = LowRank[pindices[idx]];
FBlas::gemtv(nnodes, rank, FReal(1.), K[pindices[idx]]+rank*nnodes, PermMultiExp, Compressed);
FBlas::gemv( nnodes, rank, scale, K[pindices[idx]], Compressed, PermLocalExp);
// permute
for (unsigned int n=0; n<nnodes; ++n) LocalExpansion[n] += PermLocalExp[pvectors[idx][n]];
}
}
}
}
}
*/
......@@ -368,3 +459,69 @@ public:
// if (SourceCells[idx])
// FBlas::gemva(nnodes, nnodes, scale, K[idx],
// const_cast<CellClass*>(SourceCells[idx])->getMultipole(), LocalExpansion);
// void precomputeDense()
// {
// // interpolation points of source (Y) and target (X) cell
// F3DPosition X[nnodes], Y[nnodes];
// // set roots of target cell (X)
// FChebTensor<ORDER>::setRoots(F3DPosition(0.,0.,0.), FReal(2.), X);
// unsigned int counter = 0;
// for (int i=2; i<=3; ++i) {
// for (int j=0; j<=i; ++j) {
// for (int k=0; k<=j; ++k) {
// const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3);
// assert(K[idx]==NULL);
// K[idx] = new FReal [nnodes*nnodes];
// const F3DPosition cy(FReal(2.*i), FReal(2.*j), FReal(2.*k));
// FChebTensor<ORDER>::setRoots(cy, FReal(2.), Y);
// for (unsigned int n=0; n<nnodes; ++n)
// for (unsigned int m=0; m<nnodes; ++m)
// K[idx][n*nnodes + m] = MatrixKernel->evaluate(X[m], Y[n]);
// counter++;
// }
// }
// }
// std::cout << "num interactions = " << counter << std::endl;
// }
//// dense
//FBlas::gemv(nnodes, nnodes, scale, K[pidx], PermMultiExp, PermLocalExp);
//for (unsigned int idx=0; idx<343; ++idx) {
// if (SourceCells[idx]) {
// const unsigned int pidx = pindices[idx];
// const unsigned int *const pvec = pvectors[idx];
// const unsigned int count = (countExp[pidx])++;
// // permute
// const FReal *const MultiExp = SourceCells[idx]->getMultipole();
// for (unsigned int n=0; n<nnodes; ++n) Mul[pidx][count*nnodes + pvec[n]] = MultiExp[n];
// // mat-vec-mult (svd)
// assert(K[pindices[idx]]!=NULL);
// const int rank = LowRank[pindices[idx]];
// FBlas::gemtv(nnodes, rank, FReal(1.), K[pidx] + rank*nnodes, Mul[pidx] + count*nnodes, Compressed);
// FBlas::gemv( nnodes, rank, scale, K[pidx], Compressed, Loc[pidx] + count*nnodes);
// // permute
// for (unsigned int n=0; n<nnodes; ++n) LocalExpansion[n] += Loc[pidx][count*nnodes + pvec[n]];
// }
// }
//// multiply (mat-mat-mul)
//for (unsigned int idx=0; idx<343; ++idx) countExp[idx] = 0;
//for (unsigned int idx=0; idx<343; ++idx) {
// if (SourceCells[idx]) {
// const unsigned int pidx = pindices[idx];
// const unsigned int count = (countExp[pidx])++;
// // mat-vec-mult (svd)
// assert(K[pidx]!=NULL);
// const int rank = LowRank[pidx];
// FBlas::gemtv(nnodes, rank, FReal(1.), K[pidx] + rank*nnodes, Mul[pidx] + count*nnodes, Compressed);
// FBlas::gemv( nnodes, rank, scale, K[pidx], Compressed, Loc[pidx] + count*nnodes);
// }
// }
......@@ -77,12 +77,7 @@ public:
unsigned int node_ids[nnodes][3];
setNodeIds(node_ids);
for (unsigned int n=0; n<nnodes; ++n) {
FReal weight = FReal(1.); // no weighting
weight *= FMath::Sqrt(weights_1d[node_ids[n][0]]*weights_1d[node_ids[n][1]]*weights_1d[node_ids[n][2]]); // 1/2
//weight *= weight; // 1
//weight *= weight; // 2
//weight *= weight; // 4
weights[n] = weight;
weights[n] = FMath::Sqrt(weights_1d[node_ids[n][0]]*weights_1d[node_ids[n][1]]*weights_1d[node_ids[n][2]]);
}
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment