Commit c1455fed authored by BLANCHARD Pierre's avatar BLANCHARD Pierre
Browse files

Implemented dense variant (i.e. no FFT) of P2M, M2M, M2L, L2L and L2P routines...

Implemented dense variant (i.e. no FFT) of P2M, M2M, M2L, L2L and L2P routines for Adaptive Uniform kernel (TODO M2P,P2L).
parent 3122f195
......@@ -58,10 +58,12 @@ class FAdaptiveUnifKernel : FUnifKernel<CellClass, ContainerClass, MatrixKernelC
, public FAbstractAdaptiveKernel<CellClass, ContainerClass> {
//
typedef FUnifKernel<CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS> KernelBaseClass;
enum {order = ORDER,
nnodes = KernelBaseClass::nnodes};
/// Needed for M2L operator
typedef FUnifM2LHandler<ORDER,MatrixKernelClass::Type> M2LHandlerClass;
const M2LHandlerClass M2LHandler;
const MatrixKernelClass *const MatrixKernel;
public:
......@@ -79,11 +81,11 @@ public:
// * runtime_error is thrown if the required file is not valid).
// */
FAdaptiveUnifKernel(const int inTreeHeight, const FReal inBoxWidth,
const FPoint& inBoxCenter, const MatrixKernelClass *const inMatrixKernel) : KernelBaseClass(inTreeHeight, inBoxWidth, inBoxCenter, inMatrixKernel), M2LHandler(inMatrixKernel, inTreeHeight, inBoxWidth)
const FPoint& inBoxCenter, const MatrixKernelClass *const inMatrixKernel) : KernelBaseClass(inTreeHeight, inBoxWidth, inBoxCenter, inMatrixKernel), M2LHandler(inMatrixKernel, inTreeHeight, inBoxWidth), MatrixKernel(inMatrixKernel)
{}
// /** Copy constructor */
FAdaptiveUnifKernel(const FAdaptiveUnifKernel& other)
: KernelBaseClass(other), M2LHandler(other.M2LHandler)
: KernelBaseClass(other), M2LHandler(other.M2LHandler), MatrixKernel(other.MatrixKernel)
{ }
//
......@@ -95,32 +97,116 @@ public:
void P2M(CellClass* const pole, const int cellLevel, const ContainerClass* const particles) override {
//pole->setDataUp(pole->getDataUp() + particles->getNbParticles());
//
const FPoint LeafCellCenter(KernelBaseClass::getLeafCellCenter(pole->getCoordinate()));
const FPoint CellCenter(KernelBaseClass::getLeafCellCenter(pole->getCoordinate()));
const FReal BoxWidth = KernelBaseClass::BoxWidthLeaf*FMath::pow(2.0,KernelBaseClass::TreeHeight-cellLevel);
//
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// 1) apply Sy
KernelBaseClass::Interpolator->applyP2M(LeafCellCenter, BoxWidth,
KernelBaseClass::Interpolator->applyP2M(CellCenter, BoxWidth,
pole->getMultipole(idxRhs), particles);
// 2) apply Discrete Fourier Transform
M2LHandler.applyZeroPaddingAndDFT(pole->getMultipole(idxRhs),
pole->getTransformedMultipole(idxRhs));
// // 2) apply Discrete Fourier Transform
// M2LHandler.applyZeroPaddingAndDFT(pole->getMultipole(idxRhs),
// pole->getTransformedMultipole(idxRhs));
}
}
void M2M(CellClass* const pole, const int /*poleLevel*/, const CellClass* const subCell, const int /*subCellLevel*/) override {
void M2M(CellClass* const pole, const int poleLevel, const CellClass* const subCell, const int subCellLevel) override {
// pole->setDataUp(pole->getDataUp() + subCell->getDataUp());
const FPoint subCellCenter(KernelBaseClass::getLeafCellCenter(subCell->getCoordinate()));
const FReal subCellWidth = KernelBaseClass::BoxWidthLeaf*FMath::pow(2.0,KernelBaseClass::TreeHeight-subCellLevel);
const FPoint poleCellCenter(KernelBaseClass::getLeafCellCenter(pole->getCoordinate()));
const FReal poleCellWidth = KernelBaseClass::BoxWidthLeaf*FMath::pow(2.0,KernelBaseClass::TreeHeight-poleLevel);
// Set sub-child coords
FReal globalChildCoords[3][ORDER];
FUnifTensor<order>::setPolynomialsRoots(subCellCenter, subCellWidth, globalChildCoords);
// Map global position of sub-child nodes to [-1,1]
FReal localChildCoords[3][ORDER];
const map_glob_loc map(poleCellCenter, poleCellWidth);
FPoint localChildPoints;
for (unsigned int n=0; n<ORDER; ++n) {
map(FPoint(globalChildCoords[0][n],globalChildCoords[1][n],globalChildCoords[2][n]), localChildPoints);
localChildCoords[0][n] = localChildPoints.getX();
localChildCoords[1][n] = localChildPoints.getY();
localChildCoords[2][n] = localChildPoints.getZ();
}
// assemble interpolator
FReal* subChildParentInterpolator = new FReal [3 * ORDER*ORDER];
KernelBaseClass::Interpolator->assembleInterpolator(ORDER, localChildCoords[0], subChildParentInterpolator);
KernelBaseClass::Interpolator->assembleInterpolator(ORDER, localChildCoords[1], subChildParentInterpolator + 1 * ORDER*ORDER);
KernelBaseClass::Interpolator->assembleInterpolator(ORDER, localChildCoords[2], subChildParentInterpolator + 2 * ORDER*ORDER);
// get permutation operators
unsigned int perm[3][nnodes];
for (unsigned int i=0;i<3; ++i)
for (unsigned int n=0; n<nnodes; ++n)
perm[i][n] = KernelBaseClass::Interpolator->getPermutationsM2ML2L(i)[n];
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// 1) apply Sy (using tensor product M2M with an interpolator computed on the fly)
FBlas::scal(nnodes, FReal(0.), pole->getMultipole(idxRhs));
FReal Exp[nnodes], PermExp[nnodes];
// ORDER*ORDER*ORDER * (2*ORDER-1)
FBlas::gemtm(ORDER, ORDER, ORDER*ORDER, FReal(1.),
subChildParentInterpolator, ORDER,
const_cast<FReal*>(subCell->getMultipole(idxRhs)), ORDER, PermExp, ORDER);
for (unsigned int n=0; n<nnodes; ++n) Exp[n] = PermExp[perm[1][n]];
// ORDER*ORDER*ORDER * (2*ORDER-1)
FBlas::gemtm(ORDER, ORDER, ORDER*ORDER, FReal(1.),
subChildParentInterpolator + 2 * ORDER*ORDER, ORDER,
Exp, ORDER, PermExp, ORDER);
for (unsigned int n=0; n<nnodes; ++n) Exp[perm[1][n]] = PermExp[perm[2][n]];
// ORDER*ORDER*ORDER * (2*ORDER-1)
FBlas::gemtm(ORDER, ORDER, ORDER*ORDER, FReal(1.),
subChildParentInterpolator + 1 * ORDER*ORDER, ORDER,
Exp, ORDER, PermExp, ORDER);
for (unsigned int n=0; n<nnodes; ++n) pole->getMultipole(idxRhs)[perm[2][n]] += PermExp[n];
// // 2) Apply Discete Fourier Transform
// M2LHandler.applyZeroPaddingAndDFT(ParentCell->getMultipole(idxRhs),
// ParentCell->getTransformedMultipole(idxRhs));
}
}
void P2L(CellClass* const local, const int /*localLevel*/, const ContainerClass* const particles) override {
// local->setDataDown(local->getDataDown() + particles->getNbParticles());
}
void M2L(CellClass* const local, const int /*localLevel*/, const CellClass* const aNeighbor, const int /*neighborLevel*/) override {
// local->setDataDown(local->getDataDown() + aNeighbor->getDataUp());
void M2L(CellClass* const local, const int localLevel, const CellClass* const pole, const int poleLevel) override {
// local->setDataDown(local->getDataDown() + pole->getDataUp());
// Source cell: pole
const FReal poleCellWidth(KernelBaseClass::BoxWidth / FReal(FMath::pow(2, poleLevel)));
const FPoint poleCellCenter(KernelBaseClass::getLeafCellCenter(pole->getCoordinate()));
// Target cell: local
const FReal localCellWidth(KernelBaseClass::BoxWidth / FReal(FMath::pow(2, localLevel)));
const FPoint localCellCenter(KernelBaseClass::getLeafCellCenter(local->getCoordinate()));
// interpolation points of source (Y) and target (X) cell
FPoint X[nnodes], Y[nnodes];
FUnifTensor<order>::setRoots(poleCellCenter, poleCellWidth, Y);
FUnifTensor<order>::setRoots(localCellCenter, localCellWidth, X);
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// Dense M2L
const FReal *const MultipoleExpansion = pole->getMultipole(idxRhs);
for (unsigned int m=0; m<nnodes; ++m)
for (unsigned int n=0; n<nnodes; ++n)
local->getLocal(idxRhs)[m]=MatrixKernel->evaluate(X[m], Y[n]) * MultipoleExpansion[n];
}
}
void M2P(const CellClass* const pole, const int /*poleLevel*/, ContainerClass* const particles) override {
......@@ -130,15 +216,94 @@ public:
// }
}
void L2L(const CellClass* const local, const int /*localLevel*/, CellClass* const subCell, const int /*subCellLevel*/) override {
void L2L(const CellClass* const local, const int localLevel, CellClass* const subCell, const int subCellLevel) override {
// subCell->setDataDown(local->getDataDown() + subCell->getDataDown());
const FPoint subCellCenter(KernelBaseClass::getLeafCellCenter(subCell->getCoordinate()));
const FReal subCellWidth = KernelBaseClass::BoxWidthLeaf*FMath::pow(2.0,KernelBaseClass::TreeHeight-subCellLevel);
const FPoint localCenter(KernelBaseClass::getLeafCellCenter(local->getCoordinate()));
const FReal localWidth = KernelBaseClass::BoxWidthLeaf*FMath::pow(2.0,KernelBaseClass::TreeHeight-localLevel);
// Set sub-child coords
FReal globalChildCoords[3][ORDER];
FUnifTensor<order>::setPolynomialsRoots(subCellCenter, subCellWidth, globalChildCoords);
// Map global position of sub-child nodes to [-1,1]
FReal localChildCoords[3][ORDER];
const map_glob_loc map(localCenter, localWidth);
FPoint localChildPoints;
for (unsigned int n=0; n<ORDER; ++n) {
map(FPoint(globalChildCoords[0][n],globalChildCoords[1][n],globalChildCoords[2][n]), localChildPoints);
localChildCoords[0][n] = localChildPoints.getX();
localChildCoords[1][n] = localChildPoints.getY();
localChildCoords[2][n] = localChildPoints.getZ();
}
// assemble interpolator
FReal* subChildParentInterpolator = new FReal [3 * ORDER*ORDER];
KernelBaseClass::Interpolator->assembleInterpolator(ORDER, localChildCoords[0], subChildParentInterpolator);
KernelBaseClass::Interpolator->assembleInterpolator(ORDER, localChildCoords[1], subChildParentInterpolator + 1 * ORDER*ORDER);
KernelBaseClass::Interpolator->assembleInterpolator(ORDER, localChildCoords[2], subChildParentInterpolator + 2 * ORDER*ORDER);
// get permutation operators
unsigned int perm[3][nnodes];
for (unsigned int i=0;i<3; ++i)
for (unsigned int n=0; n<nnodes; ++n)
perm[i][n] = KernelBaseClass::Interpolator->getPermutationsM2ML2L(i)[n];
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// // 1) Apply Inverse Discete Fourier Transform
// M2LHandler.unapplyZeroPaddingAndDFT(local->getTransformedLocal(idxRhs),
// const_cast<CellClass*>(local)->getLocal(idxRhs));
// 2) apply Sx
FReal Exp[nnodes], PermExp[nnodes];
// ORDER*ORDER*ORDER * (2*ORDER-1)
FBlas::gemm(ORDER, ORDER, ORDER*ORDER, FReal(1.),
subChildParentInterpolator, ORDER,
const_cast<FReal*>(local->getLocal(idxRhs)), ORDER, PermExp, ORDER);
for (unsigned int n=0; n<nnodes; ++n) Exp[n] = PermExp[perm[1][n]];
// ORDER*ORDER*ORDER * (2*ORDER-1)
FBlas::gemm(ORDER, ORDER, ORDER*ORDER, FReal(1.),
subChildParentInterpolator + 2 * ORDER*ORDER, ORDER,
Exp, ORDER, PermExp, ORDER);
for (unsigned int n=0; n<nnodes; ++n) Exp[perm[1][n]] = PermExp[perm[2][n]];
// ORDER*ORDER*ORDER * (2*ORDER-1)
FBlas::gemm(ORDER, ORDER, ORDER*ORDER, FReal(1.),
subChildParentInterpolator + 1 * ORDER*ORDER, ORDER,
Exp, ORDER, PermExp, ORDER);
for (unsigned int n=0; n<nnodes; ++n) subCell->getLocal(idxRhs)[perm[2][n]] += PermExp[n];
// total flops count: 3 * ORDER*ORDER*ORDER * (2*ORDER-1)
}
}
void L2P(const CellClass* const local, const int /*cellLevel*/, ContainerClass* const particles) override {
void L2P(const CellClass* const local, const int cellLevel, ContainerClass* const particles) override {
// long long int*const particlesAttributes = particles->getDataDown();
// for(int idxPart = 0 ; idxPart < particles->getNbParticles() ; ++idxPart){
// particlesAttributes[idxPart] += local->getDataDown();
// }
const FPoint CellCenter(KernelBaseClass::getLeafCellCenter(local->getCoordinate()));
const FReal BoxWidth = KernelBaseClass::BoxWidthLeaf*FMath::pow(2.0,KernelBaseClass::TreeHeight-cellLevel);
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// // 1) Apply Inverse Discete Fourier Transform
// M2LHandler.unapplyZeroPaddingAndDFT(local->getTransformedLocal(idxRhs),
// const_cast<CellClass*>(local)->getLocal(idxRhs));
// 2.a) apply Sx
KernelBaseClass::Interpolator->applyL2P(CellCenter, BoxWidth,
local->getLocal(idxRhs), particles);
// 2.b) apply Px (grad Sx)
KernelBaseClass::Interpolator->applyL2PGradient(CellCenter, BoxWidth,
local->getLocal(idxRhs), particles);
}
}
void P2P(ContainerClass* target, const ContainerClass* sources) override {
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment