Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 97641e38 authored by Quentin Khan's avatar Quentin Khan
Browse files
parents 3723194a 32a8f61c
Branches
Tags
No related merge requests found
......@@ -45,7 +45,7 @@ class FAbstractChebKernel : public FAbstractKernels< CellClass, ContainerClass>
{
protected:
enum {nnodes = TensorTraits<ORDER>::nnodes};
typedef FChebInterpolator<ORDER,MatrixKernelClass> InterpolatorClass;
typedef FChebInterpolator<ORDER,MatrixKernelClass,NVALS> InterpolatorClass;
/// Needed for P2M, M2M, L2L and L2P operators
const FSmartPointer<InterpolatorClass,FSmartPointerMemory> Interpolator;
......
......@@ -41,14 +41,15 @@
* of the size of the vectorial interpolators. Default is the scalar
* matrix kernel class of type ONE_OVER_R (NRHS=NLHS=1).
*/
template <int ORDER, class MatrixKernelClass = struct FInterpMatrixKernelR>
template <int ORDER, class MatrixKernelClass = struct FInterpMatrixKernelR, int NVALS = 1>
class FChebInterpolator : FNoCopyable
{
// compile time constants and types
enum {nnodes = TensorTraits<ORDER>::nnodes,
nRhs = MatrixKernelClass::NRHS,
nLhs = MatrixKernelClass::NLHS,
nPV = MatrixKernelClass::NPV};
nPV = MatrixKernelClass::NPV,
nVals = NVALS};
typedef FChebRoots< ORDER> BasisType;
typedef FChebTensor<ORDER> TensorType;
......@@ -635,9 +636,9 @@ public:
* Particle to moment: application of \f$S_\ell(y,\bar y_n)\f$
* (anterpolation, it is the transposed interpolation)
*/
template <int ORDER, class MatrixKernelClass>
template <int ORDER, class MatrixKernelClass, int NVALS>
template <class ContainerClass>
inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyP2M(const FPoint& center,
inline void FChebInterpolator<ORDER,MatrixKernelClass,NVALS>::applyP2M(const FPoint& center,
const FReal width,
FReal *const multipoleExpansion,
const ContainerClass *const inParticles) const
......@@ -648,20 +649,19 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyP2M(const FPoint& c
FPoint localPosition;
// Init W
FReal W1[nRhs];
FReal W2[nRhs][3][ ORDER-1];
FReal W4[nRhs][3][(ORDER-1)*(ORDER-1)];
FReal W8[nRhs][ (ORDER-1)*(ORDER-1)*(ORDER-1)];
for(int idxRhs = 0 ; idxRhs < nRhs ; ++idxRhs){
W1[idxRhs] = FReal(0.);
for(unsigned int i=0; i<(ORDER-1); ++i) W2[idxRhs][0][i] = W2[idxRhs][1][i] = W2[idxRhs][2][i] = FReal(0.);
for(unsigned int i=0; i<(ORDER-1)*(ORDER-1); ++i) W4[idxRhs][0][i] = W4[idxRhs][1][i] = W4[idxRhs][2][i] = FReal(0.);
for(unsigned int i=0; i<(ORDER-1)*(ORDER-1)*(ORDER-1); ++i) W8[idxRhs][i] = FReal(0.);
FReal W1[nVals*nRhs];
FReal W2[nVals*nRhs][3][ ORDER-1];
FReal W4[nVals*nRhs][3][(ORDER-1)*(ORDER-1)];
FReal W8[nVals*nRhs][ (ORDER-1)*(ORDER-1)*(ORDER-1)];
for(int idxMul = 0 ; idxMul < nVals*nRhs ; ++idxMul){
W1[idxMul] = FReal(0.);
for(unsigned int i=0; i<(ORDER-1); ++i) W2[idxMul][0][i] = W2[idxMul][1][i] = W2[idxMul][2][i] = FReal(0.);
for(unsigned int i=0; i<(ORDER-1)*(ORDER-1); ++i) W4[idxMul][0][i] = W4[idxMul][1][i] = W4[idxMul][2][i] = FReal(0.);
for(unsigned int i=0; i<(ORDER-1)*(ORDER-1)*(ORDER-1); ++i) W8[idxMul][i] = FReal(0.);
}
// loop over source particles
// const FReal*const physicalValues = inParticles->getPhysicalValues();
const FReal*const positionsX = inParticles->getPositions()[0];
const FReal*const positionsY = inParticles->getPositions()[1];
const FReal*const positionsZ = inParticles->getPositions()[2];
......@@ -682,86 +682,93 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyP2M(const FPoint& c
T_of_x[1][j] = y2 * T_of_x[1][j-1] - T_of_x[1][j-2]; // 2 flops
T_of_x[2][j] = z2 * T_of_x[2][j-1] - T_of_x[2][j-2]; // 2 flops
}
for(int idxRhs = 0 ; idxRhs < nRhs ; ++idxRhs){
const FReal*const physicalValues = inParticles->getPhysicalValues(idxRhs);
const FReal weight = physicalValues[idxPart];
W1[idxRhs] += weight; // 1 flop
for (unsigned int i=1; i<ORDER; ++i) {
const FReal wx = weight * T_of_x[0][i]; // 1 flop
const FReal wy = weight * T_of_x[1][i]; // 1 flop
const FReal wz = weight * T_of_x[2][i]; // 1 flop
W2[idxRhs][0][i-1] += wx; // 1 flop
W2[idxRhs][1][i-1] += wy; // 1 flop
W2[idxRhs][2][i-1] += wz; // 1 flop
for (unsigned int j=1; j<ORDER; ++j) {
const FReal wxy = wx * T_of_x[1][j]; // 1 flop
const FReal wxz = wx * T_of_x[2][j]; // 1 flop
const FReal wyz = wy * T_of_x[2][j]; // 1 flop
W4[idxRhs][0][(j-1)*(ORDER-1) + (i-1)] += wxy; // 1 flop
W4[idxRhs][1][(j-1)*(ORDER-1) + (i-1)] += wxz; // 1 flop
W4[idxRhs][2][(j-1)*(ORDER-1) + (i-1)] += wyz; // 1 flop
for (unsigned int k=1; k<ORDER; ++k) {
const FReal wxyz = wxy * T_of_x[2][k]; // 1 flop
W8[idxRhs][(k-1)*(ORDER-1)*(ORDER-1) + (j-1)*(ORDER-1) + (i-1)] += wxyz; // 1 flop
} // flops: (ORDER-1) * 2
} // flops: (ORDER-1) * (6 + (ORDER-1) * 2)
} // flops: (ORDER-1) * (6 + (ORDER-1) * (6 + (ORDER-1) * 2))
} // flops: ... * NRHS
for(int idxVals = 0 ; idxVals < nVals ; ++idxVals){
for(int idxRhs = 0 ; idxRhs < nRhs ; ++idxRhs){
const int idxMul = idxVals*nRhs+idxRhs;
const FReal*const physicalValues = inParticles->getPhysicalValues(idxVals,idxRhs);
const FReal weight = physicalValues[idxPart];
W1[idxMul] += weight; // 1 flop
for (unsigned int i=1; i<ORDER; ++i) {
const FReal wx = weight * T_of_x[0][i]; // 1 flop
const FReal wy = weight * T_of_x[1][i]; // 1 flop
const FReal wz = weight * T_of_x[2][i]; // 1 flop
W2[idxMul][0][i-1] += wx; // 1 flop
W2[idxMul][1][i-1] += wy; // 1 flop
W2[idxMul][2][i-1] += wz; // 1 flop
for (unsigned int j=1; j<ORDER; ++j) {
const FReal wxy = wx * T_of_x[1][j]; // 1 flop
const FReal wxz = wx * T_of_x[2][j]; // 1 flop
const FReal wyz = wy * T_of_x[2][j]; // 1 flop
W4[idxMul][0][(j-1)*(ORDER-1) + (i-1)] += wxy; // 1 flop
W4[idxMul][1][(j-1)*(ORDER-1) + (i-1)] += wxz; // 1 flop
W4[idxMul][2][(j-1)*(ORDER-1) + (i-1)] += wyz; // 1 flop
for (unsigned int k=1; k<ORDER; ++k) {
const FReal wxyz = wxy * T_of_x[2][k]; // 1 flop
W8[idxMul][(k-1)*(ORDER-1)*(ORDER-1) + (j-1)*(ORDER-1) + (i-1)] += wxyz; // 1 flop
} // flops: (ORDER-1) * 2
} // flops: (ORDER-1) * (6 + (ORDER-1) * 2)
} // flops: (ORDER-1) * (6 + (ORDER-1) * (6 + (ORDER-1) * 2))
} // flops: ... * NRHS
} // flops: ... * NVALS
} // flops: N * (18 + (ORDER-2) * 6 + (ORDER-1) * (6 + (ORDER-1) * (6 + (ORDER-1) * 2)))
////////////////////////////////////////////////////////////////////
for(int idxRhs = 0 ; idxRhs < nRhs ; ++idxRhs){
for(int idxMul = 0 ; idxMul < nVals*nRhs ; ++idxMul){
// loop over interpolation points
FReal F2[3][ORDER];
FReal F4[3][ORDER*ORDER];
FReal F8[ ORDER*ORDER*ORDER];
{
// compute W2: 3 * ORDER*(2*(ORDER-1)-1) flops
FBlas::gemv(ORDER, ORDER-1, FReal(1.), const_cast<FReal*>(T), W2[idxRhs][0], F2[0]);
FBlas::gemv(ORDER, ORDER-1, FReal(1.), const_cast<FReal*>(T), W2[idxRhs][1], F2[1]);
FBlas::gemv(ORDER, ORDER-1, FReal(1.), const_cast<FReal*>(T), W2[idxRhs][2], F2[2]);
// compute W4: 3 * [ORDER*(ORDER-1)*(2*(ORDER-1)-1) + ORDER*ORDER*(2*(ORDER-1)-1)]
FReal C[ORDER * (ORDER-1)];
FBlas::gemmt(ORDER, ORDER-1, ORDER-1, FReal(1.), const_cast<FReal*>(T), ORDER, W4[idxRhs][0], ORDER-1, C, ORDER);
FBlas::gemmt(ORDER, ORDER-1, ORDER, FReal(1.), const_cast<FReal*>(T), ORDER, C, ORDER, F4[0], ORDER);
FBlas::gemmt(ORDER, ORDER-1, ORDER-1, FReal(1.), const_cast<FReal*>(T), ORDER, W4[idxRhs][1], ORDER-1, C, ORDER);
FBlas::gemmt(ORDER, ORDER-1, ORDER, FReal(1.), const_cast<FReal*>(T), ORDER, C, ORDER, F4[1], ORDER);
FBlas::gemmt(ORDER, ORDER-1, ORDER-1, FReal(1.), const_cast<FReal*>(T), ORDER, W4[idxRhs][2], ORDER-1, C, ORDER);
FBlas::gemmt(ORDER, ORDER-1, ORDER, FReal(1.), const_cast<FReal*>(T), ORDER, C, ORDER, F4[2], ORDER);
// compute W8: 3 * (2*(ORDER-1)-1) * [ORDER*(ORDER-1)*(ORDER-1) + ORDER*ORDER*(ORDER-1) + ORDER*ORDER*ORDER]
FReal D[ORDER * (ORDER-1) * (ORDER-1)];
FBlas::gemm(ORDER, ORDER-1, (ORDER-1)*(ORDER-1), FReal(1.), const_cast<FReal*>(T), ORDER, W8[idxRhs], ORDER-1, D, ORDER);
FReal E[(ORDER-1) * (ORDER-1) * ORDER];
for (unsigned int s=0; s<perm0.size; ++s) E[perm0.mni[s]] = D[perm0.imn[s]];
FReal F[ORDER * (ORDER-1) * ORDER];
FBlas::gemm(ORDER, ORDER-1, ORDER*(ORDER-1), FReal(1.), const_cast<FReal*>(T), ORDER, E, ORDER-1, F, ORDER);
FReal G[(ORDER-1) * ORDER * ORDER];
for (unsigned int s=0; s<perm1.size; ++s) G[perm1.nij[s]] = F[perm1.jni[s]];
FReal H[ORDER * ORDER * ORDER];
FBlas::gemm(ORDER, ORDER-1, ORDER*ORDER, FReal(1.), const_cast<FReal*>(T), ORDER, G, ORDER-1, H, ORDER);
for (unsigned int s=0; s<perm2.size; ++s) F8[perm2.ijk[s]] = H[perm2.kij[s]];
}
// loop over interpolation points
FReal F2[3][ORDER];
FReal F4[3][ORDER*ORDER];
FReal F8[ ORDER*ORDER*ORDER];
{
// compute W2: 3 * ORDER*(2*(ORDER-1)-1) flops
FBlas::gemv(ORDER, ORDER-1, FReal(1.), const_cast<FReal*>(T), W2[idxMul][0], F2[0]);
FBlas::gemv(ORDER, ORDER-1, FReal(1.), const_cast<FReal*>(T), W2[idxMul][1], F2[1]);
FBlas::gemv(ORDER, ORDER-1, FReal(1.), const_cast<FReal*>(T), W2[idxMul][2], F2[2]);
// compute W4: 3 * [ORDER*(ORDER-1)*(2*(ORDER-1)-1) + ORDER*ORDER*(2*(ORDER-1)-1)]
FReal C[ORDER * (ORDER-1)];
FBlas::gemmt(ORDER, ORDER-1, ORDER-1, FReal(1.), const_cast<FReal*>(T), ORDER, W4[idxMul][0], ORDER-1, C, ORDER);
FBlas::gemmt(ORDER, ORDER-1, ORDER, FReal(1.), const_cast<FReal*>(T), ORDER, C, ORDER, F4[0], ORDER);
FBlas::gemmt(ORDER, ORDER-1, ORDER-1, FReal(1.), const_cast<FReal*>(T), ORDER, W4[idxMul][1], ORDER-1, C, ORDER);
FBlas::gemmt(ORDER, ORDER-1, ORDER, FReal(1.), const_cast<FReal*>(T), ORDER, C, ORDER, F4[1], ORDER);
FBlas::gemmt(ORDER, ORDER-1, ORDER-1, FReal(1.), const_cast<FReal*>(T), ORDER, W4[idxMul][2], ORDER-1, C, ORDER);
FBlas::gemmt(ORDER, ORDER-1, ORDER, FReal(1.), const_cast<FReal*>(T), ORDER, C, ORDER, F4[2], ORDER);
// compute W8: 3 * (2*(ORDER-1)-1) * [ORDER*(ORDER-1)*(ORDER-1) + ORDER*ORDER*(ORDER-1) + ORDER*ORDER*ORDER]
FReal D[ORDER * (ORDER-1) * (ORDER-1)];
FBlas::gemm(ORDER, ORDER-1, (ORDER-1)*(ORDER-1), FReal(1.), const_cast<FReal*>(T), ORDER, W8[idxMul], ORDER-1, D, ORDER);
FReal E[(ORDER-1) * (ORDER-1) * ORDER];
for (unsigned int s=0; s<perm0.size; ++s) E[perm0.mni[s]] = D[perm0.imn[s]];
FReal F[ORDER * (ORDER-1) * ORDER];
FBlas::gemm(ORDER, ORDER-1, ORDER*(ORDER-1), FReal(1.), const_cast<FReal*>(T), ORDER, E, ORDER-1, F, ORDER);
FReal G[(ORDER-1) * ORDER * ORDER];
for (unsigned int s=0; s<perm1.size; ++s) G[perm1.nij[s]] = F[perm1.jni[s]];
FReal H[ORDER * ORDER * ORDER];
FBlas::gemm(ORDER, ORDER-1, ORDER*ORDER, FReal(1.), const_cast<FReal*>(T), ORDER, G, ORDER-1, H, ORDER);
for (unsigned int s=0; s<perm2.size; ++s) F8[perm2.ijk[s]] = H[perm2.kij[s]];
}
// assemble multipole expansions
for (unsigned int i=0; i<ORDER; ++i) {
for (unsigned int j=0; j<ORDER; ++j) {
for (unsigned int k=0; k<ORDER; ++k) {
const unsigned int idx = k*ORDER*ORDER + j*ORDER + i;
multipoleExpansion[2*idxRhs*nnodes + idx] += (W1[idxRhs] +
FReal(2.) * (F2[0][i] + F2[1][j] + F2[2][k]) +
FReal(4.) * (F4[0][j*ORDER+i] + F4[1][k*ORDER+i] + F4[2][k*ORDER+j]) +
FReal(8.) * F8[idx]) / nnodes; // 11 * ORDER*ORDER*ORDER flops
// assemble multipole expansions
for (unsigned int i=0; i<ORDER; ++i) {
for (unsigned int j=0; j<ORDER; ++j) {
for (unsigned int k=0; k<ORDER; ++k) {
const unsigned int idx = k*ORDER*ORDER + j*ORDER + i;
multipoleExpansion[2*idxMul*nnodes + idx] += (W1[idxMul] +
FReal(2.) * (F2[0][i] + F2[1][j] + F2[2][k]) +
FReal(4.) * (F4[0][j*ORDER+i] + F4[1][k*ORDER+i] + F4[2][k*ORDER+j]) +
FReal(8.) * F8[idx]) / nnodes; // 11 * ORDER*ORDER*ORDER flops
}
}
}
}
} // NRHS
} // NVALS*NRHS
}
......@@ -836,55 +843,62 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyP2M(const FPoint& c
/**
* Local to particle operation: application of \f$S_\ell(x,\bar x_m)\f$ (interpolation)
*/
template <int ORDER, class MatrixKernelClass>
template <int ORDER, class MatrixKernelClass, int NVALS>
template <class ContainerClass>
inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyL2P(const FPoint& center,
inline void FChebInterpolator<ORDER,MatrixKernelClass,NVALS>::applyL2P(const FPoint& center,
const FReal width,
const FReal *const localExpansion,
ContainerClass *const inParticles) const
{
FReal f1[nLhs];
FReal W2[nLhs][3][ ORDER-1];
FReal W4[nLhs][3][(ORDER-1)*(ORDER-1)];
FReal W8[nLhs][ (ORDER-1)*(ORDER-1)*(ORDER-1)];
{
for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){
// sum over interpolation points
f1[idxLhs] = FReal(0.);
for(unsigned int i=0; i<ORDER-1; ++i) W2[idxLhs][0][i] = W2[idxLhs][1][i] = W2[idxLhs][2][i] = FReal(0.);
for(unsigned int i=0; i<(ORDER-1)*(ORDER-1); ++i) W4[idxLhs][0][i] = W4[idxLhs][1][i] = W4[idxLhs][2][i] = FReal(0.);
for(unsigned int i=0; i<(ORDER-1)*(ORDER-1)*(ORDER-1); ++i) W8[idxLhs][i] = FReal(0.);
FReal f1[nVals*nLhs];
FReal W2[nVals*nLhs][3][ ORDER-1];
FReal W4[nVals*nLhs][3][(ORDER-1)*(ORDER-1)];
FReal W8[nVals*nLhs][ (ORDER-1)*(ORDER-1)*(ORDER-1)];
{
for (unsigned int idx=0; idx<nnodes; ++idx) {
const unsigned int i = node_ids[idx][0];
const unsigned int j = node_ids[idx][1];
const unsigned int k = node_ids[idx][2];
for(int idxVals = 0 ; idxVals < nVals ; ++idxVals){
f1[idxLhs] += localExpansion[2*idxLhs*nnodes + idx]; // 1 flop
for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){
for (unsigned int l=0; l<ORDER-1; ++l) {
const FReal wx = T[l*ORDER+i] * localExpansion[2*idxLhs*nnodes + idx]; // 1 flops
const FReal wy = T[l*ORDER+j] * localExpansion[2*idxLhs*nnodes + idx]; // 1 flops
const FReal wz = T[l*ORDER+k] * localExpansion[2*idxLhs*nnodes + idx]; // 1 flops
W2[idxLhs][0][l] += wx; // 1 flops
W2[idxLhs][1][l] += wy; // 1 flops
W2[idxLhs][2][l] += wz; // 1 flops
for (unsigned int m=0; m<ORDER-1; ++m) {
const FReal wxy = wx * T[m*ORDER + j]; // 1 flops
const FReal wxz = wx * T[m*ORDER + k]; // 1 flops
const FReal wyz = wy * T[m*ORDER + k]; // 1 flops
W4[idxLhs][0][m*(ORDER-1)+l] += wxy; // 1 flops
W4[idxLhs][1][m*(ORDER-1)+l] += wxz; // 1 flops
W4[idxLhs][2][m*(ORDER-1)+l] += wyz; // 1 flops
for (unsigned int n=0; n<ORDER-1; ++n) {
const FReal wxyz = wxy * T[n*ORDER + k]; // 1 flops
W8[idxLhs][n*(ORDER-1)*(ORDER-1) + m*(ORDER-1) + l] += wxyz; // 1 flops
} // (ORDER-1) * 2 flops
} // (ORDER-1) * (6 + (ORDER-1)*2) flops
} // (ORDER-1) * (6 + (ORDER-1) * (6 + (ORDER-1)*2)) flops
} // ORDER*ORDER*ORDER * (1 + (ORDER-1) * (6 + (ORDER-1) * (6 + (ORDER-1)*2))) flops
} // NLHS
const int idxLoc = idxVals*nLhs+idxLhs;
// sum over interpolation points
f1[idxLoc] = FReal(0.);
for(unsigned int i=0; i<ORDER-1; ++i) W2[idxLoc][0][i] = W2[idxLoc][1][i] = W2[idxLoc][2][i] = FReal(0.);
for(unsigned int i=0; i<(ORDER-1)*(ORDER-1); ++i) W4[idxLoc][0][i] = W4[idxLoc][1][i] = W4[idxLoc][2][i] = FReal(0.);
for(unsigned int i=0; i<(ORDER-1)*(ORDER-1)*(ORDER-1); ++i) W8[idxLoc][i] = FReal(0.);
for (unsigned int idx=0; idx<nnodes; ++idx) {
const unsigned int i = node_ids[idx][0];
const unsigned int j = node_ids[idx][1];
const unsigned int k = node_ids[idx][2];
f1[idxLoc] += localExpansion[2*idxLoc*nnodes + idx]; // 1 flop
for (unsigned int l=0; l<ORDER-1; ++l) {
const FReal wx = T[l*ORDER+i] * localExpansion[2*idxLoc*nnodes + idx]; // 1 flops
const FReal wy = T[l*ORDER+j] * localExpansion[2*idxLoc*nnodes + idx]; // 1 flops
const FReal wz = T[l*ORDER+k] * localExpansion[2*idxLoc*nnodes + idx]; // 1 flops
W2[idxLoc][0][l] += wx; // 1 flops
W2[idxLoc][1][l] += wy; // 1 flops
W2[idxLoc][2][l] += wz; // 1 flops
for (unsigned int m=0; m<ORDER-1; ++m) {
const FReal wxy = wx * T[m*ORDER + j]; // 1 flops
const FReal wxz = wx * T[m*ORDER + k]; // 1 flops
const FReal wyz = wy * T[m*ORDER + k]; // 1 flops
W4[idxLoc][0][m*(ORDER-1)+l] += wxy; // 1 flops
W4[idxLoc][1][m*(ORDER-1)+l] += wxz; // 1 flops
W4[idxLoc][2][m*(ORDER-1)+l] += wyz; // 1 flops
for (unsigned int n=0; n<ORDER-1; ++n) {
const FReal wxyz = wxy * T[n*ORDER + k]; // 1 flops
W8[idxLoc][n*(ORDER-1)*(ORDER-1) + m*(ORDER-1) + l] += wxyz; // 1 flops
} // (ORDER-1) * 2 flops
} // (ORDER-1) * (6 + (ORDER-1)*2) flops
} // (ORDER-1) * (6 + (ORDER-1) * (6 + (ORDER-1)*2)) flops
} // ORDER*ORDER*ORDER * (1 + (ORDER-1) * (6 + (ORDER-1) * (6 + (ORDER-1)*2))) flops
} // NLHS
} // NVALS
}
......@@ -892,14 +906,10 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyL2P(const FPoint& c
const map_glob_loc map(center, width);
FPoint localPosition;
//const FReal*const physicalValues = inParticles->getPhysicalValues();
// get particles position
const FReal*const positionsX = inParticles->getPositions()[0];
const FReal*const positionsY = inParticles->getPositions()[1];
const FReal*const positionsZ = inParticles->getPositions()[2];
//FReal*const forcesX = inParticles->getForcesX();
//FReal*const forcesY = inParticles->getForcesY();
//FReal*const forcesZ = inParticles->getForcesZ();
// FReal*const potentials = inParticles->getPotentials();
for(int idxPart = 0 ; idxPart < inParticles->getNbParticles() ; ++ idxPart){
......@@ -921,47 +931,58 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyL2P(const FPoint& c
}
}
for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){
// distribution over potential components:
// We sum the multidim contribution of PhysValue
// This was originally done at M2L step but moved here
// because their storage is required by the force computation.
// In fact : f_{ik}(x)=w_j(x) \nabla_{x_i} K_{ij}(x,y)w_j(y))
const unsigned int idxPot = idxLhs / nPV;
// loop over multipole expansions
for(int idxVals = 0 ; idxVals < nVals ; ++idxVals){
for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){
FReal*const potentials = inParticles->getPotentials(idxPot);
const int idxLoc = idxVals*nLhs+idxLhs;
// interpolate and increment target value
FReal targetValue = potentials[idxPart];
{
FReal f2, f4, f8;
// distribution over potential components:
// We sum the multidim contribution of PhysValue
// This was originally done at M2L step but moved here
// because their storage is required by the force computation.
// In fact : f_{ik}(x)=w_j(x) \nabla_{x_i} K_{ij}(x,y)w_j(y))
const unsigned int idxPot = idxLhs / nPV;
FReal*const potentials = inParticles->getPotentials(idxVals,idxPot);
// interpolate and increment target value
FReal targetValue = potentials[idxPart];
{
f2 = f4 = f8 = FReal(0.);
for (unsigned int l=1; l<ORDER; ++l) {
f2 +=
T_of_x[0][l] * W2[idxLhs][0][l-1] +
T_of_x[1][l] * W2[idxLhs][1][l-1] +
T_of_x[2][l] * W2[idxLhs][2][l-1]; // 6 flops
for (unsigned int m=1; m<ORDER; ++m) {
f4 +=
T_of_x[0][l] * T_of_x[1][m] * W4[idxLhs][0][(m-1)*(ORDER-1)+(l-1)] +
T_of_x[0][l] * T_of_x[2][m] * W4[idxLhs][1][(m-1)*(ORDER-1)+(l-1)] +
T_of_x[1][l] * T_of_x[2][m] * W4[idxLhs][2][(m-1)*(ORDER-1)+(l-1)]; // 9 flops
for (unsigned int n=1; n<ORDER; ++n) {
f8 +=
T_of_x[0][l] * T_of_x[1][m] * T_of_x[2][n] *
W8[idxLhs][(n-1)*(ORDER-1)*(ORDER-1) + (m-1)*(ORDER-1) + (l-1)];
} // ORDER * 4 flops
} // ORDER * (9 + ORDER * 4) flops
} // ORDER * (ORDER * (9 + ORDER * 4)) flops
}
targetValue = (f1[idxLhs] + FReal(2.)*f2 + FReal(4.)*f4 + FReal(8.)*f8) / nnodes; // 7 flops
} // 7 + ORDER * (ORDER * (9 + ORDER * 4)) flops
// set potential
potentials[idxPart] += (targetValue);
} // NLHS
FReal f2, f4, f8;
{
f2 = f4 = f8 = FReal(0.);
for (unsigned int l=1; l<ORDER; ++l) {
f2 +=
T_of_x[0][l] * W2[idxLoc][0][l-1] +
T_of_x[1][l] * W2[idxLoc][1][l-1] +
T_of_x[2][l] * W2[idxLoc][2][l-1]; // 6 flops
for (unsigned int m=1; m<ORDER; ++m) {
f4 +=
T_of_x[0][l] * T_of_x[1][m] * W4[idxLoc][0][(m-1)*(ORDER-1)+(l-1)] +
T_of_x[0][l] * T_of_x[2][m] * W4[idxLoc][1][(m-1)*(ORDER-1)+(l-1)] +
T_of_x[1][l] * T_of_x[2][m] * W4[idxLoc][2][(m-1)*(ORDER-1)+(l-1)]; // 9 flops
for (unsigned int n=1; n<ORDER; ++n) {
f8 +=
T_of_x[0][l] * T_of_x[1][m] * T_of_x[2][n] *
W8[idxLoc][(n-1)*(ORDER-1)*(ORDER-1) + (m-1)*(ORDER-1) + (l-1)];
} // ORDER * 4 flops
} // ORDER * (9 + ORDER * 4) flops
} // ORDER * (ORDER * (9 + ORDER * 4)) flops
}
targetValue = (f1[idxLoc] + FReal(2.)*f2 + FReal(4.)*f4 + FReal(8.)*f8) / nnodes; // 7 flops
} // 7 + ORDER * (ORDER * (9 + ORDER * 4)) flops
// set potential
potentials[idxPart] += (targetValue);
} // NLHS
}// NVALS
} // N * (7 + ORDER * (ORDER * (9 + ORDER * 4))) flops
}
......@@ -1057,9 +1078,9 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyL2P(const FPoint& c
/**
* Local to particle operation: application of \f$\nabla_x S_\ell(x,\bar x_m)\f$ (interpolation)
*/
template <int ORDER, class MatrixKernelClass>
template <int ORDER, class MatrixKernelClass, int NVALS>
template <class ContainerClass>
inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyL2PGradient(const FPoint& center,
inline void FChebInterpolator<ORDER,MatrixKernelClass,NVALS>::applyL2PGradient(const FPoint& center,
const FReal width,
const FReal *const localExpansion,
ContainerClass *const inParticles) const
......@@ -1078,14 +1099,9 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyL2PGradient(const F
FReal U_of_x[ORDER][3];
FReal P[3];
// const FReal*const physicalValues = inParticles->getPhysicalValues();
const FReal*const positionsX = inParticles->getPositions()[0];
const FReal*const positionsY = inParticles->getPositions()[1];
const FReal*const positionsZ = inParticles->getPositions()[2];
// FReal*const forcesX = inParticles->getForcesX();
// FReal*const forcesY = inParticles->getForcesY();
// FReal*const forcesZ = inParticles->getForcesZ();
//FReal*const potentials = inParticles->getPotentials();
for(int idxPart = 0 ; idxPart < inParticles->getNbParticles() ; ++ idxPart){
......@@ -1120,10 +1136,10 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyL2PGradient(const F
}
// apply P and increment forces
FReal forces[nLhs][3];
for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs)
for (unsigned int i=0; i<3; ++i)
forces[idxLhs][i] = FReal(0.);
FReal forces[nVals*nLhs][3];
for(int idxLoc = 0 ; idxLoc < nVals*nLhs ; ++idxLoc)
for (unsigned int i=0; i<3; ++i)
forces[idxLoc][i] = FReal(0.);
for (unsigned int n=0; n<nnodes; ++n) {
......@@ -1142,8 +1158,8 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyL2PGradient(const F
P[0] *= FReal(2.);
P[1] *= FReal(2.); P[1] += FReal(1.);
P[2] *= FReal(2.); P[2] += FReal(1.);
for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs)
forces[idxLhs][0] += P[0] * P[1] * P[2] * localExpansion[2*idxLhs*nnodes + n];
for(int idxLoc = 0 ; idxLoc < nVals*nLhs ; ++idxLoc)
forces[idxLoc][0] += P[0] * P[1] * P[2] * localExpansion[2*idxLoc*nnodes + n];
// f1 component //////////////////////////////////////
P[0] = T_of_x[1][0] * T_of_roots[1][j[0]];
......@@ -1157,8 +1173,8 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyL2PGradient(const F
P[0] *= FReal(2.); P[0] += FReal(1.);
P[1] *= FReal(2.);
P[2] *= FReal(2.); P[2] += FReal(1.);
for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs)
forces[idxLhs][1] += P[0] * P[1] * P[2] * localExpansion[2*idxLhs*nnodes + n];
for(int idxLoc = 0 ; idxLoc < nVals*nLhs ; ++idxLoc)
forces[idxLoc][1] += P[0] * P[1] * P[2] * localExpansion[2*idxLoc*nnodes + n];
// f2 component //////////////////////////////////////
P[0] = T_of_x[1][0] * T_of_roots[1][j[0]];
......@@ -1172,31 +1188,37 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyL2PGradient(const F
P[0] *= FReal(2.); P[0] += FReal(1.);
P[1] *= FReal(2.); P[1] += FReal(1.);
P[2] *= FReal(2.);
for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs)
forces[idxLhs][2] += P[0] * P[1] * P[2] * localExpansion[2*idxLhs*nnodes + n];
for(int idxLoc = 0 ; idxLoc < nVals*nLhs ; ++idxLoc)
forces[idxLoc][2] += P[0] * P[1] * P[2] * localExpansion[2*idxLoc*nnodes + n];
}
for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){
const unsigned int idxPot = idxLhs / nPV;
const unsigned int idxPV = idxLhs % nPV;
// scale forces
forces[idxLhs][0] *= jacobian[0] / nnodes;
forces[idxLhs][1] *= jacobian[1] / nnodes;
forces[idxLhs][2] *= jacobian[2] / nnodes;
// get pointers to PhysValues and force components
const FReal*const physicalValues = inParticles->getPhysicalValues(idxPV);
FReal*const forcesX = inParticles->getForcesX(idxPot);
FReal*const forcesY = inParticles->getForcesY(idxPot);
FReal*const forcesZ = inParticles->getForcesZ(idxPot);
// set computed forces
forcesX[idxPart] += forces[idxLhs][0] * physicalValues[idxPart];
forcesY[idxPart] += forces[idxLhs][1] * physicalValues[idxPart];
forcesZ[idxPart] += forces[idxLhs][2] * physicalValues[idxPart];
}
}
const unsigned int idxPot = idxLhs / nPV;
const unsigned int idxPV = idxLhs % nPV;
for(int idxVals = 0 ; idxVals < nVals ; ++idxVals){
const int idxLoc = idxVals*nLhs+idxLhs;
// scale forces
forces[idxLoc][0] *= jacobian[0] / nnodes;
forces[idxLoc][1] *= jacobian[1] / nnodes;
forces[idxLoc][2] *= jacobian[2] / nnodes;
// get pointers to PhysValues and force components
const FReal*const physicalValues = inParticles->getPhysicalValues(idxVals,idxPV);
FReal*const forcesX = inParticles->getForcesX(idxVals,idxPot);
FReal*const forcesY = inParticles->getForcesY(idxVals,idxPot);
FReal*const forcesZ = inParticles->getForcesZ(idxVals,idxPot);
// set computed forces
forcesX[idxPart] += forces[idxLoc][0] * physicalValues[idxPart];
forcesY[idxPart] += forces[idxLoc][1] * physicalValues[idxPart];
forcesZ[idxPart] += forces[idxLoc][2] * physicalValues[idxPart];
} // NVALS
} // NLHS
} // N
}
......@@ -1204,55 +1226,17 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyL2PGradient(const F
* Local to particle operation: application of \f$S_\ell(x,\bar x_m)\f$ and
* \f$\nabla_x S_\ell(x,\bar x_m)\f$ (interpolation)
*/
template <int ORDER, class MatrixKernelClass>
template <int ORDER, class MatrixKernelClass, int NVALS>
template <class ContainerClass>
inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyL2PTotal(const FPoint& center,
inline void FChebInterpolator<ORDER,MatrixKernelClass,NVALS>::applyL2PTotal(const FPoint& center,
const FReal width,
const FReal *const localExpansion,
ContainerClass *const inParticles) const
{
FReal f1[nLhs];
FReal W2[nLhs][3][ ORDER-1];
FReal W4[nLhs][3][(ORDER-1)*(ORDER-1)];
FReal W8[nLhs][ (ORDER-1)*(ORDER-1)*(ORDER-1)];
//{ // sum over interpolation points
// f1 = FReal(0.);
// for(unsigned int i=0; i<ORDER-1; ++i) W2[0][i] = W2[1][i] = W2[2][i] = FReal(0.);
// for(unsigned int i=0; i<(ORDER-1)*(ORDER-1); ++i) W4[0][i] = W4[1][i] = W4[2][i] = FReal(0.);
// for(unsigned int i=0; i<(ORDER-1)*(ORDER-1)*(ORDER-1); ++i) W8[i] = FReal(0.);
//
// for (unsigned int idx=0; idx<nnodes; ++idx) {
// const unsigned int i = node_ids[idx][0];
// const unsigned int j = node_ids[idx][1];
// const unsigned int k = node_ids[idx][2];
//
// f1 += localExpansion[idx]; // 1 flop
//
// for (unsigned int l=0; l<ORDER-1; ++l) {
// const FReal wx = T[l*ORDER+i] * localExpansion[idx]; // 1 flops
// const FReal wy = T[l*ORDER+j] * localExpansion[idx]; // 1 flops
// const FReal wz = T[l*ORDER+k] * localExpansion[idx]; // 1 flops
// W2[0][l] += wx; // 1 flops
// W2[1][l] += wy; // 1 flops
// W2[2][l] += wz; // 1 flops
// for (unsigned int m=0; m<ORDER-1; ++m) {
// const FReal wxy = wx * T[m*ORDER + j]; // 1 flops
// const FReal wxz = wx * T[m*ORDER + k]; // 1 flops
// const FReal wyz = wy * T[m*ORDER + k]; // 1 flops
// W4[0][m*(ORDER-1)+l] += wxy; // 1 flops
// W4[1][m*(ORDER-1)+l] += wxz; // 1 flops
// W4[2][m*(ORDER-1)+l] += wyz; // 1 flops
// for (unsigned int n=0; n<ORDER-1; ++n) {
// const FReal wxyz = wxy * T[n*ORDER + k]; // 1 flops
// W8[n*(ORDER-1)*(ORDER-1) + m*(ORDER-1) + l] += wxyz; // 1 flops
// } // (ORDER-1) * 2 flops
// } // (ORDER-1) * (6 + (ORDER-1)*2) flops
// } // (ORDER-1) * (6 + (ORDER-1) * (6 + (ORDER-1)*2)) flops
//
// } // ORDER*ORDER*ORDER * (1 + (ORDER-1) * (6 + (ORDER-1) * (6 + (ORDER-1)*2))) flops
//
//}
FReal f1[nVals*nLhs];
FReal W2[nVals*nLhs][3][ ORDER-1];
FReal W4[nVals*nLhs][3][(ORDER-1)*(ORDER-1)];
FReal W8[nVals*nLhs][ (ORDER-1)*(ORDER-1)*(ORDER-1)];
{
// for W2
......@@ -1265,55 +1249,57 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyL2PTotal(const FPoi
FReal G8[ORDER * (ORDER-1)*(ORDER-1)];
// sum local expansions
for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){
f1[idxLhs] = FReal(0.);
for (unsigned int idx=0; idx<nnodes; ++idx) f1[idxLhs] += localExpansion[2*idxLhs*nnodes + idx]; // 1 flop
//////////////////////////////////////////////////////////////////
// IMPORTANT: NOT CHANGE ORDER OF COMPUTATIONS!!! ////////////////
//////////////////////////////////////////////////////////////////
// W2[0] ///////////////// (ORDER-1)*ORDER*ORDER * 2*ORDER
FBlas::gemtm(ORDER, ORDER-1, ORDER*ORDER, FReal(1.), const_cast<FReal*>(T), ORDER,
const_cast<FReal*>(localExpansion) + 2*idxLhs*nnodes, ORDER, F2, ORDER-1);
for (unsigned int l=0; l<ORDER-1; ++l) { W2[idxLhs][0][l] = F2[l];
for (unsigned int j=1; j<ORDER*ORDER; ++j) W2[idxLhs][0][l] += F2[j*(ORDER-1) + l]; }
// W4[0] ///////////////// ORDER*(ORDER-1)*(ORDER-1) + 2*ORDER
perm5.permute(F2, F4);
FBlas::gemtm(ORDER, ORDER-1, ORDER*(ORDER-1), FReal(1.), const_cast<FReal*>(T), ORDER, F4, ORDER, G4, ORDER-1);
for (unsigned int l=0; l<ORDER-1; ++l)
for (unsigned int m=0; m<ORDER-1; ++m) { W4[idxLhs][0][m*(ORDER-1)+l] = G4[l*ORDER*(ORDER-1) + m];
for (unsigned int k=1; k<ORDER; ++k) W4[idxLhs][0][m*(ORDER-1)+l] += G4[l*ORDER*(ORDER-1) + k*(ORDER-1) + m]; }
// W8 //////////////////// (ORDER-1)*(ORDER-1)*(ORDER-1) * (2*ORDER-1)
perm8.permute(G4, G8);
FReal F8[(ORDER-1)*(ORDER-1)*(ORDER-1)];
FBlas::gemtm(ORDER, ORDER-1, (ORDER-1)*(ORDER-1), FReal(1.), const_cast<FReal*>(T), ORDER, G8, ORDER, F8, ORDER-1);
perm9.permute(F8, W8[idxLhs]);
// W4[1] ///////////////// ORDER*(ORDER-1)*(ORDER-1) + 2*ORDER
perm6.permute(F2, F4);
FBlas::gemtm(ORDER, ORDER-1, (ORDER-1)*ORDER, FReal(1.), const_cast<FReal*>(T), ORDER, F4, ORDER, G4, ORDER-1);
for (unsigned int l=0; l<ORDER-1; ++l)
for (unsigned int n=0; n<ORDER-1; ++n) { W4[idxLhs][1][n*(ORDER-1)+l] = G4[l*(ORDER-1) + n];
for (unsigned int j=1; j<ORDER; ++j) W4[idxLhs][1][n*(ORDER-1)+l] += G4[j*(ORDER-1)*(ORDER-1) + l*(ORDER-1) + n]; }
// W2[1] ///////////////// (ORDER-1)*ORDER*ORDER * 2*ORDER
perm3.permute(localExpansion + 2*idxLhs*nnodes, lE);
FBlas::gemtm(ORDER, ORDER-1, ORDER*ORDER, FReal(1.), const_cast<FReal*>(T), ORDER, lE, ORDER, F2, ORDER-1);
for (unsigned int i=0; i<ORDER-1; ++i) { W2[idxLhs][1][i] = F2[i];
for (unsigned int j=1; j<ORDER*ORDER; ++j) W2[idxLhs][1][i] += F2[j*(ORDER-1) + i]; }
// W4[2] ///////////////// ORDER*(ORDER-1)*(ORDER-1) + 2*ORDER
perm7.permute(F2, F4);
FBlas::gemtm(ORDER, ORDER-1, (ORDER-1)*ORDER, FReal(1.), const_cast<FReal*>(T), ORDER, F4, ORDER, G4, ORDER-1);
for (unsigned int m=0; m<ORDER-1; ++m)
for (unsigned int n=0; n<ORDER-1; ++n) { W4[idxLhs][2][n*(ORDER-1)+m] = G4[m*ORDER*(ORDER-1) + n];
for (unsigned int i=1; i<ORDER; ++i) W4[idxLhs][2][n*(ORDER-1)+m] += G4[m*ORDER*(ORDER-1) + i*(ORDER-1) + n]; }
// W2[2] ///////////////// (ORDER-1)*ORDER*ORDER * 2*ORDER
perm4.permute(localExpansion + 2*idxLhs*nnodes, lE);
FBlas::gemtm(ORDER, ORDER-1, ORDER*ORDER, FReal(1.), const_cast<FReal*>(T), ORDER, lE, ORDER, F2, ORDER-1);
for (unsigned int i=0; i<ORDER-1; ++i) { W2[idxLhs][2][i] = F2[i];
for (unsigned int j=1; j<ORDER*ORDER; ++j) W2[idxLhs][2][i] += F2[j*(ORDER-1) + i]; }
}
for(int idxLoc = 0 ; idxLoc < nVals*nLhs ; ++idxLoc){
f1[idxLoc] = FReal(0.);
for (unsigned int idx=0; idx<nnodes; ++idx) f1[idxLoc] += localExpansion[2*idxLoc*nnodes + idx]; // 1 flop
//////////////////////////////////////////////////////////////////
// IMPORTANT: NOT CHANGE ORDER OF COMPUTATIONS!!! ////////////////
//////////////////////////////////////////////////////////////////
// W2[0] ///////////////// (ORDER-1)*ORDER*ORDER * 2*ORDER
FBlas::gemtm(ORDER, ORDER-1, ORDER*ORDER, FReal(1.), const_cast<FReal*>(T), ORDER,
const_cast<FReal*>(localExpansion) + 2*idxLoc*nnodes, ORDER, F2, ORDER-1);
for (unsigned int l=0; l<ORDER-1; ++l) { W2[idxLoc][0][l] = F2[l];
for (unsigned int j=1; j<ORDER*ORDER; ++j) W2[idxLoc][0][l] += F2[j*(ORDER-1) + l]; }
// W4[0] ///////////////// ORDER*(ORDER-1)*(ORDER-1) + 2*ORDER
perm5.permute(F2, F4);
FBlas::gemtm(ORDER, ORDER-1, ORDER*(ORDER-1), FReal(1.), const_cast<FReal*>(T), ORDER, F4, ORDER, G4, ORDER-1);
for (unsigned int l=0; l<ORDER-1; ++l)
for (unsigned int m=0; m<ORDER-1; ++m) { W4[idxLoc][0][m*(ORDER-1)+l] = G4[l*ORDER*(ORDER-1) + m];
for (unsigned int k=1; k<ORDER; ++k) W4[idxLoc][0][m*(ORDER-1)+l] += G4[l*ORDER*(ORDER-1) + k*(ORDER-1) + m]; }
// W8 //////////////////// (ORDER-1)*(ORDER-1)*(ORDER-1) * (2*ORDER-1)
perm8.permute(G4, G8);
FReal F8[(ORDER-1)*(ORDER-1)*(ORDER-1)];
FBlas::gemtm(ORDER, ORDER-1, (ORDER-1)*(ORDER-1), FReal(1.), const_cast<FReal*>(T), ORDER, G8, ORDER, F8, ORDER-1);
perm9.permute(F8, W8[idxLoc]);
// W4[1] ///////////////// ORDER*(ORDER-1)*(ORDER-1) + 2*ORDER
perm6.permute(F2, F4);
FBlas::gemtm(ORDER, ORDER-1, (ORDER-1)*ORDER, FReal(1.), const_cast<FReal*>(T), ORDER, F4, ORDER, G4, ORDER-1);
for (unsigned int l=0; l<ORDER-1; ++l)
for (unsigned int n=0; n<ORDER-1; ++n) { W4[idxLoc][1][n*(ORDER-1)+l] = G4[l*(ORDER-1) + n];
for (unsigned int j=1; j<ORDER; ++j) W4[idxLoc][1][n*(ORDER-1)+l] += G4[j*(ORDER-1)*(ORDER-1) + l*(ORDER-1) + n]; }
// W2[1] ///////////////// (ORDER-1)*ORDER*ORDER * 2*ORDER
perm3.permute(localExpansion + 2*idxLoc*nnodes, lE);
FBlas::gemtm(ORDER, ORDER-1, ORDER*ORDER, FReal(1.), const_cast<FReal*>(T), ORDER, lE, ORDER, F2, ORDER-1);
for (unsigned int i=0; i<ORDER-1; ++i) { W2[idxLoc][1][i] = F2[i];
for (unsigned int j=1; j<ORDER*ORDER; ++j) W2[idxLoc][1][i] += F2[j*(ORDER-1) + i]; }
// W4[2] ///////////////// ORDER*(ORDER-1)*(ORDER-1) + 2*ORDER
perm7.permute(F2, F4);
FBlas::gemtm(ORDER, ORDER-1, (ORDER-1)*ORDER, FReal(1.), const_cast<FReal*>(T), ORDER, F4, ORDER, G4, ORDER-1);
for (unsigned int m=0; m<ORDER-1; ++m)
for (unsigned int n=0; n<ORDER-1; ++n) { W4[idxLoc][2][n*(ORDER-1)+m] = G4[m*ORDER*(ORDER-1) + n];
for (unsigned int i=1; i<ORDER; ++i) W4[idxLoc][2][n*(ORDER-1)+m] += G4[m*ORDER*(ORDER-1) + i*(ORDER-1) + n]; }
// W2[2] ///////////////// (ORDER-1)*ORDER*ORDER * 2*ORDER
perm4.permute(localExpansion + 2*idxLoc*nnodes, lE);
FBlas::gemtm(ORDER, ORDER-1, ORDER*ORDER, FReal(1.), const_cast<FReal*>(T), ORDER, lE, ORDER, F2, ORDER-1);
for (unsigned int i=0; i<ORDER-1; ++i) { W2[idxLoc][2][i] = F2[i];
for (unsigned int j=1; j<ORDER*ORDER; ++j) W2[idxLoc][2][i] += F2[j*(ORDER-1) + i]; }
}// NLHS
}// NLHS
}
// loop over particles
const map_glob_loc map(center, width);
......@@ -1322,14 +1308,9 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyL2PTotal(const FPoi
const FReal jacobian[3] = {Jacobian.getX(), Jacobian.getY(), Jacobian.getZ()};
FPoint localPosition;
// const FReal*const physicalValues = inParticles->getPhysicalValues();
const FReal*const positionsX = inParticles->getPositions()[0];
const FReal*const positionsY = inParticles->getPositions()[1];
const FReal*const positionsZ = inParticles->getPositions()[2];
// FReal*const forcesX = inParticles->getForcesX();
// FReal*const forcesY = inParticles->getForcesY();
// FReal*const forcesZ = inParticles->getForcesZ();
// FReal*const potentials = inParticles->getPotentials();
for(int idxPart = 0 ; idxPart < inParticles->getNbParticles() ; ++ idxPart){
......@@ -1365,71 +1346,77 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyL2PTotal(const FPoi
} // 3 + (ORDER-2)*15
// apply P and increment forces
FReal potential[nLhs];
FReal forces[nLhs][3];
for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){
potential[idxLhs]= FReal(0.);
FReal potential[nVals*nLhs];
FReal forces[nVals*nLhs][3];
for(int idxLoc = 0 ; idxLoc < nVals*nLhs ; ++idxLoc){
potential[idxLoc]= FReal(0.);
for (unsigned int i=0; i<3; ++i)
forces[idxLhs][i] = FReal(0.);
forces[idxLoc][i] = FReal(0.);
}
for( int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){
for( int idxVals = 0 ; idxVals < nVals ; ++idxVals){
for( int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){
const int idxLoc = idxVals*nLhs+idxLhs;
{
FReal f2[4], f4[4], f8[4];
for (unsigned int i=0; i<4; ++i) f2[i] = f4[i] = f8[i] = FReal(0.);
{
for (unsigned int l=1; l<ORDER; ++l) {
const FReal w2[3] = {W2[idxLoc][0][l-1], W2[idxLoc][1][l-1], W2[idxLoc][2][l-1]};
f2[0] += T_of_x[0][l ] * w2[0] + T_of_x[1][l] * w2[1] + T_of_x[2][l] * w2[2]; // 6 flops
f2[1] += U_of_x[0][l-1] * w2[0]; // 2 flops
f2[2] += U_of_x[1][l-1] * w2[1]; // 2 flops
f2[3] += U_of_x[2][l-1] * w2[2]; // 2 flops
for (unsigned int m=1; m<ORDER; ++m) {
const unsigned int w4idx = (m-1)*(ORDER-1)+(l-1);
const FReal w4[3] = {W4[idxLoc][0][w4idx], W4[idxLoc][1][w4idx], W4[idxLoc][2][w4idx]};
f4[0] +=
T_of_x[0][l] * T_of_x[1][m] * w4[0] +
T_of_x[0][l] * T_of_x[2][m] * w4[1] +
T_of_x[1][l] * T_of_x[2][m] * w4[2]; // 9 flops
f4[1] += U_of_x[0][l-1] * T_of_x[1][m] * w4[0] + U_of_x[0][l-1] * T_of_x[2][m] * w4[1]; // 6 flops
f4[2] += T_of_x[0][l] * U_of_x[1][m-1] * w4[0] + U_of_x[1][l-1] * T_of_x[2][m] * w4[2]; // 6 flops
f4[3] += T_of_x[0][l] * U_of_x[2][m-1] * w4[1] + T_of_x[1][l] * U_of_x[2][m-1] * w4[2]; // 6 flops
for (unsigned int n=1; n<ORDER; ++n) {
const FReal w8 = W8[idxLoc][(n-1)*(ORDER-1)*(ORDER-1) + (m-1)*(ORDER-1) + (l-1)];
f8[0] += T_of_x[0][l] * T_of_x[1][m] * T_of_x[2][n] * w8; // 4 flops
f8[1] += U_of_x[0][l-1] * T_of_x[1][m] * T_of_x[2][n] * w8; // 4 flops
f8[2] += T_of_x[0][l] * U_of_x[1][m-1] * T_of_x[2][n] * w8; // 4 flops
f8[3] += T_of_x[0][l] * T_of_x[1][m] * U_of_x[2][n-1] * w8; // 4 flops
} // (ORDER-1) * 16 flops
} // (ORDER-1) * (27 + (ORDER-1) * 16) flops
} // (ORDER-1) * ((ORDER-1) * (27 + (ORDER-1) * 16)) flops
}
potential[idxLoc] = (f1[idxLoc] + FReal(2.)*f2[0] + FReal(4.)*f4[0] + FReal(8.)*f8[0]) / nnodes; // 7 flops
forces[idxLoc][0] = ( FReal(2.)*f2[1] + FReal(4.)*f4[1] + FReal(8.)*f8[1]) * jacobian[0] / nnodes; // 7 flops
forces[idxLoc][1] = ( FReal(2.)*f2[2] + FReal(4.)*f4[2] + FReal(8.)*f8[2]) * jacobian[1] / nnodes; // 7 flops
forces[idxLoc][2] = ( FReal(2.)*f2[3] + FReal(4.)*f4[3] + FReal(8.)*f8[3]) * jacobian[2] / nnodes; // 7 flops
} // 28 + (ORDER-1) * ((ORDER-1) * (27 + (ORDER-1) * 16)) flops
{
FReal f2[4], f4[4], f8[4];
for (unsigned int i=0; i<4; ++i) f2[i] = f4[i] = f8[i] = FReal(0.);
{
for (unsigned int l=1; l<ORDER; ++l) {
const FReal w2[3] = {W2[idxLhs][0][l-1], W2[idxLhs][1][l-1], W2[idxLhs][2][l-1]};
f2[0] += T_of_x[0][l ] * w2[0] + T_of_x[1][l] * w2[1] + T_of_x[2][l] * w2[2]; // 6 flops
f2[1] += U_of_x[0][l-1] * w2[0]; // 2 flops
f2[2] += U_of_x[1][l-1] * w2[1]; // 2 flops
f2[3] += U_of_x[2][l-1] * w2[2]; // 2 flops
for (unsigned int m=1; m<ORDER; ++m) {
const unsigned int w4idx = (m-1)*(ORDER-1)+(l-1);
const FReal w4[3] = {W4[idxLhs][0][w4idx], W4[idxLhs][1][w4idx], W4[idxLhs][2][w4idx]};
f4[0] +=
T_of_x[0][l] * T_of_x[1][m] * w4[0] +
T_of_x[0][l] * T_of_x[2][m] * w4[1] +
T_of_x[1][l] * T_of_x[2][m] * w4[2]; // 9 flops
f4[1] += U_of_x[0][l-1] * T_of_x[1][m] * w4[0] + U_of_x[0][l-1] * T_of_x[2][m] * w4[1]; // 6 flops
f4[2] += T_of_x[0][l] * U_of_x[1][m-1] * w4[0] + U_of_x[1][l-1] * T_of_x[2][m] * w4[2]; // 6 flops
f4[3] += T_of_x[0][l] * U_of_x[2][m-1] * w4[1] + T_of_x[1][l] * U_of_x[2][m-1] * w4[2]; // 6 flops
for (unsigned int n=1; n<ORDER; ++n) {
const FReal w8 = W8[idxLhs][(n-1)*(ORDER-1)*(ORDER-1) + (m-1)*(ORDER-1) + (l-1)];
f8[0] += T_of_x[0][l] * T_of_x[1][m] * T_of_x[2][n] * w8; // 4 flops
f8[1] += U_of_x[0][l-1] * T_of_x[1][m] * T_of_x[2][n] * w8; // 4 flops
f8[2] += T_of_x[0][l] * U_of_x[1][m-1] * T_of_x[2][n] * w8; // 4 flops
f8[3] += T_of_x[0][l] * T_of_x[1][m] * U_of_x[2][n-1] * w8; // 4 flops
} // (ORDER-1) * 16 flops
} // (ORDER-1) * (27 + (ORDER-1) * 16) flops
} // (ORDER-1) * ((ORDER-1) * (27 + (ORDER-1) * 16)) flops
}
potential[idxLhs] = (f1[idxLhs] + FReal(2.)*f2[0] + FReal(4.)*f4[0] + FReal(8.)*f8[0]) / nnodes; // 7 flops
forces[idxLhs][0] = ( FReal(2.)*f2[1] + FReal(4.)*f4[1] + FReal(8.)*f8[1]) * jacobian[0] / nnodes; // 7 flops
forces[idxLhs][1] = ( FReal(2.)*f2[2] + FReal(4.)*f4[2] + FReal(8.)*f8[2]) * jacobian[1] / nnodes; // 7 flops
forces[idxLhs][2] = ( FReal(2.)*f2[3] + FReal(4.)*f4[3] + FReal(8.)*f8[3]) * jacobian[2] / nnodes; // 7 flops
} // 28 + (ORDER-1) * ((ORDER-1) * (27 + (ORDER-1) * 16)) flops
const int idxPot = idxLhs / nPV;
const int idxPV = idxLhs % nPV;
// get potentials, physValues and forces components
const FReal*const physicalValues = inParticles->getPhysicalValues(idxPV);
FReal*const forcesX = inParticles->getForcesX(idxPot);
FReal*const forcesY = inParticles->getForcesY(idxPot);
FReal*const forcesZ = inParticles->getForcesZ(idxPot);
FReal*const potentials = inParticles->getPotentials(idxPot);
// set computed potential
potentials[idxPart] += (potential[idxLhs]); // 1 flop
// set computed forces
forcesX[idxPart] += forces[idxLhs][0] * physicalValues[idxPart];
forcesY[idxPart] += forces[idxLhs][1] * physicalValues[idxPart];
forcesZ[idxPart] += forces[idxLhs][2] * physicalValues[idxPart]; // 6 flops
const int idxPot = idxLhs / nPV;
const int idxPV = idxLhs % nPV;
}// NLHS
// get potentials, physValues and forces components
const FReal*const physicalValues = inParticles->getPhysicalValues(idxVals,idxPV);
FReal*const forcesX = inParticles->getForcesX(idxVals,idxPot);
FReal*const forcesY = inParticles->getForcesY(idxVals,idxPot);
FReal*const forcesZ = inParticles->getForcesZ(idxVals,idxPot);
FReal*const potentials = inParticles->getPotentials(idxVals,idxPot);
// set computed potential
potentials[idxPart] += (potential[idxLoc]); // 1 flop
// set computed forces
forcesX[idxPart] += forces[idxLoc][0] * physicalValues[idxPart];
forcesY[idxPart] += forces[idxLoc][1] * physicalValues[idxPart];
forcesZ[idxPart] += forces[idxLoc][2] * physicalValues[idxPart]; // 6 flops
}// NLHS
} // NVALS
} // N * (38 + (ORDER-2)*15 + (ORDER-1)*((ORDER-1) * (27 + (ORDER-1) * 16))) + 6 flops
}
......
......@@ -77,19 +77,22 @@ public:
}
FChebKernel(const int inTreeHeight, const FReal inBoxWidth, const FPoint& inBoxCenter, const MatrixKernelClass *const inMatrixKernel)
: FChebKernel(inTreeHeight, inBoxWidth,inBoxCenter,inMatrixKernel,FMath::pow(10.0,static_cast<FReal>(-ORDER)))
: FChebKernel(inTreeHeight, inBoxWidth,inBoxCenter,inMatrixKernel,FReal(0.)/*FMath::pow(10.0,static_cast<FReal>(-ORDER))*/)
{
}
void P2M(CellClass* const LeafCell,
const ContainerClass* const SourceParticles)
{
const FPoint LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafCell->getCoordinate()));
// 1) apply Sy
AbstractBaseClass::Interpolator->applyP2M(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf,
LeafCell->getMultipole(0), SourceParticles);
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// 1) apply Sy
AbstractBaseClass::Interpolator->applyP2M(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf,
LeafCell->getMultipole(idxRhs), SourceParticles);
// 2) apply B
M2LHandler->applyB(LeafCell->getMultipole(idxRhs), LeafCell->getMultipole(idxRhs) + AbstractBaseClass::nnodes);
}
......@@ -102,7 +105,6 @@ public:
{
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// 1) apply Sy
FBlas::scal(AbstractBaseClass::nnodes*2, FReal(0.), ParentCell->getMultipole(idxRhs));
for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){
if (ChildCells[ChildIndex]){
AbstractBaseClass::Interpolator->applyM2M(ChildIndex, ChildCells[ChildIndex]->getMultipole(idxRhs),
......@@ -192,22 +194,23 @@ public:
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// 1) apply U
M2LHandler->applyU(LeafCell->getLocal(idxRhs) + AbstractBaseClass::nnodes, const_cast<CellClass*>(LeafCell)->getLocal(idxRhs));
//// 2.a) apply Sx
//AbstractBaseClass::Interpolator->applyL2P(LeafCellCenter,
// AbstractBaseClass::BoxWidthLeaf,
// LeafCell->getLocal(),
// TargetParticles);
//// 2.b) apply Px (grad Sx)
//AbstractBaseClass::Interpolator->applyL2PGradient(LeafCellCenter,
// AbstractBaseClass::BoxWidthLeaf,
// LeafCell->getLocal(),
// TargetParticles);
// 2.c) apply Sx and Px (grad Sx)
AbstractBaseClass::Interpolator->applyL2PTotal(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf,
LeafCell->getLocal(idxRhs), TargetParticles);
}
//// 2.a) apply Sx
//AbstractBaseClass::Interpolator->applyL2P(LeafCellCenter,
// AbstractBaseClass::BoxWidthLeaf,
// LeafCell->getLocal(0),
// TargetParticles);
//// 2.b) apply Px (grad Sx)
//AbstractBaseClass::Interpolator->applyL2PGradient(LeafCellCenter,
// AbstractBaseClass::BoxWidthLeaf,
// LeafCell->getLocal(0),
// TargetParticles);
// 2.c) apply Sx and Px (grad Sx)
AbstractBaseClass::Interpolator->applyL2PTotal(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf,
LeafCell->getLocal(0), TargetParticles);
}
void P2P(const FTreeCoordinate& /* LeafCellCoordinate */, // needed for periodic boundary conditions
......
......@@ -180,10 +180,8 @@ public:
{
// apply Sy
const FPoint LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafCell->getCoordinate()));
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
AbstractBaseClass::Interpolator->applyP2M(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf,
LeafCell->getMultipole(idxRhs), SourceParticles);
}
AbstractBaseClass::Interpolator->applyP2M(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf,
LeafCell->getMultipole(0), SourceParticles);
}
......@@ -445,7 +443,6 @@ public:
ContainerClass* const TargetParticles)
{
const FPoint LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafCell->getCoordinate()));
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// // a) apply Sx
// AbstractBaseClass::Interpolator->applyL2P(LeafCellCenter,
// AbstractBaseClass::BoxWidthLeaf,
......@@ -457,10 +454,10 @@ public:
// LeafCell->getLocal(),
// TargetParticles);
// c) apply Sx and Px (grad Sx)
AbstractBaseClass::Interpolator->applyL2PTotal(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf,
LeafCell->getLocal(idxRhs), TargetParticles);
}
// c) apply Sx and Px (grad Sx)
AbstractBaseClass::Interpolator->applyL2PTotal(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf,
LeafCell->getLocal(0), TargetParticles);
}
void P2P(const FTreeCoordinate& /* LeafCellCoordinate */, // needed for periodic boundary conditions
......
......@@ -38,6 +38,7 @@
#include "Kernels/Chebyshev/FChebCell.hpp"
#include "Kernels/Interpolation/FInterpMatrixKernel.hpp"
#include "Kernels/Chebyshev/FChebSymKernel.hpp"
#include "Kernels/Chebyshev/FChebKernel.hpp"
#include "Kernels/Uniform/FUnifCell.hpp"
#include "Kernels/Interpolation/FInterpMatrixKernel.hpp"
......@@ -102,35 +103,20 @@ class TestInterpolationKernel : public FUTester<TestInterpolationKernel> {
// Create octree
OctreeClass tree(NbLevels, SizeSubLevels, loader.getBoxWidth(), loader.getCenterOfBox());
// Insert particle in the tree
// For each particles we associate Nvals charge ( q,0,0,0)
for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
double q = particles[idxPart].getPhysicalValue();
if(NVals == 1){
tree.insert(particles[idxPart].getPosition() , idxPart, q);//,0.0,0.0,0.0);
for(int idxPart = 0 ; idxPart < nbParticles ; ++idxPart){
// Convert FReal[NVALS] to std::array<FReal,NVALS>
std::array<FReal, (1+4*1)*NVals> physicalState;
for(int idxVals = 0 ; idxVals < NVals ; ++idxVals){
physicalState[0*NVals+idxVals]= particles[idxPart].getPhysicalValue();
physicalState[1*NVals+idxVals]=0.0;
physicalState[2*NVals+idxVals]=0.0;
physicalState[3*NVals+idxVals]=0.0;
physicalState[4*NVals+idxVals]=0.0;
}
else if(NVals == 2){
tree.insert(particles[idxPart].getPosition() , idxPart, q, q);//,0.0,0.0,0.0);
}
else if(NVals == 3){
tree.insert(particles[idxPart].getPosition() , idxPart, q, q,q);//,0.0,0.0,0.0);
} else{
FAssertLF(0, "NVALS should be <= 3");
}
}
// for(int idxPart = 0 ; idxPart < nbParticles ; ++idxPart){
// // Convert FReal[NVALS] to std::array<FReal,NVALS>
// std::array<FReal, (1+4*1)*NVals> physicalState;
// for(int idxVals = 0 ; idxVals < NVals ; ++idxVals){
// double q = particles[idxPart].getPhysicalValue();
// physicalState[0*NVals+idxVals]= q;
// physicalState[1*NVals+idxVals]=0.0;
// physicalState[2*NVals+idxVals]=0.0;
// physicalState[3*NVals+idxVals]=0.0;
// physicalState[4*NVals+idxVals]=0.0;
// }
// // put in tree
// tree.insert(particles[idxPart].getPosition(), idxPart, physicalState);
// }
// put in tree
tree.insert(particles[idxPart].getPosition(), idxPart, physicalState);
}
// Run FMM
Print("Fmm...");
......@@ -307,7 +293,20 @@ class TestInterpolationKernel : public FUTester<TestInterpolationKernel> {
RunTest<CellClass,ContainerClass,KernelClass,MatrixKernelClass,LeafClass,OctreeClass,FmmClass, NVals>();
}
/** TestChebKernel */
void TestChebKernel(){
const int NVals = 3;
const unsigned int ORDER = 6;
typedef FP2PParticleContainerIndexed<1,1,NVals> ContainerClass;
typedef FSimpleLeaf<ContainerClass> LeafClass;
typedef FInterpMatrixKernelR MatrixKernelClass;
typedef FChebCell<ORDER, 1, 1, NVals> CellClass;
typedef FOctree<CellClass,ContainerClass,LeafClass> OctreeClass;
typedef FChebKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER, NVals> KernelClass;
typedef FFmmAlgorithm<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;
// run test
RunTest<CellClass,ContainerClass,KernelClass,MatrixKernelClass,LeafClass,OctreeClass,FmmClass, NVals>();
}
///////////////////////////////////////////////////////////
// Set the tests!
......@@ -318,6 +317,8 @@ class TestInterpolationKernel : public FUTester<TestInterpolationKernel> {
AddTest(&TestInterpolationKernel::TestUnifKernel,"Test Lagrange/Uniform grid FMM");
AddTest(&TestInterpolationKernel::TestChebSymKernel,"Test Symmetric Chebyshev Kernel with 16 small SVDs and symmetries");
AddTest(&TestInterpolationKernel::TestChebKernel,"Test Chebyshev Kernel with 1 large SVD");
}
};
......
......@@ -49,9 +49,10 @@ class TestRotationDirectPeriodic : public FUTester<TestRotationDirectPeriodic> {
const int NbLevels = 4;
const int SizeSubLevels = 2;
const int PeriodicDeep = 2;
const int NbParticles = 100;
const int nbParticles = 100;
FRandomLoader loader(NbParticles);
FRandomLoader loader(nbParticles);
//
OctreeClass tree(NbLevels, SizeSubLevels, loader.getBoxWidth(), loader.getCenterOfBox());
struct TestParticle{
FPoint position;
......@@ -59,13 +60,14 @@ class TestRotationDirectPeriodic : public FUTester<TestRotationDirectPeriodic> {
FReal physicalValue;
FReal potential;
};
FReal coeff = -1.0, value = 0.10, sum = 0.0;
FReal coeff = -1.0, value = 0.10, sum = 0.0, coerr =0.0, a=0.0;
TestParticle* const particles = new TestParticle[loader.getNumberOfParticles()];
for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){
FPoint position;
loader.fillParticle(&position);
value *= coeff ;
sum += value ;
coerr += FMath::Abs(value);
// put in tree
tree.insert(position, idxPart, value);
// get copy
......@@ -75,7 +77,10 @@ class TestRotationDirectPeriodic : public FUTester<TestRotationDirectPeriodic> {
particles[idxPart].forces[0] = 0.0;
particles[idxPart].forces[1] = 0.0;
particles[idxPart].forces[2] = 0.0;
a = std::max(a,position.getX()*position.getX()+position.getY()*position.getY()+position.getZ()*position.getZ());
}
double CorErr = coerr/a;
if (FMath::Abs(sum)> 0.00001){
std::cerr << "Sum of charges is not equal zero!!! (sum=<<"<<sum<<" )"<<std::endl;
exit(-1);
......@@ -112,7 +117,7 @@ class TestRotationDirectPeriodic : public FUTester<TestRotationDirectPeriodic> {
loader.getBoxWidth() * FReal(idxY),
loader.getBoxWidth() * FReal(idxZ));
for(int idxSource = 0 ; idxSource < NbParticles ; ++idxSource){
for(int idxSource = 0 ; idxSource < nbParticles ; ++idxSource){
TestParticle source = particles[idxSource];
source.position += offset;
......@@ -186,29 +191,39 @@ class TestRotationDirectPeriodic : public FUTester<TestRotationDirectPeriodic> {
printf(" Energy DIRECT = %.12e\n",FMath::Abs(energyD));
// Assert
const FReal MaximumDiffPotential = FReal(9e-3);
const FReal MaximumDiffForces = FReal(9e-2);
double epsilon = 1.0/FMath::pow2(P);
const FReal MaximumDiffPotential = FReal(CorErr*epsilon);
const FReal MaximumDiffForces = FReal(10*CorErr*epsilon);
printf(" Criteria error - Epsilon %e \n",epsilon);
Print("Test1 - Error Relative L2 norm Potential ");
uassert(potentialDiff.getRelativeL2Norm() < MaximumDiffPotential); //1
Print("Test2 - Error RMS L2 norm Potential ");
uassert(potentialDiff.getRMSError() < MaximumDiffPotential); //2
FReal CoerrRMS = potentialDiff.getl2Dot()/FMath::Sqrt(static_cast<FReal>(nbParticles));
uassert(potentialDiff.getRMSError() < CoerrRMS*MaximumDiffPotential); //2
Print("Test3 - Error Relative L2 norm FX ");
uassert(fx.getRelativeL2Norm() < MaximumDiffForces); //3
uassert(fx.getRelativeL2Norm() < MaximumDiffForces);
CoerrRMS = fx.getl2Dot()/FMath::Sqrt(static_cast<FReal>(nbParticles));
//3
Print("Test4 - Error RMS L2 norm FX ");
uassert(fx.getRMSError() < MaximumDiffForces); //4
uassert(fx.getRMSError() < CoerrRMS*MaximumDiffForces); //4
Print("Test5 - Error Relative L2 norm FY ");
uassert(fy.getRelativeL2Norm() < MaximumDiffForces); //5
Print("Test6 - Error RMS L2 norm FY ");
uassert(fy.getRMSError() < MaximumDiffForces); //6
CoerrRMS = fy.getl2Dot()/FMath::Sqrt(static_cast<FReal>(nbParticles));
uassert(fy.getRMSError() < CoerrRMS*MaximumDiffForces); //6
Print("Test7 - Error Relative L2 norm FZ ");
uassert(fz.getRelativeL2Norm() < MaximumDiffForces); //8
Print("Test8 - Error RMS L2 norm FZ ");
uassert(fz.getRMSError() < MaximumDiffForces); //8
CoerrRMS = fz.getl2Dot()/FMath::Sqrt(static_cast<FReal>(nbParticles));
uassert(fz.getRMSError() < CoerrRMS*MaximumDiffForces); //8
Print("Test9 - Error Relative L2 norm F ");
uassert(L2error < MaximumDiffForces); //9 Total Force
Print("Test10 - Relative error Energy ");
uassert(FMath::Abs(energy-energyD) /energyD< MaximumDiffPotential); //10 Total Energy
uassert(FMath::Abs(energy-energyD)< coerr*MaximumDiffPotential); //10 Total Energy
delete[] particles;
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment