diff --git a/Src/Kernels/Chebyshev/FChebInterpolator.hpp b/Src/Kernels/Chebyshev/FChebInterpolator.hpp index 24b37295c6a572f9332ff8ccdd3103b5c5250b85..a5d38fe11e8a692e6edd211913f494bdc3c28856 100755 --- a/Src/Kernels/Chebyshev/FChebInterpolator.hpp +++ b/Src/Kernels/Chebyshev/FChebInterpolator.hpp @@ -47,7 +47,8 @@ class FChebInterpolator : FNoCopyable // compile time constants and types enum {nnodes = TensorTraits<ORDER>::nnodes, nRhs = MatrixKernelClass::NRHS, - nLhs = MatrixKernelClass::NLHS}; + nLhs = MatrixKernelClass::NLHS, + nPV = MatrixKernelClass::NPV}; typedef FChebRoots< ORDER> BasisType; typedef FChebTensor<ORDER> TensorType; @@ -861,8 +862,15 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyL2P(const FPoint& c } } - for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){ - FReal*const potentials = inParticles->getPotentials(idxLhs); + for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){ + // distribution over potential components: + // We sum the multidim contribution of PhysValue + // This was originally done at M2L step but moved here + // because their storage is required by the force computation. + // In fact : f_{ik}(x)=w_j(x) \nabla_{x_i} K_{ij}(x,y)w_j(y)) + const unsigned int idxPot = idxLhs / nPV; + + FReal*const potentials = inParticles->getPotentials(idxPot); // interpolate and increment target value FReal targetValue = potentials[idxPart]; @@ -1110,6 +1118,8 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyL2PGradient(const F } for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){ + const unsigned int idxPot = idxLhs / nPV; + const unsigned int idxPV = idxLhs % nPV; // scale forces forces[idxLhs][0] *= jacobian[0] / nnodes; @@ -1117,10 +1127,10 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyL2PGradient(const F forces[idxLhs][2] *= jacobian[2] / nnodes; // get pointers to PhysValues and force components - const FReal*const physicalValues = inParticles->getPhysicalValues(idxLhs); - FReal*const forcesX = inParticles->getForcesX(idxLhs); - FReal*const forcesY = inParticles->getForcesY(idxLhs); - FReal*const forcesZ = inParticles->getForcesZ(idxLhs); + const FReal*const physicalValues = inParticles->getPhysicalValues(idxPV); + FReal*const forcesX = inParticles->getForcesX(idxPot); + FReal*const forcesY = inParticles->getForcesY(idxPot); + FReal*const forcesZ = inParticles->getForcesZ(idxPot); // set computed forces forcesX[idxPart] += forces[idxLhs][0] * physicalValues[idxPart]; @@ -1342,12 +1352,15 @@ inline void FChebInterpolator<ORDER,MatrixKernelClass>::applyL2PTotal(const FPoi forces[idxLhs][2] = ( FReal(2.)*f2[3] + FReal(4.)*f4[3] + FReal(8.)*f8[3]) * jacobian[2] / nnodes; // 7 flops } // 28 + (ORDER-1) * ((ORDER-1) * (27 + (ORDER-1) * 16)) flops + const unsigned int idxPot = idxLhs / nPV; + const unsigned int idxPV = idxLhs % nPV; + // get potentials, physValues and forces components - const FReal*const physicalValues = inParticles->getPhysicalValues(idxLhs); - FReal*const forcesX = inParticles->getForcesX(idxLhs); - FReal*const forcesY = inParticles->getForcesY(idxLhs); - FReal*const forcesZ = inParticles->getForcesZ(idxLhs); - FReal*const potentials = inParticles->getPotentials(idxLhs); + const FReal*const physicalValues = inParticles->getPhysicalValues(idxPV); + FReal*const forcesX = inParticles->getForcesX(idxPot); + FReal*const forcesY = inParticles->getForcesY(idxPot); + FReal*const forcesZ = inParticles->getForcesZ(idxPot); + FReal*const potentials = inParticles->getPotentials(idxPot); // set computed potential potentials[idxPart] += (potential[idxLhs]); // 1 flop diff --git a/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp b/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp index e5d0d06532fd58c98e166ba07a19106b23e8f971..35c7833be2ba14ea86323330c6ce6aed74771505 100755 --- a/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp +++ b/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp @@ -46,7 +46,9 @@ class FChebTensorialKernel : public FAbstractChebKernel< CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS> { enum {nRhs = MatrixKernelClass::NRHS, - nLhs = MatrixKernelClass::NLHS}; + nLhs = MatrixKernelClass::NLHS, + nPot = MatrixKernelClass::NPOT, + nPv = MatrixKernelClass::NPV}; protected://PB: for OptiDis @@ -137,19 +139,19 @@ public: for(int idxV = 0 ; idxV < NVALS ; ++idxV){ for (int idxLhs=0; idxLhs < nLhs; ++idxLhs){ - // update local index - int idxLoc = idxV*nLhs + idxLhs; + // update local index + const int idxLoc = idxV*nLhs + idxLhs; - FReal *const CompressedLocalExpansion = TargetCell->getLocal(idxLoc) + AbstractBaseClass::nnodes; + FReal *const CompressedLocalExpansion = TargetCell->getLocal(idxLoc) + AbstractBaseClass::nnodes; - for (int idxRhs=0; idxRhs < nRhs; ++idxRhs){ + // update idxRhs + const int idxRhs = idxLhs % nPv; // update multipole index - int idxMul = idxV*nRhs + idxRhs; - // update kernel index such that: x_i = K_{ij}y_j - int idxK = idxLhs*nRhs + idxRhs; + const int idxMul = idxV*nRhs + idxRhs; + // get index in matrix kernel - unsigned int d - = AbstractBaseClass::MatrixKernel.getPtr()->getPosition(idxK); + const unsigned int d + = AbstractBaseClass::MatrixKernel.getPtr()->getPosition(idxLhs); for (int idx=0; idx<343; ++idx){ if (SourceCells[idx]){ @@ -158,8 +160,7 @@ public: CompressedLocalExpansion); } } - }// NRHS - }// NLHS + }// NLHS=NPOT*NPV }// NVALS } @@ -223,14 +224,14 @@ public: ContainerClass* const NeighborSourceParticles[27], const int /* size */) { - DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2P(TargetParticles,NeighborSourceParticles); + DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2P(TargetParticles,NeighborSourceParticles,AbstractBaseClass::MatrixKernel.getPtr()); } void P2PRemote(const FTreeCoordinate& /*inPosition*/, ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, ContainerClass* const inNeighbors[27], const int /*inSize*/){ - DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2PRemote(inTargets,inNeighbors,27); + DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2PRemote(inTargets,inNeighbors,27,AbstractBaseClass::MatrixKernel.getPtr()); } }; diff --git a/Src/Kernels/Chebyshev/FChebTensorialM2LHandler.hpp b/Src/Kernels/Chebyshev/FChebTensorialM2LHandler.hpp index 674b3cdc4c4b144b7ce3d9a500af0bcf3c3258b7..000b63b10bf7f689992c19a115b6ff3f19ccdd3c 100755 --- a/Src/Kernels/Chebyshev/FChebTensorialM2LHandler.hpp +++ b/Src/Kernels/Chebyshev/FChebTensorialM2LHandler.hpp @@ -86,8 +86,7 @@ class FChebTensorialM2LHandler<ORDER,MatrixKernelClass,HOMOGENEOUS> : FNoCopyabl enum {order = ORDER, nnodes = TensorTraits<ORDER>::nnodes, ninteractions = 316,// 7^3 - 3^3 (max num cells in far-field) - dim = MatrixKernelClass::DIM, - nidx = MatrixKernelClass::NIDX}; + dim = MatrixKernelClass::DIM}; // const MatrixKernelClass MatrixKernel; @@ -205,8 +204,7 @@ class FChebTensorialM2LHandler<ORDER,MatrixKernelClass,NON_HOMOGENEOUS> : FNoCop enum {order = ORDER, nnodes = TensorTraits<ORDER>::nnodes, ninteractions = 316,// 7^3 - 3^3 (max num cells in far-field) - dim = MatrixKernelClass::DIM, - nidx = MatrixKernelClass::NIDX}; + dim = MatrixKernelClass::DIM}; // Tensorial MatrixKernel and homogeneity specific FReal **U, **B; diff --git a/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp b/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp index b5cc77f07ae055240d60a48cecfc3e1514b0fc97..87fe875106a9ef8974921c42283c3a1a6cd096cc 100755 --- a/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp +++ b/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp @@ -71,7 +71,8 @@ struct FInterpMatrixKernelR : FInterpAbstractMatrixKernel static const KERNEL_FUNCTION_TYPE Type = /*NON_*/HOMOGENEOUS; static const KERNEL_FUNCTION_IDENTIFIER Identifier = ONE_OVER_R; static const unsigned int DIM = 1; //PB: dimension of kernel - static const unsigned int NIDX = 1; //PB: number of indices + static const unsigned int NPV = 1; + static const unsigned int NPOT = 1; static const unsigned int NRHS = 1; static const unsigned int NLHS = 1; @@ -127,7 +128,8 @@ struct FInterpMatrixKernelRR : FInterpAbstractMatrixKernel static const KERNEL_FUNCTION_TYPE Type = HOMOGENEOUS; static const KERNEL_FUNCTION_IDENTIFIER Identifier = ONE_OVER_R_SQUARED; static const unsigned int DIM = 1; //PB: dimension of kernel - static const unsigned int NIDX = 1; //PB: number of indices + static const unsigned int NPV = 1; + static const unsigned int NPOT = 1; static const unsigned int NRHS = 1; static const unsigned int NLHS = 1; @@ -165,7 +167,8 @@ struct FInterpMatrixKernelLJ : FInterpAbstractMatrixKernel static const KERNEL_FUNCTION_TYPE Type = NON_HOMOGENEOUS; static const KERNEL_FUNCTION_IDENTIFIER Identifier = LENNARD_JONES_POTENTIAL; static const unsigned int DIM = 1; //PB: dimension of kernel - static const unsigned int NIDX = 1; //PB: number of indices + static const unsigned int NPV = 1; + static const unsigned int NPOT = 1; static const unsigned int NRHS = 1; static const unsigned int NLHS = 1; @@ -205,6 +208,26 @@ struct FInterpMatrixKernelLJ : FInterpAbstractMatrixKernel // // Tensorial Matrix Kernels (DIM>1) // +// The definition of the potential p and force f are extended to the case +// of tensorial interaction kernels: +// p_i(x) = K_{ip}(x,y)w_p(y), \forall i=1..NPOT, p=1..NPV +// f_{ik}= w_p(x)K_{ip,k}(x,y)w_p(y) " +// +// Since the interpolation scheme is such that +// p_i(x) \approx S^m(x) L^{m}_{ip} +// f_{ik}= w_p(x) \nabla_k S^m(x) L^{m}_{ip} +// with +// L^{m}_{ip} = K^{mn}_{ip} S^n(y) w_p(y) (local expansion) +// M^{m}_{p} = S^n(y) w_p(y) (multipole expansion) +// then the multipole exp have NPV components and the local exp NPOT*NPV. +// +// NB1: Only the computation of forces requires that the sum over p is +// performed at L2P step. It could be done at M2L step for the potential. +// +// NB2: An efficient application of the matrix kernel is highly kernel +// dependent, we recommand overriding the P2M/M2L/L2P function of the kernel +// you are using in order to have optimal performances + set your own NRHS/NLHS. +// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// @@ -213,11 +236,15 @@ struct FInterpMatrixKernel_IOR : FInterpAbstractMatrixKernel { static const KERNEL_FUNCTION_TYPE Type = /*NON_*/HOMOGENEOUS; static const KERNEL_FUNCTION_IDENTIFIER Identifier = ID_OVER_R; - static const unsigned int DIM = 6; //PB: dimension of kernel - static const unsigned int NIDX = 2; //PB: number of indices - static const unsigned int indexTab[/*DIM*NIDX=12*/]; - static const unsigned int NRHS = 3; - static const unsigned int NLHS = 3; + static const unsigned int NK = 3*3; // dim of kernel + static const unsigned int DIM = 6; // reduced dim + static const unsigned int NPOT = 3; // dim of potentials + static const unsigned int NPV = 3; // dim of physical values + static const unsigned int NRHS = NPV; // dim of mult exp (here = dim physval) + static const unsigned int NLHS = NPOT*NRHS; // dim of loc exp (here = NPOT*NRHS) + + // store indices (i,j) corresponding to sym idx + static const unsigned int indexTab[/*2*DIM=12*/]; // store positions in sym tensor static const unsigned int applyTab[/*9*/]; @@ -271,12 +298,6 @@ struct FInterpMatrixKernel_IOR : FInterpAbstractMatrixKernel return FReal(2.) / CellWidth; } -// FReal getScaleFactor(const FReal, const int) const -// { -// // return 1 because non homogeneous kernel functions cannot be scaled!!! -// return FReal(1.); -// } -// // FReal getScaleFactor(const FReal) const // { // // return 1 because non homogeneous kernel functions cannot be scaled!!! @@ -290,31 +311,45 @@ struct FInterpMatrixKernel_IOR : FInterpAbstractMatrixKernel // required by ChebSym kernel => only suited for Unif interpolation struct FInterpMatrixKernel_R_IJ : FInterpAbstractMatrixKernel { - static const KERNEL_FUNCTION_TYPE Type = HOMOGENEOUS; + static const KERNEL_FUNCTION_TYPE Type = NON_HOMOGENEOUS; static const KERNEL_FUNCTION_IDENTIFIER Identifier = R_IJ; - static const unsigned int DIM = 6; //PB: dimension of kernel - static const unsigned int NIDX = 2; //PB: number of indices - static const unsigned int indexTab[/*DIM*NIDX=12*/]; - static const unsigned int NRHS = 3; - static const unsigned int NLHS = 3; + static const unsigned int NK = 3*3; // dim of kernel + static const unsigned int DIM = 6; // reduced dim + static const unsigned int NPOT = 3; // dim of potentials + static const unsigned int NPV = 3; // dim of physical values + static const unsigned int NRHS = NPV; // dim of mult exp (here = dim physval) + static const unsigned int NLHS = NPOT*NRHS; // dim of loc exp (here = NPOT*NRHS) + + // store indices (i,j) corresponding to sym idx + static const unsigned int indexTab[/*2*DIM=12*/]; // store positions in sym tensor (when looping over NRHSxNLHS) - static const unsigned int applyTab[/*9*/]; + static const unsigned int applyTab[/*NK=9*/]; + // indices to be set at construction if component-wise evaluation is performed const unsigned int _i,_j; - FInterpMatrixKernel_R_IJ(const double = 0.0, const unsigned int d = 0) - : _i(indexTab[d]), _j(indexTab[d+DIM]) + // Material Parameters + const double _CoreWidth2; // if >0 then kernel is NON homogeneous + + FInterpMatrixKernel_R_IJ(const double CoreWidth = 0.0, const unsigned int d = 0) + : _i(indexTab[d]), _j(indexTab[d+DIM]), _CoreWidth2(CoreWidth*CoreWidth) {} // returns position in reduced storage from position in full 3x3 matrix int getPosition(const unsigned int n) const {return applyTab[n];} + // returns Core Width squared + double getCoreWidth2() const + {return _CoreWidth2;} + FReal evaluate(const FPoint& x, const FPoint& y) const { const FPoint xy(x-y); - const FReal one_over_r = FReal(1.)/xy.norm(); + const FReal one_over_r = FReal(1.)/sqrt(xy.getX()*xy.getX() + + xy.getY()*xy.getY() + + xy.getZ()*xy.getZ() + _CoreWidth2); const FReal one_over_r3 = one_over_r*one_over_r*one_over_r; double ri,rj; @@ -338,7 +373,9 @@ struct FInterpMatrixKernel_R_IJ : FInterpAbstractMatrixKernel void evaluateBlock(const FPoint& x, const FPoint& y, FReal* block) const { const FPoint xy(x-y); - const FReal one_over_r = FReal(1.)/xy.norm(); + const FReal one_over_r = FReal(1.)/sqrt(xy.getX()*xy.getX() + + xy.getY()*xy.getY() + + xy.getZ()*xy.getZ() + _CoreWidth2); const FReal one_over_r3 = one_over_r*one_over_r*one_over_r; const double r[3] = {xy.getX(),xy.getY(),xy.getZ()}; @@ -366,36 +403,57 @@ struct FInterpMatrixKernel_R_IJ : FInterpAbstractMatrixKernel return FReal(2.) / CellWidth; } +// // R_{,ij} is set non-homogeneous +// FReal getScaleFactor(const FReal CellWidth) const +// { +// return FReal(1.); +// } + }; /// R_{,ijk} struct FInterpMatrixKernel_R_IJK : FInterpAbstractMatrixKernel { - static const KERNEL_FUNCTION_TYPE Type = HOMOGENEOUS; + static const KERNEL_FUNCTION_TYPE Type = NON_HOMOGENEOUS; static const KERNEL_FUNCTION_IDENTIFIER Identifier = R_IJK; - static const unsigned int DIM = 10; //PB: dimension of kernel - static const unsigned int NIDX = 3; //PB: number of indices - static const unsigned int indexTab[/*DIM*NIDX=30*/]; - static const unsigned int NRHS = 3; - static const unsigned int NLHS = 3; + static const unsigned int NK = 3*3*3; // dim of kernel + static const unsigned int DIM = 10; // reduced dim + static const unsigned int NPOT = 3; // dim of potentials + static const unsigned int NPV = 3*3; // dim of physical values + static const unsigned int NRHS = NPV; // dim of mult exp (here = dim physval) + static const unsigned int NLHS = NPOT*NRHS; // dim of loc exp (here = NPOT*NRHS) - // store positions in sym tensor (when looping over NRHSxNLHS) - static const unsigned int applyTab[/*27*/]; + // store indices (i,j,k) corresponding to sym idx + static const unsigned int indexTab[/*3*DIM=30*/]; + + // store positions in sym tensor wr to full + static const unsigned int applyTab[/*NK=27*/]; + // indices to be set at construction if component-wise evaluation is performed const unsigned int _i,_j,_k; - FInterpMatrixKernel_R_IJK(const double = 0.0, const unsigned int d = 0) - : _i(indexTab[d]), _j(indexTab[d+DIM]), _k(indexTab[d+2*DIM]) + // Material Parameters + const double _CoreWidth2; // if >0 then kernel is NON homogeneous + + FInterpMatrixKernel_R_IJK(const double CoreWidth = 0.0, const unsigned int d = 0) + : _i(indexTab[d]), _j(indexTab[d+DIM]), _k(indexTab[d+2*DIM]), _CoreWidth2(CoreWidth*CoreWidth) {} // returns position in reduced storage from position in full 3x3x3 matrix int getPosition(const unsigned int n) const {return applyTab[n];} + // returns Core Width squared + double getCoreWidth2() const + {return _CoreWidth2;} + FReal evaluate(const FPoint& x, const FPoint& y) const { - const FPoint xy(x-y); - const FReal one_over_r = FReal(1.)/xy.norm(); +// const FPoint xy(x-y); + const FPoint xy(y-x); + const FReal one_over_r = FReal(1.)/sqrt(xy.getX()*xy.getX() + + xy.getY()*xy.getY() + + xy.getZ()*xy.getZ() + _CoreWidth2); const FReal one_over_r2 = one_over_r*one_over_r; const FReal one_over_r3 = one_over_r2*one_over_r; double ri,rj,rk; @@ -434,6 +492,39 @@ struct FInterpMatrixKernel_R_IJK : FInterpAbstractMatrixKernel } + void evaluateBlock(const FPoint& x, const FPoint& y, FReal* block) const + { +// const FPoint xy(x-y); + const FPoint xy(y-x); + const FReal one_over_r = FReal(1.)/sqrt(xy.getX()*xy.getX() + + xy.getY()*xy.getY() + + xy.getZ()*xy.getZ() + _CoreWidth2); + const FReal one_over_r2 = one_over_r*one_over_r; + const FReal one_over_r3 = one_over_r2*one_over_r; + + const double r[3] = {xy.getX(),xy.getY(),xy.getZ()}; + + for(unsigned int d=0;d<DIM;++d){ + unsigned int i = indexTab[d]; + unsigned int j = indexTab[d+DIM]; + unsigned int k = indexTab[d+2*DIM]; + + if(i==j){ + if(j==k) //i=j=k + block[d] = FReal(3.) * ( FReal(-1.) + r[i]*r[i] * one_over_r2 ) * r[i] * one_over_r3; + else //i=j!=k + block[d] = ( FReal(-1.) + FReal(3.) * r[i]*r[i] * one_over_r2 ) * r[k] * one_over_r3; + } + else //(i!=j) + if(i==k) //i=k!=j + block[d] = ( FReal(-1.) + FReal(3.) * r[i]*r[i] * one_over_r2 ) * r[j] * one_over_r3; + else if(j==k) //i!=k=j + block[d] = ( FReal(-1.) + FReal(3.) * r[j]*r[j] * one_over_r2 ) * r[i] * one_over_r3; + else //i!=k!=j + block[d] = FReal(3.) * r[i]*r[j]*r[k] * one_over_r2 * one_over_r3; + } + } + FReal getScaleFactor(const FReal RootCellWidth, const int TreeLevel) const { const FReal CellWidth(RootCellWidth / FReal(FMath::pow(2, TreeLevel))); @@ -446,6 +537,13 @@ struct FInterpMatrixKernel_R_IJK : FInterpAbstractMatrixKernel { return FReal(4.) / (CellWidth*CellWidth); } + +// // R_{,ijk} is set non-homogeneous +// FReal getScaleFactor(const FReal CellWidth) const +// { +// return FReal(1.); +// } + }; diff --git a/Src/Kernels/Interpolation/FInterpP2PKernels.hpp b/Src/Kernels/Interpolation/FInterpP2PKernels.hpp index 0dad8959c18a7b63b61add8bcf27ea64c2acb8e3..b6186fc51e6b1250ad926e5bb56a7ef1b93e4aec 100644 --- a/Src/Kernels/Interpolation/FInterpP2PKernels.hpp +++ b/Src/Kernels/Interpolation/FInterpP2PKernels.hpp @@ -70,17 +70,39 @@ struct DirectInteractionComputer<ID_OVER_R, 1> template <> struct DirectInteractionComputer<R_IJ, 1> { - template <typename ContainerClass> + template <typename ContainerClass, typename MatrixKernelClass> static void P2P( ContainerClass* const FRestrict TargetParticles, - ContainerClass* const NeighborSourceParticles[27]){ - FP2P::FullMutualRIJ(TargetParticles,NeighborSourceParticles,14); + ContainerClass* const NeighborSourceParticles[27], + const MatrixKernelClass *const MatrixKernel){ + FP2P::FullMutualRIJ(TargetParticles,NeighborSourceParticles,14,MatrixKernel); } - template <typename ContainerClass> + template <typename ContainerClass, typename MatrixKernelClass> static void P2PRemote( ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[27], - const int inSize){ - FP2P::FullRemoteRIJ(inTargets,inNeighbors,inSize); + const int inSize, + const MatrixKernelClass *const MatrixKernel){ + FP2P::FullRemoteRIJ(inTargets,inNeighbors,inSize,MatrixKernel); + } +}; + +/*! Specialization for GradGradGradR potential */ +template <> +struct DirectInteractionComputer<R_IJK, 1> +{ + template <typename ContainerClass, typename MatrixKernelClass> + static void P2P( ContainerClass* const FRestrict TargetParticles, + ContainerClass* const NeighborSourceParticles[27], + const MatrixKernelClass *const MatrixKernel){ + FP2P::FullMutualRIJK(TargetParticles,NeighborSourceParticles,14,MatrixKernel); + } + + template <typename ContainerClass, typename MatrixKernelClass> + static void P2PRemote( ContainerClass* const FRestrict inTargets, + ContainerClass* const inNeighbors[27], + const int inSize, + const MatrixKernelClass *const MatrixKernel){ + FP2P::FullRemoteRIJK(inTargets,inNeighbors,inSize,MatrixKernel); } }; @@ -160,20 +182,46 @@ struct DirectInteractionComputer<ID_OVER_R, NVALS> template <int NVALS> struct DirectInteractionComputer<R_IJ, NVALS> { - template <typename ContainerClass> + template <typename ContainerClass, typename MatrixKernelClass> static void P2P( ContainerClass* const FRestrict TargetParticles, - ContainerClass* const NeighborSourceParticles[27]){ + ContainerClass* const NeighborSourceParticles[27], + const MatrixKernelClass *const MatrixKernel){ for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ - FP2P::FullMutualRIJ(TargetParticles,NeighborSourceParticles,14); + FP2P::FullMutualRIJ(TargetParticles,NeighborSourceParticles,14,MatrixKernel); } } - template <typename ContainerClass> + template <typename ContainerClass, typename MatrixKernelClass> static void P2PRemote( ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[27], - const int inSize){ + const int inSize, + const MatrixKernelClass *const MatrixKernel){ + for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ + FP2P::FullRemoteRIJ(inTargets,inNeighbors,inSize,MatrixKernel); + } + } +}; + +/*! Specialization for GradGradGradR potential */ +template <int NVALS> +struct DirectInteractionComputer<R_IJK, NVALS> +{ + template <typename ContainerClass, typename MatrixKernelClass> + static void P2P( ContainerClass* const FRestrict TargetParticles, + ContainerClass* const NeighborSourceParticles[27], + const MatrixKernelClass *const MatrixKernel){ + for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ + FP2P::FullMutualRIJK(TargetParticles,NeighborSourceParticles,14,MatrixKernel); + } + } + + template <typename ContainerClass, typename MatrixKernelClass> + static void P2PRemote( ContainerClass* const FRestrict inTargets, + ContainerClass* const inNeighbors[27], + const int inSize, + const MatrixKernelClass *const MatrixKernel){ for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ - FP2P::FullRemoteRIJ(inTargets,inNeighbors,inSize); + FP2P::FullRemoteRIJK(inTargets,inNeighbors,inSize,MatrixKernel); } } }; diff --git a/Src/Kernels/P2P/FP2P.hpp b/Src/Kernels/P2P/FP2P.hpp index 863180432935cd9533ac44a1c0d68d048b7a2568..7663a1f51d83ce35f75e9dd571da5e71dac772b8 100644 --- a/Src/Kernels/P2P/FP2P.hpp +++ b/Src/Kernels/P2P/FP2P.hpp @@ -978,6 +978,8 @@ public: ////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////// // R_IJ + // PB: The following functions take the MatrixKernel as input arguments for more generic implementation + // Besides this MatrixKernel is already updated with any extra parameter (e.g. core width). ////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -986,9 +988,11 @@ public: /** * @brief FullMutualRIJ */ - template <class ContainerClass> + template <class ContainerClass, typename MatrixKernelClass> static void FullMutualRIJ(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], - const int limiteNeighbors){ + const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ + + const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock const int nbParticlesTargets = inTargets->getNbParticles(); const FReal*const targetsX = inTargets->getPositions()[0]; @@ -1008,7 +1012,7 @@ public: FReal dy = sourcesY[idxSource] - targetsY[idxTarget]; FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget]; - FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz); + FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); FReal inv_distance = FMath::Sqrt(inv_distance_pow2); FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; @@ -1086,7 +1090,7 @@ public: FReal dy = targetsY[idxSource] - targetsY[idxTarget]; FReal dz = targetsZ[idxSource] - targetsZ[idxTarget]; - FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz); + FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); FReal inv_distance = FMath::Sqrt(inv_distance_pow2); FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; @@ -1144,7 +1148,6 @@ public: targetsForcesY[idxSource] -= coef[1]; targetsForcesZ[idxSource] -= coef[2]; targetsPotentials[idxSource] += potentialCoef * targetsPhysicalValues[idxTarget]; - }// j }// i @@ -1155,9 +1158,11 @@ public: /** * @brief FullRemoteRIJ */ - template <class ContainerClass> + template <class ContainerClass, typename MatrixKernelClass> static void FullRemoteRIJ(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], - const int limiteNeighbors){ + const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ + + const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock const int nbParticlesTargets = inTargets->getNbParticles(); const FReal*const targetsX = inTargets->getPositions()[0]; @@ -1178,7 +1183,7 @@ public: FReal dy = sourcesY[idxSource] - targetsY[idxTarget]; FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget]; - FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz); + FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); FReal inv_distance = FMath::Sqrt(inv_distance_pow2); FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; @@ -1261,17 +1266,21 @@ public: * @param targetForceZ * @param targetPotential */ + template<typename MatrixKernelClass> static void MutualParticlesRIJ(const FReal sourceX,const FReal sourceY,const FReal sourceZ, const FReal* sourcePhysicalValue, FReal* sourceForceX, FReal* sourceForceY, FReal* sourceForceZ, FReal* sourcePotential, const FReal targetX,const FReal targetY,const FReal targetZ, const FReal* targetPhysicalValue, - FReal* targetForceX, FReal* targetForceY, FReal* targetForceZ, FReal* targetPotential - ){ + FReal* targetForceX, FReal* targetForceY, FReal* targetForceZ, FReal* targetPotential, + const MatrixKernelClass *const MatrixKernel){ + + const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock + // GradGradR potential FReal dx = sourceX - targetX; FReal dy = sourceY - targetY; FReal dz = sourceZ - targetZ; - FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz); + FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); FReal inv_distance = FMath::Sqrt(inv_distance_pow2); FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; @@ -1328,6 +1337,364 @@ public: } + + ////////////////////////////////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////////////////////////////////// + // R_IJK + ////////////////////////////////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////////////////////////////////// + + /** + * @brief FullMutualRIJK + */ + template <class ContainerClass, typename MatrixKernelClass> + static void FullMutualRIJK(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], + const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ + + const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock + + const int nbParticlesTargets = inTargets->getNbParticles(); + const FReal*const targetsX = inTargets->getPositions()[0]; + const FReal*const targetsY = inTargets->getPositions()[1]; + const FReal*const targetsZ = inTargets->getPositions()[2]; + + for(int idxNeighbors = 0 ; idxNeighbors < limiteNeighbors ; ++idxNeighbors){ + for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ + if( inNeighbors[idxNeighbors] ){ + const int nbParticlesSources = inNeighbors[idxNeighbors]->getNbParticles(); + const FReal*const sourcesX = inNeighbors[idxNeighbors]->getPositions()[0]; + const FReal*const sourcesY = inNeighbors[idxNeighbors]->getPositions()[1]; + const FReal*const sourcesZ = inNeighbors[idxNeighbors]->getPositions()[2]; + + for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){ + FReal dx = sourcesX[idxSource] - targetsX[idxTarget]; + FReal dy = sourcesY[idxSource] - targetsY[idxTarget]; + FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget]; + + FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); + FReal inv_distance = FMath::Sqrt(inv_distance_pow2); + FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; + + FReal r[3]={dx,dy,dz}; + + for(unsigned int i = 0 ; i < 3 ; ++i){ + FReal*const targetsPotentials = inTargets->getPotentials(i); + FReal*const targetsForcesX = inTargets->getForcesX(i); + FReal*const targetsForcesY = inTargets->getForcesY(i); + FReal*const targetsForcesZ = inTargets->getForcesZ(i); + FReal*const sourcesPotentials = inNeighbors[idxNeighbors]->getPotentials(i); + FReal*const sourcesForcesX = inNeighbors[idxNeighbors]->getForcesX(i); + FReal*const sourcesForcesY = inNeighbors[idxNeighbors]->getForcesY(i); + FReal*const sourcesForcesZ = inNeighbors[idxNeighbors]->getForcesZ(i); + + const FReal ri2=r[i]*r[i]; + + for(unsigned int j = 0 ; j < 3 ; ++j){ + FReal rj2=r[j]*r[j]; + + for(unsigned int k = 0 ; k < 3 ; ++k){ + const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j*3+k); + const FReal*const sourcesPhysicalValues = inNeighbors[idxNeighbors]->getPhysicalValues(j*3+k); + + // potentials + FReal potentialCoef; + if(i==j){ + if(j==k) //i=j=k + potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i=j!=k + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; + } + else{ //(i!=j) + if(i==k) //i=k!=j + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; + else if(j==k) //i!=k=j + potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i!=k!=j + potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; + } + + // forces + FReal coef[3]; + for(unsigned int l = 0 ; l < 3 ; ++l){ + coef[l]= -(targetsPhysicalValues[idxTarget] * sourcesPhysicalValues[idxSource]); + +// // Grad of RIJK kernel is RIJKL kernel => TODO Implement +// coef[l] *= ; + + }// l + + targetsForcesX[idxTarget] += coef[0]; + targetsForcesY[idxTarget] += coef[1]; + targetsForcesZ[idxTarget] += coef[2]; + targetsPotentials[idxTarget] += ( potentialCoef * sourcesPhysicalValues[idxSource] ); + + sourcesForcesX[idxSource] -= coef[0]; + sourcesForcesY[idxSource] -= coef[1]; + sourcesForcesZ[idxSource] -= coef[2]; + sourcesPotentials[idxSource] += -potentialCoef * targetsPhysicalValues[idxTarget]; + + }// k + }// j + }// i + } + } + } + } + + for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ + for(int idxSource = idxTarget + 1 ; idxSource < nbParticlesTargets ; ++idxSource){ + FReal dx = targetsX[idxSource] - targetsX[idxTarget]; + FReal dy = targetsY[idxSource] - targetsY[idxTarget]; + FReal dz = targetsZ[idxSource] - targetsZ[idxTarget]; + + FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); + FReal inv_distance = FMath::Sqrt(inv_distance_pow2); + FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; + + FReal r[3]={dx,dy,dz}; + + for(unsigned int i = 0 ; i < 3 ; ++i){ + FReal*const targetsPotentials = inTargets->getPotentials(i); + FReal*const targetsForcesX = inTargets->getForcesX(i); + FReal*const targetsForcesY = inTargets->getForcesY(i); + FReal*const targetsForcesZ = inTargets->getForcesZ(i); + FReal ri2=r[i]*r[i]; + + for(unsigned int j = 0 ; j < 3 ; ++j){ + FReal rj2=r[j]*r[j]; + + for(unsigned int k = 0 ; k < 3 ; ++k){ + const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j*3+k); + + // potentials + FReal potentialCoef; + if(i==j){ + if(j==k) //i=j=k + potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i=j!=k + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; + } + else{ //(i!=j) + if(i==k) //i=k!=j + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; + else if(j==k) //i!=k=j + potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i!=k!=j + potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; + } + + // forces + FReal coef[3]; + for(unsigned int l = 0 ; l < 3 ; ++l){ + coef[l]= -(targetsPhysicalValues[idxTarget] * targetsPhysicalValues[idxSource]); + +// // Grad of RIJK kernel is RIJKL kernel => TODO Implement +// coef[l] *= ; + + }// l + + targetsForcesX[idxTarget] += coef[0]; + targetsForcesY[idxTarget] += coef[1]; + targetsForcesZ[idxTarget] += coef[2]; + targetsPotentials[idxTarget] += ( potentialCoef * targetsPhysicalValues[idxSource] ); + + targetsForcesX[idxSource] -= coef[0]; + targetsForcesY[idxSource] -= coef[1]; + targetsForcesZ[idxSource] -= coef[2]; + targetsPotentials[idxSource] += -potentialCoef * targetsPhysicalValues[idxTarget]; + }// k + }// j + }// i + + } + } + } + + /** + * @brief FullRemoteRIJK + */ + template <class ContainerClass, typename MatrixKernelClass> + static void FullRemoteRIJK(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], + const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ + + const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock + + const int nbParticlesTargets = inTargets->getNbParticles(); + const FReal*const targetsX = inTargets->getPositions()[0]; + const FReal*const targetsY = inTargets->getPositions()[1]; + const FReal*const targetsZ = inTargets->getPositions()[2]; + + for(int idxNeighbors = 0 ; idxNeighbors < limiteNeighbors ; ++idxNeighbors){ + for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ + if( inNeighbors[idxNeighbors] ){ + const int nbParticlesSources = inNeighbors[idxNeighbors]->getNbParticles(); + const FReal*const sourcesX = inNeighbors[idxNeighbors]->getPositions()[0]; + const FReal*const sourcesY = inNeighbors[idxNeighbors]->getPositions()[1]; + const FReal*const sourcesZ = inNeighbors[idxNeighbors]->getPositions()[2]; + + for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){ + // potential + FReal dx = sourcesX[idxSource] - targetsX[idxTarget]; + FReal dy = sourcesY[idxSource] - targetsY[idxTarget]; + FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget]; + + FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); + FReal inv_distance = FMath::Sqrt(inv_distance_pow2); + FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; + + FReal r[3]={dx,dy,dz}; + + for(unsigned int i = 0 ; i < 3 ; ++i){ + FReal*const targetsPotentials = inTargets->getPotentials(i); + FReal*const targetsForcesX = inTargets->getForcesX(i); + FReal*const targetsForcesY = inTargets->getForcesY(i); + FReal*const targetsForcesZ = inTargets->getForcesZ(i); + FReal ri2=r[i]*r[i]; + + for(unsigned int j = 0 ; j < 3 ; ++j){ + FReal rj2=r[j]*r[j]; + + for(unsigned int k = 0 ; k < 3 ; ++k){ + + const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j*3+k); + const FReal*const sourcesPhysicalValues = inNeighbors[idxNeighbors]->getPhysicalValues(j*3+k); + + // potentials + FReal potentialCoef; + if(i==j){ + if(j==k) //i=j=k + potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i=j!=k + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; + } + else{ //(i!=j) + if(i==k) //i=k!=j + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; + else if(j==k) //i!=k=j + potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i!=k!=j + potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; + } + + // forces + FReal coef[3]; + for(unsigned int l = 0 ; l < 3 ; ++l){ + coef[l]= -(targetsPhysicalValues[idxTarget] * sourcesPhysicalValues[idxSource]); + + // // Grad of RIJK kernel is RIJKL kernel => TODO Implement + // coef[l] *= ; + + }// l + + targetsForcesX[idxTarget] += coef[0]; + targetsForcesY[idxTarget] += coef[1]; + targetsForcesZ[idxTarget] += coef[2]; + targetsPotentials[idxTarget] += ( potentialCoef * sourcesPhysicalValues[idxSource] ); + + }// k + }// j + }// i + + } + } + } + } + } + + + /** + * @brief MutualParticlesRIJK + * @param sourceX + * @param sourceY + * @param sourceZ + * @param sourcePhysicalValue + * @param sourceForceX + * @param sourceForceY + * @param sourceForceZ + * @param sourcePotential + * @param targetX + * @param targetY + * @param targetZ + * @param targetPhysicalValue + * @param targetForceX + * @param targetForceY + * @param targetForceZ + * @param targetPotential + */ + template<typename MatrixKernelClass> + static void MutualParticlesRIJK(const FReal sourceX,const FReal sourceY,const FReal sourceZ, const FReal* sourcePhysicalValue, + FReal* sourceForceX, FReal* sourceForceY, FReal* sourceForceZ, FReal* sourcePotential, + const FReal targetX,const FReal targetY,const FReal targetZ, const FReal* targetPhysicalValue, + FReal* targetForceX, FReal* targetForceY, FReal* targetForceZ, FReal* targetPotential, + const MatrixKernelClass *const MatrixKernel){ + + const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock + + // GradGradR potential + FReal dx = sourceX - targetX; + FReal dy = sourceY - targetY; + FReal dz = sourceZ - targetZ; + + FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); + FReal inv_distance = FMath::Sqrt(inv_distance_pow2); + FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; + + FReal r[3]={dx,dy,dz}; + + for(unsigned int i = 0 ; i < 3 ; ++i){ + FReal ri2=r[i]*r[i]; + for(unsigned int j = 0 ; j < 3 ; ++j){ + FReal rj2=r[j]*r[j]; + for(unsigned int k = 0 ; k < 3 ; ++k){ + + // potentials + FReal potentialCoef; + if(i==j){ + if(j==k) //i=j=k + potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i=j!=k + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; + } + else{ //(i!=j) + if(i==k) //i=k!=j + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; + else if(j==k) //i!=k=j + potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i!=k!=j + potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; + } + + // forces + FReal coef[3]; + for(unsigned int l = 0 ; l < 3 ; ++l){ + coef[l]= -(targetPhysicalValue[j*3+k] * sourcePhysicalValue[j*3+k]); + +// // Grad of RIJK kernel is RIJKL kernel => TODO Implement +// coef[l] *= ; + + }// l + + targetForceX[i] += coef[0]; + targetForceY[i] += coef[1]; + targetForceZ[i] += coef[2]; + targetPotential[i] += ( potentialCoef * sourcePhysicalValue[j*3+k] ); + + sourceForceX[i] -= coef[0]; + sourceForceY[i] -= coef[1]; + sourceForceZ[i] -= coef[2]; + sourcePotential[i] += ( -potentialCoef * targetPhysicalValue[j*3+k] ); + + }// k + }// j + }// i + + } + + ////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1336,7 +1703,6 @@ public: // i.e. [[1 1 1] // [1 1 1] * 1/R // [1 1 1]] - // Only use scalar phys val, potential and forces for now. TODO use vectorial ones. ////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/Src/Kernels/Uniform/FUnifInterpolator.hpp b/Src/Kernels/Uniform/FUnifInterpolator.hpp index ada5007d582ff83fdf39d53e84068d34aae672d9..48304884035c5fd6e225d81686f446f53287dcbf 100755 --- a/Src/Kernels/Uniform/FUnifInterpolator.hpp +++ b/Src/Kernels/Uniform/FUnifInterpolator.hpp @@ -43,7 +43,8 @@ class FUnifInterpolator : FNoCopyable // compile time constants and types enum {nnodes = TensorTraits<ORDER>::nnodes, nRhs = MatrixKernelClass::NRHS, - nLhs = MatrixKernelClass::NLHS}; + nLhs = MatrixKernelClass::NLHS, + nPV = MatrixKernelClass::NPV}; typedef FUnifRoots< ORDER> BasisType; typedef FUnifTensor<ORDER> TensorType; @@ -441,7 +442,14 @@ inline void FUnifInterpolator<ORDER,MatrixKernelClass>::applyL2P(const FPoint& c L_of_x[o][2] = BasisType::L(o, localPosition.getZ()); // 3 * ORDER*(ORDER-1) flops } + // loop over dim of local expansions for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){ + // distribution over potential components: + // We sum the multidim contribution of PhysValue + // This was originally done at M2L step but moved here + // because their storage is required by the force computation. + // In fact : f_{ik}(x)=w_j(x) \nabla_{x_i} K_{ij}(x,y)w_j(y)) + const unsigned int idxPot = idxLhs / nPV; // interpolate and increment target value FReal targetValue=0.; @@ -458,7 +466,7 @@ inline void FUnifInterpolator<ORDER,MatrixKernelClass>::applyL2P(const FPoint& c } // get potential - FReal*const potentials = inParticles->getPotentials(idxLhs); + FReal*const potentials = inParticles->getPotentials(idxPot); // add contribution to potential potentials[idxPart] += (targetValue); @@ -514,6 +522,8 @@ inline void FUnifInterpolator<ORDER,MatrixKernelClass>::applyL2PGradient(const F } for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){ + const unsigned int idxPot = idxLhs / nPV; + const unsigned int idxPV = idxLhs % nPV; // interpolate and increment forces value FReal forces[3] = {FReal(0.), FReal(0.), FReal(0.)}; @@ -533,17 +543,16 @@ inline void FUnifInterpolator<ORDER,MatrixKernelClass>::applyL2PGradient(const F } // ORDER * ORDER * 4 flops } // ORDER * ORDER * ORDER * 4 flops - // scale forces forces[0] *= jacobian[0]; forces[1] *= jacobian[1]; forces[2] *= jacobian[2]; } - const FReal*const physicalValues = inParticles->getPhysicalValues(idxLhs); - FReal*const forcesX = inParticles->getForcesX(idxLhs); - FReal*const forcesY = inParticles->getForcesY(idxLhs); - FReal*const forcesZ = inParticles->getForcesZ(idxLhs); + const FReal*const physicalValues = inParticles->getPhysicalValues(idxPV); + FReal*const forcesX = inParticles->getForcesX(idxPot); + FReal*const forcesY = inParticles->getForcesY(idxPot); + FReal*const forcesZ = inParticles->getForcesZ(idxPot); // set computed forces forcesX[idxPart] += forces[0] * physicalValues[idxPart]; diff --git a/Src/Kernels/Uniform/FUnifTensorialKernel.hpp b/Src/Kernels/Uniform/FUnifTensorialKernel.hpp index 74f1cc760cb78ea1e3ec7c2b3fd58b5d95fea000..d12569d46b524fa4b43529dfdb9f54fe895e65d8 100644 --- a/Src/Kernels/Uniform/FUnifTensorialKernel.hpp +++ b/Src/Kernels/Uniform/FUnifTensorialKernel.hpp @@ -62,7 +62,9 @@ class FUnifTensorialKernel : public FAbstractUnifKernel< CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS> { enum {nRhs = MatrixKernelClass::NRHS, - nLhs = MatrixKernelClass::NLHS}; + nLhs = MatrixKernelClass::NLHS, + nPot = MatrixKernelClass::NPOT, + nPV = MatrixKernelClass::NPV}; protected://PB: for OptiDis @@ -152,19 +154,24 @@ public: for(int idxV = 0 ; idxV < NVALS ; ++idxV){ for (int idxLhs=0; idxLhs < nLhs; ++idxLhs){ - // update local index - int idxLoc = idxV*nLhs + idxLhs; - // load transformed local expansion - FComplexe *const TransformedLocalExpansion = TargetCell->getTransformedLocal(idxLoc); + // update local index + const int idxLoc = idxV*nLhs + idxLhs; + + // load transformed local expansion + FComplexe *const TransformedLocalExpansion = TargetCell->getTransformedLocal(idxLoc); - for (int idxRhs=0; idxRhs < nRhs; ++idxRhs){ + // update idxRhs + const int idxRhs = idxLhs % nPV; // update multipole index - int idxMul = idxV*nRhs + idxRhs; - // update kernel index such that: x_i = K_{ij}y_j - int idxK = idxLhs*nRhs + idxRhs; + const int idxMul = idxV*nRhs + idxRhs; + // get index in matrix kernel - unsigned int d - = AbstractBaseClass::MatrixKernel.getPtr()->getPosition(idxK); + const unsigned int d + = AbstractBaseClass::MatrixKernel.getPtr()->getPosition(idxLhs); + +// std::cout<<"idxLhs="<< idxLhs +// <<" - d="<< d +// <<" - idxRhs="<< idxRhs <<std::endl; for (int idx=0; idx<343; ++idx){ if (SourceCells[idx]){ @@ -172,10 +179,9 @@ public: SourceCells[idx]->getTransformedMultipole(idxMul), TransformedLocalExpansion); - } } - }// NRHS - }// NLHS + } + }// NLHS=NPOT*NPV }// NVALS } @@ -231,14 +237,14 @@ public: ContainerClass* const NeighborSourceParticles[27], const int /* size */) { - DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2P(TargetParticles,NeighborSourceParticles); + DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2P(TargetParticles,NeighborSourceParticles,AbstractBaseClass::MatrixKernel.getPtr()); } void P2PRemote(const FTreeCoordinate& /*inPosition*/, ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, ContainerClass* const inNeighbors[27], const int /*inSize*/){ - DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2PRemote(inTargets,inNeighbors,27); + DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2PRemote(inTargets,inNeighbors,27,AbstractBaseClass::MatrixKernel.getPtr()); } }; diff --git a/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp b/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp index a4c1d62db8ca62ab5873fc2de13ba452ef99fe2b..ef75a1068e51b9caea266c0c225ba0aa21a83511 100644 --- a/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp +++ b/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp @@ -112,7 +112,6 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, for (unsigned int d=0; d<dim; ++d) _C[d][perm[ido]] = block[d]; - ido++; } @@ -192,8 +191,7 @@ class FUnifTensorialM2LHandler<ORDER,MatrixKernelClass,HOMOGENEOUS> : FNoCopyabl nnodes = TensorTraits<ORDER>::nnodes, ninteractions = 316, // 7^3 - 3^3 (max num cells in far-field) rc = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1), - dim = MatrixKernelClass::DIM, - nidx = MatrixKernelClass::NIDX}; + dim = MatrixKernelClass::DIM}; // Tensorial MatrixKernel specific FComplexe** FC; @@ -345,7 +343,6 @@ public: // Apply forward Discrete Fourier Transform Dft.applyDFT(Py,FY); - } @@ -360,8 +357,7 @@ class FUnifTensorialM2LHandler<ORDER,MatrixKernelClass,NON_HOMOGENEOUS> : FNoCop nnodes = TensorTraits<ORDER>::nnodes, ninteractions = 316, // 7^3 - 3^3 (max num cells in far-field) rc = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1), - dim = MatrixKernelClass::DIM, - nidx = MatrixKernelClass::NIDX}; + dim = MatrixKernelClass::DIM}; // Tensorial MatrixKernel and homogeneity specific FComplexe*** FC; @@ -525,7 +521,6 @@ public: // Apply forward Discrete Fourier Transform Dft.applyDFT(Py,FY); - } diff --git a/Tests/Kernels/testChebTensorialAlgorithm.cpp b/Tests/Kernels/testChebTensorialAlgorithm.cpp index 2d522bbac4608331d40121344884815003b81462..6d8f11ef8ed65e900eeb4de751c2e2c5c63d2d40 100644 --- a/Tests/Kernels/testChebTensorialAlgorithm.cpp +++ b/Tests/Kernels/testChebTensorialAlgorithm.cpp @@ -15,7 +15,7 @@ // =================================================================================== // ==== CMAKE ===== -// @FUSE_FFT +// @FUSE_BLAS // ================ #include <iostream> @@ -69,21 +69,30 @@ int main(int argc, char* argv[]) // typedefs // typedef FInterpMatrixKernel_R_IJ MatrixKernelClass; - typedef FInterpMatrixKernel_IOR MatrixKernelClass; +// typedef FInterpMatrixKernel_IOR MatrixKernelClass; // typedef FInterpMatrixKernelR MatrixKernelClass; + typedef FInterpMatrixKernel_R_IJK MatrixKernelClass; const KERNEL_FUNCTION_IDENTIFIER MK_ID = MatrixKernelClass::Identifier; + const unsigned int NPV = MatrixKernelClass::NPV; + const unsigned int NPOT = MatrixKernelClass::NPOT; const unsigned int NRHS = MatrixKernelClass::NRHS; const unsigned int NLHS = MatrixKernelClass::NLHS; + const double CoreWidth = 0.1; + const MatrixKernelClass DirectMatrixKernel(CoreWidth); + std::cout<< "CoreWidth2 = "<< DirectMatrixKernel.getCoreWidth2()<<std::endl; + // init particles position and physical value struct TestParticle{ FPoint position; - FReal forces[3][NLHS]; - FReal physicalValue[NRHS]; - FReal potential[NLHS]; + FReal forces[3][NPOT]; + FReal physicalValue[NPV]; + FReal potential[NPOT]; }; + const FReal FRandMax = FReal(RAND_MAX); + // open particle file FFmaScanfLoader loader(filename); if(!loader.isOpen()) throw std::runtime_error("Particle file couldn't be opened!"); @@ -95,13 +104,19 @@ int main(int argc, char* argv[]) loader.fillParticle(&position,&physicalValue); // get copy particles[idxPart].position = position; - for(unsigned idxRhs = 0; idxRhs<NRHS;++idxRhs) - particles[idxPart].physicalValue[idxRhs] = physicalValue; // copy same physical value in each component - for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs){ - particles[idxPart].potential[idxLhs] = 0.0; - particles[idxPart].forces[0][idxLhs] = 0.0; - particles[idxPart].forces[1][idxLhs] = 0.0; - particles[idxPart].forces[2][idxLhs] = 0.0; + // Set physical values + for(unsigned idxPV = 0; idxPV<NPV;++idxPV){ + // Either copy same physical value in each component + particles[idxPart].physicalValue[idxPV] = physicalValue; + // ... or set random value +// particles[idxPart].physicalValue[idxPV] = physicalValue*FReal(rand())/FRandMax; + } + + for(unsigned idxPot = 0; idxPot<NPOT;++idxPot){ + particles[idxPart].potential[idxPot] = 0.0; + particles[idxPart].forces[0][idxPot] = 0.0; + particles[idxPart].forces[1][idxPot] = 0.0; + particles[idxPart].forces[2][idxPot] = 0.0; } } @@ -121,7 +136,8 @@ int main(int argc, char* argv[]) particles[idxOther].position.getX(), particles[idxOther].position.getY(), particles[idxOther].position.getZ(), particles[idxOther].physicalValue, particles[idxOther].forces[0], particles[idxOther].forces[1], - particles[idxOther].forces[2], particles[idxOther].potential); + particles[idxOther].forces[2], particles[idxOther].potential, + &DirectMatrixKernel); else if(MK_ID == ID_OVER_R) FP2P::MutualParticlesIOR(particles[idxTarget].position.getX(), particles[idxTarget].position.getY(), particles[idxTarget].position.getZ(), particles[idxTarget].physicalValue, @@ -140,6 +156,16 @@ int main(int argc, char* argv[]) particles[idxOther].position.getZ(), particles[idxOther].physicalValue[0], particles[idxOther].forces[0], particles[idxOther].forces[1], particles[idxOther].forces[2], particles[idxOther].potential); + else if(MK_ID == R_IJK) + FP2P::MutualParticlesRIJK(particles[idxTarget].position.getX(), particles[idxTarget].position.getY(), + particles[idxTarget].position.getZ(), particles[idxTarget].physicalValue, + particles[idxTarget].forces[0], particles[idxTarget].forces[1], + particles[idxTarget].forces[2], particles[idxTarget].potential, + particles[idxOther].position.getX(), particles[idxOther].position.getY(), + particles[idxOther].position.getZ(), particles[idxOther].physicalValue, + particles[idxOther].forces[0], particles[idxOther].forces[1], + particles[idxOther].forces[2], particles[idxOther].potential, + &DirectMatrixKernel); else std::runtime_error("Provide a valid matrix kernel!"); } @@ -153,10 +179,10 @@ int main(int argc, char* argv[]) //////////////////////////////////////////////////////////////////// - { // begin Lagrange kernel + { // begin Chebyshev kernel // accuracy - const unsigned int ORDER = 5; //PB: Beware order=9 exceed memory (even 8 since compute _C then store in C) +const unsigned int ORDER = 7 ; const FReal epsilon = FReal(1e-7); typedef FP2PParticleContainerIndexed<NRHS,NLHS> ContainerClass; @@ -180,11 +206,16 @@ int main(int argc, char* argv[]) for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ // put in tree - // PB: here we have to know the number of NRHS... - if(NRHS==1) + // PB: here we have to know NPV... + if(NPV==1) tree.insert(particles[idxPart].position, idxPart, particles[idxPart].physicalValue[0]); - else if(NRHS==3) + else if(NPV==3) tree.insert(particles[idxPart].position, idxPart, particles[idxPart].physicalValue[0], particles[idxPart].physicalValue[1], particles[idxPart].physicalValue[2]); + else if(NPV==9) // R_IJK + tree.insert(particles[idxPart].position, idxPart, + particles[idxPart].physicalValue[0], particles[idxPart].physicalValue[1], particles[idxPart].physicalValue[2], + particles[idxPart].physicalValue[3], particles[idxPart].physicalValue[4], particles[idxPart].physicalValue[5], + particles[idxPart].physicalValue[6], particles[idxPart].physicalValue[7], particles[idxPart].physicalValue[8]); else std::runtime_error("Insert correct number of physical values!"); @@ -198,7 +229,7 @@ int main(int argc, char* argv[]) { // ----------------------------------------------------- std::cout << "\nChebyshev FMM (ORDER="<< ORDER << ") ... " << std::endl; time.tic(); - KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), epsilon); + KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), epsilon,CoreWidth); FmmClass algorithm(&tree, &kernels); algorithm.execute(); time.tac(); @@ -208,23 +239,21 @@ int main(int argc, char* argv[]) { // ----------------------------------------------------- std::cout << "\nError computation ... " << std::endl; - FMath::FAccurater potentialDiff[NLHS]; - FMath::FAccurater fx[NLHS], fy[NLHS], fz[NLHS]; + FMath::FAccurater potentialDiff[NPOT]; + FMath::FAccurater fx[NPOT], fy[NPOT], fz[NPOT]; - FReal checkPhysVal[20000][NRHS]; - FReal checkPotential[20000][NLHS]; - FReal checkfx[20000][NLHS]; + FReal checkPotential[20000][NPOT]; + FReal checkfx[20000][NPOT]; { // Check that each particle has been summed with all other tree.forEachLeaf([&](LeafClass* leaf){ - for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs){ + for(unsigned idxPot = 0; idxPot<NPOT;++idxPot){ - const FReal*const physVals = leaf->getTargets()->getPhysicalValues(idxLhs); - const FReal*const potentials = leaf->getTargets()->getPotentials(idxLhs); - const FReal*const forcesX = leaf->getTargets()->getForcesX(idxLhs); - const FReal*const forcesY = leaf->getTargets()->getForcesY(idxLhs); - const FReal*const forcesZ = leaf->getTargets()->getForcesZ(idxLhs); + const FReal*const potentials = leaf->getTargets()->getPotentials(idxPot); + const FReal*const forcesX = leaf->getTargets()->getForcesX(idxPot); + const FReal*const forcesY = leaf->getTargets()->getForcesY(idxPot); + const FReal*const forcesZ = leaf->getTargets()->getForcesZ(idxPot); const int nbParticlesInLeaf = leaf->getTargets()->getNbParticles(); const FVector<int>& indexes = leaf->getTargets()->getIndexes(); @@ -232,46 +261,106 @@ int main(int argc, char* argv[]) const int indexPartOrig = indexes[idxPart]; //PB: store potential in array[nbParticles] - checkPhysVal[indexPartOrig][idxLhs]=physVals[idxPart]; - checkPotential[indexPartOrig][idxLhs]=potentials[idxPart]; - checkfx[indexPartOrig][idxLhs]=forcesX[idxPart]; - - potentialDiff[idxLhs].add(particles[indexPartOrig].potential[idxLhs],potentials[idxPart]); - fx[idxLhs].add(particles[indexPartOrig].forces[0][idxLhs],forcesX[idxPart]); - fy[idxLhs].add(particles[indexPartOrig].forces[1][idxLhs],forcesY[idxPart]); - fz[idxLhs].add(particles[indexPartOrig].forces[2][idxLhs],forcesZ[idxPart]); + checkPotential[indexPartOrig][idxPot]=potentials[idxPart]; + checkfx[indexPartOrig][idxPot]=forcesX[idxPart]; + + // update accuracy + potentialDiff[idxPot].add(particles[indexPartOrig].potential[idxPot],potentials[idxPart]); + fx[idxPot].add(particles[indexPartOrig].forces[0][idxPot],forcesX[idxPart]); + fy[idxPot].add(particles[indexPartOrig].forces[1][idxPot],forcesY[idxPart]); + fz[idxPot].add(particles[indexPartOrig].forces[2][idxPot],forcesZ[idxPart]); } - }// NLHS + }// NPOT }); } // std::cout << "Check Potential, forceX " << std::endl; // for(int idxPart = 0 ; idxPart < 20 ; ++idxPart) -// for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs){ -// std::cout << checkPhysVal[idxPart][idxLhs] << ", "<< particles[idxPart].physicalValue[idxLhs]<< "|| "; -// std::cout << checkPotential[idxPart][idxLhs] << ", "<< particles[idxPart].potential[idxLhs]<< "|| "; -// std::cout << checkfx[idxPart][idxLhs] << ", "<< particles[idxPart].forces[0][idxLhs] << std::endl; +// for(unsigned idxPot = 0; idxPot<NPOT;++idxPot){ +// std::cout << checkPotential[idxPart][idxPot] << ", "<< particles[idxPart].potential[idxPot]<< "|| "; +// std::cout << checkfx[idxPart][idxPot] << ", "<< particles[idxPart].forces[0][idxPot] << std::endl; // } // std::cout << std::endl; // Print for information - std::cout << "\nAbsolute errors: " << std::endl; - std::cout << "Potential: "; - for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) std::cout << potentialDiff[idxLhs] << ", " ; + std::cout << "\nRelative Inf/L2 errors: " << std::endl; + std::cout << " Potential: " << std::endl; + for(unsigned idxPot = 0; idxPot<NPOT;++idxPot) { + std::cout << " " << idxPot << ": " + << potentialDiff[idxPot].getRelativeInfNorm() << ", " + << potentialDiff[idxPot].getRelativeL2Norm() + << std::endl; + } std::cout << std::endl; - std::cout << "Fx: "; - for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) std::cout << fx[idxLhs] << ", " ; + std::cout << " Fx: " << std::endl; + for(unsigned idxPot = 0; idxPot<NPOT;++idxPot) { + std::cout << " " << idxPot << ": " + << fx[idxPot].getRelativeInfNorm() << ", " + << fx[idxPot].getRelativeL2Norm() + << std::endl; + } std::cout << std::endl; - std::cout << "Fy: "; - for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) std::cout << fy[idxLhs] << ", " ; + std::cout << " Fy: " << std::endl; + for(unsigned idxPot = 0; idxPot<NPOT;++idxPot) { + std::cout << " " << idxPot << ": " + << fy[idxPot].getRelativeInfNorm() << ", " + << fy[idxPot].getRelativeL2Norm() + << std::endl; + } std::cout << std::endl; - std::cout << "Fz: "; - for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) std::cout << fz[idxLhs] << ", " ; + std::cout << " Fz: " << std::endl; + for(unsigned idxPot = 0; idxPot<NPOT;++idxPot) { + std::cout << " " << idxPot << ": " + << fz[idxPot].getRelativeInfNorm() << ", " + << fz[idxPot].getRelativeL2Norm() + << std::endl; + } std::cout << std::endl; + { // ----------------------------------------------------- + + + std::cout << "\nStore results in file ... "<<std::endl; + std::ostringstream sstream; + if(MK_ID == R_IJ) + sstream << "testChebRij_h"<< TreeHeight << "_a" << 1000*CoreWidth; + else if(MK_ID == R_IJK) + sstream << "testChebRijk_h"<< TreeHeight << "_a" << 1000*CoreWidth; + + if(TreeHeight>2) sstream <<"_o"<<ORDER; + const std::string para_ext = sstream.str(); + std::string outname = "../tmp/"+para_ext+".dat"; + std::ofstream fout(outname.c_str()); + fout.precision(15); + + for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) + fout << potentialDiff[idxLhs].getL2Norm() << " " + << potentialDiff[idxLhs].getInfNorm() << " " + << potentialDiff[idxLhs].getRelativeL2Norm() << " " + << potentialDiff[idxLhs].getRelativeInfNorm() << std::endl; + for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) + fout << fx[idxLhs].getL2Norm() << " " + << fx[idxLhs].getInfNorm() << " " + << fx[idxLhs].getRelativeL2Norm() << " " + << fx[idxLhs].getRelativeInfNorm() << std::endl; + for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) + fout << fy[idxLhs].getL2Norm() << " " + << fy[idxLhs].getInfNorm() << " " + << fy[idxLhs].getRelativeL2Norm() << " " + << fy[idxLhs].getRelativeInfNorm() << std::endl; + for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) + fout << fz[idxLhs].getL2Norm() << " " + << fz[idxLhs].getInfNorm() << " " + << fz[idxLhs].getRelativeL2Norm() << " " + << fz[idxLhs].getRelativeInfNorm() << std::endl; + fout << std::flush; + fout.close(); + + } // ----------------------------------------------------- + } // ----------------------------------------------------- - } // end Lagrange kernel + } // end Chebyshev kernel return 0; } diff --git a/Tests/Kernels/testUnifTensorialAlgorithm.cpp b/Tests/Kernels/testUnifTensorialAlgorithm.cpp index 5b30af3ee98e060dcb55d93bb67a6ab7194da967..dfdf2d5bdead5a9d8c788b62a4523e000434528c 100644 --- a/Tests/Kernels/testUnifTensorialAlgorithm.cpp +++ b/Tests/Kernels/testUnifTensorialAlgorithm.cpp @@ -69,20 +69,31 @@ int main(int argc, char* argv[]) // typedefs // typedef FInterpMatrixKernel_R_IJ MatrixKernelClass; - typedef FInterpMatrixKernel_IOR MatrixKernelClass; +// typedef FInterpMatrixKernel_IOR MatrixKernelClass; + typedef FInterpMatrixKernel_R_IJK MatrixKernelClass; + // usefull features of matrix kernel const KERNEL_FUNCTION_IDENTIFIER MK_ID = MatrixKernelClass::Identifier; + const unsigned int NPV = MatrixKernelClass::NPV; + const unsigned int NPOT = MatrixKernelClass::NPOT; const unsigned int NRHS = MatrixKernelClass::NRHS; const unsigned int NLHS = MatrixKernelClass::NLHS; + const double CoreWidth = 10; + const MatrixKernelClass DirectMatrixKernel(CoreWidth); + + std::cout<< "CoreWidth2 = "<< DirectMatrixKernel.getCoreWidth2()<<std::endl; + // init particles position and physical value struct TestParticle{ FPoint position; - FReal forces[3][NLHS]; - FReal physicalValue[NRHS]; - FReal potential[NLHS]; + FReal forces[3][NPOT]; + FReal physicalValue[NPV]; + FReal potential[NPOT]; }; + const FReal FRandMax = FReal(RAND_MAX); + // open particle file FFmaScanfLoader loader(filename); if(!loader.isOpen()) throw std::runtime_error("Particle file couldn't be opened!"); @@ -92,15 +103,21 @@ int main(int argc, char* argv[]) FPoint position; FReal physicalValue = 0.0; loader.fillParticle(&position,&physicalValue); + // get copy particles[idxPart].position = position; - for(unsigned idxRhs = 0; idxRhs<NRHS;++idxRhs) - particles[idxPart].physicalValue[idxRhs] = physicalValue; // copy same physical value in each component - for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs){ - particles[idxPart].potential[idxLhs] = 0.0; - particles[idxPart].forces[0][idxLhs] = 0.0; - particles[idxPart].forces[1][idxLhs] = 0.0; - particles[idxPart].forces[2][idxLhs] = 0.0; + // Set physical values + for(unsigned idxPV = 0; idxPV<NPV;++idxPV){ +// // Either copy same physical value in each component + particles[idxPart].physicalValue[idxPV] = physicalValue; + // ... or set random value +// particles[idxPart].physicalValue[idxPV] = physicalValue*FReal(rand())/FRandMax; + } + for(unsigned idxPot = 0; idxPot<NPOT;++idxPot){ + particles[idxPart].potential[idxPot] = 0.0; + particles[idxPart].forces[0][idxPot] = 0.0; + particles[idxPart].forces[1][idxPot] = 0.0; + particles[idxPart].forces[2][idxPot] = 0.0; } } @@ -120,7 +137,8 @@ int main(int argc, char* argv[]) particles[idxOther].position.getX(), particles[idxOther].position.getY(), particles[idxOther].position.getZ(), particles[idxOther].physicalValue, particles[idxOther].forces[0], particles[idxOther].forces[1], - particles[idxOther].forces[2], particles[idxOther].potential); + particles[idxOther].forces[2], particles[idxOther].potential, + &DirectMatrixKernel); else if(MK_ID == ID_OVER_R) FP2P::MutualParticlesIOR(particles[idxTarget].position.getX(), particles[idxTarget].position.getY(), particles[idxTarget].position.getZ(), particles[idxTarget].physicalValue, @@ -130,6 +148,16 @@ int main(int argc, char* argv[]) particles[idxOther].position.getZ(), particles[idxOther].physicalValue, particles[idxOther].forces[0], particles[idxOther].forces[1], particles[idxOther].forces[2], particles[idxOther].potential); + else if(MK_ID == R_IJK) + FP2P::MutualParticlesRIJK(particles[idxTarget].position.getX(), particles[idxTarget].position.getY(), + particles[idxTarget].position.getZ(), particles[idxTarget].physicalValue, + particles[idxTarget].forces[0], particles[idxTarget].forces[1], + particles[idxTarget].forces[2], particles[idxTarget].potential, + particles[idxOther].position.getX(), particles[idxOther].position.getY(), + particles[idxOther].position.getZ(), particles[idxOther].physicalValue, + particles[idxOther].forces[0], particles[idxOther].forces[1], + particles[idxOther].forces[2], particles[idxOther].potential, + &DirectMatrixKernel); else std::runtime_error("Provide a valid matrix kernel!"); } @@ -146,7 +174,7 @@ int main(int argc, char* argv[]) { // begin Lagrange kernel // accuracy - const unsigned int ORDER = 7; +const unsigned int ORDER = 12 ; typedef FP2PParticleContainerIndexed<NRHS,NLHS> ContainerClass; @@ -163,14 +191,26 @@ int main(int argc, char* argv[]) { // ----------------------------------------------------- std::cout << "Creating & Inserting " << loader.getNumberOfParticles() - << " particles ..." << std::endl; + << " particles ..." << std::endl; + + + + std::cout << "\tHeight : " << TreeHeight << " \t sub-height : " << SubTreeHeight << std::endl; time.tic(); for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ // put in tree - // PB: here we have to know the number of NRHS... - tree.insert(particles[idxPart].position, idxPart, particles[idxPart].physicalValue[0], particles[idxPart].physicalValue[1], particles[idxPart].physicalValue[2]); + if(NPV==3) // R_IJ or IOR + tree.insert(particles[idxPart].position, idxPart, + particles[idxPart].physicalValue[0], particles[idxPart].physicalValue[1], particles[idxPart].physicalValue[2]); + else if(NPV==9) // R_IJK + tree.insert(particles[idxPart].position, idxPart, + particles[idxPart].physicalValue[0], particles[idxPart].physicalValue[1], particles[idxPart].physicalValue[2], + particles[idxPart].physicalValue[3], particles[idxPart].physicalValue[4], particles[idxPart].physicalValue[5], + particles[idxPart].physicalValue[6], particles[idxPart].physicalValue[7], particles[idxPart].physicalValue[8]); + else + std::runtime_error("NPV not yet supported in test! Add new case."); } time.tac(); @@ -181,7 +221,7 @@ int main(int argc, char* argv[]) { // ----------------------------------------------------- std::cout << "\nLagrange/Uniform grid FMM (ORDER="<< ORDER << ") ... " << std::endl; time.tic(); - KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox()); + KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(),CoreWidth); FmmClass algorithm(&tree, &kernels); algorithm.execute(); time.tac(); @@ -191,23 +231,21 @@ int main(int argc, char* argv[]) { // ----------------------------------------------------- std::cout << "\nError computation ... " << std::endl; - FMath::FAccurater potentialDiff[NLHS]; - FMath::FAccurater fx[NLHS], fy[NLHS], fz[NLHS]; + FMath::FAccurater potentialDiff[NPOT]; + FMath::FAccurater fx[NPOT], fy[NPOT], fz[NPOT]; - FReal checkPhysVal[20000][NRHS]; - FReal checkPotential[20000][NLHS]; - FReal checkfx[20000][NLHS]; + FReal checkPotential[20000][NPOT]; + FReal checkfx[20000][NPOT]; { // Check that each particle has been summed with all other tree.forEachLeaf([&](LeafClass* leaf){ - for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs){ + for(unsigned idxPot = 0; idxPot<NPOT;++idxPot){ - const FReal*const physVals = leaf->getTargets()->getPhysicalValues(idxLhs); - const FReal*const potentials = leaf->getTargets()->getPotentials(idxLhs); - const FReal*const forcesX = leaf->getTargets()->getForcesX(idxLhs); - const FReal*const forcesY = leaf->getTargets()->getForcesY(idxLhs); - const FReal*const forcesZ = leaf->getTargets()->getForcesZ(idxLhs); + const FReal*const potentials = leaf->getTargets()->getPotentials(idxPot); + const FReal*const forcesX = leaf->getTargets()->getForcesX(idxPot); + const FReal*const forcesY = leaf->getTargets()->getForcesY(idxPot); + const FReal*const forcesZ = leaf->getTargets()->getForcesZ(idxPot); const int nbParticlesInLeaf = leaf->getTargets()->getNbParticles(); const FVector<int>& indexes = leaf->getTargets()->getIndexes(); @@ -215,43 +253,102 @@ int main(int argc, char* argv[]) const int indexPartOrig = indexes[idxPart]; //PB: store potential in array[nbParticles] - checkPhysVal[indexPartOrig][idxLhs]=physVals[idxPart]; - checkPotential[indexPartOrig][idxLhs]=potentials[idxPart]; - checkfx[indexPartOrig][idxLhs]=forcesX[idxPart]; - - potentialDiff[idxLhs].add(particles[indexPartOrig].potential[idxLhs],potentials[idxPart]); - fx[idxLhs].add(particles[indexPartOrig].forces[0][idxLhs],forcesX[idxPart]); - fy[idxLhs].add(particles[indexPartOrig].forces[1][idxLhs],forcesY[idxPart]); - fz[idxLhs].add(particles[indexPartOrig].forces[2][idxLhs],forcesZ[idxPart]); + checkPotential[indexPartOrig][idxPot]=potentials[idxPart]; + checkfx[indexPartOrig][idxPot]=forcesX[idxPart]; + + potentialDiff[idxPot].add(particles[indexPartOrig].potential[idxPot],potentials[idxPart]); + fx[idxPot].add(particles[indexPartOrig].forces[0][idxPot],forcesX[idxPart]); + fy[idxPot].add(particles[indexPartOrig].forces[1][idxPot],forcesY[idxPart]); + fz[idxPot].add(particles[indexPartOrig].forces[2][idxPot],forcesZ[idxPart]); } - }// NLHS + }// NPOT }); } -// std::cout << "Check Potential, forceX " << std::endl; -// for(int idxPart = 0 ; idxPart < 20 ; ++idxPart) -// for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs){ -// std::cout << checkPhysVal[idxPart][idxLhs] << ", "<< particles[idxPart].physicalValue[idxLhs]<< "|| "; -// std::cout << checkPotential[idxPart][idxLhs] << ", "<< particles[idxPart].potential[idxLhs]<< "|| "; -// std::cout << checkfx[idxPart][idxLhs] << ", "<< particles[idxPart].forces[0][idxLhs] << std::endl; -// } -// std::cout << std::endl; + std::cout << "Check Potential, forceX, ... " << std::endl; + for(int idxPart = 0 ; idxPart < 20 ; ++idxPart) + for(unsigned idxPot = 0; idxPot<NPOT;++idxPot){ + std::cout << checkPotential[idxPart][idxPot] << ", "<< particles[idxPart].potential[idxPot]<< "|| "; + std::cout << checkfx[idxPart][idxPot] << ", "<< particles[idxPart].forces[0][idxPot] << std::endl; + } + std::cout << std::endl; // Print for information - std::cout << "\nAbsolute errors: " << std::endl; - std::cout << "Potential: "; - for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) std::cout << potentialDiff[idxLhs] << ", " ; + std::cout << "\nRelative Inf/L2 errors: " << std::endl; + std::cout << " Potential: " << std::endl; + for(unsigned idxPot = 0; idxPot<NPOT;++idxPot) { + std::cout << " " << idxPot << ": " + << potentialDiff[idxPot].getRelativeInfNorm() << ", " + << potentialDiff[idxPot].getRelativeL2Norm() + << std::endl; + } std::cout << std::endl; - std::cout << "Fx: "; - for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) std::cout << fx[idxLhs] << ", " ; + std::cout << " Fx: " << std::endl; + for(unsigned idxPot = 0; idxPot<NPOT;++idxPot) { + std::cout << " " << idxPot << ": " + << fx[idxPot].getRelativeInfNorm() << ", " + << fx[idxPot].getRelativeL2Norm() + << std::endl; + } std::cout << std::endl; - std::cout << "Fy: "; - for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) std::cout << fy[idxLhs] << ", " ; + std::cout << " Fy: " << std::endl; + for(unsigned idxPot = 0; idxPot<NPOT;++idxPot) { + std::cout << " " << idxPot << ": " + << fy[idxPot].getRelativeInfNorm() << ", " + << fy[idxPot].getRelativeL2Norm() + << std::endl; + } std::cout << std::endl; - std::cout << "Fz: "; - for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) std::cout << fz[idxLhs] << ", " ; + std::cout << " Fz: " << std::endl; + for(unsigned idxPot = 0; idxPot<NPOT;++idxPot) { + std::cout << " " << idxPot << ": " + << fz[idxPot].getRelativeInfNorm() << ", " + << fz[idxPot].getRelativeL2Norm() + << std::endl; + } std::cout << std::endl; + { // ----------------------------------------------------- + + + std::cout << "\nStore results in file ... "<<std::endl; + std::ostringstream sstream; + if(MK_ID == R_IJ) + sstream << "testUnifRij_h"<< TreeHeight << "_a" << 1000*CoreWidth; + else if(MK_ID == R_IJK) + sstream << "testUnifRijk_h"<< TreeHeight << "_a" << 1000*CoreWidth; + + if(TreeHeight>2) sstream <<"_o"<<ORDER; + const std::string para_ext = sstream.str(); + std::string outname = "../tmp/"+para_ext+".dat"; + std::ofstream fout(outname.c_str()); + fout.precision(15); + + for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) + fout << potentialDiff[idxLhs].getL2Norm() << " " + << potentialDiff[idxLhs].getInfNorm() << " " + << potentialDiff[idxLhs].getRelativeL2Norm() << " " + << potentialDiff[idxLhs].getRelativeInfNorm() << std::endl; + for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) + fout << fx[idxLhs].getL2Norm() << " " + << fx[idxLhs].getInfNorm() << " " + << fx[idxLhs].getRelativeL2Norm() << " " + << fx[idxLhs].getRelativeInfNorm() << std::endl; + for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) + fout << fy[idxLhs].getL2Norm() << " " + << fy[idxLhs].getInfNorm() << " " + << fy[idxLhs].getRelativeL2Norm() << " " + << fy[idxLhs].getRelativeInfNorm() << std::endl; + for(unsigned idxLhs = 0; idxLhs<NLHS;++idxLhs) + fout << fz[idxLhs].getL2Norm() << " " + << fz[idxLhs].getInfNorm() << " " + << fz[idxLhs].getRelativeL2Norm() << " " + << fz[idxLhs].getRelativeInfNorm() << std::endl; + fout << std::flush; + fout.close(); + + } // ----------------------------------------------------- + } // ----------------------------------------------------- } // end Lagrange kernel diff --git a/Tests/Utils/testUnifInterpolator.cpp b/Tests/Utils/testUnifInterpolator.cpp index ac1a22838bf63b39f5592931f1d17f9d5bfeaff8..a964d1362c7a8c33b5b20991eba809f837c84290 100755 --- a/Tests/Utils/testUnifInterpolator.cpp +++ b/Tests/Utils/testUnifInterpolator.cpp @@ -111,7 +111,7 @@ int main(int, char **){ //////////////////////////////////////////////////////////////////// // approximative computation - const unsigned int ORDER = 2; + const unsigned int ORDER = 4; const unsigned int nnodes = TensorTraits<ORDER>::nnodes; typedef FUnifInterpolator<ORDER,MatrixKernelClass> InterpolatorClass; InterpolatorClass S; @@ -220,6 +220,8 @@ int main(int, char **){ // } // std::cout<<std::endl; + if(ORDER<4){// display some extra results for low orders + // Check multi-index std::cout<< "node_ids=" <<std::endl; for (unsigned int i=0; i<nnodes; ++i) @@ -228,7 +230,6 @@ int main(int, char **){ << node_ids[i][2] <<", "<<std::endl; std::cout<<std::endl; - // Check multi-index diff std::cout<< "node_ids=" <<std::endl; for (unsigned int i=0; i<nnodes; ++i){ @@ -266,6 +267,8 @@ int main(int, char **){ // } std::cout<<std::endl; // } std::cout<<std::endl; + }// display some extra results for low orders + // In 1D the Zero Padding consists in // inserting ORDER-1 zeros in the multipole exp // in order to apply the (ORDER+ORDER-1)x(ORDER+ORDER-1) @@ -460,6 +463,13 @@ int main(int, char **){ FReal* p = new FReal[M]; FBlas::setzero(M, p); + // null vectors for easy calculation of relative errors + FReal* null_p = new FReal[M]; + FBlas::setzero(M, null_p); + FReal* null_f = new FReal [M * 3]; + FBlas::setzero(M*3, null_f); + + { // start direct computation unsigned int counter = 0; @@ -520,10 +530,12 @@ int main(int, char **){ // std::cout << std::endl; std::cout << "\nPotential error:" << std::endl; - std::cout << "Absolute error = " << FMath::FAccurater( p, approx_p, M) << std::endl; + std::cout << "Relative Inf error = " << FMath::FAccurater( p, approx_p, M).getRelativeInfNorm() << std::endl; + std::cout << "Relative L2 error = " << FMath::FAccurater( p, approx_p, M).getRelativeL2Norm() << std::endl; std::cout << "\nForce error:" << std::endl; - std::cout << "Absolute L2 error = " << FMath::FAccurater( f, approx_f, M*3) << std::endl; + std::cout << "Relative Inf error = " << FMath::FAccurater( f, approx_f, M*3).getRelativeInfNorm() << std::endl; + std::cout << "Relative L2 error = " << FMath::FAccurater( f, approx_f, M*3).getRelativeL2Norm() << std::endl; std::cout << std::endl; // free memory diff --git a/Tests/Utils/testUnifTensorialInterpolator.cpp b/Tests/Utils/testUnifTensorialInterpolator.cpp index 1602b0052ae7423f18d272aa2fbcb06108f40067..f04164986a20e38cbdbba513842e6f0c9f7079e3 100755 --- a/Tests/Utils/testUnifTensorialInterpolator.cpp +++ b/Tests/Utils/testUnifTensorialInterpolator.cpp @@ -59,9 +59,13 @@ int main(int, char **){ // typedef FInterpMatrixKernelR MatrixKernelClass; typedef FInterpMatrixKernel_R_IJ MatrixKernelClass; typedef FInterpMatrixKernel_R_IJK RIJKMatrixKernelClass; // PB: force computation + const double a = 0.0; // core width (Beware! if diff from 0. then Kernel should be NON HOMOGENEOUS !!!) + const unsigned int dim = MatrixKernelClass::DIM; const unsigned int nrhs = MatrixKernelClass::NRHS; const unsigned int nlhs = MatrixKernelClass::NLHS; + const unsigned int npot = MatrixKernelClass::NPOT; + typedef FP2PParticleContainer<nrhs,nlhs> ContainerClass; typedef FSimpleLeaf<ContainerClass> LeafClass; @@ -90,7 +94,7 @@ int main(int, char **){ FReal x = (FReal(rand())/FRandMax - FReal(.5)) * width + cx.getX(); FReal y = (FReal(rand())/FRandMax - FReal(.5)) * width + cx.getY(); FReal z = (FReal(rand())/FRandMax - FReal(.5)) * width + cx.getZ(); - // PB: need to know the actual value of NLHS + // PB: need to know the actual value of NRHS X.push(FPoint(x, y, z), FReal(rand())/FRandMax, FReal(rand())/FRandMax, FReal(rand())/FRandMax); } } @@ -98,8 +102,8 @@ int main(int, char **){ //////////////////////////////////////////////////////////////////// LeafClass Y; - // FPoint cy(FReal(2.)*width, 0., 0.); - FPoint cy(FReal(2.)*width, FReal(2.)*width, 0.); + FPoint cy(FReal(2.)*width, 0., 0.); + //FPoint cy(FReal(2.)*width, FReal(2.)*width, 0.); const unsigned long N = 5000; std::cout << "Fill the leaf Y of width " << width @@ -118,7 +122,7 @@ int main(int, char **){ //////////////////////////////////////////////////////////////////// // approximative computation - const unsigned int ORDER = 5; + const unsigned int ORDER = 9; const unsigned int nnodes = TensorTraits<ORDER>::nnodes; typedef FUnifInterpolator<ORDER,MatrixKernelClass> InterpolatorClass; InterpolatorClass S; @@ -148,14 +152,13 @@ int main(int, char **){ for (unsigned int i=0; i<nnodes; ++i) { for (unsigned int j=0; j<nnodes; ++j){ - for (unsigned int idxLhs=0; idxLhs<nlhs; ++idxLhs) - for (unsigned int idxRhs=0; idxRhs<nrhs; ++idxRhs){ - unsigned idxK = idxLhs*3+idxRhs; // or counter - unsigned int d = MatrixKernel.getPosition(idxK); + for (unsigned int idxLhs=0; idxLhs<nlhs; ++idxLhs){ + unsigned int idxRhs = idxLhs % npot; + unsigned int d = MatrixKernel.getPosition(idxLhs); - F[i+idxLhs*nnodes] += MatrixKernelClass(d).evaluate(rootsX[i], rootsY[j]) * W[j+idxRhs*nnodes]; + F[i+idxLhs*nnodes] += MatrixKernelClass(a,d).evaluate(rootsX[i], rootsY[j]) * W[j+idxRhs*nnodes]; - } + } } } std::cout << "took " << time.tacAndElapsed() << "s" << std::endl; @@ -175,7 +178,7 @@ int main(int, char **){ for (unsigned int j=0; j<nnodes; ++j){ for (unsigned int d=0; d<dim; ++d){ - K[d*nnodes*nnodes+i*nnodes+j] = MatrixKernelClass(d).evaluate(rootsX[i], rootsY[j]); + K[d*nnodes*nnodes+i*nnodes+j] = MatrixKernelClass(a,d).evaluate(rootsX[i], rootsY[j]); } } @@ -187,12 +190,11 @@ int main(int, char **){ for (unsigned int i=0; i<nnodes; ++i) for (unsigned int j=0; j<nnodes; ++j){ - for (unsigned int idxLhs=0; idxLhs<nlhs; ++idxLhs) - for (unsigned int idxRhs=0; idxRhs<nrhs; ++idxRhs){ - unsigned idxK = idxLhs*3+idxRhs; // or counter - unsigned int d = MatrixKernel.getPosition(idxK); + for (unsigned int idxLhs=0; idxLhs<nlhs; ++idxLhs){ + unsigned int idxRhs = idxLhs % npot; + unsigned int d = MatrixKernel.getPosition(idxLhs); - F[i+idxLhs*nnodes] += K[d*nnodes*nnodes+i*nnodes+j] * W[j+idxRhs*nnodes]; + F[i+idxLhs*nnodes] += K[d*nnodes*nnodes+i*nnodes+j] * W[j+idxRhs*nnodes]; } } @@ -229,7 +231,7 @@ int main(int, char **){ for (unsigned int d=0; d<dim; ++d){ C[d*rc + ido] - = MatrixKernelClass(d).evaluate(rootsX[node_ids_pairs[ido][0]], + = MatrixKernelClass(a,d).evaluate(rootsX[node_ids_pairs[ido][0]], rootsY[node_ids_pairs[ido][1]]); } @@ -330,15 +332,13 @@ int main(int, char **){ // Application of M2L in PHYSICAL SPACE for (unsigned int pj=0; pj<rc; ++pj) - for (unsigned int idxLhs=0; idxLhs<nlhs; ++idxLhs) - for (unsigned int idxRhs=0; idxRhs<nrhs; ++idxRhs){ - unsigned idxK = idxLhs*3+idxRhs; // or counter - - unsigned int d = MatrixKernel.getPosition(idxK); + for (unsigned int idxLhs=0; idxLhs<nlhs; ++idxLhs){ + unsigned int idxRhs = idxLhs % npot; + unsigned int d = MatrixKernel.getPosition(idxLhs); - LocalExp[i + idxLhs*nnodes]+=C[pj + d*rc]*PaddedMultExp[pj + idxRhs*rc]; + LocalExp[i + idxLhs*nnodes]+=C[pj + d*rc]*PaddedMultExp[pj + idxRhs*rc]; - } + } }// end i time.tac(); @@ -441,17 +441,15 @@ int main(int, char **){ // reinterpret_cast<FReal*>(FPLocalExp)); // > or perform entrywise product manually FComplexe tmpFX; - for (unsigned int idxLhs=0; idxLhs<nlhs; ++idxLhs) - for (unsigned int idxRhs=0; idxRhs<nrhs; ++idxRhs){ - unsigned int idxK = idxLhs*3+idxRhs; // or counter - - unsigned int d = MatrixKernel.getPosition(idxK); - - for (unsigned int pj=0; pj<rc; ++pj){ - tmpFX=FT[pj + d*rc]; - tmpFX*=FPMultExp[pj+idxRhs*rc]; - FPLocalExp[pj+idxLhs*rc]+=tmpFX; // add new contribution +RijYj - } + for (unsigned int idxLhs=0; idxLhs<nlhs; ++idxLhs){ + unsigned int idxRhs = idxLhs % npot; + unsigned int d = MatrixKernel.getPosition(idxLhs); + + for (unsigned int pj=0; pj<rc; ++pj){ + tmpFX=FT[pj + d*rc]; + tmpFX*=FPMultExp[pj+idxRhs*rc]; + FPLocalExp[pj+idxLhs*rc]+=tmpFX; // add new contribution +RijYj + } } time.tac(); @@ -509,22 +507,20 @@ int main(int, char **){ //////////////////////////////////////////////////////////////////// // direct computation - // Only scalar phys val => only compute first compo and no derivative - // TODO add multidim phys val !!!!!! std::cout << "Compute interactions directly ..." << std::endl; time.tic(); - FReal** approx_f = new FReal* [nlhs]; - FReal** f = new FReal* [nlhs]; - for (unsigned int i=0; i<nrhs; ++i){ + FReal** approx_f = new FReal* [npot]; + FReal** f = new FReal* [npot]; + for (unsigned int i=0; i<npot; ++i){ approx_f[i] = new FReal [M * 3]; f[i] = new FReal [M * 3]; FBlas::setzero(M*3, f[i]); } - FReal** approx_p = new FReal* [nlhs]; - FReal** p = new FReal* [nlhs]; - for (unsigned int i=0; i<nrhs; ++i){ + FReal** approx_p = new FReal* [npot]; + FReal** p = new FReal* [npot]; + for (unsigned int i=0; i<npot; ++i){ approx_p[i] = new FReal [M]; p[i] = new FReal [M]; FBlas::setzero(M, p[i]); @@ -562,19 +558,18 @@ int main(int, char **){ // f[counter*3 + 2] += force.getZ() * wx * wy; // R,ij and (R,ij),k - for (unsigned int i=0; i<nlhs; ++i) // sum all compo + for (unsigned int i=0; i<npot; ++i) // sum all compo for (unsigned int j=0; j<nrhs; ++j){ unsigned int d = MatrixKernel.getPosition(i*nrhs+j); - const FReal rij = MatrixKernelClass(d).evaluate(x, y); + const FReal rij = MatrixKernelClass(a,d).evaluate(x, y); // potential p[i][counter] += rij * wy[j]; // force - FReal force[3]; + FReal force_k; for (unsigned int k=0; k<3; ++k){ - //std::cout << "i,j,k,=" << i << ","<< j << ","<< k << std::endl; - unsigned int dk = RIJKMatrixKernel.getPosition(i*3*3+j*3+k); - force[k] = RIJKMatrixKernelClass(dk).evaluate(x, y); - f[i][counter*3 + k] += force[k] * wx[j] * wy[j]; + unsigned int dk = RIJKMatrixKernel.getPosition((i*nrhs+j)*3+k); + force_k = RIJKMatrixKernelClass(a,dk).evaluate(x, y); + f[i][counter*3 + k] += force_k * wx[j] * wy[j]; } } @@ -591,7 +586,7 @@ int main(int, char **){ //////////////////////////////////////////////////////////////////// unsigned int counter = 0; for(int idxPartX = 0 ; idxPartX < X.getSrc()->getNbParticles() ; ++idxPartX){ - for (unsigned int i=0; i<nlhs; ++i){ + for (unsigned int i=0; i<npot; ++i){ approx_p[i][counter] = X.getSrc()->getPotentials(i)[idxPartX]; const FPoint force = FPoint(X.getSrc()->getForcesX(i)[idxPartX], X.getSrc()->getForcesY(i)[idxPartX], @@ -604,7 +599,7 @@ int main(int, char **){ } // std::cout << "Check Potential, forceX, forceY, forceZ " << std::endl; -// for (unsigned int i=0; i<nlhs; ++i){ +// for (unsigned int i=0; i<npot; ++i){ // std::cout<< "idxLhs="<< i << std::endl; // for(int idxPart = 0 ; idxPart < 20 ; ++idxPart){ // std::cout << approx_p[i][idxPart] << ", "<< p[i][idxPart] << "|| "; @@ -617,15 +612,23 @@ int main(int, char **){ // } // std::cout << std::endl; - std::cout << "\nPotential error:" << std::endl; - std::cout << "Relative error X = " << FMath::FAccurater( p[0], approx_p[0], M) << std::endl; - std::cout << "Relative error Y = " << FMath::FAccurater( p[1], approx_p[1], M) << std::endl; - std::cout << "Relative error Z = " << FMath::FAccurater( p[2], approx_p[2], M) << std::endl; + std::cout << "\nRelative Inf/L2 errors: " << std::endl; + std::cout << " Potential:" << std::endl; + for(unsigned i = 0; i<npot;++i) { + std::cout << " " << i << ": " + << FMath::FAccurater(p[i],approx_p[i],M).getRelativeInfNorm()<<", " + << FMath::FAccurater(p[i],approx_p[i],M).getRelativeL2Norm() + << std::endl; + } + std::cout << std::endl; - std::cout << "\nForce error (Something is wrong here => TOFIX):" << std::endl; - std::cout << "Relative L2 error X = " << FMath::FAccurater( f[0], approx_f[0], M*3) << std::endl; - std::cout << "Relative L2 error Y = " << FMath::FAccurater( f[1], approx_f[1], M*3) << std::endl; - std::cout << "Relative L2 error Z = " << FMath::FAccurater( f[2], approx_f[2], M*3) << std::endl; + std::cout << " Force:" << std::endl; + for(unsigned i = 0; i<npot;++i) { + std::cout << " " << i << ": " + << FMath::FAccurater(f[i],approx_f[i],3*M).getRelativeInfNorm()<<", " + << FMath::FAccurater(f[i],approx_f[i],3*M).getRelativeL2Norm() + << std::endl; + } std::cout << std::endl; // free memory