diff --git a/Src/Kernels/Chebyshev/FChebInterpolator.hpp b/Src/Kernels/Chebyshev/FChebInterpolator.hpp index 24b37295c6a572f9332ff8ccdd3103b5c5250b85..a5d38fe11e8a692e6edd211913f494bdc3c28856 100755 --- a/Src/Kernels/Chebyshev/FChebInterpolator.hpp +++ b/Src/Kernels/Chebyshev/FChebInterpolator.hpp @@ -47,7 +47,8 @@ class FChebInterpolator : FNoCopyable // compile time constants and types enum {nnodes = TensorTraits::nnodes, nRhs = MatrixKernelClass::NRHS, - nLhs = MatrixKernelClass::NLHS}; + nLhs = MatrixKernelClass::NLHS, + nPV = MatrixKernelClass::NPV}; typedef FChebRoots< ORDER> BasisType; typedef FChebTensor TensorType; @@ -861,8 +862,15 @@ inline void FChebInterpolator::applyL2P(const FPoint& c } } - for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){ - FReal*const potentials = inParticles->getPotentials(idxLhs); + for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){ + // distribution over potential components: + // We sum the multidim contribution of PhysValue + // This was originally done at M2L step but moved here + // because their storage is required by the force computation. + // In fact : f_{ik}(x)=w_j(x) \nabla_{x_i} K_{ij}(x,y)w_j(y)) + const unsigned int idxPot = idxLhs / nPV; + + FReal*const potentials = inParticles->getPotentials(idxPot); // interpolate and increment target value FReal targetValue = potentials[idxPart]; @@ -1110,6 +1118,8 @@ inline void FChebInterpolator::applyL2PGradient(const F } for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){ + const unsigned int idxPot = idxLhs / nPV; + const unsigned int idxPV = idxLhs % nPV; // scale forces forces[idxLhs][0] *= jacobian[0] / nnodes; @@ -1117,10 +1127,10 @@ inline void FChebInterpolator::applyL2PGradient(const F forces[idxLhs][2] *= jacobian[2] / nnodes; // get pointers to PhysValues and force components - const FReal*const physicalValues = inParticles->getPhysicalValues(idxLhs); - FReal*const forcesX = inParticles->getForcesX(idxLhs); - FReal*const forcesY = inParticles->getForcesY(idxLhs); - FReal*const forcesZ = inParticles->getForcesZ(idxLhs); + const FReal*const physicalValues = inParticles->getPhysicalValues(idxPV); + FReal*const forcesX = inParticles->getForcesX(idxPot); + FReal*const forcesY = inParticles->getForcesY(idxPot); + FReal*const forcesZ = inParticles->getForcesZ(idxPot); // set computed forces forcesX[idxPart] += forces[idxLhs][0] * physicalValues[idxPart]; @@ -1342,12 +1352,15 @@ inline void FChebInterpolator::applyL2PTotal(const FPoi forces[idxLhs][2] = ( FReal(2.)*f2[3] + FReal(4.)*f4[3] + FReal(8.)*f8[3]) * jacobian[2] / nnodes; // 7 flops } // 28 + (ORDER-1) * ((ORDER-1) * (27 + (ORDER-1) * 16)) flops + const unsigned int idxPot = idxLhs / nPV; + const unsigned int idxPV = idxLhs % nPV; + // get potentials, physValues and forces components - const FReal*const physicalValues = inParticles->getPhysicalValues(idxLhs); - FReal*const forcesX = inParticles->getForcesX(idxLhs); - FReal*const forcesY = inParticles->getForcesY(idxLhs); - FReal*const forcesZ = inParticles->getForcesZ(idxLhs); - FReal*const potentials = inParticles->getPotentials(idxLhs); + const FReal*const physicalValues = inParticles->getPhysicalValues(idxPV); + FReal*const forcesX = inParticles->getForcesX(idxPot); + FReal*const forcesY = inParticles->getForcesY(idxPot); + FReal*const forcesZ = inParticles->getForcesZ(idxPot); + FReal*const potentials = inParticles->getPotentials(idxPot); // set computed potential potentials[idxPart] += (potential[idxLhs]); // 1 flop diff --git a/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp b/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp index e5d0d06532fd58c98e166ba07a19106b23e8f971..35c7833be2ba14ea86323330c6ce6aed74771505 100755 --- a/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp +++ b/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp @@ -46,7 +46,9 @@ class FChebTensorialKernel : public FAbstractChebKernel< CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS> { enum {nRhs = MatrixKernelClass::NRHS, - nLhs = MatrixKernelClass::NLHS}; + nLhs = MatrixKernelClass::NLHS, + nPot = MatrixKernelClass::NPOT, + nPv = MatrixKernelClass::NPV}; protected://PB: for OptiDis @@ -137,19 +139,19 @@ public: for(int idxV = 0 ; idxV < NVALS ; ++idxV){ for (int idxLhs=0; idxLhs < nLhs; ++idxLhs){ - // update local index - int idxLoc = idxV*nLhs + idxLhs; + // update local index + const int idxLoc = idxV*nLhs + idxLhs; - FReal *const CompressedLocalExpansion = TargetCell->getLocal(idxLoc) + AbstractBaseClass::nnodes; + FReal *const CompressedLocalExpansion = TargetCell->getLocal(idxLoc) + AbstractBaseClass::nnodes; - for (int idxRhs=0; idxRhs < nRhs; ++idxRhs){ + // update idxRhs + const int idxRhs = idxLhs % nPv; // update multipole index - int idxMul = idxV*nRhs + idxRhs; - // update kernel index such that: x_i = K_{ij}y_j - int idxK = idxLhs*nRhs + idxRhs; + const int idxMul = idxV*nRhs + idxRhs; + // get index in matrix kernel - unsigned int d - = AbstractBaseClass::MatrixKernel.getPtr()->getPosition(idxK); + const unsigned int d + = AbstractBaseClass::MatrixKernel.getPtr()->getPosition(idxLhs); for (int idx=0; idx<343; ++idx){ if (SourceCells[idx]){ @@ -158,8 +160,7 @@ public: CompressedLocalExpansion); } } - }// NRHS - }// NLHS + }// NLHS=NPOT*NPV }// NVALS } @@ -223,14 +224,14 @@ public: ContainerClass* const NeighborSourceParticles[27], const int /* size */) { - DirectInteractionComputer::P2P(TargetParticles,NeighborSourceParticles); + DirectInteractionComputer::P2P(TargetParticles,NeighborSourceParticles,AbstractBaseClass::MatrixKernel.getPtr()); } void P2PRemote(const FTreeCoordinate& /*inPosition*/, ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, ContainerClass* const inNeighbors[27], const int /*inSize*/){ - DirectInteractionComputer::P2PRemote(inTargets,inNeighbors,27); + DirectInteractionComputer::P2PRemote(inTargets,inNeighbors,27,AbstractBaseClass::MatrixKernel.getPtr()); } }; diff --git a/Src/Kernels/Chebyshev/FChebTensorialM2LHandler.hpp b/Src/Kernels/Chebyshev/FChebTensorialM2LHandler.hpp index 674b3cdc4c4b144b7ce3d9a500af0bcf3c3258b7..000b63b10bf7f689992c19a115b6ff3f19ccdd3c 100755 --- a/Src/Kernels/Chebyshev/FChebTensorialM2LHandler.hpp +++ b/Src/Kernels/Chebyshev/FChebTensorialM2LHandler.hpp @@ -86,8 +86,7 @@ class FChebTensorialM2LHandler : FNoCopyabl enum {order = ORDER, nnodes = TensorTraits::nnodes, ninteractions = 316,// 7^3 - 3^3 (max num cells in far-field) - dim = MatrixKernelClass::DIM, - nidx = MatrixKernelClass::NIDX}; + dim = MatrixKernelClass::DIM}; // const MatrixKernelClass MatrixKernel; @@ -205,8 +204,7 @@ class FChebTensorialM2LHandler : FNoCop enum {order = ORDER, nnodes = TensorTraits::nnodes, ninteractions = 316,// 7^3 - 3^3 (max num cells in far-field) - dim = MatrixKernelClass::DIM, - nidx = MatrixKernelClass::NIDX}; + dim = MatrixKernelClass::DIM}; // Tensorial MatrixKernel and homogeneity specific FReal **U, **B; diff --git a/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp b/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp index b5cc77f07ae055240d60a48cecfc3e1514b0fc97..87fe875106a9ef8974921c42283c3a1a6cd096cc 100755 --- a/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp +++ b/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp @@ -71,7 +71,8 @@ struct FInterpMatrixKernelR : FInterpAbstractMatrixKernel static const KERNEL_FUNCTION_TYPE Type = /*NON_*/HOMOGENEOUS; static const KERNEL_FUNCTION_IDENTIFIER Identifier = ONE_OVER_R; static const unsigned int DIM = 1; //PB: dimension of kernel - static const unsigned int NIDX = 1; //PB: number of indices + static const unsigned int NPV = 1; + static const unsigned int NPOT = 1; static const unsigned int NRHS = 1; static const unsigned int NLHS = 1; @@ -127,7 +128,8 @@ struct FInterpMatrixKernelRR : FInterpAbstractMatrixKernel static const KERNEL_FUNCTION_TYPE Type = HOMOGENEOUS; static const KERNEL_FUNCTION_IDENTIFIER Identifier = ONE_OVER_R_SQUARED; static const unsigned int DIM = 1; //PB: dimension of kernel - static const unsigned int NIDX = 1; //PB: number of indices + static const unsigned int NPV = 1; + static const unsigned int NPOT = 1; static const unsigned int NRHS = 1; static const unsigned int NLHS = 1; @@ -165,7 +167,8 @@ struct FInterpMatrixKernelLJ : FInterpAbstractMatrixKernel static const KERNEL_FUNCTION_TYPE Type = NON_HOMOGENEOUS; static const KERNEL_FUNCTION_IDENTIFIER Identifier = LENNARD_JONES_POTENTIAL; static const unsigned int DIM = 1; //PB: dimension of kernel - static const unsigned int NIDX = 1; //PB: number of indices + static const unsigned int NPV = 1; + static const unsigned int NPOT = 1; static const unsigned int NRHS = 1; static const unsigned int NLHS = 1; @@ -205,6 +208,26 @@ struct FInterpMatrixKernelLJ : FInterpAbstractMatrixKernel // // Tensorial Matrix Kernels (DIM>1) // +// The definition of the potential p and force f are extended to the case +// of tensorial interaction kernels: +// p_i(x) = K_{ip}(x,y)w_p(y), \forall i=1..NPOT, p=1..NPV +// f_{ik}= w_p(x)K_{ip,k}(x,y)w_p(y) " +// +// Since the interpolation scheme is such that +// p_i(x) \approx S^m(x) L^{m}_{ip} +// f_{ik}= w_p(x) \nabla_k S^m(x) L^{m}_{ip} +// with +// L^{m}_{ip} = K^{mn}_{ip} S^n(y) w_p(y) (local expansion) +// M^{m}_{p} = S^n(y) w_p(y) (multipole expansion) +// then the multipole exp have NPV components and the local exp NPOT*NPV. +// +// NB1: Only the computation of forces requires that the sum over p is +// performed at L2P step. It could be done at M2L step for the potential. +// +// NB2: An efficient application of the matrix kernel is highly kernel +// dependent, we recommand overriding the P2M/M2L/L2P function of the kernel +// you are using in order to have optimal performances + set your own NRHS/NLHS. +// //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// @@ -213,11 +236,15 @@ struct FInterpMatrixKernel_IOR : FInterpAbstractMatrixKernel { static const KERNEL_FUNCTION_TYPE Type = /*NON_*/HOMOGENEOUS; static const KERNEL_FUNCTION_IDENTIFIER Identifier = ID_OVER_R; - static const unsigned int DIM = 6; //PB: dimension of kernel - static const unsigned int NIDX = 2; //PB: number of indices - static const unsigned int indexTab[/*DIM*NIDX=12*/]; - static const unsigned int NRHS = 3; - static const unsigned int NLHS = 3; + static const unsigned int NK = 3*3; // dim of kernel + static const unsigned int DIM = 6; // reduced dim + static const unsigned int NPOT = 3; // dim of potentials + static const unsigned int NPV = 3; // dim of physical values + static const unsigned int NRHS = NPV; // dim of mult exp (here = dim physval) + static const unsigned int NLHS = NPOT*NRHS; // dim of loc exp (here = NPOT*NRHS) + + // store indices (i,j) corresponding to sym idx + static const unsigned int indexTab[/*2*DIM=12*/]; // store positions in sym tensor static const unsigned int applyTab[/*9*/]; @@ -271,12 +298,6 @@ struct FInterpMatrixKernel_IOR : FInterpAbstractMatrixKernel return FReal(2.) / CellWidth; } -// FReal getScaleFactor(const FReal, const int) const -// { -// // return 1 because non homogeneous kernel functions cannot be scaled!!! -// return FReal(1.); -// } -// // FReal getScaleFactor(const FReal) const // { // // return 1 because non homogeneous kernel functions cannot be scaled!!! @@ -290,31 +311,45 @@ struct FInterpMatrixKernel_IOR : FInterpAbstractMatrixKernel // required by ChebSym kernel => only suited for Unif interpolation struct FInterpMatrixKernel_R_IJ : FInterpAbstractMatrixKernel { - static const KERNEL_FUNCTION_TYPE Type = HOMOGENEOUS; + static const KERNEL_FUNCTION_TYPE Type = NON_HOMOGENEOUS; static const KERNEL_FUNCTION_IDENTIFIER Identifier = R_IJ; - static const unsigned int DIM = 6; //PB: dimension of kernel - static const unsigned int NIDX = 2; //PB: number of indices - static const unsigned int indexTab[/*DIM*NIDX=12*/]; - static const unsigned int NRHS = 3; - static const unsigned int NLHS = 3; + static const unsigned int NK = 3*3; // dim of kernel + static const unsigned int DIM = 6; // reduced dim + static const unsigned int NPOT = 3; // dim of potentials + static const unsigned int NPV = 3; // dim of physical values + static const unsigned int NRHS = NPV; // dim of mult exp (here = dim physval) + static const unsigned int NLHS = NPOT*NRHS; // dim of loc exp (here = NPOT*NRHS) + + // store indices (i,j) corresponding to sym idx + static const unsigned int indexTab[/*2*DIM=12*/]; // store positions in sym tensor (when looping over NRHSxNLHS) - static const unsigned int applyTab[/*9*/]; + static const unsigned int applyTab[/*NK=9*/]; + // indices to be set at construction if component-wise evaluation is performed const unsigned int _i,_j; - FInterpMatrixKernel_R_IJ(const double = 0.0, const unsigned int d = 0) - : _i(indexTab[d]), _j(indexTab[d+DIM]) + // Material Parameters + const double _CoreWidth2; // if >0 then kernel is NON homogeneous + + FInterpMatrixKernel_R_IJ(const double CoreWidth = 0.0, const unsigned int d = 0) + : _i(indexTab[d]), _j(indexTab[d+DIM]), _CoreWidth2(CoreWidth*CoreWidth) {} // returns position in reduced storage from position in full 3x3 matrix int getPosition(const unsigned int n) const {return applyTab[n];} + // returns Core Width squared + double getCoreWidth2() const + {return _CoreWidth2;} + FReal evaluate(const FPoint& x, const FPoint& y) const { const FPoint xy(x-y); - const FReal one_over_r = FReal(1.)/xy.norm(); + const FReal one_over_r = FReal(1.)/sqrt(xy.getX()*xy.getX() + + xy.getY()*xy.getY() + + xy.getZ()*xy.getZ() + _CoreWidth2); const FReal one_over_r3 = one_over_r*one_over_r*one_over_r; double ri,rj; @@ -338,7 +373,9 @@ struct FInterpMatrixKernel_R_IJ : FInterpAbstractMatrixKernel void evaluateBlock(const FPoint& x, const FPoint& y, FReal* block) const { const FPoint xy(x-y); - const FReal one_over_r = FReal(1.)/xy.norm(); + const FReal one_over_r = FReal(1.)/sqrt(xy.getX()*xy.getX() + + xy.getY()*xy.getY() + + xy.getZ()*xy.getZ() + _CoreWidth2); const FReal one_over_r3 = one_over_r*one_over_r*one_over_r; const double r[3] = {xy.getX(),xy.getY(),xy.getZ()}; @@ -366,36 +403,57 @@ struct FInterpMatrixKernel_R_IJ : FInterpAbstractMatrixKernel return FReal(2.) / CellWidth; } +// // R_{,ij} is set non-homogeneous +// FReal getScaleFactor(const FReal CellWidth) const +// { +// return FReal(1.); +// } + }; /// R_{,ijk} struct FInterpMatrixKernel_R_IJK : FInterpAbstractMatrixKernel { - static const KERNEL_FUNCTION_TYPE Type = HOMOGENEOUS; + static const KERNEL_FUNCTION_TYPE Type = NON_HOMOGENEOUS; static const KERNEL_FUNCTION_IDENTIFIER Identifier = R_IJK; - static const unsigned int DIM = 10; //PB: dimension of kernel - static const unsigned int NIDX = 3; //PB: number of indices - static const unsigned int indexTab[/*DIM*NIDX=30*/]; - static const unsigned int NRHS = 3; - static const unsigned int NLHS = 3; + static const unsigned int NK = 3*3*3; // dim of kernel + static const unsigned int DIM = 10; // reduced dim + static const unsigned int NPOT = 3; // dim of potentials + static const unsigned int NPV = 3*3; // dim of physical values + static const unsigned int NRHS = NPV; // dim of mult exp (here = dim physval) + static const unsigned int NLHS = NPOT*NRHS; // dim of loc exp (here = NPOT*NRHS) - // store positions in sym tensor (when looping over NRHSxNLHS) - static const unsigned int applyTab[/*27*/]; + // store indices (i,j,k) corresponding to sym idx + static const unsigned int indexTab[/*3*DIM=30*/]; + + // store positions in sym tensor wr to full + static const unsigned int applyTab[/*NK=27*/]; + // indices to be set at construction if component-wise evaluation is performed const unsigned int _i,_j,_k; - FInterpMatrixKernel_R_IJK(const double = 0.0, const unsigned int d = 0) - : _i(indexTab[d]), _j(indexTab[d+DIM]), _k(indexTab[d+2*DIM]) + // Material Parameters + const double _CoreWidth2; // if >0 then kernel is NON homogeneous + + FInterpMatrixKernel_R_IJK(const double CoreWidth = 0.0, const unsigned int d = 0) + : _i(indexTab[d]), _j(indexTab[d+DIM]), _k(indexTab[d+2*DIM]), _CoreWidth2(CoreWidth*CoreWidth) {} // returns position in reduced storage from position in full 3x3x3 matrix int getPosition(const unsigned int n) const {return applyTab[n];} + // returns Core Width squared + double getCoreWidth2() const + {return _CoreWidth2;} + FReal evaluate(const FPoint& x, const FPoint& y) const { - const FPoint xy(x-y); - const FReal one_over_r = FReal(1.)/xy.norm(); +// const FPoint xy(x-y); + const FPoint xy(y-x); + const FReal one_over_r = FReal(1.)/sqrt(xy.getX()*xy.getX() + + xy.getY()*xy.getY() + + xy.getZ()*xy.getZ() + _CoreWidth2); const FReal one_over_r2 = one_over_r*one_over_r; const FReal one_over_r3 = one_over_r2*one_over_r; double ri,rj,rk; @@ -434,6 +492,39 @@ struct FInterpMatrixKernel_R_IJK : FInterpAbstractMatrixKernel } + void evaluateBlock(const FPoint& x, const FPoint& y, FReal* block) const + { +// const FPoint xy(x-y); + const FPoint xy(y-x); + const FReal one_over_r = FReal(1.)/sqrt(xy.getX()*xy.getX() + + xy.getY()*xy.getY() + + xy.getZ()*xy.getZ() + _CoreWidth2); + const FReal one_over_r2 = one_over_r*one_over_r; + const FReal one_over_r3 = one_over_r2*one_over_r; + + const double r[3] = {xy.getX(),xy.getY(),xy.getZ()}; + + for(unsigned int d=0;d template <> struct DirectInteractionComputer { - template + template static void P2P( ContainerClass* const FRestrict TargetParticles, - ContainerClass* const NeighborSourceParticles[27]){ - FP2P::FullMutualRIJ(TargetParticles,NeighborSourceParticles,14); + ContainerClass* const NeighborSourceParticles[27], + const MatrixKernelClass *const MatrixKernel){ + FP2P::FullMutualRIJ(TargetParticles,NeighborSourceParticles,14,MatrixKernel); } - template + template static void P2PRemote( ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[27], - const int inSize){ - FP2P::FullRemoteRIJ(inTargets,inNeighbors,inSize); + const int inSize, + const MatrixKernelClass *const MatrixKernel){ + FP2P::FullRemoteRIJ(inTargets,inNeighbors,inSize,MatrixKernel); + } +}; + +/*! Specialization for GradGradGradR potential */ +template <> +struct DirectInteractionComputer +{ + template + static void P2P( ContainerClass* const FRestrict TargetParticles, + ContainerClass* const NeighborSourceParticles[27], + const MatrixKernelClass *const MatrixKernel){ + FP2P::FullMutualRIJK(TargetParticles,NeighborSourceParticles,14,MatrixKernel); + } + + template + static void P2PRemote( ContainerClass* const FRestrict inTargets, + ContainerClass* const inNeighbors[27], + const int inSize, + const MatrixKernelClass *const MatrixKernel){ + FP2P::FullRemoteRIJK(inTargets,inNeighbors,inSize,MatrixKernel); } }; @@ -160,20 +182,46 @@ struct DirectInteractionComputer template struct DirectInteractionComputer { - template + template static void P2P( ContainerClass* const FRestrict TargetParticles, - ContainerClass* const NeighborSourceParticles[27]){ + ContainerClass* const NeighborSourceParticles[27], + const MatrixKernelClass *const MatrixKernel){ for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ - FP2P::FullMutualRIJ(TargetParticles,NeighborSourceParticles,14); + FP2P::FullMutualRIJ(TargetParticles,NeighborSourceParticles,14,MatrixKernel); } } - template + template static void P2PRemote( ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[27], - const int inSize){ + const int inSize, + const MatrixKernelClass *const MatrixKernel){ + for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ + FP2P::FullRemoteRIJ(inTargets,inNeighbors,inSize,MatrixKernel); + } + } +}; + +/*! Specialization for GradGradGradR potential */ +template +struct DirectInteractionComputer +{ + template + static void P2P( ContainerClass* const FRestrict TargetParticles, + ContainerClass* const NeighborSourceParticles[27], + const MatrixKernelClass *const MatrixKernel){ + for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ + FP2P::FullMutualRIJK(TargetParticles,NeighborSourceParticles,14,MatrixKernel); + } + } + + template + static void P2PRemote( ContainerClass* const FRestrict inTargets, + ContainerClass* const inNeighbors[27], + const int inSize, + const MatrixKernelClass *const MatrixKernel){ for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ - FP2P::FullRemoteRIJ(inTargets,inNeighbors,inSize); + FP2P::FullRemoteRIJK(inTargets,inNeighbors,inSize,MatrixKernel); } } }; diff --git a/Src/Kernels/P2P/FP2P.hpp b/Src/Kernels/P2P/FP2P.hpp index 863180432935cd9533ac44a1c0d68d048b7a2568..7663a1f51d83ce35f75e9dd571da5e71dac772b8 100644 --- a/Src/Kernels/P2P/FP2P.hpp +++ b/Src/Kernels/P2P/FP2P.hpp @@ -978,6 +978,8 @@ public: ////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////// // R_IJ + // PB: The following functions take the MatrixKernel as input arguments for more generic implementation + // Besides this MatrixKernel is already updated with any extra parameter (e.g. core width). ////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -986,9 +988,11 @@ public: /** * @brief FullMutualRIJ */ - template + template static void FullMutualRIJ(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], - const int limiteNeighbors){ + const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ + + const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock const int nbParticlesTargets = inTargets->getNbParticles(); const FReal*const targetsX = inTargets->getPositions()[0]; @@ -1008,7 +1012,7 @@ public: FReal dy = sourcesY[idxSource] - targetsY[idxTarget]; FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget]; - FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz); + FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); FReal inv_distance = FMath::Sqrt(inv_distance_pow2); FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; @@ -1086,7 +1090,7 @@ public: FReal dy = targetsY[idxSource] - targetsY[idxTarget]; FReal dz = targetsZ[idxSource] - targetsZ[idxTarget]; - FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz); + FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); FReal inv_distance = FMath::Sqrt(inv_distance_pow2); FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; @@ -1144,7 +1148,6 @@ public: targetsForcesY[idxSource] -= coef[1]; targetsForcesZ[idxSource] -= coef[2]; targetsPotentials[idxSource] += potentialCoef * targetsPhysicalValues[idxTarget]; - }// j }// i @@ -1155,9 +1158,11 @@ public: /** * @brief FullRemoteRIJ */ - template + template static void FullRemoteRIJ(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], - const int limiteNeighbors){ + const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ + + const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock const int nbParticlesTargets = inTargets->getNbParticles(); const FReal*const targetsX = inTargets->getPositions()[0]; @@ -1178,7 +1183,7 @@ public: FReal dy = sourcesY[idxSource] - targetsY[idxTarget]; FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget]; - FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz); + FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); FReal inv_distance = FMath::Sqrt(inv_distance_pow2); FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; @@ -1261,17 +1266,21 @@ public: * @param targetForceZ * @param targetPotential */ + template static void MutualParticlesRIJ(const FReal sourceX,const FReal sourceY,const FReal sourceZ, const FReal* sourcePhysicalValue, FReal* sourceForceX, FReal* sourceForceY, FReal* sourceForceZ, FReal* sourcePotential, const FReal targetX,const FReal targetY,const FReal targetZ, const FReal* targetPhysicalValue, - FReal* targetForceX, FReal* targetForceY, FReal* targetForceZ, FReal* targetPotential - ){ + FReal* targetForceX, FReal* targetForceY, FReal* targetForceZ, FReal* targetPotential, + const MatrixKernelClass *const MatrixKernel){ + + const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock + // GradGradR potential FReal dx = sourceX - targetX; FReal dy = sourceY - targetY; FReal dz = sourceZ - targetZ; - FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz); + FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); FReal inv_distance = FMath::Sqrt(inv_distance_pow2); FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; @@ -1328,6 +1337,364 @@ public: } + + ////////////////////////////////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////////////////////////////////// + // R_IJK + ////////////////////////////////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////////////////////////////////// + ////////////////////////////////////////////////////////////////////////////////////////////////////////// + + /** + * @brief FullMutualRIJK + */ + template + static void FullMutualRIJK(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], + const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ + + const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock + + const int nbParticlesTargets = inTargets->getNbParticles(); + const FReal*const targetsX = inTargets->getPositions()[0]; + const FReal*const targetsY = inTargets->getPositions()[1]; + const FReal*const targetsZ = inTargets->getPositions()[2]; + + for(int idxNeighbors = 0 ; idxNeighbors < limiteNeighbors ; ++idxNeighbors){ + for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ + if( inNeighbors[idxNeighbors] ){ + const int nbParticlesSources = inNeighbors[idxNeighbors]->getNbParticles(); + const FReal*const sourcesX = inNeighbors[idxNeighbors]->getPositions()[0]; + const FReal*const sourcesY = inNeighbors[idxNeighbors]->getPositions()[1]; + const FReal*const sourcesZ = inNeighbors[idxNeighbors]->getPositions()[2]; + + for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){ + FReal dx = sourcesX[idxSource] - targetsX[idxTarget]; + FReal dy = sourcesY[idxSource] - targetsY[idxTarget]; + FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget]; + + FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); + FReal inv_distance = FMath::Sqrt(inv_distance_pow2); + FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; + + FReal r[3]={dx,dy,dz}; + + for(unsigned int i = 0 ; i < 3 ; ++i){ + FReal*const targetsPotentials = inTargets->getPotentials(i); + FReal*const targetsForcesX = inTargets->getForcesX(i); + FReal*const targetsForcesY = inTargets->getForcesY(i); + FReal*const targetsForcesZ = inTargets->getForcesZ(i); + FReal*const sourcesPotentials = inNeighbors[idxNeighbors]->getPotentials(i); + FReal*const sourcesForcesX = inNeighbors[idxNeighbors]->getForcesX(i); + FReal*const sourcesForcesY = inNeighbors[idxNeighbors]->getForcesY(i); + FReal*const sourcesForcesZ = inNeighbors[idxNeighbors]->getForcesZ(i); + + const FReal ri2=r[i]*r[i]; + + for(unsigned int j = 0 ; j < 3 ; ++j){ + FReal rj2=r[j]*r[j]; + + for(unsigned int k = 0 ; k < 3 ; ++k){ + const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j*3+k); + const FReal*const sourcesPhysicalValues = inNeighbors[idxNeighbors]->getPhysicalValues(j*3+k); + + // potentials + FReal potentialCoef; + if(i==j){ + if(j==k) //i=j=k + potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i=j!=k + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; + } + else{ //(i!=j) + if(i==k) //i=k!=j + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; + else if(j==k) //i!=k=j + potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i!=k!=j + potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; + } + + // forces + FReal coef[3]; + for(unsigned int l = 0 ; l < 3 ; ++l){ + coef[l]= -(targetsPhysicalValues[idxTarget] * sourcesPhysicalValues[idxSource]); + +// // Grad of RIJK kernel is RIJKL kernel => TODO Implement +// coef[l] *= ; + + }// l + + targetsForcesX[idxTarget] += coef[0]; + targetsForcesY[idxTarget] += coef[1]; + targetsForcesZ[idxTarget] += coef[2]; + targetsPotentials[idxTarget] += ( potentialCoef * sourcesPhysicalValues[idxSource] ); + + sourcesForcesX[idxSource] -= coef[0]; + sourcesForcesY[idxSource] -= coef[1]; + sourcesForcesZ[idxSource] -= coef[2]; + sourcesPotentials[idxSource] += -potentialCoef * targetsPhysicalValues[idxTarget]; + + }// k + }// j + }// i + } + } + } + } + + for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ + for(int idxSource = idxTarget + 1 ; idxSource < nbParticlesTargets ; ++idxSource){ + FReal dx = targetsX[idxSource] - targetsX[idxTarget]; + FReal dy = targetsY[idxSource] - targetsY[idxTarget]; + FReal dz = targetsZ[idxSource] - targetsZ[idxTarget]; + + FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); + FReal inv_distance = FMath::Sqrt(inv_distance_pow2); + FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; + + FReal r[3]={dx,dy,dz}; + + for(unsigned int i = 0 ; i < 3 ; ++i){ + FReal*const targetsPotentials = inTargets->getPotentials(i); + FReal*const targetsForcesX = inTargets->getForcesX(i); + FReal*const targetsForcesY = inTargets->getForcesY(i); + FReal*const targetsForcesZ = inTargets->getForcesZ(i); + FReal ri2=r[i]*r[i]; + + for(unsigned int j = 0 ; j < 3 ; ++j){ + FReal rj2=r[j]*r[j]; + + for(unsigned int k = 0 ; k < 3 ; ++k){ + const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j*3+k); + + // potentials + FReal potentialCoef; + if(i==j){ + if(j==k) //i=j=k + potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i=j!=k + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; + } + else{ //(i!=j) + if(i==k) //i=k!=j + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; + else if(j==k) //i!=k=j + potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i!=k!=j + potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; + } + + // forces + FReal coef[3]; + for(unsigned int l = 0 ; l < 3 ; ++l){ + coef[l]= -(targetsPhysicalValues[idxTarget] * targetsPhysicalValues[idxSource]); + +// // Grad of RIJK kernel is RIJKL kernel => TODO Implement +// coef[l] *= ; + + }// l + + targetsForcesX[idxTarget] += coef[0]; + targetsForcesY[idxTarget] += coef[1]; + targetsForcesZ[idxTarget] += coef[2]; + targetsPotentials[idxTarget] += ( potentialCoef * targetsPhysicalValues[idxSource] ); + + targetsForcesX[idxSource] -= coef[0]; + targetsForcesY[idxSource] -= coef[1]; + targetsForcesZ[idxSource] -= coef[2]; + targetsPotentials[idxSource] += -potentialCoef * targetsPhysicalValues[idxTarget]; + }// k + }// j + }// i + + } + } + } + + /** + * @brief FullRemoteRIJK + */ + template + static void FullRemoteRIJK(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], + const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ + + const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock + + const int nbParticlesTargets = inTargets->getNbParticles(); + const FReal*const targetsX = inTargets->getPositions()[0]; + const FReal*const targetsY = inTargets->getPositions()[1]; + const FReal*const targetsZ = inTargets->getPositions()[2]; + + for(int idxNeighbors = 0 ; idxNeighbors < limiteNeighbors ; ++idxNeighbors){ + for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ + if( inNeighbors[idxNeighbors] ){ + const int nbParticlesSources = inNeighbors[idxNeighbors]->getNbParticles(); + const FReal*const sourcesX = inNeighbors[idxNeighbors]->getPositions()[0]; + const FReal*const sourcesY = inNeighbors[idxNeighbors]->getPositions()[1]; + const FReal*const sourcesZ = inNeighbors[idxNeighbors]->getPositions()[2]; + + for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){ + // potential + FReal dx = sourcesX[idxSource] - targetsX[idxTarget]; + FReal dy = sourcesY[idxSource] - targetsY[idxTarget]; + FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget]; + + FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); + FReal inv_distance = FMath::Sqrt(inv_distance_pow2); + FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; + + FReal r[3]={dx,dy,dz}; + + for(unsigned int i = 0 ; i < 3 ; ++i){ + FReal*const targetsPotentials = inTargets->getPotentials(i); + FReal*const targetsForcesX = inTargets->getForcesX(i); + FReal*const targetsForcesY = inTargets->getForcesY(i); + FReal*const targetsForcesZ = inTargets->getForcesZ(i); + FReal ri2=r[i]*r[i]; + + for(unsigned int j = 0 ; j < 3 ; ++j){ + FReal rj2=r[j]*r[j]; + + for(unsigned int k = 0 ; k < 3 ; ++k){ + + const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j*3+k); + const FReal*const sourcesPhysicalValues = inNeighbors[idxNeighbors]->getPhysicalValues(j*3+k); + + // potentials + FReal potentialCoef; + if(i==j){ + if(j==k) //i=j=k + potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i=j!=k + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; + } + else{ //(i!=j) + if(i==k) //i=k!=j + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; + else if(j==k) //i!=k=j + potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i!=k!=j + potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; + } + + // forces + FReal coef[3]; + for(unsigned int l = 0 ; l < 3 ; ++l){ + coef[l]= -(targetsPhysicalValues[idxTarget] * sourcesPhysicalValues[idxSource]); + + // // Grad of RIJK kernel is RIJKL kernel => TODO Implement + // coef[l] *= ; + + }// l + + targetsForcesX[idxTarget] += coef[0]; + targetsForcesY[idxTarget] += coef[1]; + targetsForcesZ[idxTarget] += coef[2]; + targetsPotentials[idxTarget] += ( potentialCoef * sourcesPhysicalValues[idxSource] ); + + }// k + }// j + }// i + + } + } + } + } + } + + + /** + * @brief MutualParticlesRIJK + * @param sourceX + * @param sourceY + * @param sourceZ + * @param sourcePhysicalValue + * @param sourceForceX + * @param sourceForceY + * @param sourceForceZ + * @param sourcePotential + * @param targetX + * @param targetY + * @param targetZ + * @param targetPhysicalValue + * @param targetForceX + * @param targetForceY + * @param targetForceZ + * @param targetPotential + */ + template + static void MutualParticlesRIJK(const FReal sourceX,const FReal sourceY,const FReal sourceZ, const FReal* sourcePhysicalValue, + FReal* sourceForceX, FReal* sourceForceY, FReal* sourceForceZ, FReal* sourcePotential, + const FReal targetX,const FReal targetY,const FReal targetZ, const FReal* targetPhysicalValue, + FReal* targetForceX, FReal* targetForceY, FReal* targetForceZ, FReal* targetPotential, + const MatrixKernelClass *const MatrixKernel){ + + const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock + + // GradGradR potential + FReal dx = sourceX - targetX; + FReal dy = sourceY - targetY; + FReal dz = sourceZ - targetZ; + + FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); + FReal inv_distance = FMath::Sqrt(inv_distance_pow2); + FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; + + FReal r[3]={dx,dy,dz}; + + for(unsigned int i = 0 ; i < 3 ; ++i){ + FReal ri2=r[i]*r[i]; + for(unsigned int j = 0 ; j < 3 ; ++j){ + FReal rj2=r[j]*r[j]; + for(unsigned int k = 0 ; k < 3 ; ++k){ + + // potentials + FReal potentialCoef; + if(i==j){ + if(j==k) //i=j=k + potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i=j!=k + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; + } + else{ //(i!=j) + if(i==k) //i=k!=j + potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; + else if(j==k) //i!=k=j + potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; + else //i!=k!=j + potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; + } + + // forces + FReal coef[3]; + for(unsigned int l = 0 ; l < 3 ; ++l){ + coef[l]= -(targetPhysicalValue[j*3+k] * sourcePhysicalValue[j*3+k]); + +// // Grad of RIJK kernel is RIJKL kernel => TODO Implement +// coef[l] *= ; + + }// l + + targetForceX[i] += coef[0]; + targetForceY[i] += coef[1]; + targetForceZ[i] += coef[2]; + targetPotential[i] += ( potentialCoef * sourcePhysicalValue[j*3+k] ); + + sourceForceX[i] -= coef[0]; + sourceForceY[i] -= coef[1]; + sourceForceZ[i] -= coef[2]; + sourcePotential[i] += ( -potentialCoef * targetPhysicalValue[j*3+k] ); + + }// k + }// j + }// i + + } + + ////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////// @@ -1336,7 +1703,6 @@ public: // i.e. [[1 1 1] // [1 1 1] * 1/R // [1 1 1]] - // Only use scalar phys val, potential and forces for now. TODO use vectorial ones. ////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/Src/Kernels/Uniform/FUnifInterpolator.hpp b/Src/Kernels/Uniform/FUnifInterpolator.hpp index ada5007d582ff83fdf39d53e84068d34aae672d9..48304884035c5fd6e225d81686f446f53287dcbf 100755 --- a/Src/Kernels/Uniform/FUnifInterpolator.hpp +++ b/Src/Kernels/Uniform/FUnifInterpolator.hpp @@ -43,7 +43,8 @@ class FUnifInterpolator : FNoCopyable // compile time constants and types enum {nnodes = TensorTraits::nnodes, nRhs = MatrixKernelClass::NRHS, - nLhs = MatrixKernelClass::NLHS}; + nLhs = MatrixKernelClass::NLHS, + nPV = MatrixKernelClass::NPV}; typedef FUnifRoots< ORDER> BasisType; typedef FUnifTensor TensorType; @@ -441,7 +442,14 @@ inline void FUnifInterpolator::applyL2P(const FPoint& c L_of_x[o][2] = BasisType::L(o, localPosition.getZ()); // 3 * ORDER*(ORDER-1) flops } + // loop over dim of local expansions for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){ + // distribution over potential components: + // We sum the multidim contribution of PhysValue + // This was originally done at M2L step but moved here + // because their storage is required by the force computation. + // In fact : f_{ik}(x)=w_j(x) \nabla_{x_i} K_{ij}(x,y)w_j(y)) + const unsigned int idxPot = idxLhs / nPV; // interpolate and increment target value FReal targetValue=0.; @@ -458,7 +466,7 @@ inline void FUnifInterpolator::applyL2P(const FPoint& c } // get potential - FReal*const potentials = inParticles->getPotentials(idxLhs); + FReal*const potentials = inParticles->getPotentials(idxPot); // add contribution to potential potentials[idxPart] += (targetValue); @@ -514,6 +522,8 @@ inline void FUnifInterpolator::applyL2PGradient(const F } for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){ + const unsigned int idxPot = idxLhs / nPV; + const unsigned int idxPV = idxLhs % nPV; // interpolate and increment forces value FReal forces[3] = {FReal(0.), FReal(0.), FReal(0.)}; @@ -533,17 +543,16 @@ inline void FUnifInterpolator::applyL2PGradient(const F } // ORDER * ORDER * 4 flops } // ORDER * ORDER * ORDER * 4 flops - // scale forces forces[0] *= jacobian[0]; forces[1] *= jacobian[1]; forces[2] *= jacobian[2]; } - const FReal*const physicalValues = inParticles->getPhysicalValues(idxLhs); - FReal*const forcesX = inParticles->getForcesX(idxLhs); - FReal*const forcesY = inParticles->getForcesY(idxLhs); - FReal*const forcesZ = inParticles->getForcesZ(idxLhs); + const FReal*const physicalValues = inParticles->getPhysicalValues(idxPV); + FReal*const forcesX = inParticles->getForcesX(idxPot); + FReal*const forcesY = inParticles->getForcesY(idxPot); + FReal*const forcesZ = inParticles->getForcesZ(idxPot); // set computed forces forcesX[idxPart] += forces[0] * physicalValues[idxPart]; diff --git a/Src/Kernels/Uniform/FUnifTensorialKernel.hpp b/Src/Kernels/Uniform/FUnifTensorialKernel.hpp index 74f1cc760cb78ea1e3ec7c2b3fd58b5d95fea000..d12569d46b524fa4b43529dfdb9f54fe895e65d8 100644 --- a/Src/Kernels/Uniform/FUnifTensorialKernel.hpp +++ b/Src/Kernels/Uniform/FUnifTensorialKernel.hpp @@ -62,7 +62,9 @@ class FUnifTensorialKernel : public FAbstractUnifKernel< CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS> { enum {nRhs = MatrixKernelClass::NRHS, - nLhs = MatrixKernelClass::NLHS}; + nLhs = MatrixKernelClass::NLHS, + nPot = MatrixKernelClass::NPOT, + nPV = MatrixKernelClass::NPV}; protected://PB: for OptiDis @@ -152,19 +154,24 @@ public: for(int idxV = 0 ; idxV < NVALS ; ++idxV){ for (int idxLhs=0; idxLhs < nLhs; ++idxLhs){ - // update local index - int idxLoc = idxV*nLhs + idxLhs; - // load transformed local expansion - FComplexe *const TransformedLocalExpansion = TargetCell->getTransformedLocal(idxLoc); + // update local index + const int idxLoc = idxV*nLhs + idxLhs; + + // load transformed local expansion + FComplexe *const TransformedLocalExpansion = TargetCell->getTransformedLocal(idxLoc); - for (int idxRhs=0; idxRhs < nRhs; ++idxRhs){ + // update idxRhs + const int idxRhs = idxLhs % nPV; // update multipole index - int idxMul = idxV*nRhs + idxRhs; - // update kernel index such that: x_i = K_{ij}y_j - int idxK = idxLhs*nRhs + idxRhs; + const int idxMul = idxV*nRhs + idxRhs; + // get index in matrix kernel - unsigned int d - = AbstractBaseClass::MatrixKernel.getPtr()->getPosition(idxK); + const unsigned int d + = AbstractBaseClass::MatrixKernel.getPtr()->getPosition(idxLhs); + +// std::cout<<"idxLhs="<< idxLhs +// <<" - d="<< d +// <<" - idxRhs="<< idxRhs <getTransformedMultipole(idxMul), TransformedLocalExpansion); - } } - }// NRHS - }// NLHS + } + }// NLHS=NPOT*NPV }// NVALS } @@ -231,14 +237,14 @@ public: ContainerClass* const NeighborSourceParticles[27], const int /* size */) { - DirectInteractionComputer::P2P(TargetParticles,NeighborSourceParticles); + DirectInteractionComputer::P2P(TargetParticles,NeighborSourceParticles,AbstractBaseClass::MatrixKernel.getPtr()); } void P2PRemote(const FTreeCoordinate& /*inPosition*/, ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, ContainerClass* const inNeighbors[27], const int /*inSize*/){ - DirectInteractionComputer::P2PRemote(inTargets,inNeighbors,27); + DirectInteractionComputer::P2PRemote(inTargets,inNeighbors,27,AbstractBaseClass::MatrixKernel.getPtr()); } }; diff --git a/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp b/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp index a4c1d62db8ca62ab5873fc2de13ba452ef99fe2b..ef75a1068e51b9caea266c0c225ba0aa21a83511 100644 --- a/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp +++ b/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp @@ -112,7 +112,6 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, for (unsigned int d=0; d : FNoCopyabl nnodes = TensorTraits::nnodes, ninteractions = 316, // 7^3 - 3^3 (max num cells in far-field) rc = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1), - dim = MatrixKernelClass::DIM, - nidx = MatrixKernelClass::NIDX}; + dim = MatrixKernelClass::DIM}; // Tensorial MatrixKernel specific FComplexe** FC; @@ -345,7 +343,6 @@ public: // Apply forward Discrete Fourier Transform Dft.applyDFT(Py,FY); - } @@ -360,8 +357,7 @@ class FUnifTensorialM2LHandler : FNoCop nnodes = TensorTraits::nnodes, ninteractions = 316, // 7^3 - 3^3 (max num cells in far-field) rc = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1), - dim = MatrixKernelClass::DIM, - nidx = MatrixKernelClass::NIDX}; + dim = MatrixKernelClass::DIM}; // Tensorial MatrixKernel and homogeneity specific FComplexe*** FC; @@ -525,7 +521,6 @@ public: // Apply forward Discrete Fourier Transform Dft.applyDFT(Py,FY); - } diff --git a/Tests/Kernels/testChebTensorialAlgorithm.cpp b/Tests/Kernels/testChebTensorialAlgorithm.cpp index 2d522bbac4608331d40121344884815003b81462..6d8f11ef8ed65e900eeb4de751c2e2c5c63d2d40 100644 --- a/Tests/Kernels/testChebTensorialAlgorithm.cpp +++ b/Tests/Kernels/testChebTensorialAlgorithm.cpp @@ -15,7 +15,7 @@ // =================================================================================== // ==== CMAKE ===== -// @FUSE_FFT +// @FUSE_BLAS // ================ #include @@ -69,21 +69,30 @@ int main(int argc, char* argv[]) // typedefs // typedef FInterpMatrixKernel_R_IJ MatrixKernelClass; - typedef FInterpMatrixKernel_IOR MatrixKernelClass; +// typedef FInterpMatrixKernel_IOR MatrixKernelClass; // typedef FInterpMatrixKernelR MatrixKernelClass; + typedef FInterpMatrixKernel_R_IJK MatrixKernelClass; const KERNEL_FUNCTION_IDENTIFIER MK_ID = MatrixKernelClass::Identifier; + const unsigned int NPV = MatrixKernelClass::NPV; + const unsigned int NPOT = MatrixKernelClass::NPOT; const unsigned int NRHS = MatrixKernelClass::NRHS; const unsigned int NLHS = MatrixKernelClass::NLHS; + const double CoreWidth = 0.1; + const MatrixKernelClass DirectMatrixKernel(CoreWidth); + std::cout<< "CoreWidth2 = "<< DirectMatrixKernel.getCoreWidth2()< ContainerClass; @@ -180,11 +206,16 @@ int main(int argc, char* argv[]) for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ // put in tree - // PB: here we have to know the number of NRHS... - if(NRHS==1) + // PB: here we have to know NPV... + if(NPV==1) tree.insert(particles[idxPart].position, idxPart, particles[idxPart].physicalValue[0]); - else if(NRHS==3) + else if(NPV==3) tree.insert(particles[idxPart].position, idxPart, particles[idxPart].physicalValue[0], particles[idxPart].physicalValue[1], particles[idxPart].physicalValue[2]); + else if(NPV==9) // R_IJK + tree.insert(particles[idxPart].position, idxPart, + particles[idxPart].physicalValue[0], particles[idxPart].physicalValue[1], particles[idxPart].physicalValue[2], + particles[idxPart].physicalValue[3], particles[idxPart].physicalValue[4], particles[idxPart].physicalValue[5], + particles[idxPart].physicalValue[6], particles[idxPart].physicalValue[7], particles[idxPart].physicalValue[8]); else std::runtime_error("Insert correct number of physical values!"); @@ -198,7 +229,7 @@ int main(int argc, char* argv[]) { // ----------------------------------------------------- std::cout << "\nChebyshev FMM (ORDER="<< ORDER << ") ... " << std::endl; time.tic(); - KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), epsilon); + KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), epsilon,CoreWidth); FmmClass algorithm(&tree, &kernels); algorithm.execute(); time.tac(); @@ -208,23 +239,21 @@ int main(int argc, char* argv[]) { // ----------------------------------------------------- std::cout << "\nError computation ... " << std::endl; - FMath::FAccurater potentialDiff[NLHS]; - FMath::FAccurater fx[NLHS], fy[NLHS], fz[NLHS]; + FMath::FAccurater potentialDiff[NPOT]; + FMath::FAccurater fx[NPOT], fy[NPOT], fz[NPOT]; - FReal checkPhysVal[20000][NRHS]; - FReal checkPotential[20000][NLHS]; - FReal checkfx[20000][NLHS]; + FReal checkPotential[20000][NPOT]; + FReal checkfx[20000][NPOT]; { // Check that each particle has been summed with all other tree.forEachLeaf([&](LeafClass* leaf){ - for(unsigned idxLhs = 0; idxLhsgetTargets()->getPhysicalValues(idxLhs); - const FReal*const potentials = leaf->getTargets()->getPotentials(idxLhs); - const FReal*const forcesX = leaf->getTargets()->getForcesX(idxLhs); - const FReal*const forcesY = leaf->getTargets()->getForcesY(idxLhs); - const FReal*const forcesZ = leaf->getTargets()->getForcesZ(idxLhs); + const FReal*const potentials = leaf->getTargets()->getPotentials(idxPot); + const FReal*const forcesX = leaf->getTargets()->getForcesX(idxPot); + const FReal*const forcesY = leaf->getTargets()->getForcesY(idxPot); + const FReal*const forcesZ = leaf->getTargets()->getForcesZ(idxPot); const int nbParticlesInLeaf = leaf->getTargets()->getNbParticles(); const FVector& indexes = leaf->getTargets()->getIndexes(); @@ -232,46 +261,106 @@ int main(int argc, char* argv[]) const int indexPartOrig = indexes[idxPart]; //PB: store potential in array[nbParticles] - checkPhysVal[indexPartOrig][idxLhs]=physVals[idxPart]; - checkPotential[indexPartOrig][idxLhs]=potentials[idxPart]; - checkfx[indexPartOrig][idxLhs]=forcesX[idxPart]; - - potentialDiff[idxLhs].add(particles[indexPartOrig].potential[idxLhs],potentials[idxPart]); - fx[idxLhs].add(particles[indexPartOrig].forces[0][idxLhs],forcesX[idxPart]); - fy[idxLhs].add(particles[indexPartOrig].forces[1][idxLhs],forcesY[idxPart]); - fz[idxLhs].add(particles[indexPartOrig].forces[2][idxLhs],forcesZ[idxPart]); + checkPotential[indexPartOrig][idxPot]=potentials[idxPart]; + checkfx[indexPartOrig][idxPot]=forcesX[idxPart]; + + // update accuracy + potentialDiff[idxPot].add(particles[indexPartOrig].potential[idxPot],potentials[idxPart]); + fx[idxPot].add(particles[indexPartOrig].forces[0][idxPot],forcesX[idxPart]); + fy[idxPot].add(particles[indexPartOrig].forces[1][idxPot],forcesY[idxPart]); + fz[idxPot].add(particles[indexPartOrig].forces[2][idxPot],forcesZ[idxPart]); } - }// NLHS + }// NPOT }); } // std::cout << "Check Potential, forceX " << std::endl; // for(int idxPart = 0 ; idxPart < 20 ; ++idxPart) -// for(unsigned idxLhs = 0; idxLhs2) sstream <<"_o"< ContainerClass; @@ -163,14 +191,26 @@ int main(int argc, char* argv[]) { // ----------------------------------------------------- std::cout << "Creating & Inserting " << loader.getNumberOfParticles() - << " particles ..." << std::endl; + << " particles ..." << std::endl; + + + + std::cout << "\tHeight : " << TreeHeight << " \t sub-height : " << SubTreeHeight << std::endl; time.tic(); for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ // put in tree - // PB: here we have to know the number of NRHS... - tree.insert(particles[idxPart].position, idxPart, particles[idxPart].physicalValue[0], particles[idxPart].physicalValue[1], particles[idxPart].physicalValue[2]); + if(NPV==3) // R_IJ or IOR + tree.insert(particles[idxPart].position, idxPart, + particles[idxPart].physicalValue[0], particles[idxPart].physicalValue[1], particles[idxPart].physicalValue[2]); + else if(NPV==9) // R_IJK + tree.insert(particles[idxPart].position, idxPart, + particles[idxPart].physicalValue[0], particles[idxPart].physicalValue[1], particles[idxPart].physicalValue[2], + particles[idxPart].physicalValue[3], particles[idxPart].physicalValue[4], particles[idxPart].physicalValue[5], + particles[idxPart].physicalValue[6], particles[idxPart].physicalValue[7], particles[idxPart].physicalValue[8]); + else + std::runtime_error("NPV not yet supported in test! Add new case."); } time.tac(); @@ -181,7 +221,7 @@ int main(int argc, char* argv[]) { // ----------------------------------------------------- std::cout << "\nLagrange/Uniform grid FMM (ORDER="<< ORDER << ") ... " << std::endl; time.tic(); - KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox()); + KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(),CoreWidth); FmmClass algorithm(&tree, &kernels); algorithm.execute(); time.tac(); @@ -191,23 +231,21 @@ int main(int argc, char* argv[]) { // ----------------------------------------------------- std::cout << "\nError computation ... " << std::endl; - FMath::FAccurater potentialDiff[NLHS]; - FMath::FAccurater fx[NLHS], fy[NLHS], fz[NLHS]; + FMath::FAccurater potentialDiff[NPOT]; + FMath::FAccurater fx[NPOT], fy[NPOT], fz[NPOT]; - FReal checkPhysVal[20000][NRHS]; - FReal checkPotential[20000][NLHS]; - FReal checkfx[20000][NLHS]; + FReal checkPotential[20000][NPOT]; + FReal checkfx[20000][NPOT]; { // Check that each particle has been summed with all other tree.forEachLeaf([&](LeafClass* leaf){ - for(unsigned idxLhs = 0; idxLhsgetTargets()->getPhysicalValues(idxLhs); - const FReal*const potentials = leaf->getTargets()->getPotentials(idxLhs); - const FReal*const forcesX = leaf->getTargets()->getForcesX(idxLhs); - const FReal*const forcesY = leaf->getTargets()->getForcesY(idxLhs); - const FReal*const forcesZ = leaf->getTargets()->getForcesZ(idxLhs); + const FReal*const potentials = leaf->getTargets()->getPotentials(idxPot); + const FReal*const forcesX = leaf->getTargets()->getForcesX(idxPot); + const FReal*const forcesY = leaf->getTargets()->getForcesY(idxPot); + const FReal*const forcesZ = leaf->getTargets()->getForcesZ(idxPot); const int nbParticlesInLeaf = leaf->getTargets()->getNbParticles(); const FVector& indexes = leaf->getTargets()->getIndexes(); @@ -215,43 +253,102 @@ int main(int argc, char* argv[]) const int indexPartOrig = indexes[idxPart]; //PB: store potential in array[nbParticles] - checkPhysVal[indexPartOrig][idxLhs]=physVals[idxPart]; - checkPotential[indexPartOrig][idxLhs]=potentials[idxPart]; - checkfx[indexPartOrig][idxLhs]=forcesX[idxPart]; - - potentialDiff[idxLhs].add(particles[indexPartOrig].potential[idxLhs],potentials[idxPart]); - fx[idxLhs].add(particles[indexPartOrig].forces[0][idxLhs],forcesX[idxPart]); - fy[idxLhs].add(particles[indexPartOrig].forces[1][idxLhs],forcesY[idxPart]); - fz[idxLhs].add(particles[indexPartOrig].forces[2][idxLhs],forcesZ[idxPart]); + checkPotential[indexPartOrig][idxPot]=potentials[idxPart]; + checkfx[indexPartOrig][idxPot]=forcesX[idxPart]; + + potentialDiff[idxPot].add(particles[indexPartOrig].potential[idxPot],potentials[idxPart]); + fx[idxPot].add(particles[indexPartOrig].forces[0][idxPot],forcesX[idxPart]); + fy[idxPot].add(particles[indexPartOrig].forces[1][idxPot],forcesY[idxPart]); + fz[idxPot].add(particles[indexPartOrig].forces[2][idxPot],forcesZ[idxPart]); } - }// NLHS + }// NPOT }); } -// std::cout << "Check Potential, forceX " << std::endl; -// for(int idxPart = 0 ; idxPart < 20 ; ++idxPart) -// for(unsigned idxLhs = 0; idxLhs2) sstream <<"_o"<::nnodes; typedef FUnifInterpolator InterpolatorClass; InterpolatorClass S; @@ -220,6 +220,8 @@ int main(int, char **){ // } // std::cout<::nnodes; typedef FUnifInterpolator InterpolatorClass; InterpolatorClass S; @@ -148,14 +152,13 @@ int main(int, char **){ for (unsigned int i=0; i(FPLocalExp)); // > or perform entrywise product manually FComplexe tmpFX; - for (unsigned int idxLhs=0; idxLhs only compute first compo and no derivative - // TODO add multidim phys val !!!!!! std::cout << "Compute interactions directly ..." << std::endl; time.tic(); - FReal** approx_f = new FReal* [nlhs]; - FReal** f = new FReal* [nlhs]; - for (unsigned int i=0; igetNbParticles() ; ++idxPartX){ - for (unsigned int i=0; igetPotentials(i)[idxPartX]; const FPoint force = FPoint(X.getSrc()->getForcesX(i)[idxPartX], X.getSrc()->getForcesY(i)[idxPartX], @@ -604,7 +599,7 @@ int main(int, char **){ } // std::cout << "Check Potential, forceX, forceY, forceZ " << std::endl; -// for (unsigned int i=0; i TOFIX):" << std::endl; - std::cout << "Relative L2 error X = " << FMath::FAccurater( f[0], approx_f[0], M*3) << std::endl; - std::cout << "Relative L2 error Y = " << FMath::FAccurater( f[1], approx_f[1], M*3) << std::endl; - std::cout << "Relative L2 error Z = " << FMath::FAccurater( f[2], approx_f[2], M*3) << std::endl; + std::cout << " Force:" << std::endl; + for(unsigned i = 0; i