From f89165519ee7fb21beee151fd4a5ce77937a0f3b Mon Sep 17 00:00:00 2001 From: Pierre Blanchard Date: Thu, 4 Sep 2014 17:53:05 +0200 Subject: [PATCH] Redesigned P2P wrapper, Implemented generic tensorial P2P wrapper. --- Src/Kernels/Chebyshev/FChebKernel.hpp | 4 +- Src/Kernels/Chebyshev/FChebM2LHandler.hpp | 2 +- Src/Kernels/Chebyshev/FChebSymKernel.hpp | 4 +- .../Chebyshev/FChebSymTensorialKernel.hpp | 487 ----------- .../Chebyshev/FChebSymTensorialM2LHandler.hpp | 813 ------------------ .../Chebyshev/FChebTensorialKernel.hpp | 4 +- .../Chebyshev/FChebTensorialM2LHandler.hpp | 4 +- .../Interpolation/FInterpMatrixKernel.cpp | 8 - .../Interpolation/FInterpMatrixKernel.hpp | 202 ++--- .../Interpolation/FInterpP2PKernels.hpp | 135 +-- Src/Kernels/P2P/FP2P.hpp | 725 +--------------- Src/Kernels/P2P/FP2PClassic.hpp | 187 ++++ Src/Kernels/Uniform/FUnifDenseKernel.hpp | 4 +- Src/Kernels/Uniform/FUnifKernel.hpp | 4 +- Src/Kernels/Uniform/FUnifM2LHandler.hpp | 4 +- Src/Kernels/Uniform/FUnifSymKernel.hpp | 471 ---------- Src/Kernels/Uniform/FUnifTensorialKernel.hpp | 4 +- .../Uniform/FUnifTensorialM2LHandler.hpp | 4 +- Tests/Kernels/testChebTensorialAlgorithm.cpp | 59 +- Tests/Kernels/testUnifTensorialAlgorithm.cpp | 53 +- Tests/Utils/testUnifTensorialInterpolator.cpp | 12 +- 21 files changed, 333 insertions(+), 2857 deletions(-) delete mode 100755 Src/Kernels/Chebyshev/FChebSymTensorialKernel.hpp delete mode 100755 Src/Kernels/Chebyshev/FChebSymTensorialM2LHandler.hpp delete mode 100755 Src/Kernels/Uniform/FUnifSymKernel.hpp diff --git a/Src/Kernels/Chebyshev/FChebKernel.hpp b/Src/Kernels/Chebyshev/FChebKernel.hpp index ab37a9e2..13aafa77 100755 --- a/Src/Kernels/Chebyshev/FChebKernel.hpp +++ b/Src/Kernels/Chebyshev/FChebKernel.hpp @@ -216,14 +216,14 @@ public: ContainerClass* const NeighborSourceParticles[27], const int /* size */) { - DirectInteractionComputer::P2P(TargetParticles,NeighborSourceParticles,MatrixKernel); + DirectInteractionComputer::P2P(TargetParticles,NeighborSourceParticles,MatrixKernel); } void P2PRemote(const FTreeCoordinate& /*inPosition*/, ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, ContainerClass* const inNeighbors[27], const int /*inSize*/){ - DirectInteractionComputer::P2PRemote(inTargets,inNeighbors,27,MatrixKernel); + DirectInteractionComputer::P2PRemote(inTargets,inNeighbors,27,MatrixKernel); } }; diff --git a/Src/Kernels/Chebyshev/FChebM2LHandler.hpp b/Src/Kernels/Chebyshev/FChebM2LHandler.hpp index ff2fa37f..bb6e8655 100755 --- a/Src/Kernels/Chebyshev/FChebM2LHandler.hpp +++ b/Src/Kernels/Chebyshev/FChebM2LHandler.hpp @@ -81,7 +81,7 @@ class FChebM2LHandler : FNoCopyable { const char precision_type = (typeid(FReal)==typeid(double) ? 'd' : 'f'); std::stringstream stream; - stream << "m2l_k"<< MatrixKernelClass::Identifier << "_" << precision_type + stream << "m2l_k"<< MatrixKernelClass::getID() << "_" << precision_type << "_o" << order << "_e" << epsilon << ".bin"; return stream.str(); } diff --git a/Src/Kernels/Chebyshev/FChebSymKernel.hpp b/Src/Kernels/Chebyshev/FChebSymKernel.hpp index 52c7500d..4efc91f6 100755 --- a/Src/Kernels/Chebyshev/FChebSymKernel.hpp +++ b/Src/Kernels/Chebyshev/FChebSymKernel.hpp @@ -469,14 +469,14 @@ public: ContainerClass* const NeighborSourceParticles[27], const int /* size */) { - DirectInteractionComputer::P2P(TargetParticles,NeighborSourceParticles,MatrixKernel); + DirectInteractionComputer::P2P(TargetParticles,NeighborSourceParticles,MatrixKernel); } void P2PRemote(const FTreeCoordinate& /*inPosition*/, ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, ContainerClass* const inNeighbors[27], const int /*inSize*/){ - DirectInteractionComputer::P2PRemote(inTargets,inNeighbors,27,MatrixKernel); + DirectInteractionComputer::P2PRemote(inTargets,inNeighbors,27,MatrixKernel); } }; diff --git a/Src/Kernels/Chebyshev/FChebSymTensorialKernel.hpp b/Src/Kernels/Chebyshev/FChebSymTensorialKernel.hpp deleted file mode 100755 index 34b29b0a..00000000 --- a/Src/Kernels/Chebyshev/FChebSymTensorialKernel.hpp +++ /dev/null @@ -1,487 +0,0 @@ -#ifndef FCHEBSYMKERNEL_HPP -#define FCHEBSYMKERNEL_HPP -// [--License--] - -#include "../../Utils/FGlobal.hpp" - -#include "../../Utils/FSmartPointer.hpp" - -#include "./FAbstractChebKernel.hpp" -#include "./FChebInterpolator.hpp" -// =================================================================================== -// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner -// olivier.coulaud@inria.fr, berenger.bramas@inria.fr -// This software is a computer program whose purpose is to compute the FMM. -// -// This software is governed by the CeCILL-C and LGPL licenses and -// abiding by the rules of distribution of free software. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public and CeCILL-C Licenses for more details. -// "http://www.cecill.info". -// "http://www.gnu.org/licenses". -// =================================================================================== -//#include "./FChebSymM2LHandler.hpp" -#include "./FChebSymTensorialM2LHandler.hpp" //PB: temporary version - -class FTreeCoordinate; - - -// for verbosity only!!! -//#define COUNT_BLOCKED_INTERACTIONS - -// if timings should be logged -//#define LOG_TIMINGS - -/** - * @author Matthias Messner(matthias.messner@inria.fr) - * @class FChebSymTensorialKernel - * @brief - * Please read the license - * - * This kernels implement the Chebyshev interpolation based FMM operators - * exploiting the symmetries in the far-field. It implements all interfaces - * (P2P, P2M, M2M, M2L, L2L, L2P) which are required by the FFmmAlgorithm and - * FFmmAlgorithmThread. - * - * @tparam CellClass Type of cell - * @tparam ContainerClass Type of container to store particles - * @tparam MatrixKernelClass Type of matrix kernel function - * @tparam ORDER Chebyshev interpolation order - */ -template < class CellClass, class ContainerClass, class MatrixKernelClass, int ORDER, int NVALS = 1> -class FChebSymTensorialKernel - : public FAbstractChebKernel -{ - typedef FAbstractChebKernel AbstractBaseClass; - typedef SymmetryHandler SymmetryHandlerClass; - enum {nnodes = AbstractBaseClass::nnodes, - ncmp = MatrixKernelClass::NCMP, - nRhs = MatrixKernelClass::NRHS, - nLhs = MatrixKernelClass::NLHS}; - - /// Needed for handling all symmetries - const FSmartPointer SymHandler; - - // permuted local and multipole expansions - FReal** Loc; - FReal** Mul; - unsigned int* countExp; - - - - /** - * Allocate memory for storing locally permuted mulipole and local expansions - */ - void allocateMemoryForPermutedExpansions() - { - assert(Loc==NULL && Mul==NULL && countExp==NULL); - Loc = new FReal* [343]; - Mul = new FReal* [343]; - countExp = new unsigned int [343]; - - // set all 343 to NULL - for (unsigned int idx=0; idx<343; ++idx) { - Mul[idx] = Loc[idx] = NULL; - } - - // init only 16 of 343 possible translations due to symmetries - for (int i=2; i<=3; ++i) - for (int j=0; j<=i; ++j) - for (int k=0; k<=j; ++k) { - const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3); - assert(Mul[idx]==NULL || Loc[idx]==NULL); - Mul[idx] = new FReal [24 * nnodes]; - Loc[idx] = new FReal [24 * nnodes]; - } - } - - -#ifdef LOG_TIMINGS - FTic time; - FReal t_m2l_1, t_m2l_2, t_m2l_3; -#endif - -public: - /** - * The constructor initializes all constant attributes and it reads the - * precomputed and compressed M2L operators from a binary file (an - * runtime_error is thrown if the required file is not valid). - */ - FChebSymTensorialKernel(const int inTreeHeight, - const FReal inBoxWidth, - const FPoint& inBoxCenter, - const FReal Epsilon) - : AbstractBaseClass(inTreeHeight, inBoxWidth, inBoxCenter), - SymHandler(new SymmetryHandlerClass(AbstractBaseClass::MatrixKernel.getPtr(), Epsilon, inBoxWidth, inTreeHeight)), - Loc(NULL), Mul(NULL), countExp(NULL) - { - this->allocateMemoryForPermutedExpansions(); - -#ifdef LOG_TIMINGS - t_m2l_1 = FReal(0.); - t_m2l_2 = FReal(0.); - t_m2l_3 = FReal(0.); -#endif - } - - - - /** Copy constructor */ - FChebSymTensorialKernel(const FChebSymTensorialKernel& other) - : AbstractBaseClass(other), - SymHandler(other.SymHandler), - Loc(NULL), Mul(NULL), countExp(NULL) - { - this->allocateMemoryForPermutedExpansions(); - } - - - - /** Destructor */ - ~FChebSymTensorialKernel() - { - for (unsigned int t=0; t<343; ++t) { - if (Loc[t]!=NULL) delete [] Loc[t]; - if (Mul[t]!=NULL) delete [] Mul[t]; - } - if (Loc!=NULL) delete [] Loc; - if (Mul!=NULL) delete [] Mul; - if (countExp!=NULL) delete [] countExp; - -#ifdef LOG_TIMINGS - std::cout << "- Permutation took " << t_m2l_1 << "s" - << "\n- GEMMT and GEMM took " << t_m2l_2 << "s" - << "\n- Unpermutation took " << t_m2l_3 << "s" - << std::endl; -#endif - } - - // PB: Only used in testChebAlgorithm - const SymmetryHandlerClass *const getPtrToSymHandler() const - { return SymHandler.getPtr(); } - - - - void P2M(CellClass* const LeafCell, - const ContainerClass* const SourceParticles) - { - const FPoint LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafCell->getCoordinate())); - for(int idxV = 0 ; idxV < NVALS ; ++idxV){ - for(int idxRhs = 0 ; idxRhs < nRhs ; ++idxRhs){ - // update multipole index - int idxMul = idxV*nRhs + idxRhs; - // apply Sy - AbstractBaseClass::Interpolator->applyP2M(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf, - LeafCell->getMultipole(idxMul), SourceParticles); - }// NRHS - }// NVALS - } - - - - void M2M(CellClass* const FRestrict ParentCell, - const CellClass*const FRestrict *const FRestrict ChildCells, - const int /*TreeLevel*/) - { - for(int idxV = 0 ; idxV < NVALS ; ++idxV){ - for(int idxRhs = 0 ; idxRhs < nRhs ; ++idxRhs){ - // update multipole index - int idxMul = idxV*nRhs + idxRhs; - // apply Sy - FBlas::scal(nnodes*2, FReal(0.), ParentCell->getMultipole(idxMul)); - for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){ - if (ChildCells[ChildIndex]){ - AbstractBaseClass::Interpolator->applyM2M(ChildIndex, ChildCells[ChildIndex]->getMultipole(idxMul), ParentCell->getMultipole(idxMul)); - } - } - }// NRHS - }// NVALS - } - - - void M2L(CellClass* const FRestrict TargetCell, - const CellClass* SourceCells[343], - const int /*NumSourceCells*/, - const int TreeLevel) - { -#ifdef LOG_TIMINGS - time.tic(); -#endif - for(int idxV = 0 ; idxV < NVALS ; ++idxV){ - for (int idxLhs=0; idxLhs < nLhs; ++idxLhs){ - // update local index - int idxLoc = idxV*nLhs + idxLhs; - - for (int idxRhs=0; idxRhs < nRhs; ++idxRhs){ - // update multipole index - int idxMul = idxV*nRhs + idxRhs; - // update kernel index such that: x_i = K_{ij}y_j - int idxK = idxLhs*nRhs + idxRhs; - unsigned int d = AbstractBaseClass::MatrixKernel->getPosition(idxK); - - // permute and copy multipole expansion - memset(countExp, 0, sizeof(int) * 343); - for (unsigned int idx=0; idx<343; ++idx) { - if (SourceCells[idx]) { - const unsigned int pidx = SymHandler->pindices[idx]; - const unsigned int count = (countExp[pidx])++; - FReal *const mul = Mul[pidx] + count*nnodes; - const unsigned int *const pvec = SymHandler->pvectors[idx]; - const FReal *const MultiExp = SourceCells[idx]->getMultipole(idxMul); - - /* - // no loop unrolling - for (unsigned int n=0; ngetScaleFactor(AbstractBaseClass::BoxWidth, TreeLevel); - for (unsigned int pidx=0; pidx<343; ++pidx) { - const unsigned int count = countExp[pidx]; - if (count) { - const unsigned int rank = SymHandler->getLowRank(TreeLevel, pidx, d); - // rank * count * (2*nnodes-1) flops - FBlas::gemtm(nnodes, rank, count, FReal(1.), - const_cast(SymHandler->getK(TreeLevel, pidx, d))+rank*nnodes, - nnodes, Mul[pidx], nnodes, Compressed, rank); - // nnodes *count * (2*rank-1) flops - FBlas::gemm( nnodes, rank, count, scale, - const_cast(SymHandler->getK(TreeLevel, pidx, d)), - nnodes, Compressed, rank, Loc[pidx], nnodes); - } - } - -#ifdef LOG_TIMINGS - t_m2l_2 += time.tacAndElapsed(); -#endif - -#ifdef LOG_TIMINGS - time.tic(); -#endif - - // permute and add contribution to local expansions - FReal *const LocalExpansion = TargetCell->getLocal(idxLoc); - memset(countExp, 0, sizeof(int) * 343); - for (unsigned int idx=0; idx<343; ++idx) { - if (SourceCells[idx]) { - const unsigned int pidx = SymHandler->pindices[idx]; - const unsigned int count = (countExp[pidx])++; - const FReal *const loc = Loc[pidx] + count*nnodes; - const unsigned int *const pvec = SymHandler->pvectors[idx]; - - /* - // no loop unrolling - for (unsigned int n=0; ngetPosition(idxK); - - // permute and copy multipole expansion - memset(countExp, 0, sizeof(int) * 343); - for (unsigned int idx=0; idx<343; ++idx) { - if (SourceCells[idx]) { - const unsigned int pidx = SymHandler->pindices[idx]; - const unsigned int count = (countExp[pidx])++; - const FReal *const MultiExp = SourceCells[idx]->getMultipole(idxMul); - for (unsigned int n=0; npvectors[idx][n]] = MultiExp[n]; - } - } - - // multiply (mat-mat-mul) - FReal Compressed [nnodes * 30]; - const FReal CellWidth(AbstractBaseClass::BoxWidth / FReal(FMath::pow(2, TreeLevel))); - const FReal scale(AbstractBaseClass::MatrixKernel->getScaleFactor(CellWidth)); - for (unsigned int pidx=0; pidx<343; ++pidx) { - const unsigned int count = countExp[pidx]; - if (count) { - const unsigned int rank = SymHandler->getLowRank(TreeLevel, pidx, d); - FBlas::gemtm(nnodes, rank, count, FReal(1.), - const_cast(SymHandler->getK(TreeLevel, pidx, d)+rank*nnodes), - nnodes, Mul[pidx], nnodes, Compressed, rank); - FBlas::gemm( nnodes, rank, count, scale, - const_cast(SymHandler->getK(TreeLevel, pidx, d)), - nnodes, Compressed, rank, Loc[pidx], nnodes); - } - } - - // permute and add contribution to local expansions - FReal *const LocalExpansion = TargetCell->getLocal(idxLoc); - memset(countExp, 0, sizeof(int) * 343); - for (unsigned int idx=0; idx<343; ++idx) { - if (SourceCells[idx]) { - const unsigned int pidx = SymHandler->pindices[idx]; - const unsigned int count = (countExp[pidx])++; - for (unsigned int n=0; npvectors[idx][n]]; - } - } - - }// NRHS - }// NLHS - }// NVALS - - } - */ - - void L2L(const CellClass* const FRestrict ParentCell, - CellClass* FRestrict *const FRestrict ChildCells, - const int /*TreeLevel*/) - { - for(int idxV = 0 ; idxV < NVALS ; ++idxV){ - for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){ - int idxLoc = idxV*nLhs + idxLhs; - // apply Sx - for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){ - if (ChildCells[ChildIndex]){ - AbstractBaseClass::Interpolator->applyL2L(ChildIndex, ParentCell->getLocal(idxLoc), ChildCells[ChildIndex]->getLocal(idxLoc)); - } - } - }// NLHS - }// NVALS - } - - - void L2P(const CellClass* const LeafCell, - ContainerClass* const TargetParticles) - { - const FPoint LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafCell->getCoordinate())); - for(int idxV = 0 ; idxV < NVALS ; ++idxV){ - for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){ - int idxLoc = idxV*nLhs + idxLhs; -// // a) apply Sx -// AbstractBaseClass::Interpolator->applyL2P(LeafCellCenter, -// AbstractBaseClass::BoxWidthLeaf, -// LeafCell->getLocal(idxLoc), -// TargetParticles); -// // b) apply Px (grad Sx) -// AbstractBaseClass::Interpolator->applyL2PGradient(LeafCellCenter, -// AbstractBaseClass::BoxWidthLeaf, -// LeafCell->getLocal(idxLoc), -// TargetParticles); - - // c) apply Sx and Px (grad Sx) - AbstractBaseClass::Interpolator->applyL2PTotal(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf, - LeafCell->getLocal(idxLoc), TargetParticles); - }// NLHS - }// NVALS - } - - void P2P(const FTreeCoordinate& /* LeafCellCoordinate */, // needed for periodic boundary conditions - ContainerClass* const FRestrict TargetParticles, - const ContainerClass* const FRestrict /*SourceParticles*/, - ContainerClass* const NeighborSourceParticles[27], - const int /* size */) - { - DirectInteractionComputer::P2P(TargetParticles,NeighborSourceParticles); - } - - - void P2PRemote(const FTreeCoordinate& /*inPosition*/, - ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, - ContainerClass* const inNeighbors[27], const int /*inSize*/){ - DirectInteractionComputer::P2PRemote(inTargets,inNeighbors,27); - } - -}; - - - - - - - - -#endif //FCHEBSYMTENSORIALKERNELS_HPP - -// [--END--] diff --git a/Src/Kernels/Chebyshev/FChebSymTensorialM2LHandler.hpp b/Src/Kernels/Chebyshev/FChebSymTensorialM2LHandler.hpp deleted file mode 100755 index 038686df..00000000 --- a/Src/Kernels/Chebyshev/FChebSymTensorialM2LHandler.hpp +++ /dev/null @@ -1,813 +0,0 @@ -// =================================================================================== -// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Berenger Bramas, Matthias Messner -// olivier.coulaud@inria.fr, berenger.bramas@inria.fr -// This software is a computer program whose purpose is to compute the FMM. -// -// This software is governed by the CeCILL-C and LGPL licenses and -// abiding by the rules of distribution of free software. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public and CeCILL-C Licenses for more details. -// "http://www.cecill.info". -// "http://www.gnu.org/licenses". -// =================================================================================== -#ifndef FCHEBSYMTENSORIALM2LHANDLER_HPP -#define FCHEBSYMTENSORIALM2LHANDLER_HPP - -#include - -#include "../../Utils/FBlas.hpp" - -#include "./FChebTensor.hpp" -#include "../Interpolation/FInterpSymmetries.hpp" -#include "./FChebM2LHandler.hpp" - -/** - * @author Matthias Messner (matthias.matthias@inria.fr) - * @author Pierre Blanchard (pierre.blanchard@inria.fr) - * Please read the license - */ - - -/*! Choose either \a FULLY_PIVOTED_ACASVD or \a PARTIALLY_PIVOTED_ACASVD or - \a ONLY_SVD. -*/ -//#define ONLY_SVD -//#define FULLY_PIVOTED_ACASVD -#define PARTIALLY_PIVOTED_ACASVD - - - - -/*! The fully pivoted adaptive cross approximation (fACA) compresses a - far-field interaction as \f$K\sim UV^\top\f$. The fACA requires all entries - to be computed beforehand, then the compression follows in - \f$\mathcal{O}(2\ell^3k)\f$ operations based on the required accuracy - \f$\varepsilon\f$. The matrix K will be destroyed as a result. - - @param[in] K far-field to be approximated - @param[in] nx number of rows - @param[in] ny number of cols - @param[in] eps prescribed accuracy - @param[out] U matrix containing \a k column vectors - @param[out] V matrix containing \a k row vectors - @param[out] k final low-rank depends on prescribed accuracy \a eps -*/ -void fACA(FReal *const K, - const unsigned int nx, const unsigned int ny, - const double eps, FReal* &U, FReal* &V, unsigned int &k) -{ - // control vectors (true if not used, false if used) - bool *const r = new bool[nx]; - bool *const c = new bool[ny]; - for (unsigned int i=0; i eps*eps * norm2K); - //////////////////////////////////////////////// - - delete [] r; - delete [] c; -} - - - - - - - - - - - -/*! The partially pivoted adaptive cross approximation (pACA) compresses a - far-field interaction as \f$K\sim UV^\top\f$. The pACA computes the matrix - entries on the fly, as they are needed. The compression follows in - \f$\mathcal{O}(2\ell^3k)\f$ operations based on the required accuracy - \f$\varepsilon\f$. The matrix K will be destroyed as a result. - - @tparam ComputerType the functor type which allows to compute matrix entries - - @param[in] Computer the entry-computer functor - @param[in] eps prescribed accuracy - @param[in] nx number of rows - @param[in] ny number of cols - @param[out] U matrix containing \a k column vectors - @param[out] V matrix containing \a k row vectors - @param[out] k final low-rank depends on prescribed accuracy \a eps -*/ -template -void pACA(const ComputerType& Computer, - const unsigned int nx, const unsigned int ny, - const FReal eps, FReal* &U, FReal* &V, unsigned int &k) -{ - // control vectors (true if not used, false if used) - bool *const r = new bool[nx]; - bool *const c = new bool[ny]; - for (unsigned int i=0; i eps2 * norm2S); - - delete [] r; - delete [] c; -} - - - -/*! Precomputes the 16 far-field interactions (due to symmetries in their - arrangement all 316 far-field interactions can be represented by - permutations of the 16 we compute in this function). Depending on whether - FACASVD is defined or not, either ACA+SVD or only SVD is used to compress - them. */ -template -static void precompute(const int idxK, const FReal CellWidth, - const FReal Epsilon, FReal* K[343], int LowRank[343]) -{ - // std::cout << "\nComputing 16 far-field interactions (l=" << ORDER << ", eps=" << Epsilon - // << ") for cells of width w = " << CellWidth << std::endl; - - static const unsigned int nnodes = ORDER*ORDER*ORDER; - - // interpolation points of source (Y) and target (X) cell - FPoint X[nnodes], Y[nnodes]; - // set roots of target cell (X) - FChebTensor::setRoots(FPoint(0.,0.,0.), CellWidth, X); - // temporary matrix - FReal* U = new FReal [nnodes*nnodes]; - - // needed for the SVD - unsigned int INFO; - const unsigned int LWORK = 2 * (3*nnodes + nnodes); - FReal *const WORK = new FReal [LWORK]; - FReal *const VT = new FReal [nnodes*nnodes]; - FReal *const S = new FReal [nnodes]; - - - // initialize timer - FTic time; - double overall_time(0.); - double elapsed_time(0.); - - // initialize rank counter - unsigned int overall_rank = 0; - - unsigned int counter = 0; - for (int i=2; i<=3; ++i) { - for (int j=0; j<=i; ++j) { - for (int k=0; k<=j; ++k) { - - // assemble matrix and apply weighting matrices - const FPoint cy(CellWidth*FReal(i), CellWidth*FReal(j), CellWidth*FReal(k)); - FChebTensor::setRoots(cy, CellWidth, Y); - FReal weights[nnodes]; - FChebTensor::setRootOfWeights(weights); - - // now the entry-computer is responsible for weighting the matrix entries - EntryComputer Computer(nnodes, X, nnodes, Y, weights, idxK); - - // start timer - time.tic(); - -#if (defined ONLY_SVD || defined FULLY_PIVOTED_ACASVD) - Computer(0, nnodes, 0, nnodes, U); -#endif - /* - // applying weights //////////////////////////////////////// - FReal weights[nnodes]; - FChebTensor::setRootOfWeights(weights); - for (unsigned int n=0; n V and then Qu V - FReal *const V = new FReal [aca_rank * rank]; - for (unsigned int r=0; r(S, Epsilon); - - // store - const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3); - assert(K[idx]==nullptr); - K[idx] = new FReal [2*rank*nnodes]; - LowRank[idx] = rank; - for (unsigned int r=0; r class SymmetryHandler; - -/*! Specialization for homogeneous kernel functions */ -template -class SymmetryHandler -{ - static const unsigned int nnodes = ORDER*ORDER*ORDER; - - // M2L operators - FReal*** K; - int** LowRank; - -public: - - // permutation vectors and permutated indices - unsigned int pvectors[343][nnodes]; - unsigned int pindices[343]; - - - /** Constructor: with 16 small SVDs */ - template - SymmetryHandler(const MatrixKernelClass *const MatrixKernel, - const FReal Epsilon, - const FReal, const unsigned int) - { - // For each component of kernel matrix - // init all 343 item to zero, because effectively only 16 exist - K = new FReal** [NCMP]; - LowRank = new int* [NCMP]; - - for (unsigned int d=0; d Symmetries; - for (int i=-3; i<=3; ++i) - for (int j=-3; j<=3; ++j) - for (int k=-3; k<=3; ++k) { - const unsigned int idx = ((i+3) * 7 + (j+3)) * 7 + (k+3); - pindices[idx] = 0; - if (abs(i)>1 || abs(j)>1 || abs(k)>1) - pindices[idx] = Symmetries.getPermutationArrayAndIndex(i,j,k, pvectors[idx]); - } - - // precompute 16 M2L operators - const FReal ReferenceCellWidth = FReal(2.); - for (unsigned int d=0; d(d, ReferenceCellWidth, Epsilon, K[d], LowRank[d]); - - } - - - - /** Destructor */ - ~SymmetryHandler() - { - for (unsigned int d=0; d -class SymmetryHandler -{ - static const unsigned int nnodes = ORDER*ORDER*ORDER; - - // Height of octree; needed only in the case of non-homogeneous kernel functions - const unsigned int TreeHeight; - - // M2L operators for all levels in the octree - FReal**** K; - int*** LowRank; - -public: - - // permutation vectors and permutated indices - unsigned int pvectors[343][nnodes]; - unsigned int pindices[343]; - - - /** Constructor: with 16 small SVDs */ - template - SymmetryHandler(const MatrixKernelClass *const MatrixKernel, - const double Epsilon, - const FReal RootCellWidth, const unsigned int inTreeHeight) - : TreeHeight(inTreeHeight) - { - // init all 343 item to zero, because effectively only 16 exist - K = new FReal*** [NCMP]; - LowRank = new int** [NCMP]; - for (unsigned int d=0; d Symmetries; - for (int i=-3; i<=3; ++i) - for (int j=-3; j<=3; ++j) - for (int k=-3; k<=3; ++k) { - const unsigned int idx = ((i+3) * 7 + (j+3)) * 7 + (k+3); - pindices[idx] = 0; - if (abs(i)>1 || abs(j)>1 || abs(k)>1) - pindices[idx] = Symmetries.getPermutationArrayAndIndex(i,j,k, pvectors[idx]); - } - - // precompute 16 M2L operators at all levels having far-field interactions - for (unsigned int d=0; d(d, CellWidth, Epsilon, K[d][l], LowRank[d][l]); - CellWidth /= FReal(2.); // at level l+1 - } - } - } - - - - /** Destructor */ - ~SymmetryHandler() - { - for (unsigned int d=0; d -//#include -// -// -///** -// * Computes, compresses and stores the 16 M2L kernels in a binary file. -// */ -//template -//static void ComputeAndCompressAndStoreInBinaryFile(const FReal Epsilon) -//{ -// static const unsigned int nnodes = ORDER*ORDER*ORDER; -// -// // compute and compress //////////// -// FReal* K[343]; -// int LowRank[343]; -// for (unsigned int idx=0; idx<343; ++idx) { K[idx] = nullptr; LowRank[idx] = 0; } -// precompute(FReal(2.), Epsilon, K, LowRank); -// -// // write to binary file //////////// -// FTic time; time.tic(); -// // start computing process -// const char precision = (typeid(FReal)==typeid(double) ? 'd' : 'f'); -// std::stringstream sstream; -// sstream << "sym2l_" << precision << "_o" << ORDER << "_e" << Epsilon << ".bin"; -// const std::string filename(sstream.str()); -// std::ofstream stream(filename.c_str(), -// std::ios::out | std::ios::binary | std::ios::trunc); -// if (stream.good()) { -// stream.seekp(0); -// for (unsigned int idx=0; idx<343; ++idx) -// if (K[idx]!=nullptr) { -// // 1) write index -// stream.write(reinterpret_cast(&idx), sizeof(int)); -// // 2) write low rank (int) -// int rank = LowRank[idx]; -// stream.write(reinterpret_cast(&rank), sizeof(int)); -// // 3) write U and V (both: rank*nnodes * FReal) -// FReal *const U = K[idx]; -// FReal *const V = K[idx] + rank*nnodes; -// stream.write(reinterpret_cast(U), sizeof(FReal)*rank*nnodes); -// stream.write(reinterpret_cast(V), sizeof(FReal)*rank*nnodes); -// } -// } else throw std::runtime_error("File could not be opened to write"); -// stream.close(); -// // write info -// // std::cout << "Compressed M2L operators stored in binary file " << filename -// // << " in " << time.tacAndElapsed() << "sec." << std::endl; -// -// // free memory ///////////////////// -// for (unsigned int t=0; t<343; ++t) if (K[t]!=nullptr) delete [] K[t]; -//} -// -// -///** -// * Reads the 16 compressed M2L kernels from the binary files and writes them -// * in K and the respective low-rank in LowRank. -// */ -//template -//void ReadFromBinaryFile(const FReal Epsilon, FReal* K[343], int LowRank[343]) -//{ -// // compile time constants -// const unsigned int nnodes = ORDER*ORDER*ORDER; -// -// // find filename -// const char precision = (typeid(FReal)==typeid(double) ? 'd' : 'f'); -// std::stringstream sstream; -// sstream << "sym2l_" << precision << "_o" << ORDER << "_e" << Epsilon << ".bin"; -// const std::string filename(sstream.str()); -// -// // read binary file -// std::ifstream istream(filename.c_str(), -// std::ios::in | std::ios::binary | std::ios::ate); -// const std::ifstream::pos_type size = istream.tellg(); -// if (size<=0) throw std::runtime_error("The requested binary file does not yet exist. Exit."); -// -// if (istream.good()) { -// istream.seekg(0); -// // 1) read index (int) -// int _idx; -// istream.read(reinterpret_cast(&_idx), sizeof(int)); -// // loop to find 16 compressed m2l operators -// for (int idx=0; idx<343; ++idx) { -// K[idx] = nullptr; -// LowRank[idx] = 0; -// // if it exists -// if (idx == _idx) { -// // 2) read low rank (int) -// int rank; -// istream.read(reinterpret_cast(&rank), sizeof(int)); -// LowRank[idx] = rank; -// // 3) read U and V (both: rank*nnodes * FReal) -// K[idx] = new FReal [2*rank*nnodes]; -// FReal *const U = K[idx]; -// FReal *const V = K[idx] + rank*nnodes; -// istream.read(reinterpret_cast(U), sizeof(FReal)*rank*nnodes); -// istream.read(reinterpret_cast(V), sizeof(FReal)*rank*nnodes); -// -// // 1) read next index -// istream.read(reinterpret_cast(&_idx), sizeof(int)); -// } -// } -// } else throw std::runtime_error("File could not be opened to read"); -// istream.close(); -//} - - - - - -#endif diff --git a/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp b/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp index 02906a97..0372f2d7 100755 --- a/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp +++ b/Src/Kernels/Chebyshev/FChebTensorialKernel.hpp @@ -240,14 +240,14 @@ public: ContainerClass* const NeighborSourceParticles[27], const int /* size */) { - DirectInteractionComputer::P2P(TargetParticles,NeighborSourceParticles,MatrixKernel); + DirectInteractionComputer::P2P(TargetParticles,NeighborSourceParticles,MatrixKernel); } void P2PRemote(const FTreeCoordinate& /*inPosition*/, ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, ContainerClass* const inNeighbors[27], const int /*inSize*/){ - DirectInteractionComputer::P2PRemote(inTargets,inNeighbors,27,MatrixKernel); + DirectInteractionComputer::P2PRemote(inTargets,inNeighbors,27,MatrixKernel); } }; diff --git a/Src/Kernels/Chebyshev/FChebTensorialM2LHandler.hpp b/Src/Kernels/Chebyshev/FChebTensorialM2LHandler.hpp index 33432247..2c61090f 100755 --- a/Src/Kernels/Chebyshev/FChebTensorialM2LHandler.hpp +++ b/Src/Kernels/Chebyshev/FChebTensorialM2LHandler.hpp @@ -96,7 +96,7 @@ class FChebTensorialM2LHandler : FNoCopyabl { const char precision_type = (typeid(FReal)==typeid(double) ? 'd' : 'f'); std::stringstream stream; - stream << "m2l_k"<< MatrixKernelClass::Identifier << "_" << precision_type + stream << "m2l_k"<< MatrixKernelClass::getID() << "_" << precision_type << "_o" << order << "_e" << epsilon << ".bin"; return stream.str(); } @@ -220,7 +220,7 @@ class FChebTensorialM2LHandler : FNoCop { const char precision_type = (typeid(FReal)==typeid(double) ? 'd' : 'f'); std::stringstream stream; - stream << "m2l_k"<< MatrixKernelClass::Identifier << "_" << precision_type + stream << "m2l_k"<< MatrixKernelClass::getID() << "_" << precision_type << "_o" << order << "_e" << epsilon << ".bin"; return stream.str(); } diff --git a/Src/Kernels/Interpolation/FInterpMatrixKernel.cpp b/Src/Kernels/Interpolation/FInterpMatrixKernel.cpp index 8f962427..98b6b995 100644 --- a/Src/Kernels/Interpolation/FInterpMatrixKernel.cpp +++ b/Src/Kernels/Interpolation/FInterpMatrixKernel.cpp @@ -9,11 +9,3 @@ const unsigned int FInterpMatrixKernel_R_IJ::applyTab[]={0,1,2, 1,3,4, 2,4,5}; -/// R_IJK -const unsigned int FInterpMatrixKernel_R_IJK::indexTab[]={0,0,0,1,1,1,2,2,2,0, - 0,1,2,0,1,2,0,1,2,1, - 0,1,2,0,1,2,0,1,2,2}; - -const unsigned int FInterpMatrixKernel_R_IJK::applyTab[]={0,3,6,3,1,9,6,9,2, - 3,1,9,1,4,7,9,7,5, - 6,9,2,9,7,5,2,5,8}; diff --git a/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp b/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp index 59f3038f..5cfe4a8b 100755 --- a/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp +++ b/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp @@ -22,15 +22,9 @@ #include "Utils/FNoCopyable.hpp" #include "Utils/FMath.hpp" #include "Utils/FBlas.hpp" +#include "Utils/FGlobal.hpp" + -// extendable -enum KERNEL_FUNCTION_IDENTIFIER {ONE_OVER_R, - ONE_OVER_R_SQUARED, - LENNARD_JONES_POTENTIAL, - R_IJ, - R_IJK, - ONE_OVER_RH - }; // probably not extendable :) enum KERNEL_FUNCTION_TYPE {HOMOGENEOUS, NON_HOMOGENEOUS}; @@ -73,7 +67,6 @@ struct FInterpAbstractMatrixKernel : FNoCopyable struct FInterpMatrixKernelR : FInterpAbstractMatrixKernel { static const KERNEL_FUNCTION_TYPE Type = HOMOGENEOUS; - static const KERNEL_FUNCTION_IDENTIFIER Identifier = ONE_OVER_R; static const unsigned int NCMP = 1; //< number of components static const unsigned int NPV = 1; //< dim of physical values static const unsigned int NPOT = 1; //< dim of potentials @@ -86,6 +79,8 @@ struct FInterpMatrixKernelR : FInterpAbstractMatrixKernel FInterpMatrixKernelR(const FInterpMatrixKernelR& /*other*/) {} + static const char* getID() { return "ONE_OVER_R"; } + // returns position in reduced storage int getPosition(const unsigned int) const {return 0;} @@ -155,7 +150,6 @@ struct FInterpMatrixKernelR : FInterpAbstractMatrixKernel /// One over r when the box size is rescaled to 1 struct FInterpMatrixKernelRH :FInterpMatrixKernelR{ static const KERNEL_FUNCTION_TYPE Type = HOMOGENEOUS; - static const KERNEL_FUNCTION_IDENTIFIER Identifier = ONE_OVER_RH; static const unsigned int NCMP = 1; //< number of components static const unsigned int NPV = 1; //< dim of physical values static const unsigned int NPOT = 1; //< dim of potentials @@ -171,6 +165,8 @@ struct FInterpMatrixKernelRH :FInterpMatrixKernelR{ :FInterpMatrixKernelR(other), LX(other.LX), LY(other.LY), LZ(other.LZ) {} + static const char* getID() { return "ONE_OVER_RH"; } + // evaluate interaction template ValueClass evaluate(const ValueClass& x1, const ValueClass& y1, const ValueClass& z1, @@ -246,7 +242,6 @@ struct FInterpMatrixKernelRH :FInterpMatrixKernelR{ struct FInterpMatrixKernelRR : FInterpAbstractMatrixKernel { static const KERNEL_FUNCTION_TYPE Type = HOMOGENEOUS; - static const KERNEL_FUNCTION_IDENTIFIER Identifier = ONE_OVER_R_SQUARED; static const unsigned int NCMP = 1; //< number of components static const unsigned int NPV = 1; //< dim of physical values static const unsigned int NPOT = 1; //< dim of potentials @@ -259,6 +254,8 @@ struct FInterpMatrixKernelRR : FInterpAbstractMatrixKernel FInterpMatrixKernelRR(const FInterpMatrixKernelRR& /*other*/) {} + static const char* getID() { return "ONE_OVER_R_SQUARED"; } + // returns position in reduced storage int getPosition(const unsigned int) const {return 0;} @@ -337,7 +334,6 @@ struct FInterpMatrixKernelRR : FInterpAbstractMatrixKernel struct FInterpMatrixKernelLJ : FInterpAbstractMatrixKernel { static const KERNEL_FUNCTION_TYPE Type = NON_HOMOGENEOUS; - static const KERNEL_FUNCTION_IDENTIFIER Identifier = LENNARD_JONES_POTENTIAL; static const unsigned int NCMP = 1; //< number of components static const unsigned int NPV = 1; //< dim of physical values static const unsigned int NPOT = 1; //< dim of potentials @@ -350,6 +346,8 @@ struct FInterpMatrixKernelLJ : FInterpAbstractMatrixKernel FInterpMatrixKernelLJ(const FInterpMatrixKernelLJ& /*other*/) {} + static const char* getID() { return "LENNARD_JONES_POTENTIAL"; } + // returns position in reduced storage int getPosition(const unsigned int) const {return 0;} @@ -467,7 +465,6 @@ struct FInterpMatrixKernelLJ : FInterpAbstractMatrixKernel struct FInterpMatrixKernel_R_IJ : FInterpAbstractMatrixKernel { static const KERNEL_FUNCTION_TYPE Type = NON_HOMOGENEOUS; - static const KERNEL_FUNCTION_IDENTIFIER Identifier = R_IJ; static const unsigned int NK = 3*3; //< total number of components static const unsigned int NCMP = 6; //< number of components static const unsigned int NPV = 3; //< dim of physical values @@ -496,6 +493,8 @@ struct FInterpMatrixKernel_R_IJ : FInterpAbstractMatrixKernel : _i(other._i), _j(other._j), _CoreWidth2(other._CoreWidth2) {} + static const char* getID() { return "R_IJ"; } + // returns position in reduced storage from position in full 3x3 matrix unsigned int getPosition(const unsigned int n) const {return applyTab[n];} @@ -543,7 +542,7 @@ struct FInterpMatrixKernel_R_IJ : FInterpAbstractMatrixKernel const ValueClass diffx = (x1-x2); const ValueClass diffy = (y1-y2); const ValueClass diffz = (z1-z2); - const ValueClass one_over_r = FMath::One()/sqrt(diffx*diffx + + const ValueClass one_over_r = FMath::One()/FMath::Sqrt(diffx*diffx + diffy*diffy + diffz*diffz + FMath::ConvertTo(_CoreWidth2)); const ValueClass one_over_r3 = one_over_r*one_over_r*one_over_r; @@ -560,158 +559,54 @@ struct FInterpMatrixKernel_R_IJ : FInterpAbstractMatrixKernel } } - FReal getScaleFactor(const FReal RootCellWidth, const int TreeLevel) const - { - const FReal CellWidth(RootCellWidth / FReal(FMath::pow(2, TreeLevel))); - return getScaleFactor(CellWidth); - } - - // R_{,ij} is homogeneous to [L]/[L]^{-2}=[L]^{-1} - // => scales like ONE_OVER_R - FReal getScaleFactor(const FReal CellWidth) const - { - return FReal(2.) / CellWidth; - } - - - - FReal evaluate(const FPoint& p1, const FPoint& p2) const{ - return evaluate(p1.getX(), p1.getY(), p1.getZ(), p2.getX(), p2.getY(), p2.getZ()); - } - void evaluateBlock(const FPoint& p1, const FPoint& p2, FReal* block) const{ - evaluateBlock(p1.getX(), p1.getY(), p1.getZ(), p2.getX(), p2.getY(), p2.getZ(), block); - } -}; - -/// R_{,ijk} -struct FInterpMatrixKernel_R_IJK : FInterpAbstractMatrixKernel -{ - static const KERNEL_FUNCTION_TYPE Type = NON_HOMOGENEOUS; - static const KERNEL_FUNCTION_IDENTIFIER Identifier = R_IJK; - static const unsigned int NK = 3*3*3; //< total number of components - static const unsigned int NCMP = 10; //< number of components after symmetry - static const unsigned int NPOT = 3; //< dim of potentials - static const unsigned int NPV = 3*3; //< dim of physical values - static const unsigned int NRHS = NPV; //< dim of mult exp - static const unsigned int NLHS = NPOT*NRHS; //< dim of loc exp - - // store indices (i,j,k) corresponding to sym idx - static const unsigned int indexTab[/*3*NCMP=30*/]; - - // store positions in sym tensor wr to full - static const unsigned int applyTab[/*NK=27*/]; - - // indices to be set at construction if component-wise evaluation is performed - const unsigned int _i,_j,_k; - - // Material Parameters - const FReal _CoreWidth2; // if >0 then kernel is NON homogeneous - - FInterpMatrixKernel_R_IJK(const FReal CoreWidth = 0.0, const unsigned int d = 0) - : _i(indexTab[d]), _j(indexTab[d+NCMP]), _k(indexTab[d+2*NCMP]), _CoreWidth2(CoreWidth*CoreWidth) - {} - - // copy ctor - FInterpMatrixKernel_R_IJK(const FInterpMatrixKernel_R_IJK& other) - : _i(other._i), _j(other._j), _k(other._k), _CoreWidth2(other._CoreWidth2) - {} - - // returns position in reduced storage from position in full 3x3x3 matrix - unsigned int getPosition(const unsigned int n) const - {return applyTab[n];} - - // returns Core Width squared - FReal getCoreWidth2() const - {return _CoreWidth2;} - - // evaluate interaction - template - FReal evaluate(const ValueClass& x1, const ValueClass& y1, const ValueClass& z1, - const ValueClass& x2, const ValueClass& y2, const ValueClass& z2) const - { - // Convention for anti-symmetric kernels xy=y-x instead of xy=x-y - // This makes the P2P mutual interactions implementation more explicit - const ValueClass diffx = (x1-x2); - const ValueClass diffy = (y1-y2); - const ValueClass diffz = (z1-z2); - const FReal one_over_r = FMath::One()/sqrt(diffx*diffx + - diffy*diffy + - diffz*diffz + FMath::ConvertTo(_CoreWidth2)); - const FReal one_over_r2 = one_over_r*one_over_r; - const FReal one_over_r3 = one_over_r2*one_over_r; - FReal ri,rj,rk; - - if(_i==0) ri=diffx; - else if(_i==1) ri=diffy; - else if(_i==2) ri=diffz; - else throw std::runtime_error("Update i!"); - - if(_j==0) rj=diffx; - else if(_j==1) rj=diffy; - else if(_j==2) rj=diffz; - else throw std::runtime_error("Update j!"); - - if(_k==0) rk=diffx; - else if(_k==1) rk=diffy; - else if(_k==2) rk=diffz; - else throw std::runtime_error("Update k!"); - - const FReal ri2=ri*ri; - const FReal rj2=rj*rj; - - if(_i==_j){ - if(_j==_k) //i=j=k - return FReal(3.) * ( FReal(-1.) + ri2 * one_over_r2 ) * ri * one_over_r3; - else //i=j!=k - return ( FReal(-1.) + FReal(3.) * ri2 * one_over_r2 ) * rk * one_over_r3; - } - else //(_i!=j) - if(_i==_k) //i=k!=j - return ( FReal(-1.) + FReal(3.) * ri2 * one_over_r2 ) * rj * one_over_r3; - else if(_j==_k) //i!=k=j - return ( FReal(-1.) + FReal(3.) * rj2 * one_over_r2 ) * ri * one_over_r3; - else //i!=k!=j - return FReal(3.) * ri * rj * rk * one_over_r2 * one_over_r3; - - } - - // evaluate interaction (blockwise) + // evaluate interaction and derivative (blockwise) template - void evaluateBlock(const ValueClass& x1, const ValueClass& y1, const ValueClass& z1, - const ValueClass& x2, const ValueClass& y2, const ValueClass& z2, ValueClass* block) const + void evaluateBlockAndDerivative(const ValueClass& x1, const ValueClass& y1, const ValueClass& z1, + const ValueClass& x2, const ValueClass& y2, const ValueClass& z2, + ValueClass block[NCMP], ValueClass blockDerivative[NCMP][3]) const { - // Convention for anti-symmetric kernels xy=y-x instead of xy=x-y - // This makes the P2P mutual interactions implementation more explicit const ValueClass diffx = (x1-x2); const ValueClass diffy = (y1-y2); const ValueClass diffz = (z1-z2); - const ValueClass one_over_r = FMath::One()/FMath::Sqrt(diffx*diffx + - diffy*diffy + - diffz*diffz + FMath::ConvertTo(_CoreWidth2)); - const ValueClass one_over_r2 = one_over_r*one_over_r; + const ValueClass r2[3] = {diffx*diffx,diffy*diffy,diffz*diffz}; + const ValueClass one_over_r2 = FMath::One() / (r2[0] + r2[1] + r2[2]); + const ValueClass one_over_r = FMath::Sqrt(one_over_r2); const ValueClass one_over_r3 = one_over_r2*one_over_r; const ValueClass r[3] = {diffx,diffy,diffz}; + const ValueClass Three = FMath::ConvertTo(3.); + const ValueClass MinusOne = -FMath::One(); + for(unsigned int d=0;d scales like ONE_OVER_RR + // R_{,ij} is homogeneous to [L]/[L]^{-2}=[L]^{-1} + // => scales like ONE_OVER_R FReal getScaleFactor(const FReal CellWidth) const { - return FReal(4.) / (CellWidth*CellWidth); + return FReal(2.) / CellWidth; } + FReal evaluate(const FPoint& p1, const FPoint& p2) const{ return evaluate(p1.getX(), p1.getY(), p1.getZ(), p2.getX(), p2.getY(), p2.getZ()); } void evaluateBlock(const FPoint& p1, const FPoint& p2, FReal* block) const{ evaluateBlock(p1.getX(), p1.getY(), p1.getZ(), p2.getX(), p2.getY(), p2.getZ(), block); } + void evaluateBlockAndDerivative(const FPoint& p1, const FPoint& p2, + FReal block[NCMP], FReal blockDerivative[NCMP][3]) const { + evaluateBlockAndDerivative(p1.getX(), p1.getY(), p1.getZ(), p2.getX(), p2.getY(), p2.getZ(), block, blockDerivative); + } }; diff --git a/Src/Kernels/Interpolation/FInterpP2PKernels.hpp b/Src/Kernels/Interpolation/FInterpP2PKernels.hpp index 3c560dc4..df0298e5 100644 --- a/Src/Kernels/Interpolation/FInterpP2PKernels.hpp +++ b/Src/Kernels/Interpolation/FInterpP2PKernels.hpp @@ -5,131 +5,19 @@ #include "../P2P/FP2P.hpp" #include "../P2P/FP2PR.hpp" - -template -struct DirectInteractionComputer -{ - template - static void P2P( ContainerClass* const FRestrict TargetParticles, - ContainerClass* const NeighborSourceParticles[27], - const MatrixKernelClass *const MatrixKernel){ - FP2P::FullMutual(TargetParticles,NeighborSourceParticles,14,MatrixKernel); - } - - template - static void P2PRemote( ContainerClass* const FRestrict inTargets, - ContainerClass* const inNeighbors[27], - const int inSize, - const MatrixKernelClass *const MatrixKernel){ - FP2P::FullRemote(inTargets,inNeighbors,inSize,MatrixKernel); - } -}; - /////////////////////////////////////////////////////// // P2P Wrappers /////////////////////////////////////////////////////// -/*! Specialization for Laplace potential */ -template <> -struct DirectInteractionComputer -{ - template - static void P2P( ContainerClass* const FRestrict TargetParticles, - ContainerClass* const NeighborSourceParticles[27], - const MatrixKernelClass *const /*MatrixKernel*/){ - FP2PR::FullMutual(TargetParticles,NeighborSourceParticles,14); - } - - template - static void P2PRemote( ContainerClass* const FRestrict inTargets, - ContainerClass* const inNeighbors[27], - const int inSize, - const MatrixKernelClass *const /*MatrixKernel*/){ - FP2PR::FullRemote(inTargets,inNeighbors,inSize); - } -}; - - -/*! Specialization for GradGradR potential */ -template <> -struct DirectInteractionComputer -{ - template - static void P2P( ContainerClass* const FRestrict TargetParticles, - ContainerClass* const NeighborSourceParticles[27], - const MatrixKernelClass *const MatrixKernel){ - FP2P::FullMutualRIJ(TargetParticles,NeighborSourceParticles,14,MatrixKernel); - } - - template - static void P2PRemote( ContainerClass* const FRestrict inTargets, - ContainerClass* const inNeighbors[27], - const int inSize, - const MatrixKernelClass *const MatrixKernel){ - FP2P::FullRemoteRIJ(inTargets,inNeighbors,inSize,MatrixKernel); - } -}; - -/*! Specialization for GradGradGradR potential */ -template <> -struct DirectInteractionComputer -{ - template - static void P2P( ContainerClass* const FRestrict TargetParticles, - ContainerClass* const NeighborSourceParticles[27], - const MatrixKernelClass *const MatrixKernel){ - FP2P::FullMutualRIJK(TargetParticles,NeighborSourceParticles,14,MatrixKernel); - } - - template - static void P2PRemote( ContainerClass* const FRestrict inTargets, - ContainerClass* const inNeighbors[27], - const int inSize, - const MatrixKernelClass *const MatrixKernel){ - FP2P::FullRemoteRIJK(inTargets,inNeighbors,inSize,MatrixKernel); - } -}; - -/////////////////////////////////////////////////////// -// In case of multi right hand side -/////////////////////////////////////////////////////// - - - -template -struct DirectInteractionComputer -{ - template - static void P2P(ContainerClass* const FRestrict TargetParticles, - ContainerClass* const NeighborSourceParticles[27], - const MatrixKernelClass *const /*MatrixKernel*/){ - for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ - FP2PR::FullMutual(TargetParticles,NeighborSourceParticles,14); - } - } - - template - static void P2PRemote(ContainerClass* const FRestrict inTargets, - ContainerClass* const inNeighbors[27], - const int inSize, - const MatrixKernelClass *const /*MatrixKernel*/){ - for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ - FP2PR::FullRemote(inTargets,inNeighbors,inSize); - } - } -}; - - -/*! Specialization for GradGradR potential */ -template -struct DirectInteractionComputer +template +struct DirectInteractionComputer { template static void P2P( ContainerClass* const FRestrict TargetParticles, ContainerClass* const NeighborSourceParticles[27], const MatrixKernelClass *const MatrixKernel){ for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ - FP2P::FullMutualRIJ(TargetParticles,NeighborSourceParticles,14,MatrixKernel); + FP2P::FullMutualKIJ(TargetParticles,NeighborSourceParticles,14,MatrixKernel); } } @@ -139,22 +27,22 @@ struct DirectInteractionComputer const int inSize, const MatrixKernelClass *const MatrixKernel){ for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ - FP2P::FullRemoteRIJ(inTargets,inNeighbors,inSize,MatrixKernel); + FP2P::FullRemoteKIJ(inTargets,inNeighbors,inSize,MatrixKernel); } } }; -/*! Specialization for GradGradGradR potential */ + +/*! Specialization for scalar kernels */ template -struct DirectInteractionComputer +struct DirectInteractionComputer<1, NVALS> { template static void P2P( ContainerClass* const FRestrict TargetParticles, ContainerClass* const NeighborSourceParticles[27], const MatrixKernelClass *const MatrixKernel){ - for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ - FP2P::FullMutualRIJK(TargetParticles,NeighborSourceParticles,14,MatrixKernel); - } + for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs) + FP2P::FullMutual(TargetParticles,NeighborSourceParticles,14,MatrixKernel); } template @@ -162,9 +50,8 @@ struct DirectInteractionComputer ContainerClass* const inNeighbors[27], const int inSize, const MatrixKernelClass *const MatrixKernel){ - for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ - FP2P::FullRemoteRIJK(inTargets,inNeighbors,inSize,MatrixKernel); - } + for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs) + FP2P::FullRemote(inTargets,inNeighbors,inSize,MatrixKernel); } }; diff --git a/Src/Kernels/P2P/FP2P.hpp b/Src/Kernels/P2P/FP2P.hpp index 4c1d410a..3c04b317 100644 --- a/Src/Kernels/P2P/FP2P.hpp +++ b/Src/Kernels/P2P/FP2P.hpp @@ -93,284 +93,18 @@ inline void NonMutualParticles(const FReal sourceX,const FReal sourceY,const FRe (*targetPotential) += ( Kxy[0] * sourcePhysicalValue ); } - - - - ////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////// -// R_IJ +// Tensorial Matrix Kernels: K_IJ / p_i=\sum_j K_{ij} w_j ////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////////////////////////////// -/** - * @brief FullMutualRIJ - */ -template -inline void FullMutualRIJ(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], - const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ - - const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock - - const int nbParticlesTargets = inTargets->getNbParticles(); - const FReal*const targetsX = inTargets->getPositions()[0]; - const FReal*const targetsY = inTargets->getPositions()[1]; - const FReal*const targetsZ = inTargets->getPositions()[2]; - - for(int idxNeighbors = 0 ; idxNeighbors < limiteNeighbors ; ++idxNeighbors){ - for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ - if( inNeighbors[idxNeighbors] ){ - const int nbParticlesSources = inNeighbors[idxNeighbors]->getNbParticles(); - const FReal*const sourcesX = inNeighbors[idxNeighbors]->getPositions()[0]; - const FReal*const sourcesY = inNeighbors[idxNeighbors]->getPositions()[1]; - const FReal*const sourcesZ = inNeighbors[idxNeighbors]->getPositions()[2]; - - for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){ - FReal dx = sourcesX[idxSource] - targetsX[idxTarget]; - FReal dy = sourcesY[idxSource] - targetsY[idxTarget]; - FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget]; - - FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); - FReal inv_distance = FMath::Sqrt(inv_distance_pow2); - FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; - - FReal r[3]={dx,dy,dz}; - - for(unsigned int i = 0 ; i < 3 ; ++i){ - FReal*const targetsPotentials = inTargets->getPotentials(i); - FReal*const targetsForcesX = inTargets->getForcesX(i); - FReal*const targetsForcesY = inTargets->getForcesY(i); - FReal*const targetsForcesZ = inTargets->getForcesZ(i); - FReal*const sourcesPotentials = inNeighbors[idxNeighbors]->getPotentials(i); - FReal*const sourcesForcesX = inNeighbors[idxNeighbors]->getForcesX(i); - FReal*const sourcesForcesY = inNeighbors[idxNeighbors]->getForcesY(i); - FReal*const sourcesForcesZ = inNeighbors[idxNeighbors]->getForcesZ(i); - - FReal ri2=r[i]*r[i]; - - for(unsigned int j = 0 ; j < 3 ; ++j){ - const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j); - const FReal*const sourcesPhysicalValues = inNeighbors[idxNeighbors]->getPhysicalValues(j); - - // potentials - FReal potentialCoef; - if(i==j) - potentialCoef = inv_distance - ri2 * inv_distance_pow3; - else - potentialCoef = - r[i] * r[j] * inv_distance_pow3; - - // forces - FReal rj2=r[j]*r[j]; - - FReal coef[3]; - for(unsigned int k = 0 ; k < 3 ; ++k) - coef[k]= -(targetsPhysicalValues[idxTarget] * sourcesPhysicalValues[idxSource]); - - // Grad of RIJ kernel is RIJK kernel => use same expression as in FInterpMatrixKernel - for(unsigned int k = 0 ; k < 3 ; ++k){ - if(i==j){ - if(j==k) //i=j=k - coef[k] *= FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i=j!=k - coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; - } - else{ //(i!=j) - if(i==k) //i=k!=j - coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; - else if(j==k) //i!=k=j - coef[k] *= ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i!=k!=j - coef[k] *= FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; - } - }// k - - targetsForcesX[idxTarget] += coef[0]; - targetsForcesY[idxTarget] += coef[1]; - targetsForcesZ[idxTarget] += coef[2]; - targetsPotentials[idxTarget] += ( potentialCoef * sourcesPhysicalValues[idxSource] ); - - sourcesForcesX[idxSource] -= coef[0]; - sourcesForcesY[idxSource] -= coef[1]; - sourcesForcesZ[idxSource] -= coef[2]; - sourcesPotentials[idxSource] += potentialCoef * targetsPhysicalValues[idxTarget]; - - - }// j - }// i - } - } - } - } - - for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ - for(int idxSource = idxTarget + 1 ; idxSource < nbParticlesTargets ; ++idxSource){ - FReal dx = targetsX[idxSource] - targetsX[idxTarget]; - FReal dy = targetsY[idxSource] - targetsY[idxTarget]; - FReal dz = targetsZ[idxSource] - targetsZ[idxTarget]; - - FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); - FReal inv_distance = FMath::Sqrt(inv_distance_pow2); - FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; - - FReal r[3]={dx,dy,dz}; - - for(unsigned int i = 0 ; i < 3 ; ++i){ - FReal*const targetsPotentials = inTargets->getPotentials(i); - FReal*const targetsForcesX = inTargets->getForcesX(i); - FReal*const targetsForcesY = inTargets->getForcesY(i); - FReal*const targetsForcesZ = inTargets->getForcesZ(i); - FReal ri2=r[i]*r[i]; - - for(unsigned int j = 0 ; j < 3 ; ++j){ - const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j); - - // potentials - FReal potentialCoef; - if(i==j) - potentialCoef = inv_distance - ri2 * inv_distance_pow3; - else - potentialCoef = - r[i] * r[j] * inv_distance_pow3; - - // forces - FReal rj2=r[j]*r[j]; - - FReal coef[3]; - for(unsigned int k = 0 ; k < 3 ; ++k) - coef[k]= -(targetsPhysicalValues[idxTarget] * targetsPhysicalValues[idxSource]); - - // Grad of RIJ kernel is RIJK kernel => use same expression as in FInterpMatrixKernel - for(unsigned int k = 0 ; k < 3 ; ++k){ - if(i==j){ - if(j==k) //i=j=k - coef[k] *= FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i=j!=k - coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; - } - else{ //(i!=j) - if(i==k) //i=k!=j - coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; - else if(j==k) //i!=k=j - coef[k] *= ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i!=k!=j - coef[k] *= FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; - } - }// k - - - targetsForcesX[idxTarget] += coef[0]; - targetsForcesY[idxTarget] += coef[1]; - targetsForcesZ[idxTarget] += coef[2]; - targetsPotentials[idxTarget] += ( potentialCoef * targetsPhysicalValues[idxSource] ); - - targetsForcesX[idxSource] -= coef[0]; - targetsForcesY[idxSource] -= coef[1]; - targetsForcesZ[idxSource] -= coef[2]; - targetsPotentials[idxSource] += potentialCoef * targetsPhysicalValues[idxTarget]; - }// j - }// i - - } - } -} - -/** - * @brief FullRemoteRIJ - */ -template -inline void FullRemoteRIJ(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], - const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ - - const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock - - const int nbParticlesTargets = inTargets->getNbParticles(); - const FReal*const targetsX = inTargets->getPositions()[0]; - const FReal*const targetsY = inTargets->getPositions()[1]; - const FReal*const targetsZ = inTargets->getPositions()[2]; - - for(int idxNeighbors = 0 ; idxNeighbors < limiteNeighbors ; ++idxNeighbors){ - for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ - if( inNeighbors[idxNeighbors] ){ - const int nbParticlesSources = inNeighbors[idxNeighbors]->getNbParticles(); - const FReal*const sourcesX = inNeighbors[idxNeighbors]->getPositions()[0]; - const FReal*const sourcesY = inNeighbors[idxNeighbors]->getPositions()[1]; - const FReal*const sourcesZ = inNeighbors[idxNeighbors]->getPositions()[2]; - - for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){ - // potential - FReal dx = sourcesX[idxSource] - targetsX[idxTarget]; - FReal dy = sourcesY[idxSource] - targetsY[idxTarget]; - FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget]; - - FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); - FReal inv_distance = FMath::Sqrt(inv_distance_pow2); - FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; - - FReal r[3]={dx,dy,dz}; - - for(unsigned int i = 0 ; i < 3 ; ++i){ - FReal*const targetsPotentials = inTargets->getPotentials(i); - FReal*const targetsForcesX = inTargets->getForcesX(i); - FReal*const targetsForcesY = inTargets->getForcesY(i); - FReal*const targetsForcesZ = inTargets->getForcesZ(i); - FReal ri2=r[i]*r[i]; - - for(unsigned int j = 0 ; j < 3 ; ++j){ - const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j); - const FReal*const sourcesPhysicalValues = inNeighbors[idxNeighbors]->getPhysicalValues(j); - - // potentials - FReal potentialCoef; - if(i==j) - potentialCoef = inv_distance - ri2 * inv_distance_pow3; - else - potentialCoef = - r[i] * r[j] * inv_distance_pow3; - - // forces - FReal rj2=r[j]*r[j]; - - FReal coef[3]; - for(unsigned int k = 0 ; k < 3 ; ++k) - coef[k]= -(targetsPhysicalValues[idxTarget] * sourcesPhysicalValues[idxSource]); - - // Grad of RIJ kernel is RIJK kernel => use same expression as in FInterpMatrixKernel - for(unsigned int k = 0 ; k < 3 ; ++k){ - if(i==j){ - if(j==k) //i=j=k - coef[k] *= FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i=j!=k - coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; - } - else{ //(i!=j) - if(i==k) //i=k!=j - coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; - else if(j==k) //i!=k=j - coef[k] *= ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i!=k!=j - coef[k] *= FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; - } - }// k - - targetsForcesX[idxTarget] += coef[0]; - targetsForcesY[idxTarget] += coef[1]; - targetsForcesZ[idxTarget] += coef[2]; - targetsPotentials[idxTarget] += ( potentialCoef * sourcesPhysicalValues[idxSource] ); - - }// j - }// i - - } - } - } - } -} - - -/** - * @brief MutualParticlesRIJ + /** + * @brief MutualParticlesKIJ * @param sourceX * @param sourceY * @param sourceZ @@ -388,434 +122,49 @@ inline void FullRemoteRIJ(ContainerClass* const FRestrict inTargets, ContainerCl * @param targetForceZ * @param targetPotential */ -template -inline void MutualParticlesRIJ(const FReal sourceX,const FReal sourceY,const FReal sourceZ, const FReal* sourcePhysicalValue, - FReal* sourceForceX, FReal* sourceForceY, FReal* sourceForceZ, FReal* sourcePotential, - const FReal targetX,const FReal targetY,const FReal targetZ, const FReal* targetPhysicalValue, - FReal* targetForceX, FReal* targetForceY, FReal* targetForceZ, FReal* targetPotential, - const MatrixKernelClass *const MatrixKernel){ - - const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock - - // GradGradR potential - FReal dx = sourceX - targetX; - FReal dy = sourceY - targetY; - FReal dz = sourceZ - targetZ; - - FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); - FReal inv_distance = FMath::Sqrt(inv_distance_pow2); - FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; - - FReal r[3]={dx,dy,dz}; - - for(unsigned int i = 0 ; i < 3 ; ++i){ - FReal ri2=r[i]*r[i]; - for(unsigned int j = 0 ; j < 3 ; ++j){ - - // potentials - FReal potentialCoef; - if(i==j) - potentialCoef = inv_distance - ri2 * inv_distance_pow3; - else - potentialCoef = - r[i] * r[j] * inv_distance_pow3; - - // forces - FReal rj2=r[j]*r[j]; - - FReal coef[3]; - for(unsigned int k = 0 ; k < 3 ; ++k) - coef[k]= -(targetPhysicalValue[j] * sourcePhysicalValue[j]); - - // Grad of RIJ kernel is RIJK kernel => use same expression as in FInterpMatrixKernel - for(unsigned int k = 0 ; k < 3 ; ++k){ - if(i==j){ - if(j==k) //i=j=k - coef[k] *= FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i=j!=k - coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; - } - else{ //(i!=j) - if(i==k) //i=k!=j - coef[k] *= ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; - else if(j==k) //i!=k=j - coef[k] *= ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i!=k!=j - coef[k] *= FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; - } - }// k - - targetForceX[i] += coef[0]; - targetForceY[i] += coef[1]; - targetForceZ[i] += coef[2]; - targetPotential[i] += ( potentialCoef * sourcePhysicalValue[j] ); - - sourceForceX[i] -= coef[0]; - sourceForceY[i] -= coef[1]; - sourceForceZ[i] -= coef[2]; - sourcePotential[i] += ( potentialCoef * targetPhysicalValue[j] ); - - }// j - }// i - -} - - -////////////////////////////////////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////////////////////////////////////// -// R_IJK -////////////////////////////////////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////////////////////////////////////// -////////////////////////////////////////////////////////////////////////////////////////////////////////// - -/** - * @brief FullMutualRIJK - */ -template -inline void FullMutualRIJK(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], - const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ - - const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock - - const int nbParticlesTargets = inTargets->getNbParticles(); - const FReal*const targetsX = inTargets->getPositions()[0]; - const FReal*const targetsY = inTargets->getPositions()[1]; - const FReal*const targetsZ = inTargets->getPositions()[2]; - - for(int idxNeighbors = 0 ; idxNeighbors < limiteNeighbors ; ++idxNeighbors){ - for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ - if( inNeighbors[idxNeighbors] ){ - const int nbParticlesSources = inNeighbors[idxNeighbors]->getNbParticles(); - const FReal*const sourcesX = inNeighbors[idxNeighbors]->getPositions()[0]; - const FReal*const sourcesY = inNeighbors[idxNeighbors]->getPositions()[1]; - const FReal*const sourcesZ = inNeighbors[idxNeighbors]->getPositions()[2]; - - for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){ - FReal dx = sourcesX[idxSource] - targetsX[idxTarget]; - FReal dy = sourcesY[idxSource] - targetsY[idxTarget]; - FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget]; - - FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); - FReal inv_distance = FMath::Sqrt(inv_distance_pow2); - FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; - - FReal r[3]={dx,dy,dz}; - - for(unsigned int i = 0 ; i < 3 ; ++i){ - FReal*const targetsPotentials = inTargets->getPotentials(i); - FReal*const targetsForcesX = inTargets->getForcesX(i); - FReal*const targetsForcesY = inTargets->getForcesY(i); - FReal*const targetsForcesZ = inTargets->getForcesZ(i); - FReal*const sourcesPotentials = inNeighbors[idxNeighbors]->getPotentials(i); - FReal*const sourcesForcesX = inNeighbors[idxNeighbors]->getForcesX(i); - FReal*const sourcesForcesY = inNeighbors[idxNeighbors]->getForcesY(i); - FReal*const sourcesForcesZ = inNeighbors[idxNeighbors]->getForcesZ(i); - - const FReal ri2=r[i]*r[i]; - - for(unsigned int j = 0 ; j < 3 ; ++j){ - FReal rj2=r[j]*r[j]; - - for(unsigned int k = 0 ; k < 3 ; ++k){ - const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j*3+k); - const FReal*const sourcesPhysicalValues = inNeighbors[idxNeighbors]->getPhysicalValues(j*3+k); - - // potentials - FReal potentialCoef; - if(i==j){ - if(j==k) //i=j=k - potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i=j!=k - potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; - } - else{ //(i!=j) - if(i==k) //i=k!=j - potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; - else if(j==k) //i!=k=j - potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i!=k!=j - potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; - } - - // forces - FReal coef[3]; - for(unsigned int l = 0 ; l < 3 ; ++l){ - coef[l]= -(targetsPhysicalValues[idxTarget] * sourcesPhysicalValues[idxSource]); - - // // Grad of RIJK kernel is RIJKL kernel => TODO Implement - // coef[l] *= ; - - }// l - - targetsForcesX[idxTarget] += coef[0]; - targetsForcesY[idxTarget] += coef[1]; - targetsForcesZ[idxTarget] += coef[2]; - targetsPotentials[idxTarget] += ( potentialCoef * sourcesPhysicalValues[idxSource] ); - - sourcesForcesX[idxSource] -= coef[0]; - sourcesForcesY[idxSource] -= coef[1]; - sourcesForcesZ[idxSource] -= coef[2]; - sourcesPotentials[idxSource] += -potentialCoef * targetsPhysicalValues[idxTarget]; - - }// k - }// j - }// i - } - } - } - } - - for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ - for(int idxSource = idxTarget + 1 ; idxSource < nbParticlesTargets ; ++idxSource){ - FReal dx = targetsX[idxSource] - targetsX[idxTarget]; - FReal dy = targetsY[idxSource] - targetsY[idxTarget]; - FReal dz = targetsZ[idxSource] - targetsZ[idxTarget]; - - FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); - FReal inv_distance = FMath::Sqrt(inv_distance_pow2); - FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; - - FReal r[3]={dx,dy,dz}; - - for(unsigned int i = 0 ; i < 3 ; ++i){ - FReal*const targetsPotentials = inTargets->getPotentials(i); - FReal*const targetsForcesX = inTargets->getForcesX(i); - FReal*const targetsForcesY = inTargets->getForcesY(i); - FReal*const targetsForcesZ = inTargets->getForcesZ(i); - FReal ri2=r[i]*r[i]; - - for(unsigned int j = 0 ; j < 3 ; ++j){ - FReal rj2=r[j]*r[j]; - - for(unsigned int k = 0 ; k < 3 ; ++k){ - const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j*3+k); - - // potentials - FReal potentialCoef; - if(i==j){ - if(j==k) //i=j=k - potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i=j!=k - potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; - } - else{ //(i!=j) - if(i==k) //i=k!=j - potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; - else if(j==k) //i!=k=j - potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i!=k!=j - potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; - } - - // forces - FReal coef[3]; - for(unsigned int l = 0 ; l < 3 ; ++l){ - coef[l]= -(targetsPhysicalValues[idxTarget] * targetsPhysicalValues[idxSource]); - - // // Grad of RIJK kernel is RIJKL kernel => TODO Implement - // coef[l] *= ; - - }// l - - targetsForcesX[idxTarget] += coef[0]; - targetsForcesY[idxTarget] += coef[1]; - targetsForcesZ[idxTarget] += coef[2]; - targetsPotentials[idxTarget] += ( potentialCoef * targetsPhysicalValues[idxSource] ); - - targetsForcesX[idxSource] -= coef[0]; - targetsForcesY[idxSource] -= coef[1]; - targetsForcesZ[idxSource] -= coef[2]; - targetsPotentials[idxSource] += -potentialCoef * targetsPhysicalValues[idxTarget]; - }// k - }// j - }// i - - } - } -} - -/** - * @brief FullRemoteRIJK - */ -template -inline void FullRemoteRIJK(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], - const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ - - const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock - - const int nbParticlesTargets = inTargets->getNbParticles(); - const FReal*const targetsX = inTargets->getPositions()[0]; - const FReal*const targetsY = inTargets->getPositions()[1]; - const FReal*const targetsZ = inTargets->getPositions()[2]; - - for(int idxNeighbors = 0 ; idxNeighbors < limiteNeighbors ; ++idxNeighbors){ - for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ - if( inNeighbors[idxNeighbors] ){ - const int nbParticlesSources = inNeighbors[idxNeighbors]->getNbParticles(); - const FReal*const sourcesX = inNeighbors[idxNeighbors]->getPositions()[0]; - const FReal*const sourcesY = inNeighbors[idxNeighbors]->getPositions()[1]; - const FReal*const sourcesZ = inNeighbors[idxNeighbors]->getPositions()[2]; - - for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){ - // potential - FReal dx = sourcesX[idxSource] - targetsX[idxTarget]; - FReal dy = sourcesY[idxSource] - targetsY[idxTarget]; - FReal dz = sourcesZ[idxSource] - targetsZ[idxTarget]; - - FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); - FReal inv_distance = FMath::Sqrt(inv_distance_pow2); - FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; - - FReal r[3]={dx,dy,dz}; - - for(unsigned int i = 0 ; i < 3 ; ++i){ - FReal*const targetsPotentials = inTargets->getPotentials(i); - FReal*const targetsForcesX = inTargets->getForcesX(i); - FReal*const targetsForcesY = inTargets->getForcesY(i); - FReal*const targetsForcesZ = inTargets->getForcesZ(i); - FReal ri2=r[i]*r[i]; - - for(unsigned int j = 0 ; j < 3 ; ++j){ - FReal rj2=r[j]*r[j]; - - for(unsigned int k = 0 ; k < 3 ; ++k){ - - const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j*3+k); - const FReal*const sourcesPhysicalValues = inNeighbors[idxNeighbors]->getPhysicalValues(j*3+k); - - // potentials - FReal potentialCoef; - if(i==j){ - if(j==k) //i=j=k - potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i=j!=k - potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; - } - else{ //(i!=j) - if(i==k) //i=k!=j - potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; - else if(j==k) //i!=k=j - potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i!=k!=j - potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; - } - - // forces - FReal coef[3]; - for(unsigned int l = 0 ; l < 3 ; ++l){ - coef[l]= -(targetsPhysicalValues[idxTarget] * sourcesPhysicalValues[idxSource]); - - // // Grad of RIJK kernel is RIJKL kernel => TODO Implement - // coef[l] *= ; - - }// l - - targetsForcesX[idxTarget] += coef[0]; - targetsForcesY[idxTarget] += coef[1]; - targetsForcesZ[idxTarget] += coef[2]; - targetsPotentials[idxTarget] += ( potentialCoef * sourcesPhysicalValues[idxSource] ); - - }// k - }// j - }// i - - } - } - } - } -} - - -/** - * @brief MutualParticlesRIJK - * @param sourceX - * @param sourceY - * @param sourceZ - * @param sourcePhysicalValue - * @param sourceForceX - * @param sourceForceY - * @param sourceForceZ - * @param sourcePotential - * @param targetX - * @param targetY - * @param targetZ - * @param targetPhysicalValue - * @param targetForceX - * @param targetForceY - * @param targetForceZ - * @param targetPotential - */ -template -inline void MutualParticlesRIJK(const FReal sourceX,const FReal sourceY,const FReal sourceZ, const FReal* sourcePhysicalValue, - FReal* sourceForceX, FReal* sourceForceY, FReal* sourceForceZ, FReal* sourcePotential, - const FReal targetX,const FReal targetY,const FReal targetZ, const FReal* targetPhysicalValue, - FReal* targetForceX, FReal* targetForceY, FReal* targetForceZ, FReal* targetPotential, - const MatrixKernelClass *const MatrixKernel){ - - const double CoreWidth2 = MatrixKernel->getCoreWidth2(); //PB: TODO directly call evaluateBlock - - // GradGradR potential - FReal dx = sourceX - targetX; - FReal dy = sourceY - targetY; - FReal dz = sourceZ - targetZ; - - FReal inv_distance_pow2 = FReal(1.0) / (dx*dx + dy*dy + dz*dz + CoreWidth2); - FReal inv_distance = FMath::Sqrt(inv_distance_pow2); - FReal inv_distance_pow3 = inv_distance_pow2 * inv_distance; - - FReal r[3]={dx,dy,dz}; + template + inline void MutualParticlesKIJ(const FReal sourceX,const FReal sourceY,const FReal sourceZ, const FReal* sourcePhysicalValue, + FReal* sourceForceX, FReal* sourceForceY, FReal* sourceForceZ, FReal* sourcePotential, + const FReal targetX,const FReal targetY,const FReal targetZ, const FReal* targetPhysicalValue, + FReal* targetForceX, FReal* targetForceY, FReal* targetForceZ, FReal* targetPotential, + const MatrixKernelClass *const MatrixKernel){ + + // get information on tensorial aspect of matrix kernel + const int ncmp = MatrixKernelClass::NCMP; + const int applyTab[9] = {0,1,2, + 1,3,4, + 2,4,5}; + + // evaluate kernel and its partial derivatives + const FPoint sourcePoint(sourceX,sourceY,sourceZ); + const FPoint targetPoint(targetX,targetY,targetZ); + FReal Kxy[ncmp]; + FReal dKxy[ncmp][3]; + MatrixKernel->evaluateBlockAndDerivative(sourcePoint,targetPoint,Kxy,dKxy); for(unsigned int i = 0 ; i < 3 ; ++i){ - FReal ri2=r[i]*r[i]; - for(unsigned int j = 0 ; j < 3 ; ++j){ - FReal rj2=r[j]*r[j]; - for(unsigned int k = 0 ; k < 3 ; ++k){ - - // potentials - FReal potentialCoef; - if(i==j){ - if(j==k) //i=j=k - potentialCoef = FReal(3.) * ( FReal(-1.) + ri2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i=j!=k - potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[k] * inv_distance_pow3; - } - else{ //(i!=j) - if(i==k) //i=k!=j - potentialCoef = ( FReal(-1.) + FReal(3.) * ri2 * inv_distance_pow2 ) * r[j] * inv_distance_pow3; - else if(j==k) //i!=k=j - potentialCoef = ( FReal(-1.) + FReal(3.) * rj2 * inv_distance_pow2 ) * r[i] * inv_distance_pow3; - else //i!=k!=j - potentialCoef = FReal(3.) * r[i] * r[j] * r[k] * inv_distance_pow2 * inv_distance_pow3; - } - - // forces - FReal coef[3]; - for(unsigned int l = 0 ; l < 3 ; ++l){ - coef[l]= -(targetPhysicalValue[j*3+k] * sourcePhysicalValue[j*3+k]); + for(unsigned int j = 0 ; j < 3 ; ++j){ - // // Grad of RIJK kernel is RIJKL kernel => TODO Implement - // coef[l] *= ; + // update component to be applied + const int d = applyTab[i*3+j]; - }// l + // forces prefactor + const FReal coef = -(targetPhysicalValue[j] * sourcePhysicalValue[j]); - targetForceX[i] += coef[0]; - targetForceY[i] += coef[1]; - targetForceZ[i] += coef[2]; - targetPotential[i] += ( potentialCoef * sourcePhysicalValue[j*3+k] ); + targetForceX[i] += dKxy[d][0] * coef; + targetForceY[i] += dKxy[d][1] * coef; + targetForceZ[i] += dKxy[d][2] * coef; + targetPotential[i] += ( Kxy[d] * sourcePhysicalValue[j] ); - sourceForceX[i] -= coef[0]; - sourceForceY[i] -= coef[1]; - sourceForceZ[i] -= coef[2]; - sourcePotential[i] += ( -potentialCoef * targetPhysicalValue[j*3+k] ); + sourceForceX[i] -= dKxy[d][0] * coef; + sourceForceY[i] -= dKxy[d][1] * coef; + sourceForceZ[i] -= dKxy[d][2] * coef; + sourcePotential[i] += ( Kxy[d] * targetPhysicalValue[j] ); - }// k - }// j + }// j }// i -} - + } } diff --git a/Src/Kernels/P2P/FP2PClassic.hpp b/Src/Kernels/P2P/FP2PClassic.hpp index d9bcc699..4bc51331 100644 --- a/Src/Kernels/P2P/FP2PClassic.hpp +++ b/Src/Kernels/P2P/FP2PClassic.hpp @@ -131,6 +131,193 @@ inline void FullRemote(ContainerClass* const FRestrict inTargets, ContainerClass } } +////////////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////////////////////////// +// Tensorial Matrix Kernels: K_IJ +////////////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////////////////////////////////////////// + +/** + * @brief FullMutualKIJ + */ + template + inline void FullMutualKIJ(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], + const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ + + // get information on tensorial aspect of matrix kernel + const int ncmp = MatrixKernelClass::NCMP; + const int applyTab[9] = {0,1,2, + 1,3,4, + 2,4,5}; + + const int nbParticlesTargets = inTargets->getNbParticles(); + const FReal*const targetsX = inTargets->getPositions()[0]; + const FReal*const targetsY = inTargets->getPositions()[1]; + const FReal*const targetsZ = inTargets->getPositions()[2]; + + for(int idxNeighbors = 0 ; idxNeighbors < limiteNeighbors ; ++idxNeighbors){ + for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ + if( inNeighbors[idxNeighbors] ){ + const int nbParticlesSources = inNeighbors[idxNeighbors]->getNbParticles(); + const FReal*const sourcesX = inNeighbors[idxNeighbors]->getPositions()[0]; + const FReal*const sourcesY = inNeighbors[idxNeighbors]->getPositions()[1]; + const FReal*const sourcesZ = inNeighbors[idxNeighbors]->getPositions()[2]; + + for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){ + + // evaluate kernel and its partial derivatives + const FPoint sourcePoint(sourcesX[idxSource],sourcesY[idxSource],sourcesZ[idxSource]); + const FPoint targetPoint(targetsX[idxTarget],targetsY[idxTarget],targetsZ[idxTarget]); + FReal Kxy[ncmp]; + FReal dKxy[ncmp][3]; + MatrixKernel->evaluateBlockAndDerivative(sourcePoint,targetPoint,Kxy,dKxy); + + for(unsigned int i = 0 ; i < 3 ; ++i){ + FReal*const targetsPotentials = inTargets->getPotentials(i); + FReal*const targetsForcesX = inTargets->getForcesX(i); + FReal*const targetsForcesY = inTargets->getForcesY(i); + FReal*const targetsForcesZ = inTargets->getForcesZ(i); + FReal*const sourcesPotentials = inNeighbors[idxNeighbors]->getPotentials(i); + FReal*const sourcesForcesX = inNeighbors[idxNeighbors]->getForcesX(i); + FReal*const sourcesForcesY = inNeighbors[idxNeighbors]->getForcesY(i); + FReal*const sourcesForcesZ = inNeighbors[idxNeighbors]->getForcesZ(i); + + for(unsigned int j = 0 ; j < 3 ; ++j){ + const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j); + const FReal*const sourcesPhysicalValues = inNeighbors[idxNeighbors]->getPhysicalValues(j); + + // update component to be applied + const int d = applyTab[i*3+j]; + + // forces prefactor + FReal coef = -(targetsPhysicalValues[idxTarget] * sourcesPhysicalValues[idxSource]); + + targetsForcesX[idxTarget] += dKxy[d][0] * coef; + targetsForcesY[idxTarget] += dKxy[d][1] * coef; + targetsForcesZ[idxTarget] += dKxy[d][2] * coef; + targetsPotentials[idxTarget] += ( Kxy[d] * sourcesPhysicalValues[idxSource] ); + + sourcesForcesX[idxSource] -= dKxy[d][0] * coef; + sourcesForcesY[idxSource] -= dKxy[d][1] * coef; + sourcesForcesZ[idxSource] -= dKxy[d][2] * coef; + sourcesPotentials[idxSource] += Kxy[d] * targetsPhysicalValues[idxTarget]; + + }// j + }// i + } + } + } + } + + for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ + for(int idxSource = idxTarget + 1 ; idxSource < nbParticlesTargets ; ++idxSource){ + + // evaluate kernel and its partial derivatives + const FPoint sourcePoint(targetsX[idxSource],targetsY[idxSource],targetsZ[idxSource]); + const FPoint targetPoint(targetsX[idxTarget],targetsY[idxTarget],targetsZ[idxTarget]); + FReal Kxy[ncmp]; + FReal dKxy[ncmp][3]; + MatrixKernel->evaluateBlockAndDerivative(sourcePoint,targetPoint,Kxy,dKxy); + + for(unsigned int i = 0 ; i < 3 ; ++i){ + FReal*const targetsPotentials = inTargets->getPotentials(i); + FReal*const targetsForcesX = inTargets->getForcesX(i); + FReal*const targetsForcesY = inTargets->getForcesY(i); + FReal*const targetsForcesZ = inTargets->getForcesZ(i); + + for(unsigned int j = 0 ; j < 3 ; ++j){ + const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j); + + // update component to be applied + const int d = applyTab[i*3+j]; + + // forces prefactor + const FReal coef = -(targetsPhysicalValues[idxTarget] * targetsPhysicalValues[idxSource]); + + targetsForcesX[idxTarget] += dKxy[d][0] * coef; + targetsForcesY[idxTarget] += dKxy[d][1] * coef; + targetsForcesZ[idxTarget] += dKxy[d][2] * coef; + targetsPotentials[idxTarget] += ( Kxy[d] * targetsPhysicalValues[idxSource] ); + + targetsForcesX[idxSource] -= dKxy[d][0] * coef; + targetsForcesY[idxSource] -= dKxy[d][1] * coef; + targetsForcesZ[idxSource] -= dKxy[d][2] * coef; + targetsPotentials[idxSource] += Kxy[d] * targetsPhysicalValues[idxTarget]; + }// j + }// i + + } + } + } + + /** + * @brief FullRemoteKIJ + */ + template + inline void FullRemoteKIJ(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[], + const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){ + + // get information on tensorial aspect of matrix kernel + const int ncmp = MatrixKernelClass::NCMP; + const int applyTab[9] = {0,1,2, + 1,3,4, + 2,4,5}; + + const int nbParticlesTargets = inTargets->getNbParticles(); + const FReal*const targetsX = inTargets->getPositions()[0]; + const FReal*const targetsY = inTargets->getPositions()[1]; + const FReal*const targetsZ = inTargets->getPositions()[2]; + + for(int idxNeighbors = 0 ; idxNeighbors < limiteNeighbors ; ++idxNeighbors){ + for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){ + if( inNeighbors[idxNeighbors] ){ + const int nbParticlesSources = inNeighbors[idxNeighbors]->getNbParticles(); + const FReal*const sourcesX = inNeighbors[idxNeighbors]->getPositions()[0]; + const FReal*const sourcesY = inNeighbors[idxNeighbors]->getPositions()[1]; + const FReal*const sourcesZ = inNeighbors[idxNeighbors]->getPositions()[2]; + + for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){ + + // evaluate kernel and its partial derivatives + const FPoint sourcePoint(sourcesX[idxSource],sourcesY[idxSource],sourcesZ[idxSource]); + const FPoint targetPoint(targetsX[idxTarget],targetsY[idxTarget],targetsZ[idxTarget]); + FReal Kxy[ncmp]; + FReal dKxy[ncmp][3]; + MatrixKernel->evaluateBlockAndDerivative(sourcePoint,targetPoint,Kxy,dKxy); + + for(unsigned int i = 0 ; i < 3 ; ++i){ + FReal*const targetsPotentials = inTargets->getPotentials(i); + FReal*const targetsForcesX = inTargets->getForcesX(i); + FReal*const targetsForcesY = inTargets->getForcesY(i); + FReal*const targetsForcesZ = inTargets->getForcesZ(i); + + for(unsigned int j = 0 ; j < 3 ; ++j){ + const FReal*const targetsPhysicalValues = inTargets->getPhysicalValues(j); + const FReal*const sourcesPhysicalValues = inNeighbors[idxNeighbors]->getPhysicalValues(j); + + // update component to be applied + const int d = applyTab[i*3+j]; + + // forces prefactor + const FReal coef = -(targetsPhysicalValues[idxTarget] * sourcesPhysicalValues[idxSource]); + + targetsForcesX[idxTarget] += dKxy[d][0] * coef; + targetsForcesY[idxTarget] += dKxy[d][1] * coef; + targetsForcesZ[idxTarget] += dKxy[d][2] * coef; + targetsPotentials[idxTarget] += ( Kxy[d] * sourcesPhysicalValues[idxSource] ); + + }// j + }// i + + } + } + } + } + } } diff --git a/Src/Kernels/Uniform/FUnifDenseKernel.hpp b/Src/Kernels/Uniform/FUnifDenseKernel.hpp index 95aa4d44..c6a84dd9 100644 --- a/Src/Kernels/Uniform/FUnifDenseKernel.hpp +++ b/Src/Kernels/Uniform/FUnifDenseKernel.hpp @@ -232,7 +232,7 @@ public: ContainerClass* const NeighborSourceParticles[27], const int /* size */) { - DirectInteractionComputer::P2P(TargetParticles,NeighborSourceParticles,MatrixKernel); + DirectInteractionComputer::P2P(TargetParticles,NeighborSourceParticles,MatrixKernel); } @@ -240,7 +240,7 @@ public: ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, ContainerClass* const inNeighbors[27], const int /*inSize*/) { - DirectInteractionComputer::P2PRemote(inTargets,inNeighbors,27,MatrixKernel); + DirectInteractionComputer::P2PRemote(inTargets,inNeighbors,27,MatrixKernel); } }; diff --git a/Src/Kernels/Uniform/FUnifKernel.hpp b/Src/Kernels/Uniform/FUnifKernel.hpp index 31b0b491..effbd662 100644 --- a/Src/Kernels/Uniform/FUnifKernel.hpp +++ b/Src/Kernels/Uniform/FUnifKernel.hpp @@ -213,7 +213,7 @@ public: ContainerClass* const NeighborSourceParticles[27], const int /* size */) { - DirectInteractionComputer::P2P(TargetParticles,NeighborSourceParticles,MatrixKernel); + DirectInteractionComputer::P2P(TargetParticles,NeighborSourceParticles,MatrixKernel); } @@ -221,7 +221,7 @@ public: ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, ContainerClass* const inNeighbors[27], const int /*inSize*/) { - DirectInteractionComputer::P2PRemote(inTargets,inNeighbors,27,MatrixKernel); + DirectInteractionComputer::P2PRemote(inTargets,inNeighbors,27,MatrixKernel); } }; diff --git a/Src/Kernels/Uniform/FUnifM2LHandler.hpp b/Src/Kernels/Uniform/FUnifM2LHandler.hpp index 09fb6ca5..5a1f322e 100644 --- a/Src/Kernels/Uniform/FUnifM2LHandler.hpp +++ b/Src/Kernels/Uniform/FUnifM2LHandler.hpp @@ -188,7 +188,7 @@ class FUnifM2LHandler { const char precision_type = (typeid(FReal)==typeid(double) ? 'd' : 'f'); std::stringstream stream; - stream << "m2l_k"/*<< MatrixKernelClass::Identifier*/ << "_" << precision_type + stream << "m2l_k"/*<< MatrixKernelClass::getID()*/ << "_" << precision_type << "_o" << order << ".bin"; return stream.str(); } @@ -353,7 +353,7 @@ class FUnifM2LHandler { const char precision_type = (typeid(FReal)==typeid(double) ? 'd' : 'f'); std::stringstream stream; - stream << "m2l_k"/*<< MatrixKernelClass::Identifier*/ << "_" << precision_type + stream << "m2l_k"/*<< MatrixKernelClass::getID()*/ << "_" << precision_type << "_o" << order << ".bin"; return stream.str(); } diff --git a/Src/Kernels/Uniform/FUnifSymKernel.hpp b/Src/Kernels/Uniform/FUnifSymKernel.hpp deleted file mode 100755 index e42e79c8..00000000 --- a/Src/Kernels/Uniform/FUnifSymKernel.hpp +++ /dev/null @@ -1,471 +0,0 @@ -// =================================================================================== -// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner -// olivier.coulaud@inria.fr, berenger.bramas@inria.fr -// This software is a computer program whose purpose is to compute the FMM. -// -// This software is governed by the CeCILL-C and LGPL licenses and -// abiding by the rules of distribution of free software. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public and CeCILL-C Licenses for more details. -// "http://www.cecill.info". -// "http://www.gnu.org/licenses". -// =================================================================================== -#ifndef FUNIFSYMKERNEL_HPP -#define FUNIFSYMKERNEL_HPP - -#include "../../Utils/FGlobal.hpp" - -#include "../../Utils/FSmartPointer.hpp" - -// Originally in M2LHandler but transferred to the kernel for the symmetric version -#include "../../Utils/FDft.hpp" // PB: for FFT -#include "../../Utils/FComplex.hpp" -#include "./FUnifTensor.hpp" // PB: for node_diff -// - -#include "./FAbstractUnifKernel.hpp" -#include "./FUnifInterpolator.hpp" - -#include "./FUnifSymM2LHandler.hpp" - -class FTreeCoordinate; - - -// for verbosity only!!! -//#define COUNT_BLOCKED_INTERACTIONS - -// if timings should be logged -#define LOG_TIMINGS - -/** - * @author Pierre Blanchard (pierre.blanchard@inria.fr) - * @class FUnifSymKernel - * @brief - * Please read the license - * - * This kernels implement the Lagrange interpolation based FMM operators - * exploiting the symmetries in the far-field. It implements all interfaces - * (P2P, P2M, M2M, M2L, L2L, L2P) which are required by the FFmmAlgorithm and - * FFmmAlgorithmThread. - * - * @tparam CellClass Type of cell - * @tparam ContainerClass Type of container to store particles - * @tparam MatrixKernelClass Type of matrix kernel function - * @tparam ORDER interpolation order - */ -template < class CellClass, class ContainerClass, class MatrixKernelClass, int ORDER, int NVALS = 1> -class FUnifSymKernel - : public FAbstractUnifKernel -{ - typedef FAbstractUnifKernel AbstractBaseClass; - typedef FUnifSymM2LHandler SymM2LHandlerClass; - enum {nnodes = AbstractBaseClass::nnodes}; - - /// Needed for handling all symmetries - const FSmartPointer SymM2LHandler; - - // permuted local and multipole expansions - FReal** Loc; - FReal** Mul; - unsigned int* countExp; - - // transformed expansions - FComplex** TLoc; - FComplex** TMul; - - static const unsigned int rc = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1); - static const unsigned int opt_rc = rc/2+1; - - typedef FUnifTensor TensorType; - unsigned int node_diff[nnodes*nnodes]; - - // FDft Dft; // Direct Discrete Fourier Transformator - FFft Dft; // Fast Discrete Fourier Transformator - - /** - * Allocate memory for storing locally permuted mulipole and local expansions - */ - void allocateMemoryForPermutedExpansions() - { - assert(Loc==NULL && Mul==NULL && countExp==NULL); - Loc = new FReal* [343]; - Mul = new FReal* [343]; - TLoc = new FComplex* [343]; - TMul = new FComplex* [343]; - countExp = new unsigned int [343]; - - // set all 343 to NULL - for (unsigned int idx=0; idx<343; ++idx) { - Mul[idx] = Loc[idx] = NULL; - TMul[idx] = TLoc[idx] = NULL; - } - - // init only 16 of 343 possible translations due to symmetries - for (int i=2; i<=3; ++i) - for (int j=0; j<=i; ++j) - for (int k=0; k<=j; ++k) { - const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3); - assert(Mul[idx]==NULL || Loc[idx]==NULL); - Mul[idx] = new FReal [24 * nnodes]; - Loc[idx] = new FReal [24 * nnodes]; - TMul[idx] = new FComplex [24 * rc]; - TLoc[idx] = new FComplex [24 * rc]; - } - } - - -#ifdef LOG_TIMINGS - FTic time; - FReal t_m2l_1, t_m2l_2, t_m2l_3; -#endif - -public: - /** - * The constructor initializes all constant attributes and it reads the - * precomputed and compressed M2L operators from a binary file (an - * runtime_error is thrown if the required file is not valid). - */ - FUnifSymKernel(const int inTreeHeight, - const FPoint& inBoxCenter, - const FReal inBoxWidth) - : AbstractBaseClass(inTreeHeight, inBoxCenter, inBoxWidth), - SymM2LHandler(new SymM2LHandlerClass(AbstractBaseClass::MatrixKernel.getPtr(), inBoxWidth, inTreeHeight)), - Loc(NULL), Mul(NULL), countExp(NULL), - TLoc(NULL), TMul(NULL), - Dft(rc) // initialize Discrete Fourier Transformator, - { - this->allocateMemoryForPermutedExpansions(); - - // initialize root node ids - TensorType::setNodeIdsDiff(node_diff); - -#ifdef LOG_TIMINGS - t_m2l_1 = FReal(0.); - t_m2l_2 = FReal(0.); - t_m2l_3 = FReal(0.); -#endif - } - - - - /** Copy constructor */ - FUnifSymKernel(const FUnifSymKernel& other) - : AbstractBaseClass(other), - SymM2LHandler(other.SymM2LHandler), - Loc(NULL), Mul(NULL), countExp(NULL), - TLoc(NULL), TMul(NULL) - { - this->allocateMemoryForPermutedExpansions(); - } - - - - /** Destructor */ - ~FUnifSymKernel() - { - for (unsigned int t=0; t<343; ++t) { - if (Loc[t]!=NULL) delete [] Loc[t]; - if (Mul[t]!=NULL) delete [] Mul[t]; - if (TLoc[t]!=NULL) delete [] TLoc[t]; - if (TMul[t]!=NULL) delete [] TMul[t]; - } - if (Loc!=NULL) delete [] Loc; - if (Mul!=NULL) delete [] Mul; - if (countExp!=NULL) delete [] countExp; - if (TLoc!=NULL) delete [] TLoc; - if (TMul!=NULL) delete [] TMul; - -#ifdef LOG_TIMINGS - std::cout << "- Permutation+Pad+Transfo took " << t_m2l_1 << "s" - << "\n- Apply M2L in Fourier space took " << t_m2l_2 << "s" - << "\n- Unpermutation+Untransfo+Unpad took " << t_m2l_3 << "s" - << std::endl; -#endif - } - - - const SymM2LHandlerClass *const getPtrToSymM2LHandler() const - { return SymM2LHandler.getPtr(); } - - - - void P2M(CellClass* const LeafCell, - const ContainerClass* const SourceParticles) - { - const FPoint LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafCell->getCoordinate())); - for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ - // 1) apply Sy - AbstractBaseClass::Interpolator->applyP2M(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf, - LeafCell->getMultipole(idxRhs), SourceParticles); -// // 2) apply Discrete Fourier Transform -// SymM2LHandler->applyZeroPaddingAndDFT(LeafCell->getMultipole(idxRhs), -// LeafCell->getTransformedMultipole(idxRhs)); - } - } - - - - void M2M(CellClass* const FRestrict ParentCell, - const CellClass*const FRestrict *const FRestrict ChildCells, - const int /*TreeLevel*/) - { - for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ - // apply Sy - FBlas::scal(nnodes, FReal(0.), ParentCell->getMultipole(idxRhs)); - for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){ - if (ChildCells[ChildIndex]){ - AbstractBaseClass::Interpolator->applyM2M(ChildIndex, ChildCells[ChildIndex]->getMultipole(idxRhs), ParentCell->getMultipole(idxRhs)); - } - } -// // 2) Apply Discete Fourier Transform -// SymM2LHandler->applyZeroPaddingAndDFT(ParentCell->getMultipole(idxRhs), -// ParentCell->getTransformedMultipole(idxRhs)); - } - } - - - - void M2L(CellClass* const FRestrict TargetCell, - const CellClass* SourceCells[343], - const int /*NumSourceCells*/, - const int TreeLevel) - { -#ifdef LOG_TIMINGS - time.tic(); -#endif - for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ - - // permute and copy multipole expansion - memset(countExp, 0, sizeof(int) * 343); - for (unsigned int idx=0; idx<343; ++idx) { - if (SourceCells[idx]) { - const unsigned int pidx = SymM2LHandler->pindices[idx]; - const unsigned int count = (countExp[pidx])++; - FReal *const mul = Mul[pidx] + count*nnodes; - const unsigned int *const pvec = SymM2LHandler->pvectors[idx]; - const FReal *const MultiExp = SourceCells[idx]->getMultipole(idxRhs); - - // explicit loop unrolling - for (unsigned int n=0; ngetScaleFactor(AbstractBaseClass::BoxWidth, TreeLevel); - - FComplex tmpTLoc; - - for (unsigned int pidx=0; pidx<343; ++pidx) { - const unsigned int count = countExp[pidx]; - if (count) { - - FComplex *const tmpK=const_cast(SymM2LHandler->getK(TreeLevel, pidx)); - - for (unsigned int i=0; i(SymM2LHandler->getK(TreeLevel, pidx))+rank*nnodes, -// nnodes, Mul[pidx], nnodes, Compressed, rank); -// // nnodes *count * (2*rank-1) flops -// FBlas::gemm( nnodes, rank, count, scale, -// const_cast(SymM2LHandler->getK(TreeLevel, pidx)), -// nnodes, Compressed, rank, Loc[pidx], nnodes); - } - } - - - -#ifdef LOG_TIMINGS - t_m2l_2 += time.tacAndElapsed(); -#endif - -#ifdef LOG_TIMINGS - time.tic(); -#endif - - // permute and add contribution to local expansions - FReal *const LocalExpansion = TargetCell->getLocal(idxRhs); - memset(countExp, 0, sizeof(int) * 343); - for (unsigned int idx=0; idx<343; ++idx) { - if (SourceCells[idx]) { - const unsigned int pidx = SymM2LHandler->pindices[idx]; - const unsigned int count = (countExp[pidx])++; - - FReal *const loc = Loc[pidx] + count*nnodes; - const FComplex *const tloc = TLoc[pidx] + count*rc; - - /////////////////////////////////////////// - FReal ploc[rc]; - FBlas::setzero(rc,ploc); - // Apply forward Discrete Fourier Transform - Dft.applyIDFT(tloc,ploc); - - // Unapply Zero Padding - for (unsigned int j=0; jpvectors[idx]; - - /* - // no loop unrolling - for (unsigned int n=0; nunapplyZeroPaddingAndDFT(ParentCell->getTransformedLocal(idxRhs), -// const_cast(ParentCell)->getLocal(idxRhs)); - // 2) apply Sx - for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){ - if (ChildCells[ChildIndex]){ - AbstractBaseClass::Interpolator->applyL2L(ChildIndex, ParentCell->getLocal(idxRhs), ChildCells[ChildIndex]->getLocal(idxRhs)); - } - } - } - } - - - - void L2P(const CellClass* const LeafCell, - ContainerClass* const TargetParticles) - { - const FPoint LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafCell->getCoordinate())); - - for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ - -// // 1) Apply Inverse Discete Fourier Transform -// SymM2LHandler->unapplyZeroPaddingAndDFT(LeafCell->getTransformedLocal(idxRhs), -// const_cast(LeafCell)->getLocal(idxRhs)); -// - // 2.a) apply Sx - AbstractBaseClass::Interpolator->applyL2P(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf, - LeafCell->getLocal(idxRhs), TargetParticles); - - // 2.b) apply Px (grad Sx) - AbstractBaseClass::Interpolator->applyL2PGradient(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf, - LeafCell->getLocal(idxRhs), TargetParticles); - } - } - - void P2P(const FTreeCoordinate& /* LeafCellCoordinate */, // needed for periodic boundary conditions - ContainerClass* const FRestrict TargetParticles, - const ContainerClass* const FRestrict /*SourceParticles*/, - ContainerClass* const NeighborSourceParticles[27], - const int /* size */) - { - DirectInteractionComputer::P2P(TargetParticles,NeighborSourceParticles); - } - - - void P2PRemote(const FTreeCoordinate& /*inPosition*/, - ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, - ContainerClass* const inNeighbors[27], const int /*inSize*/){ - DirectInteractionComputer::P2PRemote(inTargets,inNeighbors,27); - } - -}; - - - - - - - - -#endif //FUNIFSYMKERNELS_HPP - -// [--END--] diff --git a/Src/Kernels/Uniform/FUnifTensorialKernel.hpp b/Src/Kernels/Uniform/FUnifTensorialKernel.hpp index 84aa4e56..81e5988f 100644 --- a/Src/Kernels/Uniform/FUnifTensorialKernel.hpp +++ b/Src/Kernels/Uniform/FUnifTensorialKernel.hpp @@ -251,14 +251,14 @@ public: ContainerClass* const NeighborSourceParticles[27], const int /* size */) { - DirectInteractionComputer::P2P(TargetParticles,NeighborSourceParticles,MatrixKernel); + DirectInteractionComputer::P2P(TargetParticles,NeighborSourceParticles,MatrixKernel); } void P2PRemote(const FTreeCoordinate& /*inPosition*/, ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, ContainerClass* const inNeighbors[27], const int /*inSize*/){ - DirectInteractionComputer::P2PRemote(inTargets,inNeighbors,27,MatrixKernel); + DirectInteractionComputer::P2PRemote(inTargets,inNeighbors,27,MatrixKernel); } }; diff --git a/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp b/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp index fa59c216..46d6c527 100644 --- a/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp +++ b/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp @@ -212,7 +212,7 @@ class FUnifTensorialM2LHandler { const char precision_type = (typeid(FReal)==typeid(double) ? 'd' : 'f'); std::stringstream stream; - stream << "m2l_k"/*<< MatrixKernelClass::Identifier*/ << "_" << precision_type + stream << "m2l_k"/*<< MatrixKernelClass::getID()*/ << "_" << precision_type << "_o" << order << ".bin"; return stream.str(); } @@ -406,7 +406,7 @@ class FUnifTensorialM2LHandler { const char precision_type = (typeid(FReal)==typeid(double) ? 'd' : 'f'); std::stringstream stream; - stream << "m2l_k"/*<< MatrixKernelClass::Identifier*/ << "_" << precision_type + stream << "m2l_k"/*<< MatrixKernelClass::getID*/ << "_" << precision_type << "_o" << order << ".bin"; return stream.str(); } diff --git a/Tests/Kernels/testChebTensorialAlgorithm.cpp b/Tests/Kernels/testChebTensorialAlgorithm.cpp index 01f762c3..2a928c6c 100644 --- a/Tests/Kernels/testChebTensorialAlgorithm.cpp +++ b/Tests/Kernels/testChebTensorialAlgorithm.cpp @@ -68,10 +68,7 @@ int main(int argc, char* argv[]) // typedefs typedef FInterpMatrixKernel_R_IJ MatrixKernelClass; -// typedef FInterpMatrixKernelR MatrixKernelClass; -// typedef FInterpMatrixKernel_R_IJK MatrixKernelClass; - const KERNEL_FUNCTION_IDENTIFIER MK_ID = MatrixKernelClass::Identifier; const unsigned int NPV = MatrixKernelClass::NPV; const unsigned int NPOT = MatrixKernelClass::NPOT; const unsigned int NRHS = MatrixKernelClass::NRHS; @@ -79,15 +76,6 @@ int main(int argc, char* argv[]) const double CoreWidth = 0.1; const MatrixKernelClass MatrixKernel(CoreWidth); - std::cout<< "Interaction kernel: "; - if(MK_ID == ONE_OVER_R) std::cout<< "1/R"; - else if(MK_ID == R_IJ) - std::cout<< "Ra_{,ij}" - << " with core width a=" << CoreWidth < InterpolatorClass; InterpolatorClass S; MatrixKernelClass MatrixKernel; - RIJKMatrixKernelClass RIJKMatrixKernel; std::cout << "\nCompute interactions approximatively, interpolation order = " << ORDER << " ..." << std::endl; @@ -559,19 +557,21 @@ int main(int, char **){ // f[counter*3 + 1] += force.getY() * wx * wy; // f[counter*3 + 2] += force.getZ() * wx * wy; + FReal rij[ncmp]; + FReal rijk[ncmp][3]; + MatrixKernel.evaluateBlockAndDerivative(x,y,rij,rijk); + // R,ij and (R,ij),k for (unsigned int i=0; i