Attention une mise à jour du serveur va être effectuée le vendredi 16 avril entre 12h et 12h30. Cette mise à jour va générer une interruption du service de quelques minutes.

Commit 0b5d3de2 authored by BLANCHARD Pierre's avatar BLANCHARD Pierre

Implemented multi Rhs (i.e. MatMat product) for Uniform FMM.

parent 77f7a676
......@@ -285,11 +285,11 @@ public:
/////////////////////////////////////////////////////
/////////////////////////////////////////////////////
ValueClass* getRawData(){
AttributeClass* getRawData(){
return reinterpret_cast<AttributeClass*>(positions[2] + allocatedParticles);
}
const ValueClass* getRawData() const {
const AttributeClass* getRawData() const {
return reinterpret_cast<AttributeClass*>(positions[2] + allocatedParticles);
}
......
......@@ -216,14 +216,14 @@ public:
ContainerClass* const NeighborSourceParticles[27],
const int /* size */)
{
DirectInteractionComputer<MatrixKernelClass::NCMP,NVALS>::P2P(TargetParticles,NeighborSourceParticles,MatrixKernel);
DirectInteractionComputer<MatrixKernelClass::NCMP, NVALS>::P2P(TargetParticles,NeighborSourceParticles,MatrixKernel);
}
void P2PRemote(const FTreeCoordinate& /*inPosition*/,
ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/,
ContainerClass* const inNeighbors[27], const int /*inSize*/){
DirectInteractionComputer<MatrixKernelClass::NCMP,NVALS>::P2PRemote(inTargets,inNeighbors,27,MatrixKernel);
DirectInteractionComputer<MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,27,MatrixKernel);
}
};
......
......@@ -16,9 +16,7 @@ struct DirectInteractionComputer
static void P2P( ContainerClass* const FRestrict TargetParticles,
ContainerClass* const NeighborSourceParticles[27],
const MatrixKernelClass *const MatrixKernel){
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
FP2P::FullMutualKIJ(TargetParticles,NeighborSourceParticles,14,MatrixKernel);
}
}
template <typename ContainerClass, typename MatrixKernelClass>
......@@ -26,22 +24,39 @@ struct DirectInteractionComputer
ContainerClass* const inNeighbors[27],
const int inSize,
const MatrixKernelClass *const MatrixKernel){
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
FP2P::FullRemoteKIJ(inTargets,inNeighbors,inSize,MatrixKernel);
}
}
};
/*! Specialization for scalar kernels */
template <int NVALS>
struct DirectInteractionComputer<1, NVALS>
struct DirectInteractionComputer<1,NVALS>
{
template <typename ContainerClass, typename MatrixKernelClass>
static void P2P( ContainerClass* const FRestrict TargetParticles,
ContainerClass* const NeighborSourceParticles[27],
const MatrixKernelClass *const MatrixKernel){
FP2P::FullMutualMultiRhs(TargetParticles,NeighborSourceParticles,14,MatrixKernel);
}
template <typename ContainerClass, typename MatrixKernelClass>
static void P2PRemote( ContainerClass* const FRestrict inTargets,
ContainerClass* const inNeighbors[27],
const int inSize,
const MatrixKernelClass *const MatrixKernel){
FP2P::FullRemoteMultiRhs(inTargets,inNeighbors,inSize,MatrixKernel);
}
};
/*! Specialization for scalar kernels and single rhs*/
template <>
struct DirectInteractionComputer<1,1>
{
template <typename ContainerClass, typename MatrixKernelClass>
static void P2P( ContainerClass* const FRestrict TargetParticles,
ContainerClass* const NeighborSourceParticles[27],
const MatrixKernelClass *const MatrixKernel){
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs)
FP2P::FullMutual(TargetParticles,NeighborSourceParticles,14,MatrixKernel);
}
......@@ -50,7 +65,6 @@ struct DirectInteractionComputer<1, NVALS>
ContainerClass* const inNeighbors[27],
const int inSize,
const MatrixKernelClass *const MatrixKernel){
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs)
FP2P::FullRemote(inTargets,inNeighbors,inSize,MatrixKernel);
}
};
......
......@@ -378,6 +378,7 @@ inline void NonMutualParticles(const FReal sourceX,const FReal sourceY,const FRe
#include "FP2PAvx.h"
#else
#include "FP2PClassic.hpp"
#include "FP2PMultiRhs.hpp"
#endif //Includes
#endif // FP2P_HPP
// ===================================================================================
// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner
// olivier.coulaud@inria.fr, berenger.bramas@inria.fr
// This software is a computer program whose purpose is to compute the FMM.
//
// This software is governed by the CeCILL-C and LGPL licenses and
// abiding by the rules of distribution of free software.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public and CeCILL-C Licenses for more details.
// "http://www.cecill.info".
// "http://www.gnu.org/licenses".
// ===================================================================================
#ifndef FP2PMULTIRHS_HPP
#define FP2PMULTIRHS_HPP
namespace FP2P {
/*
* FullMutualMultiRhs (generic version)
*/
template <class ContainerClass, typename MatrixKernelClass>
inline void FullMutualMultiRhs(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
const int nbParticlesTargets = inTargets->getNbParticles();
const FReal*const targetsPhysicalValues = inTargets->getPhysicalValuesArray();
const FReal*const targetsX = inTargets->getPositions()[0];
const FReal*const targetsY = inTargets->getPositions()[1];
const FReal*const targetsZ = inTargets->getPositions()[2];
FReal*const targetsForcesX = inTargets->getForcesXArray();
FReal*const targetsForcesY = inTargets->getForcesYArray();
FReal*const targetsForcesZ = inTargets->getForcesZArray();
FReal*const targetsPotentials = inTargets->getPotentialsArray();
const int NVALS = inTargets->getNVALS();
const int targetsLD = inTargets->getLeadingDimension();
for(int idxNeighbors = 0 ; idxNeighbors < limiteNeighbors ; ++idxNeighbors){
for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){
if( inNeighbors[idxNeighbors] ){
const int nbParticlesSources = inNeighbors[idxNeighbors]->getNbParticles();
const FReal*const sourcesPhysicalValues = inNeighbors[idxNeighbors]->getPhysicalValuesArray();
const FReal*const sourcesX = inNeighbors[idxNeighbors]->getPositions()[0];
const FReal*const sourcesY = inNeighbors[idxNeighbors]->getPositions()[1];
const FReal*const sourcesZ = inNeighbors[idxNeighbors]->getPositions()[2];
FReal*const sourcesForcesX = inNeighbors[idxNeighbors]->getForcesXArray();
FReal*const sourcesForcesY = inNeighbors[idxNeighbors]->getForcesYArray();
FReal*const sourcesForcesZ = inNeighbors[idxNeighbors]->getForcesZArray();
FReal*const sourcesPotentials = inNeighbors[idxNeighbors]->getPotentialsArray();
const int sourcesLD = inNeighbors[idxNeighbors]->getLeadingDimension();
for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){
// Compute kernel of interaction and its derivative
const FPoint sourcePoint(sourcesX[idxSource],sourcesY[idxSource],sourcesZ[idxSource]);
const FPoint targetPoint(targetsX[idxTarget],targetsY[idxTarget],targetsZ[idxTarget]);
FReal Kxy[1];
FReal dKxy[3];
MatrixKernel->evaluateBlockAndDerivative(sourcePoint.getX(),sourcePoint.getY(),sourcePoint.getZ(),
targetPoint.getX(),targetPoint.getY(),targetPoint.getZ(),Kxy,dKxy);
for(int idxVals = 0 ; idxVals < NVALS ; ++idxVals){
const int idxTargetValue = idxVals*targetsLD+idxTarget;
const int idxSourceValue = idxVals*sourcesLD+idxSource;
FReal coef = (targetsPhysicalValues[idxTargetValue] * sourcesPhysicalValues[idxSourceValue]);
targetsForcesX[idxTargetValue] += dKxy[0] * coef;
targetsForcesY[idxTargetValue] += dKxy[1] * coef;
targetsForcesZ[idxTargetValue] += dKxy[2] * coef;
targetsPotentials[idxTargetValue] += ( Kxy[0] * sourcesPhysicalValues[idxSourceValue] );
sourcesForcesX[idxSourceValue] -= dKxy[0] * coef;
sourcesForcesY[idxSourceValue] -= dKxy[1] * coef;
sourcesForcesZ[idxSourceValue] -= dKxy[2] * coef;
sourcesPotentials[idxSourceValue] += ( Kxy[0] * targetsPhysicalValues[idxTargetValue] );
} // NVALS
}
}
}
}
for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){
for(int idxSource = idxTarget + 1 ; idxSource < nbParticlesTargets ; ++idxSource){
// Compute kernel of interaction...
const FPoint sourcePoint(targetsX[idxSource],targetsY[idxSource],targetsZ[idxSource]);
const FPoint targetPoint(targetsX[idxTarget],targetsY[idxTarget],targetsZ[idxTarget]);
FReal Kxy[1];
FReal dKxy[3];
MatrixKernel->evaluateBlockAndDerivative(sourcePoint.getX(),sourcePoint.getY(),sourcePoint.getZ(),
targetPoint.getX(),targetPoint.getY(),targetPoint.getZ(),Kxy,dKxy);
for(int idxVals = 0 ; idxVals < NVALS ; ++idxVals){
const int idxTargetValue = idxVals*targetsLD+idxTarget;
const int idxSourceValue = idxVals*targetsLD+idxSource;
FReal coef = (targetsPhysicalValues[idxTargetValue] * targetsPhysicalValues[idxSourceValue]);
targetsForcesX[idxTargetValue] += dKxy[0] * coef;
targetsForcesY[idxTargetValue] += dKxy[1] * coef;
targetsForcesZ[idxTargetValue] += dKxy[2] * coef;
targetsPotentials[idxTargetValue] += ( Kxy[0] * targetsPhysicalValues[idxSourceValue] );
targetsForcesX[idxSourceValue] -= dKxy[0] * coef;
targetsForcesY[idxSourceValue] -= dKxy[1] * coef;
targetsForcesZ[idxSourceValue] -= dKxy[2] * coef;
targetsPotentials[idxSourceValue] += ( Kxy[0] * targetsPhysicalValues[idxTargetValue] );
}// NVALS
}
}
}
/**
* FullRemoteMultiRhs (generic version)
*/
template <class ContainerClass, typename MatrixKernelClass>
inline void FullRemoteMultiRhs(ContainerClass* const FRestrict inTargets, ContainerClass* const inNeighbors[],
const int limiteNeighbors, const MatrixKernelClass *const MatrixKernel){
const int nbParticlesTargets = inTargets->getNbParticles();
const FReal*const targetsPhysicalValues = inTargets->getPhysicalValuesArray();
const FReal*const targetsX = inTargets->getPositions()[0];
const FReal*const targetsY = inTargets->getPositions()[1];
const FReal*const targetsZ = inTargets->getPositions()[2];
FReal*const targetsForcesX = inTargets->getForcesXArray();
FReal*const targetsForcesY = inTargets->getForcesYArray();
FReal*const targetsForcesZ = inTargets->getForcesZArray();
FReal*const targetsPotentials = inTargets->getPotentialsArray();
const int NVALS = inTargets->getNVALS();
const int targetsLD = inTargets->getLeadingDimension();
for(int idxNeighbors = 0 ; idxNeighbors < limiteNeighbors ; ++idxNeighbors){
for(int idxTarget = 0 ; idxTarget < nbParticlesTargets ; ++idxTarget){
if( inNeighbors[idxNeighbors] ){
const int nbParticlesSources = inNeighbors[idxNeighbors]->getNbParticles();
const FReal*const sourcesPhysicalValues = inNeighbors[idxNeighbors]->getPhysicalValuesArray();
const FReal*const sourcesX = inNeighbors[idxNeighbors]->getPositions()[0];
const FReal*const sourcesY = inNeighbors[idxNeighbors]->getPositions()[1];
const FReal*const sourcesZ = inNeighbors[idxNeighbors]->getPositions()[2];
const int sourcesLD = inNeighbors[idxNeighbors]->getLeadingDimension();
for(int idxSource = 0 ; idxSource < nbParticlesSources ; ++idxSource){
// Compute kernel of interaction...
const FPoint sourcePoint(sourcesX[idxSource],sourcesY[idxSource],sourcesZ[idxSource]);
const FPoint targetPoint(targetsX[idxTarget],targetsY[idxTarget],targetsZ[idxTarget]);
FReal Kxy[1];
FReal dKxy[3];
MatrixKernel->evaluateBlockAndDerivative(sourcePoint.getX(),sourcePoint.getY(),sourcePoint.getZ(),
targetPoint.getX(),targetPoint.getY(),targetPoint.getZ(),Kxy,dKxy);
for(int idxVals = 0 ; idxVals < NVALS ; ++idxVals){
const int idxTargetValue = idxVals*targetsLD+idxTarget;
const int idxSourceValue = idxVals*sourcesLD+idxSource;
FReal coef = (targetsPhysicalValues[idxTargetValue] * sourcesPhysicalValues[idxSourceValue]);
targetsForcesX[idxTargetValue] += dKxy[0] * coef;
targetsForcesY[idxTargetValue] += dKxy[1] * coef;
targetsForcesZ[idxTargetValue] += dKxy[2] * coef;
targetsPotentials[idxTargetValue] += ( Kxy[0] * sourcesPhysicalValues[idxSourceValue] );
} // NVALS
}
}
}
}
}
}
#endif // FP2PMULTIRHS_HPP
......@@ -18,56 +18,105 @@
#include "Components/FBasicParticleContainer.hpp"
template<int NRHS = 1, int NLHS = 1>
class FP2PParticleContainer : public FBasicParticleContainer<NRHS+4*NLHS> {
typedef FBasicParticleContainer<NRHS+4*NLHS> Parent;
template<int NRHS = 1, int NLHS = 1, int NVALS = 1>
class FP2PParticleContainer : public FBasicParticleContainer<NVALS*(NRHS+4*NLHS)> {
typedef FBasicParticleContainer<NVALS*(NRHS+4*NLHS)> Parent;
public:
FReal* getPhysicalValues(const int idxRhs = 0){
return Parent::getAttribute(0+idxRhs);
FReal* getPhysicalValues(const int idxVals = 0, const int idxRhs = 0){
return Parent::getAttribute((0+idxRhs)*NVALS+idxVals);
}
const FReal* getPhysicalValues(const int idxRhs = 0) const {
return Parent::getAttribute(0+idxRhs);
const FReal* getPhysicalValues(const int idxVals = 0, const int idxRhs = 0) const {
return Parent::getAttribute((0+idxRhs)*NVALS+idxVals);
}
FReal* getPotentials(const int idxLhs = 0){
return Parent::getAttribute(NRHS+idxLhs);
FReal* getPhysicalValuesArray(const int idxVals = 0, const int idxRhs = 0){
return Parent::getRawData() + ((0+idxRhs)*NVALS+idxVals)*Parent::getLeadingRawData();
}
const FReal* getPotentials(const int idxLhs = 0) const {
return Parent::getAttribute(NRHS+idxLhs);
const FReal* getPhysicalValuesArray(const int idxVals = 0, const int idxRhs = 0) const {
return Parent::getRawData() + ((0+idxRhs)*NVALS+idxVals)*Parent::getLeadingRawData();
}
FReal* getForcesX(const int idxLhs = 0){
return Parent::getAttribute(NRHS+NLHS+idxLhs);
int getLeadingDimension(){
return Parent::getLeadingRawData();
}
const FReal* getForcesX(const int idxLhs = 0) const {
return Parent::getAttribute(NRHS+NLHS+idxLhs);
FReal* getPotentials(const int idxVals = 0, const int idxLhs = 0){
return Parent::getAttribute((NRHS+idxLhs)*NVALS+idxVals);
}
FReal* getForcesY(const int idxLhs = 0){
return Parent::getAttribute(NRHS+2*NLHS+idxLhs);
const FReal* getPotentials(const int idxVals = 0, const int idxLhs = 0) const {
return Parent::getAttribute((NRHS+idxLhs)*NVALS+idxVals);
}
const FReal* getForcesY(const int idxLhs = 0) const {
return Parent::getAttribute(NRHS+2*NLHS+idxLhs);
FReal* getPotentialsArray(const int idxVals = 0, const int idxLhs = 0){
return Parent::getRawData() + ((NRHS+idxLhs)*NVALS+idxVals)*Parent::getLeadingRawData();
}
FReal* getForcesZ(const int idxLhs = 0){
return Parent::getAttribute(NRHS+3*NLHS+idxLhs);
const FReal* getPotentialsArray(const int idxVals = 0, const int idxLhs = 0) const {
return Parent::getRawData() + ((NRHS+idxLhs)*NVALS+idxVals)*Parent::getLeadingRawData();
}
const FReal* getForcesZ(const int idxLhs = 0) const {
return Parent::getAttribute(NRHS+3*NLHS+idxLhs);
FReal* getForcesX(const int idxVals = 0, const int idxLhs = 0){
return Parent::getAttribute((NRHS+NLHS+idxLhs)*NVALS+idxVals);
}
const FReal* getForcesX(const int idxVals = 0, const int idxLhs = 0) const {
return Parent::getAttribute((NRHS+NLHS+idxLhs)*NVALS+idxVals);
}
FReal* getForcesXArray(const int idxVals = 0, const int idxLhs = 0){
return Parent::getRawData() + ((NRHS+NLHS+idxLhs)*NVALS+idxVals)*Parent::getLeadingRawData();
}
const FReal* getForcesXArray(const int idxVals = 0, const int idxLhs = 0) const {
return Parent::getRawData() + ((NRHS+NLHS+idxLhs)*NVALS+idxVals)*Parent::getLeadingRawData();
}
FReal* getForcesY(const int idxVals = 0, const int idxLhs = 0){
return Parent::getAttribute((NRHS+2*NLHS+idxLhs)*NVALS+idxVals);
}
const FReal* getForcesY(const int idxVals = 0, const int idxLhs = 0) const {
return Parent::getAttribute((NRHS+2*NLHS+idxLhs)*NVALS+idxVals);
}
FReal* getForcesYArray(const int idxVals = 0, const int idxLhs = 0){
return Parent::getRawData() + ((NRHS+2*NLHS+idxLhs)*NVALS+idxVals)*Parent::getLeadingRawData();
}
const FReal* getForcesYArray(const int idxVals = 0, const int idxLhs = 0) const {
return Parent::getRawData() + ((NRHS+2*NLHS+idxLhs)*NVALS+idxVals)*Parent::getLeadingRawData();
}
FReal* getForcesZ(const int idxVals = 0, const int idxLhs = 0){
return Parent::getAttribute((NRHS+3*NLHS+idxLhs)*NVALS+idxVals);
}
const FReal* getForcesZ(const int idxVals = 0, const int idxLhs = 0) const {
return Parent::getAttribute((NRHS+3*NLHS+idxLhs)*NVALS+idxVals);
}
FReal* getForcesZArray(const int idxVals = 0, const int idxLhs = 0){
return Parent::getRawData() + ((NRHS+3*NLHS+idxLhs)*NVALS+idxVals)*Parent::getLeadingRawData();
}
const FReal* getForcesZArray(const int idxVals = 0, const int idxLhs = 0) const {
return Parent::getRawData() + ((NRHS+3*NLHS+idxLhs)*NVALS+idxVals)*Parent::getLeadingRawData();
}
void resetForcesAndPotential(){
for(int idx = 0 ; idx < 4*NLHS ; ++idx){
Parent::resetToInitialState(idx + NRHS);
for(int idx = 0 ; idx < 4*NLHS*NVALS ; ++idx){
Parent::resetToInitialState(idx + NRHS*NVALS);
}
}
const int getNVALS() const {
return NVALS;
}
};
#endif // FP2PPARTICLECONTAINER_HPP
......@@ -21,9 +21,9 @@
#include "FP2PParticleContainer.hpp"
#include "Components/FParticleType.hpp"
template<int NRHS = 1, int NLHS = 1>
class FP2PParticleContainerIndexed : public FP2PParticleContainer<NRHS,NLHS> {
typedef FP2PParticleContainer<NRHS,NLHS> Parent;
template<int NRHS = 1, int NLHS = 1, int NVALS = 1>
class FP2PParticleContainerIndexed : public FP2PParticleContainer<NRHS,NLHS,NVALS> {
typedef FP2PParticleContainer<NRHS,NLHS,NVALS> Parent;
FVector<int> indexes;
......
......@@ -45,7 +45,7 @@ class FAbstractUnifKernel : public FAbstractKernels< CellClass, ContainerClass>
{
protected:
enum {nnodes = TensorTraits<ORDER>::nnodes};
typedef FUnifInterpolator<ORDER,MatrixKernelClass> InterpolatorClass;
typedef FUnifInterpolator<ORDER,MatrixKernelClass,NVALS> InterpolatorClass;
/// Needed for P2M, M2M, L2L and L2P operators
const FSmartPointer<InterpolatorClass,FSmartPointerMemory> Interpolator;
......
......@@ -37,14 +37,15 @@
* The class @p FUnifInterpolator defines the anterpolation (M2M) and
* interpolation (L2L) concerning operations.
*/
template <int ORDER, class MatrixKernelClass>
template <int ORDER, class MatrixKernelClass, int NVALS = 1>
class FUnifInterpolator : FNoCopyable
{
// compile time constants and types
enum {nnodes = TensorTraits<ORDER>::nnodes,
nRhs = MatrixKernelClass::NRHS,
nLhs = MatrixKernelClass::NLHS,
nPV = MatrixKernelClass::NPV};
nPV = MatrixKernelClass::NPV,
nVals = NVALS};
typedef FUnifRoots< ORDER> BasisType;
typedef FUnifTensor<ORDER> TensorType;
......@@ -408,9 +409,9 @@ public:
* Particle to moment: application of \f$S_\ell(y,\bar y_n)\f$
* (anterpolation, it is the transposed interpolation)
*/
template <int ORDER, class MatrixKernelClass>
template <int ORDER, class MatrixKernelClass, int NVALS>
template <class ContainerClass>
inline void FUnifInterpolator<ORDER,MatrixKernelClass>::applyP2M(const FPoint& center,
inline void FUnifInterpolator<ORDER,MatrixKernelClass,NVALS>::applyP2M(const FPoint& center,
const FReal width,
FReal *const multipoleExpansion,
const ContainerClass *const inParticles) const
......@@ -438,22 +439,30 @@ inline void FUnifInterpolator<ORDER,MatrixKernelClass>::applyP2M(const FPoint& c
for(int idxRhs = 0 ; idxRhs < nRhs ; ++idxRhs){
// read physicalValue
const FReal*const physicalValues = inParticles->getPhysicalValues(idxRhs);
// compute weight
const FReal weight = physicalValues[idxPart];
FReal weight[nVals];
for(int idxVals = 0 ; idxVals < nVals ; ++idxVals){
// read physicalValue
const FReal*const physicalValues = inParticles->getPhysicalValues(idxVals,idxRhs);
weight[idxVals] = physicalValues[idxPart];
} // nVals
// assemble multipole expansions
for (unsigned int i=0; i<ORDER; ++i) {
for (unsigned int j=0; j<ORDER; ++j) {
for (unsigned int k=0; k<ORDER; ++k) {
const unsigned int idx = idxRhs*nnodes + k*ORDER*ORDER + j*ORDER + i;
multipoleExpansion[idx] += L_of_x[i][0] * L_of_x[j][1] * L_of_x[k][2] * weight; // 3 * ORDER*ORDER*ORDER flops
const unsigned int idx = idxRhs*nVals*nnodes + k*ORDER*ORDER + j*ORDER + i;
const FReal S = L_of_x[i][0] * L_of_x[j][1] * L_of_x[k][2];
for(int idxVals = 0 ; idxVals < nVals ; ++idxVals)
multipoleExpansion[idxVals*nnodes+idx] += S * weight[idxVals]; // 3 * ORDER*ORDER*ORDER flops
}
}
}
} // idxRhs
} // nRhs
} // flops: N * (3 * ORDER*ORDER*ORDER + 3 * 3 * ORDER*(ORDER-1)) flops
......@@ -463,9 +472,9 @@ inline void FUnifInterpolator<ORDER,MatrixKernelClass>::applyP2M(const FPoint& c
/**
* Local to particle operation: application of \f$S_\ell(x,\bar x_m)\f$ (interpolation)
*/
template <int ORDER, class MatrixKernelClass>
template <int ORDER, class MatrixKernelClass, int NVALS>
template <class ContainerClass>
inline void FUnifInterpolator<ORDER,MatrixKernelClass>::applyL2P(const FPoint& center,
inline void FUnifInterpolator<ORDER,MatrixKernelClass,NVALS>::applyL2P(const FPoint& center,
const FReal width,
const FReal *const localExpansion,
ContainerClass *const inParticles) const
......@@ -502,24 +511,35 @@ inline void FUnifInterpolator<ORDER,MatrixKernelClass>::applyL2P(const FPoint& c
// In fact : f_{ik}(x)=w_j(x) \nabla_{x_i} K_{ij}(x,y)w_j(y))
const unsigned int idxPot = idxLhs / nPV;
// interpolate and increment target value
FReal targetValue=0.;
FReal targetValue[nVals];
for(int idxVals = 0 ; idxVals < nVals ; ++idxVals)
targetValue[idxVals]=0.;
{
for (unsigned int l=0; l<ORDER; ++l) {
for (unsigned int m=0; m<ORDER; ++m) {
for (unsigned int n=0; n<ORDER; ++n) {
const unsigned int idx = idxLhs*nnodes + n*ORDER*ORDER + m*ORDER + l;
targetValue +=
L_of_x[l][0] * L_of_x[m][1] * L_of_x[n][2] * localExpansion[idx];
const unsigned int idx = idxLhs*nVals*nnodes + n*ORDER*ORDER + m*ORDER + l;
const FReal S = L_of_x[l][0] * L_of_x[m][1] * L_of_x[n][2];
for(int idxVals = 0 ; idxVals < nVals ; ++idxVals)
targetValue[idxVals] += S * localExpansion[idxVals*nnodes+idx];
} // ORDER * 4 flops
} // ORDER * ORDER * 4 flops
} // ORDER * ORDER * ORDER * 4 flops
}
// get potential
FReal*const potentials = inParticles->getPotentials(idxPot);
// add contribution to potential
potentials[idxPart] += (targetValue);
for(int idxVals = 0 ; idxVals < nVals ; ++idxVals){
// get potential
FReal*const potentials = inParticles->getPotentials(idxVals,idxPot);
// add contribution to potential
potentials[idxPart] += (targetValue[idxVals]);
}// NVALS
} // idxLhs
......@@ -531,9 +551,9 @@ inline void FUnifInterpolator<ORDER,MatrixKernelClass>::applyL2P(const FPoint& c
/**
* Local to particle operation: application of \f$\nabla_x S_\ell(x,\bar x_m)\f$ (interpolation)
*/
template <int ORDER, class MatrixKernelClass>
template <int ORDER, class MatrixKernelClass, int NVALS>
template <class ContainerClass>
inline void FUnifInterpolator<ORDER,MatrixKernelClass>::applyL2PGradient(const FPoint& center,
inline void FUnifInterpolator<ORDER,MatrixKernelClass,NVALS>::applyL2PGradient(const FPoint& center,
const FReal width,