Commit 088fbae5 authored by BLANCHARD Pierre's avatar BLANCHARD Pierre

Fixed Uniform FMAlgoThread (FFT implementation is not thread safe), some cleanup.

parent bb443606
#include "FInterpMatrixKernel.hpp"
/// ID_OVER_R
const unsigned int FInterpMatrixKernel_IOR::indexTab[]={0,0,0,1,1,2,
0,1,2,1,2,2};
const unsigned int FInterpMatrixKernel_IOR::applyTab[]={0,1,2,
1,3,4,
2,4,5};
/// R_IJ
const unsigned int FInterpMatrixKernel_R_IJ::indexTab[]={0,0,0,1,1,2,
0,1,2,1,2,2};
......
......@@ -27,7 +27,6 @@
enum KERNEL_FUNCTION_IDENTIFIER {ONE_OVER_R,
ONE_OVER_R_SQUARED,
LENNARD_JONES_POTENTIAL,
ID_OVER_R,
R_IJ,
R_IJK};
......@@ -68,7 +67,7 @@ struct FInterpAbstractMatrixKernel : FNoCopyable
/// One over r
struct FInterpMatrixKernelR : FInterpAbstractMatrixKernel
{
static const KERNEL_FUNCTION_TYPE Type = /*NON_*/HOMOGENEOUS;
static const KERNEL_FUNCTION_TYPE Type = HOMOGENEOUS;
static const KERNEL_FUNCTION_IDENTIFIER Identifier = ONE_OVER_R;
static const unsigned int NCMP = 1; //< number of components
static const unsigned int NPV = 1; //< dim of physical values
......@@ -106,18 +105,6 @@ struct FInterpMatrixKernelR : FInterpAbstractMatrixKernel
return FReal(2.) / CellWidth;
}
// FReal getScaleFactor(const FReal, const int) const
// {
// // return 1 because non homogeneous kernel functions cannot be scaled!!!
// return FReal(1.);
// }
//
// FReal getScaleFactor(const FReal) const
// {
// // return 1 because non homogeneous kernel functions cannot be scaled!!!
// return FReal(1.);
// }
};
......@@ -160,7 +147,7 @@ struct FInterpMatrixKernelRR : FInterpAbstractMatrixKernel
FReal getScaleFactor(const FReal CellWidth) const
{
return FReal(4.) / CellWidth;
return FReal(4.) / (CellWidth*CellWidth);
}
};
......@@ -241,80 +228,6 @@ struct FInterpMatrixKernelLJ : FInterpAbstractMatrixKernel
////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
/// Test Tensorial kernel 1/R*Id_3
struct FInterpMatrixKernel_IOR : FInterpAbstractMatrixKernel
{
static const KERNEL_FUNCTION_TYPE Type = /*NON_*/HOMOGENEOUS;
static const KERNEL_FUNCTION_IDENTIFIER Identifier = ID_OVER_R;
static const unsigned int NK = 3*3; //< total number of components
static const unsigned int NCMP = 6; //< number of components after symmetry
static const unsigned int NPV = 3; //< dim of physical values
static const unsigned int NPOT = 3; //< dim of potentials
static const unsigned int NRHS = NPV; //< dim of mult exp
static const unsigned int NLHS = NPOT*NRHS; //< dim of loc exp
// store indices (i,j) corresponding to sym idx
static const unsigned int indexTab[/*2*NCMP=12*/];
// store positions in sym tensor
static const unsigned int applyTab[/*9*/];
const unsigned int _i,_j;
FInterpMatrixKernel_IOR(const double = 0.0, const unsigned int d = 0)
: _i(indexTab[d]), _j(indexTab[d+NCMP])
{}
// returns position in reduced storage from position in full 3x3 matrix
unsigned int getPosition(const unsigned int n) const
{return applyTab[n];}
FReal evaluate(const FPoint& x, const FPoint& y) const
{
const FPoint xy(x-y);
// low rank approx does not support nul kernels
// if(_i==_j)
return FReal(1.)/xy.norm();
// else
// return FReal(0.);
}
void evaluateBlock(const FPoint& x, const FPoint& y, FReal* block) const
{
const FPoint xy(x-y);
const FReal one_over_r = FReal(1.)/xy.norm();
for(unsigned int d=0;d<NCMP;++d){
// unsigned int i = indexTab[d];
// unsigned int j = indexTab[d+NCMP];
// if(i==j)
block[d] = one_over_r;
// else
// block[d] = 0.0;
}
}
FReal getScaleFactor(const FReal RootCellWidth, const int TreeLevel) const
{
const FReal CellWidth(RootCellWidth / FReal(FMath::pow(2, TreeLevel)));
return getScaleFactor(CellWidth);
}
FReal getScaleFactor(const FReal CellWidth) const
{
return FReal(2.) / CellWidth;
}
// FReal getScaleFactor(const FReal) const
// {
// // return 1 because non homogeneous kernel functions cannot be scaled!!!
// return FReal(1.);
// }
};
/// R_{,ij}
// PB: IMPORTANT! This matrix kernel does not present the symmetries
......@@ -413,12 +326,6 @@ struct FInterpMatrixKernel_R_IJ : FInterpAbstractMatrixKernel
return FReal(2.) / CellWidth;
}
// // R_{,ij} is set non-homogeneous
// FReal getScaleFactor(const FReal CellWidth) const
// {
// return FReal(1.);
// }
};
/// R_{,ijk}
......@@ -550,12 +457,6 @@ struct FInterpMatrixKernel_R_IJK : FInterpAbstractMatrixKernel
return FReal(4.) / (CellWidth*CellWidth);
}
// // R_{,ijk} is set non-homogeneous
// FReal getScaleFactor(const FReal CellWidth) const
// {
// return FReal(1.);
// }
};
......
......@@ -48,24 +48,6 @@ struct DirectInteractionComputer<LENNARD_JONES_POTENTIAL, 1>
}
};
/*! Specialization for ID_OVER_R potential */
template <>
struct DirectInteractionComputer<ID_OVER_R, 1>
{
template <typename ContainerClass>
static void P2P( ContainerClass* const FRestrict TargetParticles,
ContainerClass* const NeighborSourceParticles[27]){
FP2P::FullMutualIOR(TargetParticles,NeighborSourceParticles,14);
}
template <typename ContainerClass>
static void P2PRemote( ContainerClass* const FRestrict inTargets,
ContainerClass* const inNeighbors[27],
const int inSize){
FP2P::FullRemoteIOR(inTargets,inNeighbors,inSize);
}
};
/*! Specialization for GradGradR potential */
template <>
struct DirectInteractionComputer<R_IJ, 1>
......@@ -156,27 +138,6 @@ struct DirectInteractionComputer<LENNARD_JONES_POTENTIAL, NVALS>
}
};
/*! Specialization for ID_OVER_R potential */
template <int NVALS>
struct DirectInteractionComputer<ID_OVER_R, NVALS>
{
template <typename ContainerClass>
static void P2P( ContainerClass* const FRestrict TargetParticles,
ContainerClass* const NeighborSourceParticles[27]){
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
FP2P::FullMutualIOR(TargetParticles,NeighborSourceParticles,14);
}
}
template <typename ContainerClass>
static void P2PRemote( ContainerClass* const FRestrict inTargets,
ContainerClass* const inNeighbors[27],
const int inSize){
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
FP2P::FullRemoteIOR(inTargets,inNeighbors,inSize);
}
}
};
/*! Specialization for GradGradR potential */
template <int NVALS>
......
This diff is collapsed.
......@@ -52,7 +52,8 @@ class FUnifKernel
AbstractBaseClass;
/// Needed for M2L operator
FSmartPointer< M2LHandlerClass,FSmartPointerMemory> M2LHandler;
const M2LHandlerClass M2LHandler;
public:
/**
......@@ -64,9 +65,9 @@ public:
const FReal inBoxWidth,
const FPoint& inBoxCenter)
: FAbstractUnifKernel< CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>(inTreeHeight,inBoxWidth,inBoxCenter),
M2LHandler(new M2LHandlerClass(AbstractBaseClass::MatrixKernel.getPtr(),
inTreeHeight,
inBoxWidth))// PB: for non homogeneous case
M2LHandler(AbstractBaseClass::MatrixKernel.getPtr(),
inTreeHeight,
inBoxWidth)
{ }
......@@ -79,7 +80,7 @@ public:
AbstractBaseClass::Interpolator->applyP2M(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf,
LeafCell->getMultipole(idxRhs), SourceParticles);
// 2) apply Discrete Fourier Transform
M2LHandler->applyZeroPaddingAndDFT(LeafCell->getMultipole(idxRhs),
M2LHandler.applyZeroPaddingAndDFT(LeafCell->getMultipole(idxRhs),
LeafCell->getTransformedMultipole(idxRhs));
}
......@@ -100,7 +101,7 @@ public:
}
}
// 2) Apply Discete Fourier Transform
M2LHandler->applyZeroPaddingAndDFT(ParentCell->getMultipole(idxRhs),
M2LHandler.applyZeroPaddingAndDFT(ParentCell->getMultipole(idxRhs),
ParentCell->getTransformedMultipole(idxRhs));
}
}
......@@ -137,7 +138,7 @@ public:
for (int idx=0; idx<343; ++idx){
if (SourceCells[idx]){
M2LHandler->applyFC(idx, TreeLevel, scale,
M2LHandler.applyFC(idx, TreeLevel, scale,
SourceCells[idx]->getTransformedMultipole(idxRhs),
TransformedLocalExpansion);
}
......@@ -168,7 +169,7 @@ public:
{
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// 1) Apply Inverse Discete Fourier Transform
M2LHandler->unapplyZeroPaddingAndDFT(ParentCell->getTransformedLocal(idxRhs),
M2LHandler.unapplyZeroPaddingAndDFT(ParentCell->getTransformedLocal(idxRhs),
const_cast<CellClass*>(ParentCell)->getLocal(idxRhs));
// 2) apply Sx
for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){
......@@ -187,7 +188,7 @@ public:
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// 1) Apply Inverse Discete Fourier Transform
M2LHandler->unapplyZeroPaddingAndDFT(LeafCell->getTransformedLocal(idxRhs),
M2LHandler.unapplyZeroPaddingAndDFT(LeafCell->getTransformedLocal(idxRhs),
const_cast<CellClass*>(LeafCell)->getLocal(idxRhs));
// 2.a) apply Sx
......
......@@ -40,7 +40,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal Cel
// allocate memory and store compressed M2L operators
if (FC) throw std::runtime_error("M2L operators are already set");
// PB: need to redefine some constant since not function from m2lhandler class
// dimensions of operators
const unsigned int order = ORDER;
const unsigned int nnodes = TensorTraits<ORDER>::nnodes;
const unsigned int ninteractions = 316;
......@@ -68,8 +68,8 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal Cel
// init Discrete Fourier Transformator
const int dimfft = 1; // unidim FFT since fully circulant embedding
const int steps[dimfft] = {rc};
// FDft Dft(rc);
FFft<dimfft> Dft(steps);
FFft<dimfft> Dft;
Dft.buildDFT(steps);
// get first column of K via permutation
unsigned int perm[rc];
......@@ -97,7 +97,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal Cel
// _C[counter*rc + ido]
_C[perm[ido]]
= MatrixKernel->evaluate(X[node_ids_pairs[ido][0]],
Y[node_ids_pairs[ido][1]]);
Y[node_ids_pairs[ido][1]]);
ido++;
}
......@@ -130,15 +130,9 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal Cel
if (abs(i)>1 || abs(j)>1 || abs(k)>1) {
FBlas::c_copy(opt_rc, reinterpret_cast<FReal*>(_FC + counter*rc),
reinterpret_cast<FReal*>(FC + idx*opt_rc));
// for (unsigned int n=0; n<rc; ++n){
// FC[idx*rc+n]=_FC[counter*rc+n];
// }
counter++;
} else{
FBlas::c_setzero(opt_rc, reinterpret_cast<FReal*>(FC + idx*opt_rc));
// for (unsigned int n=0; n<rc; ++n){
// FC[idx*rc+n]=FComplexe(0.0,0.0);
// }
}
}
......@@ -169,25 +163,24 @@ template <int ORDER, KERNEL_FUNCTION_TYPE TYPE> class FUnifM2LHandler;
/*! Specialization for homogeneous kernel functions */
template <int ORDER>
class FUnifM2LHandler<ORDER,HOMOGENEOUS> : FNoCopyable
class FUnifM2LHandler<ORDER,HOMOGENEOUS>
{
enum {order = ORDER,
nnodes = TensorTraits<ORDER>::nnodes,
ninteractions = 316, // 7^3 - 3^3 (max num cells in far-field)
rc = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1)};
/// M2L Operators (stored in Fourier space)
FSmartPointer< FComplexe,FSmartArrayMemory> FC;
FComplexe *FC;
/// Utils
typedef FUnifTensor<ORDER> TensorType;
unsigned int node_diff[nnodes*nnodes];
/// DFT specific
static const int dimfft = 1; // unidim FFT since fully circulant embedding
// FDft Dft; // Direct Discrete Fourier Transformator
typedef FFft<dimfft> DftClass; // Fast Discrete Fourier Transformator
FSmartPointer<DftClass,FSmartPointerMemory> Dft;
DftClass Dft;
const unsigned int opt_rc; // specific to real valued kernel
......@@ -204,12 +197,11 @@ class FUnifM2LHandler<ORDER,HOMOGENEOUS> : FNoCopyable
public:
template <typename MatrixKernelClass>
FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal)
: FC(NULL),
opt_rc(rc/2+1)
: FC(NULL), opt_rc(rc/2+1), Dft()
{
// init DFT
const int steps[dimfft] = {rc};
Dft = new DftClass(steps);
Dft.buildDFT(steps);
// initialize root node ids
TensorType::setNodeIdsDiff(node_diff);
......@@ -220,9 +212,7 @@ public:
}
~FUnifM2LHandler()
{
if (FC != NULL) delete [] FC;
}
{ }
/**
* Computes and sets the matrix \f$C_t\f$
......@@ -236,7 +226,9 @@ public:
if (FC) throw std::runtime_error("M2L operator already set");
// Compute matrix of interactions
const FReal ReferenceCellWidth = FReal(2.);
Compute<order>(MatrixKernel,ReferenceCellWidth,FC);
FComplexe* pFC = NULL;
Compute<order>(MatrixKernel,ReferenceCellWidth,pFC);
FC.assign(pFC);
// Compute memory usage
unsigned long sizeM2L = 343*opt_rc*sizeof(FComplexe);
......@@ -260,7 +252,7 @@ public:
FReal Px[rc];
FBlas::setzero(rc,Px);
// Apply forward Discrete Fourier Transform
Dft->applyIDFT(FX,Px);
Dft.applyIDFT(FX,Px);
// Unapply Zero Padding
for (unsigned int j=0; j<nnodes; ++j)
......@@ -284,28 +276,12 @@ public:
void applyFC(const unsigned int idx, const unsigned int, const FReal scale,
const FComplexe *const FY, FComplexe *const FX) const
{
FComplexe tmpFX;
// Perform entrywise product manually
for (unsigned int j=0; j<opt_rc; ++j){
tmpFX=FC[idx*opt_rc + j];
tmpFX*=FY[j];
tmpFX*=scale;
FX[j]+=tmpFX;
FX[j].addMul(FComplexe(scale*FC[idx*opt_rc + j].getReal(),
scale*FC[idx*opt_rc + j].getImag()),
FY[j]);
}
// // Perform entrywise product using BLAS and MKL routines
// // PB: not necessary faster than the naive version
// FComplexe tmpFX[rc];
// FBlas::c_setzero(rc,reinterpret_cast<FReal*>(tmpFX));
// FMkl::c_had(rc,reinterpret_cast<const FReal* const>(FC + idx*rc),
// reinterpret_cast<const FReal* const>(FY),
// reinterpret_cast<FReal* const>(tmpFX));
// // Scale
// FBlas::c_axpy(rc,&scale,reinterpret_cast<FReal* const>(tmpFX),
// reinterpret_cast<FReal* const>(FX));
}
......@@ -326,8 +302,7 @@ public:
Py[node_diff[i*nnodes]]=y[i];
// Apply forward Discrete Fourier Transform
Dft->applyDFT(Py,FY);
Dft.applyDFT(Py,FY);
}
......@@ -336,28 +311,27 @@ public:
/*! Specialization for non-homogeneous kernel functions */
template <int ORDER>
class FUnifM2LHandler<ORDER,NON_HOMOGENEOUS> : FNoCopyable
class FUnifM2LHandler<ORDER,NON_HOMOGENEOUS>
{
enum {order = ORDER,
nnodes = TensorTraits<ORDER>::nnodes,
ninteractions = 316, // 7^3 - 3^3 (max num cells in far-field)
rc = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1)};
// Homogeneity specific
FComplexe** FC;
/// M2L Operators (stored in Fourier space for each level)
FSmartPointer< FComplexe*,FSmartArrayMemory> FC;
/// Homogeneity specific variables
const unsigned int TreeHeight;
const FReal RootCellWidth;
/// Utils
typedef FUnifTensor<ORDER> TensorType;
unsigned int node_diff[nnodes*nnodes];
// DFT specific
/// DFT specific
static const int dimfft = 1; // unidim FFT since fully circulant embedding
// FDft Dft; // Direct Discrete Fourier Transformator
typedef FFft<dimfft> DftClass; // Fast Discrete Fourier Transformator
FSmartPointer<DftClass,FSmartPointerMemory> Dft;
DftClass Dft;
const unsigned int opt_rc; // specific to real valued kernel
......@@ -376,11 +350,12 @@ public:
FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int inTreeHeight, const FReal inRootCellWidth)
: TreeHeight(inTreeHeight),
RootCellWidth(inRootCellWidth),
opt_rc(rc/2+1)
opt_rc(rc/2+1),
Dft()
{
// init DFT
const int steps[dimfft] = {rc};
Dft = new DftClass(steps);
Dft.buildDFT(steps);
// initialize root node ids
TensorType::setNodeIdsDiff(node_diff);
......@@ -443,7 +418,7 @@ public:
FReal Px[rc];
FBlas::setzero(rc,Px);
// Apply forward Discrete Fourier Transform
Dft->applyIDFT(FX,Px);
Dft.applyIDFT(FX,Px);
// Unapply Zero Padding
for (unsigned int j=0; j<nnodes; ++j)
......@@ -467,27 +442,10 @@ public:
void applyFC(const unsigned int idx, const unsigned int TreeLevel, const FReal,
const FComplexe *const FY, FComplexe *const FX) const
{
FComplexe tmpFX;
// Perform entrywise product manually
for (unsigned int j=0; j<opt_rc; ++j){
tmpFX=FC[TreeLevel][idx*opt_rc + j];
tmpFX*=FY[j];
FX[j]+=tmpFX;
FX[j].addMul(FC[TreeLevel][idx*opt_rc + j],FY[j]);
}
// // Perform entrywise product using BLAS and MKL routines
// // PB: not necessary faster than the naive version
// FComplexe tmpFX[rc];
// FBlas::c_setzero(rc,reinterpret_cast<FReal*>(tmpFX));
// FMkl::c_had(rc,reinterpret_cast<const FReal* const>(FC + idx*rc),
// reinterpret_cast<const FReal* const>(FY),
// reinterpret_cast<FReal* const>(tmpFX));
// // Scale
// FBlas::c_axpy(rc,&scale,reinterpret_cast<FReal* const>(tmpFX),
// reinterpret_cast<FReal* const>(FX));
}
......@@ -508,7 +466,7 @@ public:
Py[node_diff[i*nnodes]]=y[i];
// Apply forward Discrete Fourier Transform
Dft->applyDFT(Py,FY);
Dft.applyDFT(Py,FY);
}
......
......@@ -76,7 +76,7 @@ protected://PB: for OptiDis
AbstractBaseClass;
/// Needed for M2L operator
FSmartPointer< M2LHandlerClass,FSmartPointerMemory> M2LHandler;
const M2LHandlerClass M2LHandler;
public:
/**
......@@ -89,9 +89,9 @@ public:
const FPoint& inBoxCenter,
const double inMatParam = 0.0)
: FAbstractUnifKernel< CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>(inTreeHeight,inBoxWidth,inBoxCenter,inMatParam),
M2LHandler(new M2LHandlerClass(AbstractBaseClass::MatrixKernel.getPtr(),
inTreeHeight,
inBoxWidth))// PB: for non homogeneous case
M2LHandler(AbstractBaseClass::MatrixKernel.getPtr(),
inTreeHeight,
inBoxWidth)
{ }
......@@ -111,8 +111,8 @@ public:
int idxMul = idxV*nRhs + idxRhs;
// 2) apply Discrete Fourier Transform
M2LHandler->applyZeroPaddingAndDFT(LeafCell->getMultipole(idxMul),
LeafCell->getTransformedMultipole(idxMul));
M2LHandler.applyZeroPaddingAndDFT(LeafCell->getMultipole(idxMul),
LeafCell->getTransformedMultipole(idxMul));
}
}// NVALS
......@@ -137,8 +137,8 @@ public:
}
}
// 2) Apply Discete Fourier Transform
M2LHandler->applyZeroPaddingAndDFT(ParentCell->getMultipole(idxMul),
ParentCell->getTransformedMultipole(idxMul));
M2LHandler.applyZeroPaddingAndDFT(ParentCell->getMultipole(idxMul),
ParentCell->getTransformedMultipole(idxMul));
}
}// NVALS
}
......@@ -174,7 +174,7 @@ public:
for (int idx=0; idx<343; ++idx){
if (SourceCells[idx]){
M2LHandler->applyFC(idx, TreeLevel, scale, d,
M2LHandler.applyFC(idx, TreeLevel, scale, d,
SourceCells[idx]->getTransformedMultipole(idxMul),
TransformedLocalExpansion);
......@@ -193,8 +193,8 @@ public:
for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){
int idxLoc = idxV*nLhs + idxLhs;
// 1) Apply Inverse Discete Fourier Transform
M2LHandler->unapplyZeroPaddingAndDFT(ParentCell->getTransformedLocal(idxLoc),
const_cast<CellClass*>(ParentCell)->getLocal(idxLoc));
M2LHandler.unapplyZeroPaddingAndDFT(ParentCell->getTransformedLocal(idxLoc),
const_cast<CellClass*>(ParentCell)->getLocal(idxLoc));
// 2) apply Sx
for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){
if (ChildCells[ChildIndex]){
......@@ -214,8 +214,8 @@ public:
for(int idxLhs = 0 ; idxLhs < nLhs ; ++idxLhs){
int idxLoc = idxV*nLhs + idxLhs;
// 1) Apply Inverse Discete Fourier Transform
M2LHandler->unapplyZeroPaddingAndDFT(LeafCell->getTransformedLocal(idxLoc),
const_cast<CellClass*>(LeafCell)->getLocal(idxLoc));
M2LHandler.unapplyZeroPaddingAndDFT(LeafCell->getTransformedLocal(idxLoc),
const_cast<CellClass*>(LeafCell)->getLocal(idxLoc));
}
......
......@@ -40,12 +40,13 @@ static void Compute(const MatrixKernelClass *const MatrixKernel,
const FReal CellWidth,
FComplexe** &FC)
{
// PB: need to redefine some constant since not function from m2lhandler class
// dimensions of operators
const unsigned int order = ORDER;
const unsigned int nnodes = TensorTraits<ORDER>::nnodes;
const unsigned int ninteractions = 316;
const unsigned int ncmp = MatrixKernelClass::NCMP;
// utils
typedef FUnifTensor<ORDER> TensorType;
// allocate memory and store compressed M2L operators
......@@ -77,8 +78,8 @@ static void Compute(const MatrixKernelClass *const MatrixKernel,
// init Discrete Fourier Transformator
const int dimfft = 1; // unidim FFT since fully circulant embedding
const int steps[dimfft] = {rc};
// FDft Dft(rc);
FFft<dimfft> Dft(steps);
FFft<dimfft> Dft;
Dft.buildDFT(steps);
// get first column of K via permutation
unsigned int perm[rc];
......@@ -140,7 +141,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel,
const unsigned int opt_rc = rc/2+1;
// allocate M2L
for (unsigned int d=0; d<ncmp; ++d)
FC[d] = new FComplexe[343 * opt_rc]; //PB: allocation already done wr NCMP
FC[d] = new FComplexe[343 * opt_rc];
for (int i=-3; i<=3; ++i)
for (int j=-3; j<=3; ++j)
......@@ -150,16 +151,10 @@ static void Compute(const MatrixKernelClass *const MatrixKernel,
for (unsigned int d=0; d<ncmp; ++d)
FBlas::c_copy(opt_rc, reinterpret_cast<FReal*>(_FC[d] + counter*rc),
reinterpret_cast<FReal*>(FC[d] + idx*opt_rc));
// for (unsigned int n=0; n<rc; ++n){
// FC[idx*rc+n]=_FC[counter*rc+n];
// }
counter++;
} else{
for (unsigned int d=0; d<ncmp; ++d)
FBlas::c_setzero(opt_rc, reinterpret_cast<FReal*>(FC[d] + idx*opt_rc));
// for (unsigned int n=0; n<rc; ++n){
// FC[idx*rc+n]=FComplexe(0.0,0.0);
// }
}
}
......@@ -195,18 +190,17 @@ class FUnifTensorialM2LHandler<ORDER,MatrixKernelClass,HOMOGENEOUS> : FNoCopyabl
rc = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1),
ncmp = MatrixKernelClass::NCMP};
// Tensorial MatrixKernel specific
FComplexe** FC;
/// M2L Operators (stored in Fourier space for each component of tensor)
FSmartPointer< FComplexe*,FSmartArrayMemory> FC;
/// Utils
typedef FUnifTensor<ORDER> TensorType;
unsigned int node_diff[nnodes*nnodes];
// DFT specific
/// DFT specific
static const int dimfft = 1; // unidim FFT since fully circulant embedding
// FDft Dft; // Direct Discrete Fourier Transformator
typedef FFft<dimfft> DftClass; // Fast Discrete Fourier Transformator
FSmartPointer<DftClass,FSmartPointerMemory> Dft;
DftClass Dft;
const unsigned int opt_rc; // specific to real valued kernel
......@@ -222,11 +216,11 @@ class FUnifTensorialM2LHandler<ORDER,MatrixKernelClass,HOMOGENEOUS> : FNoCopyabl
public:
FUnifTensorialM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal)
: opt_rc(rc/2+1)
: opt_rc(rc/2+1), Dft()
{
// init DFT
const int steps[dimfft] = {rc};
Dft = new DftClass(steps);
Dft.buildDFT(steps);
// allocate FC
FC = new FComplexe*[ncmp];
......@@ -282,7 +276,7 @@ public:
FReal Px[rc];
FBlas::setzero(rc,Px);
// Apply forward Discrete Fourier Transform
Dft->applyIDFT(FX,Px);
Dft.applyIDFT(FX,Px);
// Unapply Zero Padding
for (unsigned int j=0; j<nnodes; ++j)
......@@ -309,8 +303,8 @@ public:
{
// Perform entrywise product manually
for (unsigned int j=0; j<opt_rc; ++j){
FX[j].addMul(FComplexe(FC[d][idx*opt_rc + j].getReal()*scale,
FC[d][idx*opt_rc + j].getImag()*scale),
FX[j].addMul(FComplexe(scale*FC[d][idx*opt_rc + j].getReal(),