Commit 88be8bd4 authored by BLANCHARD Pierre's avatar BLANCHARD Pierre

Redesigned fftw wrapper to handle float and double (Important: Make sure float is enabled in fftw).

parent f3fb5e98
......@@ -26,14 +26,14 @@
#include <fstream>
#include <typeinfo>
#include "../../Utils/FBlas.hpp"
#include "../../Utils/FTic.hpp"
#include "../../Utils/FDft.hpp"
#include "Utils/FBlas.hpp"
#include "Utils/FTic.hpp"
#include "Utils/FDft.hpp"
#include "../../Utils/FComplex.hpp"
#include "Utils/FComplex.hpp"
#include "./FUnifTensor.hpp"
#include "FUnifTensor.hpp"
/*! Precomputation of the 316 interactions by evaluation of the matrix kernel on the uniform grid and transformation into Fourier space.
PB: Compute() does not belong to the M2LHandler like it does in the Chebyshev kernel. This allows much nicer specialization of the M2LHandler class with respect to the homogeneity of the kernel of interaction like in the ChebyshevSym kernel.*/
......@@ -67,9 +67,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal Cel
TensorType::setNodeIdsPairs(node_ids_pairs);
// init Discrete Fourier Transformator
const int dimfft = 1; // unidim FFT since fully circulant embedding
const int steps[dimfft] = {rc};
FFft<FReal,dimfft> Dft;
Dft.buildDFT(steps);
FFftw<FReal,FComplex<FReal>,dimfft> Dft(rc);
// get first column of K via permutation
unsigned int perm[rc];
TensorType::setStoragePermutation(perm);
......@@ -178,7 +176,7 @@ class FUnifM2LHandler<FReal, ORDER,HOMOGENEOUS>
/// DFT specific
static const int dimfft = 1; // unidim FFT since fully circulant embedding
typedef FFft<FReal,dimfft> DftClass; // Fast Discrete Fourier Transformator
typedef FFftw<FReal,FComplex<FReal>,dimfft> DftClass; // Fast Discrete Fourier Transformator
DftClass Dft;
const unsigned int opt_rc; // specific to real valued kernel
......@@ -198,11 +196,8 @@ class FUnifM2LHandler<FReal, ORDER,HOMOGENEOUS>
public:
template <typename MatrixKernelClass>
FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal, const int inLeafLevelSeparationCriterion = 1)
: FC(nullptr), Dft(), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion)
: FC(nullptr), Dft(rc), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion)
{
// init DFT
const int steps[dimfft] = {rc};
Dft.buildDFT(steps);
// initialize root node ids
TensorType::setNodeIdsDiff(node_diff);
......@@ -216,11 +211,8 @@ public:
* Copy constructor
*/
FUnifM2LHandler(const FUnifM2LHandler& other)
: FC(other.FC), Dft(), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion)
: FC(other.FC), Dft(other.Dft), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion)
{
// init DFT
const int steps[dimfft] = {rc};
Dft.buildDFT(steps);
// copy node_diff
memcpy(node_diff,other.node_diff,sizeof(unsigned int)*nnodes*nnodes);
}
......@@ -269,7 +261,7 @@ public:
FReal Px[rc];
FBlas::setzero(rc,Px);
// Apply forward Discrete Fourier Transform
Dft.applyIDFT(FX,Px);
Dft.applyIDFTNorm(FX,Px);
// Unapply Zero Padding
for (unsigned int j=0; j<nnodes; ++j)
x[j]+=Px[node_diff[nnodes-j-1]];
......@@ -342,7 +334,7 @@ class FUnifM2LHandler<FReal,ORDER,NON_HOMOGENEOUS>
unsigned int node_diff[nnodes*nnodes];
/// DFT specific
static const int dimfft = 1; // unidim FFT since fully circulant embedding
typedef FFft<FReal,dimfft> DftClass; // Fast Discrete Fourier Transformator
typedef FFftw<FReal,FComplex<FReal>,dimfft> DftClass; // Fast real-valued Discrete Fourier Transformator
DftClass Dft;
const unsigned int opt_rc; // specific to real valued kernel
......@@ -364,11 +356,8 @@ public:
FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int inTreeHeight, const FReal inRootCellWidth, const int inLeafLevelSeparationCriterion = 1)
: TreeHeight(inTreeHeight),
RootCellWidth(inRootCellWidth),
Dft(), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion)
Dft(rc), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion)
{
// init DFT
const int steps[dimfft] = {rc};
Dft.buildDFT(steps);
// initialize root node ids
TensorType::setNodeIdsDiff(node_diff);
......@@ -390,11 +379,8 @@ public:
: FC(other.FC),
TreeHeight(other.TreeHeight),
RootCellWidth(other.RootCellWidth),
Dft(), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion)
Dft(other.Dft), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion)
{
// init DFT
const int steps[dimfft] = {rc};
Dft.buildDFT(steps);
// copy node_diff
memcpy(node_diff,other.node_diff,sizeof(unsigned int)*nnodes*nnodes);
}
......@@ -455,7 +441,7 @@ public:
FReal Px[rc];
FBlas::setzero(rc,Px);
// Apply forward Discrete Fourier Transform
Dft.applyIDFT(FX,Px);
Dft.applyIDFTNorm(FX,Px);
// Unapply Zero Padding
for (unsigned int j=0; j<nnodes; ++j)
x[j]+=Px[node_diff[nnodes-j-1]];
......
......@@ -26,14 +26,14 @@
#include <fstream>
#include <typeinfo>
#include "../../Utils/FBlas.hpp"
#include "../../Utils/FTic.hpp"
#include "../../Utils/FDft.hpp"
#include "Utils/FBlas.hpp"
#include "Utils/FTic.hpp"
#include "Utils/FDft.hpp"
#include "../../Utils/FComplex.hpp"
#include "Utils/FComplex.hpp"
#include "./FUnifTensor.hpp"
#include "FUnifTensor.hpp"
/*! Precomputation of the 316 interactions by evaluation of the matrix kernel on the uniform grid and transformation into Fourier space. These interactions can be tensorial (of size ncmp) and are computed blockwise
......@@ -83,9 +83,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel,
// init Discrete Fourier Transformator
const int dimfft = 1; // unidim FFT since fully circulant embedding
const int steps[dimfft] = {rc};
FFft<FReal,dimfft> Dft;
Dft.buildDFT(steps);
FFftw<FReal,FComplex<FReal>,dimfft> Dft(rc);
// get first column of K via permutation
unsigned int perm[rc];
......@@ -208,7 +206,7 @@ class FUnifTensorialM2LHandler<FReal, ORDER,MatrixKernelClass,HOMOGENEOUS>
/// DFT specific
static const int dimfft = 1; // unidim FFT since fully circulant embedding
typedef FFft<FReal,dimfft> DftClass; // Fast Discrete Fourier Transformator
typedef FFftw<FReal,FComplex<FReal>,dimfft> DftClass; // Fast Discrete Fourier Transformator
DftClass Dft;
const unsigned int opt_rc; // specific to real valued kernel
......@@ -228,11 +226,8 @@ class FUnifTensorialM2LHandler<FReal, ORDER,MatrixKernelClass,HOMOGENEOUS>
public:
FUnifTensorialM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal, const FReal inCellWidthExtension, const int inLeafLevelSeparationCriterion = 1)
: CellWidthExtension(inCellWidthExtension),
Dft(), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion)
Dft(rc), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion)
{
// init DFT
const int steps[dimfft] = {rc};
Dft.buildDFT(steps);
// allocate FC
FC = new FComplex<FReal>*[ncmp];
......@@ -252,12 +247,8 @@ public:
FUnifTensorialM2LHandler(const FUnifTensorialM2LHandler& other)
: FC(other.FC),
CellWidthExtension(other.CellWidthExtension),
Dft(), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion)
Dft(other.Dft), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion)
{
// init DFT
const int steps[dimfft] = {rc};
Dft.buildDFT(steps);
// copy node_diff
memcpy(node_diff,other.node_diff,sizeof(unsigned int)*nnodes*nnodes);
}
......@@ -311,7 +302,7 @@ public:
FReal Px[rc];
FBlas::setzero(rc,Px);
// Apply forward Discrete Fourier Transform
Dft.applyIDFT(FX,Px);
Dft.applyIDFTNorm(FX,Px);
// Unapply Zero Padding
for (unsigned int j=0; j<nnodes; ++j)
x[j]+=Px[node_diff[nnodes-j-1]];
......@@ -403,7 +394,7 @@ class FUnifTensorialM2LHandler<FReal,ORDER,MatrixKernelClass,NON_HOMOGENEOUS>
/// DFT specific
static const int dimfft = 1; // unidim FFT since fully circulant embedding
typedef FFft<FReal,dimfft> DftClass; // Fast Discrete Fourier Transformator
typedef FFftw<FReal,FComplex<FReal>,dimfft> DftClass; // Fast Discrete Fourier Transformator
DftClass Dft;
const unsigned int opt_rc; // specific to real valued kernel
......@@ -425,11 +416,8 @@ public:
: TreeHeight(inTreeHeight),
RootCellWidth(inRootCellWidth),
CellWidthExtension(inCellWidthExtension),
Dft(), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion)
Dft(rc), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion)
{
// init DFT
const int steps[dimfft] = {rc};
Dft.buildDFT(steps);
// allocate FC
FC = new FComplex<FReal>**[TreeHeight];
......@@ -454,12 +442,8 @@ public:
TreeHeight(other.TreeHeight),
RootCellWidth(other.RootCellWidth),
CellWidthExtension(other.CellWidthExtension),
Dft(), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion)
Dft(other.Dft), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion)
{
// init DFT
const int steps[dimfft] = {rc};
Dft.buildDFT(steps);
// copy node_diff
memcpy(node_diff,other.node_diff,sizeof(unsigned int)*nnodes*nnodes);
}
......@@ -517,7 +501,7 @@ public:
FReal Px[rc];
FBlas::setzero(rc,Px);
// Apply forward Discrete Fourier Transform
Dft.applyIDFT(FX,Px);
Dft.applyIDFTNorm(FX,Px);
// Unapply Zero Padding
for (unsigned int j=0; j<nnodes; ++j)
x[j]+=Px[node_diff[nnodes-j-1]];
......
......@@ -31,42 +31,37 @@
// elseif libfftw_dev: usr/include/fftw3.h
#include <fftw3.h>
#include "FGlobal.hpp"
#include "FComplex.hpp"
#include "FMath.hpp"
// for @class FDft only
#include "FBlas.hpp"
#include "FTic.hpp"
#include "FComplex.hpp"
/**
* @author Pierre Blanchard (pierre.blanchard@inria.fr)
* @class FDft, @class FFft and @class FCFft
* @class FDft, @class FFftw
* Please read the license
*
* These classes handle the forward and backward Discete Fourier Transform
* (DFT).
* @class FDft implements a direct method while @class FFft uses the Fast
* @class FDft implements a direct method while @class FFftw uses the Fast
* Fourier Transform (FFT). The FFT algorithm can either be provided by the
* FFTW(3) library itself or a version that is wrapped in Intel MKL.
*
* The direct DFT is templatized with the input value type (FReal or FComplex<FReal>),
* while 2 distinct classes resp. @class FFft and @class FCFft are defined
* resp. for the real and complex valued FFT.
* The @class FDft is templatized with the input value type (FReal or FComplex<FReal>),
* while @class FFftw is templatized with input and output value types and the dimension.
*
* The aim of writing these specific classes is to allow further customization
* of the DFT such as implementing a tensorial variant, a weighted variant
* or any other feature.
*
* TODO: some copies might slow down the routines, this needs to be optimized.
*
*/
/**
* @class FDft
*
* @tparam nsteps number of sampled values \f$N\f$
* @tparam size_ number of sampled values \f$N\f$
*/
template< class FReal, typename ValueType = FReal>
class FDft
......@@ -78,37 +73,37 @@ class FDft
FReal *cosRS_, *sinRS_;
private:
unsigned int nsteps_; //< number of steps
unsigned int size_; //< number of steps
void initDFT()
{
// allocate arrays
data_ = new ValueType[nsteps_];
dataF_ = new FComplex<FReal>[nsteps_];
data_ = new ValueType[size_];
dataF_ = new FComplex<FReal>[size_];
// Beware this is extremely HEAVY to store!!! => Use FDft only for debug!
cosRS_ = new FReal[nsteps_*nsteps_];
sinRS_ = new FReal[nsteps_*nsteps_];
for(unsigned int r=0; r<nsteps_; ++r)
for(unsigned int s=0; s<nsteps_; ++s){
FReal thetaRS = 2*M_PI*r*s/nsteps_;
cosRS_[r*nsteps_+s]=FMath::Cos(thetaRS);
sinRS_[r*nsteps_+s]=FMath::Sin(thetaRS);
cosRS_ = new FReal[size_*size_];
sinRS_ = new FReal[size_*size_];
for(unsigned int r=0; r<size_; ++r)
for(unsigned int s=0; s<size_; ++s){
FReal thetaRS = 2*M_PI*r*s/size_;
cosRS_[r*size_+s]=FMath::Cos(thetaRS);
sinRS_[r*size_+s]=FMath::Sin(thetaRS);
}
}
public:
FDft(const unsigned int nsteps)
: nsteps_(nsteps)
FDft(const unsigned int size)
: size_(size)
{
// init DFT
initDFT();
}
FDft(const FDft& other)
: nsteps_(other.nsteps_)
: size_(other.size_)
{
// init DFT
initDFT();
......@@ -128,18 +123,18 @@ public:
FComplex<FReal>* transformedData) const
{
// read sampled data
FBlas::c_setzero(nsteps_,reinterpret_cast<FReal*>(dataF_));
FBlas::copy(nsteps_, sampledData,data_);
FBlas::c_setzero(size_,reinterpret_cast<FReal*>(dataF_));
FBlas::copy(size_, sampledData,data_);
// perform direct forward transformation
for(unsigned int r=0; r<nsteps_; ++r)
for(unsigned int s=0; s<nsteps_; ++s){
dataF_[r] += FComplex<FReal>(data_[s]*cosRS_[r*nsteps_+s],
-data_[s]*sinRS_[r*nsteps_+s]);
for(unsigned int r=0; r<size_; ++r)
for(unsigned int s=0; s<size_; ++s){
dataF_[r] += FComplex<FReal>(data_[s]*cosRS_[r*size_+s],
-data_[s]*sinRS_[r*size_+s]);
}
// write transformed data
FBlas::c_copy(nsteps_,reinterpret_cast<FReal*>(dataF_),
FBlas::c_copy(size_,reinterpret_cast<FReal*>(dataF_),
reinterpret_cast<FReal*>(transformedData));
}
// Complexe valued DFT
......@@ -147,21 +142,21 @@ public:
FComplex<FReal>* transformedData) const
{
// read sampled data
FBlas::c_setzero(nsteps_,reinterpret_cast<FReal*>(dataF_));
FBlas::c_copy(nsteps_,reinterpret_cast<const FReal*>(sampledData),
FBlas::c_setzero(size_,reinterpret_cast<FReal*>(dataF_));
FBlas::c_copy(size_,reinterpret_cast<const FReal*>(sampledData),
reinterpret_cast<FReal*>(data_));
// perform direct forward transformation
for(unsigned int r=0; r<nsteps_; ++r)
for(unsigned int s=0; s<nsteps_; ++s){
dataF_[r] += FComplex<FReal>(data_[s].getReal()*cosRS_[r*nsteps_+s]
+ data_[s].getImag()*sinRS_[r*nsteps_+s],
data_[s].getImag()*cosRS_[r*nsteps_+s]
- data_[s].getReal()*sinRS_[r*nsteps_+s]);
for(unsigned int r=0; r<size_; ++r)
for(unsigned int s=0; s<size_; ++s){
dataF_[r] += FComplex<FReal>(data_[s].getReal()*cosRS_[r*size_+s]
+ data_[s].getImag()*sinRS_[r*size_+s],
data_[s].getImag()*cosRS_[r*size_+s]
- data_[s].getReal()*sinRS_[r*size_+s]);
}
// write transformed data
FBlas::c_copy(nsteps_,reinterpret_cast<FReal*>(dataF_),
FBlas::c_copy(size_,reinterpret_cast<FReal*>(dataF_),
reinterpret_cast<FReal*>(transformedData));
}
......@@ -171,21 +166,21 @@ public:
FReal* sampledData) const
{
// read transformed data
FBlas::setzero(nsteps_,data_);
FBlas::c_copy(nsteps_,reinterpret_cast<const FReal*>(transformedData),
FBlas::setzero(size_,data_);
FBlas::c_copy(size_,reinterpret_cast<const FReal*>(transformedData),
reinterpret_cast<FReal*>(dataF_));
// perform direct backward transformation
for(unsigned int r=0; r<nsteps_; ++r){
for(unsigned int s=0; s<nsteps_; ++s){
data_[r] += dataF_[s].getReal()*cosRS_[r*nsteps_+s]
+ dataF_[s].getImag()*sinRS_[r*nsteps_+s];
for(unsigned int r=0; r<size_; ++r){
for(unsigned int s=0; s<size_; ++s){
data_[r] += dataF_[s].getReal()*cosRS_[r*size_+s]
+ dataF_[s].getImag()*sinRS_[r*size_+s];
}
data_[r]*=1./nsteps_;
data_[r]*=1./size_;
}
// write sampled data
FBlas::copy(nsteps_,data_,sampledData);
FBlas::copy(size_,data_,sampledData);
}
// Complexe valued IDFT
......@@ -193,259 +188,365 @@ public:
FComplex<FReal>* sampledData) const
{
// read transformed data
FBlas::c_setzero(nsteps_,reinterpret_cast<FReal*>(data_));
FBlas::c_copy(nsteps_,reinterpret_cast<const FReal*>(transformedData),
FBlas::c_setzero(size_,reinterpret_cast<FReal*>(data_));
FBlas::c_copy(size_,reinterpret_cast<const FReal*>(transformedData),
reinterpret_cast<FReal*>(dataF_));
// perform direct backward transformation
for(unsigned int r=0; r<nsteps_; ++r){
for(unsigned int s=0; s<nsteps_; ++s){
data_[r] += FComplex<FReal>(dataF_[s].getReal()*cosRS_[r*nsteps_+s]
- dataF_[s].getImag()*sinRS_[r*nsteps_+s],
dataF_[s].getImag()*cosRS_[r*nsteps_+s]
+ dataF_[s].getReal()*sinRS_[r*nsteps_+s]);
for(unsigned int r=0; r<size_; ++r){
for(unsigned int s=0; s<size_; ++s){
data_[r] += FComplex<FReal>(dataF_[s].getReal()*cosRS_[r*size_+s]
- dataF_[s].getImag()*sinRS_[r*size_+s],
dataF_[s].getImag()*cosRS_[r*size_+s]
+ dataF_[s].getReal()*sinRS_[r*size_+s]);
}
data_[r]*=1./nsteps_;
data_[r]*=1./size_;
}
// write sampled data
FBlas::c_copy(nsteps_,reinterpret_cast<FReal*>(data_),
FBlas::c_copy(size_,reinterpret_cast<FReal*>(data_),
reinterpret_cast<FReal*>(sampledData));
}
};
/**
* @class FFft
*
* @tparam nsteps number of sampled values \f$N\f$
*/
template<class FReal, int DIM = 1>
class FFft
{
enum{dim = DIM};
// arrays
FReal* data_; //< data in physical space
FComplex<FReal>* dataF_; //< data in Fourier space
// plans
fftw_plan plan_c2r_; //< backward FFT plan
fftw_plan plan_r2c_; //< forward FFT plan
private:
int steps_[dim]; //< number of steps per dimension
int nsteps_; //< total number of steps
int nsteps_opt_; //< reduced number of steps for real valued FFT
void initDFT()
{
// allocate arrays
data_ = (FReal*) fftw_malloc(sizeof(FReal) * nsteps_);
dataF_ = (FComplex<FReal>*) fftw_malloc(sizeof(FComplex<FReal>) * nsteps_opt_);
// // unidim fftw plans
// plan_c2r_ =
// fftw_plan_dft_c2r_1d(nsteps_,
// reinterpret_cast<fftw_complex*>(dataF_),
// data_,
// FFTW_MEASURE);// TODO: test FFTW_ESTIMATE
// plan_r2c_ =
// fftw_plan_dft_r2c_1d(nsteps_,
// data_,
// reinterpret_cast<fftw_complex*>(dataF_),
// FFTW_MEASURE);
// multidim fftw plans
plan_c2r_ =
fftw_plan_dft_c2r(dim, steps_,
reinterpret_cast<fftw_complex*>(dataF_),
data_,
FFTW_MEASURE);// TODO: test FFTW_ESTIMATE
plan_r2c_ =
fftw_plan_dft_r2c(dim, steps_,
data_,
reinterpret_cast<fftw_complex*>(dataF_),
FFTW_MEASURE);
/**
* @class FFftw
*
* @brief This class is a wrapper to FFTW. It is important to enable float if FFftw<float> is used.
*
*
* @tparam ValueClassSrc Source value type
* @tparam ValueClassDest Destination value type
* @tparam DIM dimension \f$d\f$ of the Discrete Fourier Transform
* @param nbPointsPerDim number of sampled values \f$N\f$ per dimension
* @param nbPoints total number of sampled values \f$N^d\f$
*
*
*/
template <class ValueClassSrc, class ValueClassDest, class ValueClassSrcFftw, class ValueClassDestFftw, class PlanClassFftw, int DIM >
class FFftwCore {
enum{dim = DIM};
protected:
static void PlusEqual(double dest[], const double src[], const int nbElements){
for(int idxVal = 0 ; idxVal < nbElements ; ++idxVal){
dest[idxVal] += src[idxVal];
}
}
static void PlusEqual(FComplex<double> dest[], const fftw_complex src[], const int nbElements){
const double* ptrSrc = reinterpret_cast<const double*>(src);
for(int idxVal = 0 ; idxVal < nbElements ; ++idxVal){
dest[idxVal] += FComplex<double>(ptrSrc[2*idxVal], ptrSrc[2*idxVal+1]);
}
}
static void Bind_fftw_alloc(double** ptr, const int size){
*ptr = (double*) fftw_malloc(sizeof(double) * size);
//SpErrAEH(*ptr);
}
static void Bind_fftw_alloc(fftw_complex** ptr, const int size){
*ptr = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * size);
//SpErrAEH(*ptr);
}
static void Bind_fftw_destroy_plan(fftw_plan plan){
//#pragma omp critical(SF_KEEP_FFTW)
//{
fftw_destroy_plan(plan);
//}
}
static fftw_plan Bind_fftw_plan_dft(int d, int *n0, fftw_complex *in, double *out, int sign, unsigned flags){
//SpErrAE(sign == 1);
fftw_plan plan;
//#pragma omp critical(SF_KEEP_FFTW)
//{
plan = fftw_plan_dft_c2r(d, n0, in, out, flags);
//}
return plan;
}
static fftw_plan Bind_fftw_plan_dft(int d, int *n0, double *in, fftw_complex *out, int sign, unsigned flags){
//SpErrAE(sign == -1);
fftw_plan plan;
//#pragma omp critical(SF_KEEP_FFTW)
//{
plan = fftw_plan_dft_r2c(d, n0, in, out, flags);
//}
return plan;
}
static fftw_plan Bind_fftw_plan_dft(int d, int *n0, fftw_complex *in, fftw_complex *out, int sign, unsigned flags){
fftw_plan plan;
//#pragma omp critical(SF_KEEP_FFTW)
//{
plan = fftw_plan_dft(d, n0, in, out, sign, flags);
//}
return plan;
}
static void Bind_fftw_execute(fftw_plan plan){
fftw_execute(plan);
}
static void PlusEqual(float dest[], const float src[], const int nbElements){
for(int idxVal = 0 ; idxVal < nbElements ; ++idxVal){
dest[idxVal] += src[idxVal];
}
}
static void PlusEqual(FComplex<float> dest[], const fftwf_complex src[], const int nbElements){
const float* ptrSrc = reinterpret_cast<const float*>(src);
for(int idxVal = 0 ; idxVal < nbElements ; ++idxVal){
dest[idxVal] += FComplex<float>(ptrSrc[2*idxVal], ptrSrc[2*idxVal+1]);
}
}
static void Bind_fftw_alloc(float** ptr, const int size){
*ptr = (float*) fftwf_malloc(sizeof(float) * size);
//SpErrAEH(*ptr);
}
static void Bind_fftw_alloc(fftwf_complex** ptr, const int size){
*ptr = (fftwf_complex*) fftw_malloc(sizeof(fftwf_complex) * size);
//SpErrAEH(*ptr);
}
static void Bind_fftw_destroy_plan(fftwf_plan plan){
//#pragma omp critical(SF_KEEP_FFTW)