Commit 88be8bd4 authored by BLANCHARD Pierre's avatar BLANCHARD Pierre

Redesigned fftw wrapper to handle float and double (Important: Make sure float is enabled in fftw).

parent f3fb5e98
...@@ -26,14 +26,14 @@ ...@@ -26,14 +26,14 @@
#include <fstream> #include <fstream>
#include <typeinfo> #include <typeinfo>
#include "../../Utils/FBlas.hpp" #include "Utils/FBlas.hpp"
#include "../../Utils/FTic.hpp" #include "Utils/FTic.hpp"
#include "../../Utils/FDft.hpp" #include "Utils/FDft.hpp"
#include "../../Utils/FComplex.hpp" #include "Utils/FComplex.hpp"
#include "./FUnifTensor.hpp" #include "FUnifTensor.hpp"
/*! Precomputation of the 316 interactions by evaluation of the matrix kernel on the uniform grid and transformation into Fourier space. /*! Precomputation of the 316 interactions by evaluation of the matrix kernel on the uniform grid and transformation into Fourier space.
PB: Compute() does not belong to the M2LHandler like it does in the Chebyshev kernel. This allows much nicer specialization of the M2LHandler class with respect to the homogeneity of the kernel of interaction like in the ChebyshevSym kernel.*/ PB: Compute() does not belong to the M2LHandler like it does in the Chebyshev kernel. This allows much nicer specialization of the M2LHandler class with respect to the homogeneity of the kernel of interaction like in the ChebyshevSym kernel.*/
...@@ -67,9 +67,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal Cel ...@@ -67,9 +67,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal Cel
TensorType::setNodeIdsPairs(node_ids_pairs); TensorType::setNodeIdsPairs(node_ids_pairs);
// init Discrete Fourier Transformator // init Discrete Fourier Transformator
const int dimfft = 1; // unidim FFT since fully circulant embedding const int dimfft = 1; // unidim FFT since fully circulant embedding
const int steps[dimfft] = {rc}; FFftw<FReal,FComplex<FReal>,dimfft> Dft(rc);
FFft<FReal,dimfft> Dft;
Dft.buildDFT(steps);
// get first column of K via permutation // get first column of K via permutation
unsigned int perm[rc]; unsigned int perm[rc];
TensorType::setStoragePermutation(perm); TensorType::setStoragePermutation(perm);
...@@ -178,7 +176,7 @@ class FUnifM2LHandler<FReal, ORDER,HOMOGENEOUS> ...@@ -178,7 +176,7 @@ class FUnifM2LHandler<FReal, ORDER,HOMOGENEOUS>
/// DFT specific /// DFT specific
static const int dimfft = 1; // unidim FFT since fully circulant embedding static const int dimfft = 1; // unidim FFT since fully circulant embedding
typedef FFft<FReal,dimfft> DftClass; // Fast Discrete Fourier Transformator typedef FFftw<FReal,FComplex<FReal>,dimfft> DftClass; // Fast Discrete Fourier Transformator
DftClass Dft; DftClass Dft;
const unsigned int opt_rc; // specific to real valued kernel const unsigned int opt_rc; // specific to real valued kernel
...@@ -198,11 +196,8 @@ class FUnifM2LHandler<FReal, ORDER,HOMOGENEOUS> ...@@ -198,11 +196,8 @@ class FUnifM2LHandler<FReal, ORDER,HOMOGENEOUS>
public: public:
template <typename MatrixKernelClass> template <typename MatrixKernelClass>
FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal, const int inLeafLevelSeparationCriterion = 1) FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal, const int inLeafLevelSeparationCriterion = 1)
: FC(nullptr), Dft(), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) : FC(nullptr), Dft(rc), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion)
{ {
// init DFT
const int steps[dimfft] = {rc};
Dft.buildDFT(steps);
// initialize root node ids // initialize root node ids
TensorType::setNodeIdsDiff(node_diff); TensorType::setNodeIdsDiff(node_diff);
...@@ -216,11 +211,8 @@ public: ...@@ -216,11 +211,8 @@ public:
* Copy constructor * Copy constructor
*/ */
FUnifM2LHandler(const FUnifM2LHandler& other) FUnifM2LHandler(const FUnifM2LHandler& other)
: FC(other.FC), Dft(), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion) : FC(other.FC), Dft(other.Dft), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion)
{ {
// init DFT
const int steps[dimfft] = {rc};
Dft.buildDFT(steps);
// copy node_diff // copy node_diff
memcpy(node_diff,other.node_diff,sizeof(unsigned int)*nnodes*nnodes); memcpy(node_diff,other.node_diff,sizeof(unsigned int)*nnodes*nnodes);
} }
...@@ -269,7 +261,7 @@ public: ...@@ -269,7 +261,7 @@ public:
FReal Px[rc]; FReal Px[rc];
FBlas::setzero(rc,Px); FBlas::setzero(rc,Px);
// Apply forward Discrete Fourier Transform // Apply forward Discrete Fourier Transform
Dft.applyIDFT(FX,Px); Dft.applyIDFTNorm(FX,Px);
// Unapply Zero Padding // Unapply Zero Padding
for (unsigned int j=0; j<nnodes; ++j) for (unsigned int j=0; j<nnodes; ++j)
x[j]+=Px[node_diff[nnodes-j-1]]; x[j]+=Px[node_diff[nnodes-j-1]];
...@@ -342,7 +334,7 @@ class FUnifM2LHandler<FReal,ORDER,NON_HOMOGENEOUS> ...@@ -342,7 +334,7 @@ class FUnifM2LHandler<FReal,ORDER,NON_HOMOGENEOUS>
unsigned int node_diff[nnodes*nnodes]; unsigned int node_diff[nnodes*nnodes];
/// DFT specific /// DFT specific
static const int dimfft = 1; // unidim FFT since fully circulant embedding static const int dimfft = 1; // unidim FFT since fully circulant embedding
typedef FFft<FReal,dimfft> DftClass; // Fast Discrete Fourier Transformator typedef FFftw<FReal,FComplex<FReal>,dimfft> DftClass; // Fast real-valued Discrete Fourier Transformator
DftClass Dft; DftClass Dft;
const unsigned int opt_rc; // specific to real valued kernel const unsigned int opt_rc; // specific to real valued kernel
...@@ -364,11 +356,8 @@ public: ...@@ -364,11 +356,8 @@ public:
FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int inTreeHeight, const FReal inRootCellWidth, const int inLeafLevelSeparationCriterion = 1) FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int inTreeHeight, const FReal inRootCellWidth, const int inLeafLevelSeparationCriterion = 1)
: TreeHeight(inTreeHeight), : TreeHeight(inTreeHeight),
RootCellWidth(inRootCellWidth), RootCellWidth(inRootCellWidth),
Dft(), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) Dft(rc), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion)
{ {
// init DFT
const int steps[dimfft] = {rc};
Dft.buildDFT(steps);
// initialize root node ids // initialize root node ids
TensorType::setNodeIdsDiff(node_diff); TensorType::setNodeIdsDiff(node_diff);
...@@ -390,11 +379,8 @@ public: ...@@ -390,11 +379,8 @@ public:
: FC(other.FC), : FC(other.FC),
TreeHeight(other.TreeHeight), TreeHeight(other.TreeHeight),
RootCellWidth(other.RootCellWidth), RootCellWidth(other.RootCellWidth),
Dft(), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion) Dft(other.Dft), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion)
{ {
// init DFT
const int steps[dimfft] = {rc};
Dft.buildDFT(steps);
// copy node_diff // copy node_diff
memcpy(node_diff,other.node_diff,sizeof(unsigned int)*nnodes*nnodes); memcpy(node_diff,other.node_diff,sizeof(unsigned int)*nnodes*nnodes);
} }
...@@ -455,7 +441,7 @@ public: ...@@ -455,7 +441,7 @@ public:
FReal Px[rc]; FReal Px[rc];
FBlas::setzero(rc,Px); FBlas::setzero(rc,Px);
// Apply forward Discrete Fourier Transform // Apply forward Discrete Fourier Transform
Dft.applyIDFT(FX,Px); Dft.applyIDFTNorm(FX,Px);
// Unapply Zero Padding // Unapply Zero Padding
for (unsigned int j=0; j<nnodes; ++j) for (unsigned int j=0; j<nnodes; ++j)
x[j]+=Px[node_diff[nnodes-j-1]]; x[j]+=Px[node_diff[nnodes-j-1]];
......
...@@ -26,14 +26,14 @@ ...@@ -26,14 +26,14 @@
#include <fstream> #include <fstream>
#include <typeinfo> #include <typeinfo>
#include "../../Utils/FBlas.hpp" #include "Utils/FBlas.hpp"
#include "../../Utils/FTic.hpp" #include "Utils/FTic.hpp"
#include "../../Utils/FDft.hpp" #include "Utils/FDft.hpp"
#include "../../Utils/FComplex.hpp" #include "Utils/FComplex.hpp"
#include "./FUnifTensor.hpp" #include "FUnifTensor.hpp"
/*! Precomputation of the 316 interactions by evaluation of the matrix kernel on the uniform grid and transformation into Fourier space. These interactions can be tensorial (of size ncmp) and are computed blockwise /*! Precomputation of the 316 interactions by evaluation of the matrix kernel on the uniform grid and transformation into Fourier space. These interactions can be tensorial (of size ncmp) and are computed blockwise
...@@ -83,9 +83,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, ...@@ -83,9 +83,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel,
// init Discrete Fourier Transformator // init Discrete Fourier Transformator
const int dimfft = 1; // unidim FFT since fully circulant embedding const int dimfft = 1; // unidim FFT since fully circulant embedding
const int steps[dimfft] = {rc}; FFftw<FReal,FComplex<FReal>,dimfft> Dft(rc);
FFft<FReal,dimfft> Dft;
Dft.buildDFT(steps);
// get first column of K via permutation // get first column of K via permutation
unsigned int perm[rc]; unsigned int perm[rc];
...@@ -208,7 +206,7 @@ class FUnifTensorialM2LHandler<FReal, ORDER,MatrixKernelClass,HOMOGENEOUS> ...@@ -208,7 +206,7 @@ class FUnifTensorialM2LHandler<FReal, ORDER,MatrixKernelClass,HOMOGENEOUS>
/// DFT specific /// DFT specific
static const int dimfft = 1; // unidim FFT since fully circulant embedding static const int dimfft = 1; // unidim FFT since fully circulant embedding
typedef FFft<FReal,dimfft> DftClass; // Fast Discrete Fourier Transformator typedef FFftw<FReal,FComplex<FReal>,dimfft> DftClass; // Fast Discrete Fourier Transformator
DftClass Dft; DftClass Dft;
const unsigned int opt_rc; // specific to real valued kernel const unsigned int opt_rc; // specific to real valued kernel
...@@ -228,11 +226,8 @@ class FUnifTensorialM2LHandler<FReal, ORDER,MatrixKernelClass,HOMOGENEOUS> ...@@ -228,11 +226,8 @@ class FUnifTensorialM2LHandler<FReal, ORDER,MatrixKernelClass,HOMOGENEOUS>
public: public:
FUnifTensorialM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal, const FReal inCellWidthExtension, const int inLeafLevelSeparationCriterion = 1) FUnifTensorialM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal, const FReal inCellWidthExtension, const int inLeafLevelSeparationCriterion = 1)
: CellWidthExtension(inCellWidthExtension), : CellWidthExtension(inCellWidthExtension),
Dft(), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) Dft(rc), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion)
{ {
// init DFT
const int steps[dimfft] = {rc};
Dft.buildDFT(steps);
// allocate FC // allocate FC
FC = new FComplex<FReal>*[ncmp]; FC = new FComplex<FReal>*[ncmp];
...@@ -252,12 +247,8 @@ public: ...@@ -252,12 +247,8 @@ public:
FUnifTensorialM2LHandler(const FUnifTensorialM2LHandler& other) FUnifTensorialM2LHandler(const FUnifTensorialM2LHandler& other)
: FC(other.FC), : FC(other.FC),
CellWidthExtension(other.CellWidthExtension), CellWidthExtension(other.CellWidthExtension),
Dft(), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion) Dft(other.Dft), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion)
{ {
// init DFT
const int steps[dimfft] = {rc};
Dft.buildDFT(steps);
// copy node_diff // copy node_diff
memcpy(node_diff,other.node_diff,sizeof(unsigned int)*nnodes*nnodes); memcpy(node_diff,other.node_diff,sizeof(unsigned int)*nnodes*nnodes);
} }
...@@ -311,7 +302,7 @@ public: ...@@ -311,7 +302,7 @@ public:
FReal Px[rc]; FReal Px[rc];
FBlas::setzero(rc,Px); FBlas::setzero(rc,Px);
// Apply forward Discrete Fourier Transform // Apply forward Discrete Fourier Transform
Dft.applyIDFT(FX,Px); Dft.applyIDFTNorm(FX,Px);
// Unapply Zero Padding // Unapply Zero Padding
for (unsigned int j=0; j<nnodes; ++j) for (unsigned int j=0; j<nnodes; ++j)
x[j]+=Px[node_diff[nnodes-j-1]]; x[j]+=Px[node_diff[nnodes-j-1]];
...@@ -403,7 +394,7 @@ class FUnifTensorialM2LHandler<FReal,ORDER,MatrixKernelClass,NON_HOMOGENEOUS> ...@@ -403,7 +394,7 @@ class FUnifTensorialM2LHandler<FReal,ORDER,MatrixKernelClass,NON_HOMOGENEOUS>
/// DFT specific /// DFT specific
static const int dimfft = 1; // unidim FFT since fully circulant embedding static const int dimfft = 1; // unidim FFT since fully circulant embedding
typedef FFft<FReal,dimfft> DftClass; // Fast Discrete Fourier Transformator typedef FFftw<FReal,FComplex<FReal>,dimfft> DftClass; // Fast Discrete Fourier Transformator
DftClass Dft; DftClass Dft;
const unsigned int opt_rc; // specific to real valued kernel const unsigned int opt_rc; // specific to real valued kernel
...@@ -425,11 +416,8 @@ public: ...@@ -425,11 +416,8 @@ public:
: TreeHeight(inTreeHeight), : TreeHeight(inTreeHeight),
RootCellWidth(inRootCellWidth), RootCellWidth(inRootCellWidth),
CellWidthExtension(inCellWidthExtension), CellWidthExtension(inCellWidthExtension),
Dft(), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) Dft(rc), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion)
{ {
// init DFT
const int steps[dimfft] = {rc};
Dft.buildDFT(steps);
// allocate FC // allocate FC
FC = new FComplex<FReal>**[TreeHeight]; FC = new FComplex<FReal>**[TreeHeight];
...@@ -454,12 +442,8 @@ public: ...@@ -454,12 +442,8 @@ public:
TreeHeight(other.TreeHeight), TreeHeight(other.TreeHeight),
RootCellWidth(other.RootCellWidth), RootCellWidth(other.RootCellWidth),
CellWidthExtension(other.CellWidthExtension), CellWidthExtension(other.CellWidthExtension),
Dft(), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion) Dft(other.Dft), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion)
{ {
// init DFT
const int steps[dimfft] = {rc};
Dft.buildDFT(steps);
// copy node_diff // copy node_diff
memcpy(node_diff,other.node_diff,sizeof(unsigned int)*nnodes*nnodes); memcpy(node_diff,other.node_diff,sizeof(unsigned int)*nnodes*nnodes);
} }
...@@ -517,7 +501,7 @@ public: ...@@ -517,7 +501,7 @@ public:
FReal Px[rc]; FReal Px[rc];
FBlas::setzero(rc,Px); FBlas::setzero(rc,Px);
// Apply forward Discrete Fourier Transform // Apply forward Discrete Fourier Transform
Dft.applyIDFT(FX,Px); Dft.applyIDFTNorm(FX,Px);
// Unapply Zero Padding // Unapply Zero Padding
for (unsigned int j=0; j<nnodes; ++j) for (unsigned int j=0; j<nnodes; ++j)
x[j]+=Px[node_diff[nnodes-j-1]]; x[j]+=Px[node_diff[nnodes-j-1]];
......
...@@ -31,42 +31,37 @@ ...@@ -31,42 +31,37 @@
// elseif libfftw_dev: usr/include/fftw3.h // elseif libfftw_dev: usr/include/fftw3.h
#include <fftw3.h> #include <fftw3.h>
#include "FGlobal.hpp" // for @class FDft only
#include "FComplex.hpp" #include "FBlas.hpp"
#include "FMath.hpp"
#include "FTic.hpp" #include "FComplex.hpp"
/** /**
* @author Pierre Blanchard (pierre.blanchard@inria.fr) * @author Pierre Blanchard (pierre.blanchard@inria.fr)
* @class FDft, @class FFft and @class FCFft * @class FDft, @class FFftw
* Please read the license * Please read the license
* *
* These classes handle the forward and backward Discete Fourier Transform * These classes handle the forward and backward Discete Fourier Transform
* (DFT). * (DFT).
* @class FDft implements a direct method while @class FFft uses the Fast * @class FDft implements a direct method while @class FFftw uses the Fast
* Fourier Transform (FFT). The FFT algorithm can either be provided by the * Fourier Transform (FFT). The FFT algorithm can either be provided by the
* FFTW(3) library itself or a version that is wrapped in Intel MKL. * FFTW(3) library itself or a version that is wrapped in Intel MKL.
* *
* The direct DFT is templatized with the input value type (FReal or FComplex<FReal>), * The @class FDft is templatized with the input value type (FReal or FComplex<FReal>),
* while 2 distinct classes resp. @class FFft and @class FCFft are defined * while @class FFftw is templatized with input and output value types and the dimension.
* resp. for the real and complex valued FFT.
* *
* The aim of writing these specific classes is to allow further customization * The aim of writing these specific classes is to allow further customization
* of the DFT such as implementing a tensorial variant, a weighted variant * of the DFT such as implementing a tensorial variant, a weighted variant
* or any other feature. * or any other feature.
* *
* TODO: some copies might slow down the routines, this needs to be optimized.
*
*/ */
/** /**
* @class FDft * @class FDft
* *
* @tparam nsteps number of sampled values \f$N\f$ * @tparam size_ number of sampled values \f$N\f$
*/ */
template< class FReal, typename ValueType = FReal> template< class FReal, typename ValueType = FReal>
class FDft class FDft
...@@ -78,37 +73,37 @@ class FDft ...@@ -78,37 +73,37 @@ class FDft
FReal *cosRS_, *sinRS_; FReal *cosRS_, *sinRS_;
private: private:
unsigned int nsteps_; //< number of steps unsigned int size_; //< number of steps
void initDFT() void initDFT()
{ {
// allocate arrays // allocate arrays
data_ = new ValueType[nsteps_]; data_ = new ValueType[size_];
dataF_ = new FComplex<FReal>[nsteps_]; dataF_ = new FComplex<FReal>[size_];
// Beware this is extremely HEAVY to store!!! => Use FDft only for debug! // Beware this is extremely HEAVY to store!!! => Use FDft only for debug!
cosRS_ = new FReal[nsteps_*nsteps_]; cosRS_ = new FReal[size_*size_];
sinRS_ = new FReal[nsteps_*nsteps_]; sinRS_ = new FReal[size_*size_];
for(unsigned int r=0; r<nsteps_; ++r) for(unsigned int r=0; r<size_; ++r)
for(unsigned int s=0; s<nsteps_; ++s){ for(unsigned int s=0; s<size_; ++s){
FReal thetaRS = 2*M_PI*r*s/nsteps_; FReal thetaRS = 2*M_PI*r*s/size_;
cosRS_[r*nsteps_+s]=FMath::Cos(thetaRS); cosRS_[r*size_+s]=FMath::Cos(thetaRS);
sinRS_[r*nsteps_+s]=FMath::Sin(thetaRS); sinRS_[r*size_+s]=FMath::Sin(thetaRS);
} }
} }
public: public:
FDft(const unsigned int nsteps) FDft(const unsigned int size)
: nsteps_(nsteps) : size_(size)
{ {
// init DFT // init DFT
initDFT(); initDFT();
} }
FDft(const FDft& other) FDft(const FDft& other)
: nsteps_(other.nsteps_) : size_(other.size_)
{ {
// init DFT // init DFT
initDFT(); initDFT();
...@@ -128,18 +123,18 @@ public: ...@@ -128,18 +123,18 @@ public:
FComplex<FReal>* transformedData) const FComplex<FReal>* transformedData) const
{ {
// read sampled data // read sampled data
FBlas::c_setzero(nsteps_,reinterpret_cast<FReal*>(dataF_)); FBlas::c_setzero(size_,reinterpret_cast<FReal*>(dataF_));
FBlas::copy(nsteps_, sampledData,data_); FBlas::copy(size_, sampledData,data_);
// perform direct forward transformation // perform direct forward transformation
for(unsigned int r=0; r<nsteps_; ++r) for(unsigned int r=0; r<size_; ++r)
for(unsigned int s=0; s<nsteps_; ++s){ for(unsigned int s=0; s<size_; ++s){
dataF_[r] += FComplex<FReal>(data_[s]*cosRS_[r*nsteps_+s], dataF_[r] += FComplex<FReal>(data_[s]*cosRS_[r*size_+s],
-data_[s]*sinRS_[r*nsteps_+s]); -data_[s]*sinRS_[r*size_+s]);
} }
// write transformed data // write transformed data
FBlas::c_copy(nsteps_,reinterpret_cast<FReal*>(dataF_), FBlas::c_copy(size_,reinterpret_cast<FReal*>(dataF_),
reinterpret_cast<FReal*>(transformedData)); reinterpret_cast<FReal*>(transformedData));
} }
// Complexe valued DFT // Complexe valued DFT
...@@ -147,21 +142,21 @@ public: ...@@ -147,21 +142,21 @@ public:
FComplex<FReal>* transformedData) const FComplex<FReal>* transformedData) const
{ {
// read sampled data // read sampled data
FBlas::c_setzero(nsteps_,reinterpret_cast<FReal*>(dataF_)); FBlas::c_setzero(size_,reinterpret_cast<FReal*>(dataF_));
FBlas::c_copy(nsteps_,reinterpret_cast<const FReal*>(sampledData), FBlas::c_copy(size_,reinterpret_cast<const FReal*>(sampledData),
reinterpret_cast<FReal*>(data_)); reinterpret_cast<FReal*>(data_));
// perform direct forward transformation // perform direct forward transformation
for(unsigned int r=0; r<nsteps_; ++r) for(unsigned int r=0; r<size_; ++r)
for(unsigned int s=0; s<nsteps_; ++s){ for(unsigned int s=0; s<size_; ++s){
dataF_[r] += FComplex<FReal>(data_[s].getReal()*cosRS_[r*nsteps_+s] dataF_[r] += FComplex<FReal>(data_[s].getReal()*cosRS_[r*size_+s]
+ data_[s].getImag()*sinRS_[r*nsteps_+s], + data_[s].getImag()*sinRS_[r*size_+s],
data_[s].getImag()*cosRS_[r*nsteps_+s] data_[s].getImag()*cosRS_[r*size_+s]
- data_[s].getReal()*sinRS_[r*nsteps_+s]); - data_[s].getReal()*sinRS_[r*size_+s]);
} }
// write transformed data // write transformed data
FBlas::c_copy(nsteps_,reinterpret_cast<FReal*>(dataF_), FBlas::c_copy(size_,reinterpret_cast<FReal*>(dataF_),
reinterpret_cast<FReal*>(transformedData)); reinterpret_cast<FReal*>(transformedData));
} }
...@@ -171,21 +166,21 @@ public: ...@@ -171,21 +166,21 @@ public:
FReal* sampledData) const FReal* sampledData) const
{ {
// read transformed data // read transformed data
FBlas::setzero(nsteps_,data_); FBlas::setzero(size_,data_);
FBlas::c_copy(nsteps_,reinterpret_cast<const FReal*>(transformedData), FBlas::c_copy(size_,reinterpret_cast<const FReal*>(transformedData),
reinterpret_cast<FReal*>(dataF_)); reinterpret_cast<FReal*>(dataF_));
// perform direct backward transformation // perform direct backward transformation
for(unsigned int r=0; r<nsteps_; ++r){ for(unsigned int r=0; r<size_; ++r){
for(unsigned int s=0; s<nsteps_; ++s){ for(unsigned int s=0; s<size_; ++s){
data_[r] += dataF_[s].getReal()*cosRS_[r*nsteps_+s] data_[r] += dataF_[s].getReal()*cosRS_[r*size_+s]
+ dataF_[s].getImag()*sinRS_[r*nsteps_+s]; + dataF_[s].getImag()*sinRS_[r*size_+s];
} }
data_[r]*=1./nsteps_; data_[r]*=1./size_;
} }
// write sampled data // write sampled data
FBlas::copy(nsteps_,data_,sampledData); FBlas::copy(size_,data_,sampledData);
} }
// Complexe valued IDFT // Complexe valued IDFT
...@@ -193,259 +188,365 @@ public: ...@@ -193,259 +188,365 @@ public:
FComplex<FReal>* sampledData) const FComplex<FReal>* sampledData) const
{ {
// read transformed data // read transformed data
FBlas::c_setzero(nsteps_,reinterpret_cast<FReal*>(data_)); FBlas::c_setzero(size_,reinterpret_cast<FReal*>(data_));
FBlas::c_copy(nsteps_,reinterpret_cast<const FReal*>(transformedData), FBlas::c_copy(size_,reinterpret_cast<const FReal*>(transformedData),
reinterpret_cast<FReal*>(dataF_)); reinterpret_cast<FReal*>(dataF_));
// perform direct backward transformation // perform direct backward transformation
for(unsigned int r=0; r<nsteps_; ++r){