diff --git a/Src/Kernels/Uniform/FUnifM2LHandler.hpp b/Src/Kernels/Uniform/FUnifM2LHandler.hpp index 0c38e1861998931213f0df9accd0c32022b07c83..7e02ee8dbe80b3413237a3ed8f9e3961b68e01f5 100644 --- a/Src/Kernels/Uniform/FUnifM2LHandler.hpp +++ b/Src/Kernels/Uniform/FUnifM2LHandler.hpp @@ -26,14 +26,14 @@ #include #include -#include "../../Utils/FBlas.hpp" -#include "../../Utils/FTic.hpp" -#include "../../Utils/FDft.hpp" +#include "Utils/FBlas.hpp" +#include "Utils/FTic.hpp" +#include "Utils/FDft.hpp" -#include "../../Utils/FComplex.hpp" +#include "Utils/FComplex.hpp" -#include "./FUnifTensor.hpp" +#include "FUnifTensor.hpp" /*! Precomputation of the 316 interactions by evaluation of the matrix kernel on the uniform grid and transformation into Fourier space. PB: Compute() does not belong to the M2LHandler like it does in the Chebyshev kernel. This allows much nicer specialization of the M2LHandler class with respect to the homogeneity of the kernel of interaction like in the ChebyshevSym kernel.*/ @@ -67,9 +67,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal Cel TensorType::setNodeIdsPairs(node_ids_pairs); // init Discrete Fourier Transformator const int dimfft = 1; // unidim FFT since fully circulant embedding - const int steps[dimfft] = {rc}; - FFft Dft; - Dft.buildDFT(steps); + FFftw,dimfft> Dft(rc); // get first column of K via permutation unsigned int perm[rc]; TensorType::setStoragePermutation(perm); @@ -178,7 +176,7 @@ class FUnifM2LHandler /// DFT specific static const int dimfft = 1; // unidim FFT since fully circulant embedding - typedef FFft DftClass; // Fast Discrete Fourier Transformator + typedef FFftw,dimfft> DftClass; // Fast Discrete Fourier Transformator DftClass Dft; const unsigned int opt_rc; // specific to real valued kernel @@ -198,11 +196,8 @@ class FUnifM2LHandler public: template FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal, const int inLeafLevelSeparationCriterion = 1) - : FC(nullptr), Dft(), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) + : FC(nullptr), Dft(rc), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) { - // init DFT - const int steps[dimfft] = {rc}; - Dft.buildDFT(steps); // initialize root node ids TensorType::setNodeIdsDiff(node_diff); @@ -216,11 +211,8 @@ public: * Copy constructor */ FUnifM2LHandler(const FUnifM2LHandler& other) - : FC(other.FC), Dft(), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion) + : FC(other.FC), Dft(other.Dft), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion) { - // init DFT - const int steps[dimfft] = {rc}; - Dft.buildDFT(steps); // copy node_diff memcpy(node_diff,other.node_diff,sizeof(unsigned int)*nnodes*nnodes); } @@ -269,7 +261,7 @@ public: FReal Px[rc]; FBlas::setzero(rc,Px); // Apply forward Discrete Fourier Transform - Dft.applyIDFT(FX,Px); + Dft.applyIDFTNorm(FX,Px); // Unapply Zero Padding for (unsigned int j=0; j unsigned int node_diff[nnodes*nnodes]; /// DFT specific static const int dimfft = 1; // unidim FFT since fully circulant embedding - typedef FFft DftClass; // Fast Discrete Fourier Transformator + typedef FFftw,dimfft> DftClass; // Fast real-valued Discrete Fourier Transformator DftClass Dft; const unsigned int opt_rc; // specific to real valued kernel @@ -364,11 +356,8 @@ public: FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int inTreeHeight, const FReal inRootCellWidth, const int inLeafLevelSeparationCriterion = 1) : TreeHeight(inTreeHeight), RootCellWidth(inRootCellWidth), - Dft(), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) + Dft(rc), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) { - // init DFT - const int steps[dimfft] = {rc}; - Dft.buildDFT(steps); // initialize root node ids TensorType::setNodeIdsDiff(node_diff); @@ -390,11 +379,8 @@ public: : FC(other.FC), TreeHeight(other.TreeHeight), RootCellWidth(other.RootCellWidth), - Dft(), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion) + Dft(other.Dft), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion) { - // init DFT - const int steps[dimfft] = {rc}; - Dft.buildDFT(steps); // copy node_diff memcpy(node_diff,other.node_diff,sizeof(unsigned int)*nnodes*nnodes); } @@ -455,7 +441,7 @@ public: FReal Px[rc]; FBlas::setzero(rc,Px); // Apply forward Discrete Fourier Transform - Dft.applyIDFT(FX,Px); + Dft.applyIDFTNorm(FX,Px); // Unapply Zero Padding for (unsigned int j=0; j #include -#include "../../Utils/FBlas.hpp" -#include "../../Utils/FTic.hpp" -#include "../../Utils/FDft.hpp" +#include "Utils/FBlas.hpp" +#include "Utils/FTic.hpp" +#include "Utils/FDft.hpp" -#include "../../Utils/FComplex.hpp" +#include "Utils/FComplex.hpp" -#include "./FUnifTensor.hpp" +#include "FUnifTensor.hpp" /*! Precomputation of the 316 interactions by evaluation of the matrix kernel on the uniform grid and transformation into Fourier space. These interactions can be tensorial (of size ncmp) and are computed blockwise @@ -83,9 +83,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, // init Discrete Fourier Transformator const int dimfft = 1; // unidim FFT since fully circulant embedding - const int steps[dimfft] = {rc}; - FFft Dft; - Dft.buildDFT(steps); + FFftw,dimfft> Dft(rc); // get first column of K via permutation unsigned int perm[rc]; @@ -208,7 +206,7 @@ class FUnifTensorialM2LHandler /// DFT specific static const int dimfft = 1; // unidim FFT since fully circulant embedding - typedef FFft DftClass; // Fast Discrete Fourier Transformator + typedef FFftw,dimfft> DftClass; // Fast Discrete Fourier Transformator DftClass Dft; const unsigned int opt_rc; // specific to real valued kernel @@ -228,11 +226,8 @@ class FUnifTensorialM2LHandler public: FUnifTensorialM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal, const FReal inCellWidthExtension, const int inLeafLevelSeparationCriterion = 1) : CellWidthExtension(inCellWidthExtension), - Dft(), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) + Dft(rc), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) { - // init DFT - const int steps[dimfft] = {rc}; - Dft.buildDFT(steps); // allocate FC FC = new FComplex*[ncmp]; @@ -252,12 +247,8 @@ public: FUnifTensorialM2LHandler(const FUnifTensorialM2LHandler& other) : FC(other.FC), CellWidthExtension(other.CellWidthExtension), - Dft(), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion) - { - // init DFT - const int steps[dimfft] = {rc}; - Dft.buildDFT(steps); - + Dft(other.Dft), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion) + { // copy node_diff memcpy(node_diff,other.node_diff,sizeof(unsigned int)*nnodes*nnodes); } @@ -311,7 +302,7 @@ public: FReal Px[rc]; FBlas::setzero(rc,Px); // Apply forward Discrete Fourier Transform - Dft.applyIDFT(FX,Px); + Dft.applyIDFTNorm(FX,Px); // Unapply Zero Padding for (unsigned int j=0; j /// DFT specific static const int dimfft = 1; // unidim FFT since fully circulant embedding - typedef FFft DftClass; // Fast Discrete Fourier Transformator + typedef FFftw,dimfft> DftClass; // Fast Discrete Fourier Transformator DftClass Dft; const unsigned int opt_rc; // specific to real valued kernel @@ -425,11 +416,8 @@ public: : TreeHeight(inTreeHeight), RootCellWidth(inRootCellWidth), CellWidthExtension(inCellWidthExtension), - Dft(), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) + Dft(rc), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) { - // init DFT - const int steps[dimfft] = {rc}; - Dft.buildDFT(steps); // allocate FC FC = new FComplex**[TreeHeight]; @@ -454,12 +442,8 @@ public: TreeHeight(other.TreeHeight), RootCellWidth(other.RootCellWidth), CellWidthExtension(other.CellWidthExtension), - Dft(), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion) + Dft(other.Dft), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion) { - // init DFT - const int steps[dimfft] = {rc}; - Dft.buildDFT(steps); - // copy node_diff memcpy(node_diff,other.node_diff,sizeof(unsigned int)*nnodes*nnodes); } @@ -517,7 +501,7 @@ public: FReal Px[rc]; FBlas::setzero(rc,Px); // Apply forward Discrete Fourier Transform - Dft.applyIDFT(FX,Px); + Dft.applyIDFTNorm(FX,Px); // Unapply Zero Padding for (unsigned int j=0; j -#include "FGlobal.hpp" -#include "FComplex.hpp" - -#include "FMath.hpp" +// for @class FDft only +#include "FBlas.hpp" -#include "FTic.hpp" +#include "FComplex.hpp" /** * @author Pierre Blanchard (pierre.blanchard@inria.fr) - * @class FDft, @class FFft and @class FCFft + * @class FDft, @class FFftw * Please read the license * * These classes handle the forward and backward Discete Fourier Transform * (DFT). - * @class FDft implements a direct method while @class FFft uses the Fast + * @class FDft implements a direct method while @class FFftw uses the Fast * Fourier Transform (FFT). The FFT algorithm can either be provided by the * FFTW(3) library itself or a version that is wrapped in Intel MKL. * - * The direct DFT is templatized with the input value type (FReal or FComplex), - * while 2 distinct classes resp. @class FFft and @class FCFft are defined - * resp. for the real and complex valued FFT. + * The @class FDft is templatized with the input value type (FReal or FComplex), + * while @class FFftw is templatized with input and output value types and the dimension. * * The aim of writing these specific classes is to allow further customization * of the DFT such as implementing a tensorial variant, a weighted variant * or any other feature. * - * TODO: some copies might slow down the routines, this needs to be optimized. - * */ /** * @class FDft * - * @tparam nsteps number of sampled values \f$N\f$ + * @tparam size_ number of sampled values \f$N\f$ */ template< class FReal, typename ValueType = FReal> class FDft @@ -78,37 +73,37 @@ class FDft FReal *cosRS_, *sinRS_; private: - unsigned int nsteps_; //< number of steps + unsigned int size_; //< number of steps void initDFT() { // allocate arrays - data_ = new ValueType[nsteps_]; - dataF_ = new FComplex[nsteps_]; + data_ = new ValueType[size_]; + dataF_ = new FComplex[size_]; // Beware this is extremely HEAVY to store!!! => Use FDft only for debug! - cosRS_ = new FReal[nsteps_*nsteps_]; - sinRS_ = new FReal[nsteps_*nsteps_]; - - for(unsigned int r=0; r* transformedData) const { // read sampled data - FBlas::c_setzero(nsteps_,reinterpret_cast(dataF_)); - FBlas::copy(nsteps_, sampledData,data_); + FBlas::c_setzero(size_,reinterpret_cast(dataF_)); + FBlas::copy(size_, sampledData,data_); // perform direct forward transformation - for(unsigned int r=0; r(data_[s]*cosRS_[r*nsteps_+s], - -data_[s]*sinRS_[r*nsteps_+s]); + for(unsigned int r=0; r(data_[s]*cosRS_[r*size_+s], + -data_[s]*sinRS_[r*size_+s]); } // write transformed data - FBlas::c_copy(nsteps_,reinterpret_cast(dataF_), + FBlas::c_copy(size_,reinterpret_cast(dataF_), reinterpret_cast(transformedData)); } // Complexe valued DFT @@ -147,21 +142,21 @@ public: FComplex* transformedData) const { // read sampled data - FBlas::c_setzero(nsteps_,reinterpret_cast(dataF_)); - FBlas::c_copy(nsteps_,reinterpret_cast(sampledData), + FBlas::c_setzero(size_,reinterpret_cast(dataF_)); + FBlas::c_copy(size_,reinterpret_cast(sampledData), reinterpret_cast(data_)); // perform direct forward transformation - for(unsigned int r=0; r(data_[s].getReal()*cosRS_[r*nsteps_+s] - + data_[s].getImag()*sinRS_[r*nsteps_+s], - data_[s].getImag()*cosRS_[r*nsteps_+s] - - data_[s].getReal()*sinRS_[r*nsteps_+s]); + for(unsigned int r=0; r(data_[s].getReal()*cosRS_[r*size_+s] + + data_[s].getImag()*sinRS_[r*size_+s], + data_[s].getImag()*cosRS_[r*size_+s] + - data_[s].getReal()*sinRS_[r*size_+s]); } // write transformed data - FBlas::c_copy(nsteps_,reinterpret_cast(dataF_), + FBlas::c_copy(size_,reinterpret_cast(dataF_), reinterpret_cast(transformedData)); } @@ -171,21 +166,21 @@ public: FReal* sampledData) const { // read transformed data - FBlas::setzero(nsteps_,data_); - FBlas::c_copy(nsteps_,reinterpret_cast(transformedData), + FBlas::setzero(size_,data_); + FBlas::c_copy(size_,reinterpret_cast(transformedData), reinterpret_cast(dataF_)); // perform direct backward transformation - for(unsigned int r=0; r* sampledData) const { // read transformed data - FBlas::c_setzero(nsteps_,reinterpret_cast(data_)); - FBlas::c_copy(nsteps_,reinterpret_cast(transformedData), + FBlas::c_setzero(size_,reinterpret_cast(data_)); + FBlas::c_copy(size_,reinterpret_cast(transformedData), reinterpret_cast(dataF_)); // perform direct backward transformation - for(unsigned int r=0; r(dataF_[s].getReal()*cosRS_[r*nsteps_+s] - - dataF_[s].getImag()*sinRS_[r*nsteps_+s], - dataF_[s].getImag()*cosRS_[r*nsteps_+s] - + dataF_[s].getReal()*sinRS_[r*nsteps_+s]); + for(unsigned int r=0; r(dataF_[s].getReal()*cosRS_[r*size_+s] + - dataF_[s].getImag()*sinRS_[r*size_+s], + dataF_[s].getImag()*cosRS_[r*size_+s] + + dataF_[s].getReal()*sinRS_[r*size_+s]); } - data_[r]*=1./nsteps_; + data_[r]*=1./size_; } // write sampled data - FBlas::c_copy(nsteps_,reinterpret_cast(data_), + FBlas::c_copy(size_,reinterpret_cast(data_), reinterpret_cast(sampledData)); } }; -/** - * @class FFft - * - * @tparam nsteps number of sampled values \f$N\f$ - */ -template -class FFft -{ - enum{dim = DIM}; - // arrays - FReal* data_; //< data in physical space - FComplex* dataF_; //< data in Fourier space - // plans - fftw_plan plan_c2r_; //< backward FFT plan - fftw_plan plan_r2c_; //< forward FFT plan -private: - int steps_[dim]; //< number of steps per dimension - int nsteps_; //< total number of steps - int nsteps_opt_; //< reduced number of steps for real valued FFT - void initDFT() - { - // allocate arrays - data_ = (FReal*) fftw_malloc(sizeof(FReal) * nsteps_); - dataF_ = (FComplex*) fftw_malloc(sizeof(FComplex) * nsteps_opt_); - - // // unidim fftw plans - // plan_c2r_ = - // fftw_plan_dft_c2r_1d(nsteps_, - // reinterpret_cast(dataF_), - // data_, - // FFTW_MEASURE);// TODO: test FFTW_ESTIMATE - // plan_r2c_ = - // fftw_plan_dft_r2c_1d(nsteps_, - // data_, - // reinterpret_cast(dataF_), - // FFTW_MEASURE); - - // multidim fftw plans - plan_c2r_ = - fftw_plan_dft_c2r(dim, steps_, - reinterpret_cast(dataF_), - data_, - FFTW_MEASURE);// TODO: test FFTW_ESTIMATE - plan_r2c_ = - fftw_plan_dft_r2c(dim, steps_, - data_, - reinterpret_cast(dataF_), - FFTW_MEASURE); +/** + * @class FFftw + * + * @brief This class is a wrapper to FFTW. It is important to enable float if FFftw is used. + * + * + * @tparam ValueClassSrc Source value type + * @tparam ValueClassDest Destination value type + * @tparam DIM dimension \f$d\f$ of the Discrete Fourier Transform + * @param nbPointsPerDim number of sampled values \f$N\f$ per dimension + * @param nbPoints total number of sampled values \f$N^d\f$ + * + * + */ +template +class FFftwCore { + enum{dim = DIM}; +protected: + static void PlusEqual(double dest[], const double src[], const int nbElements){ + for(int idxVal = 0 ; idxVal < nbElements ; ++idxVal){ + dest[idxVal] += src[idxVal]; + } } - -public: - - FFft() - : nsteps_(0), nsteps_opt_(0) - { - for(int d = 0; d dest[], const fftw_complex src[], const int nbElements){ + const double* ptrSrc = reinterpret_cast(src); + for(int idxVal = 0 ; idxVal < nbElements ; ++idxVal){ + dest[idxVal] += FComplex(ptrSrc[2*idxVal], ptrSrc[2*idxVal+1]); + } + } + static void Bind_fftw_alloc(double** ptr, const int size){ + *ptr = (double*) fftw_malloc(sizeof(double) * size); + //SpErrAEH(*ptr); + } + static void Bind_fftw_alloc(fftw_complex** ptr, const int size){ + *ptr = (fftw_complex*) fftw_malloc(sizeof(fftw_complex) * size); + //SpErrAEH(*ptr); + } + static void Bind_fftw_destroy_plan(fftw_plan plan){ + //#pragma omp critical(SF_KEEP_FFTW) + //{ + fftw_destroy_plan(plan); + //} + } + static fftw_plan Bind_fftw_plan_dft(int d, int *n0, fftw_complex *in, double *out, int sign, unsigned flags){ + //SpErrAE(sign == 1); + fftw_plan plan; + //#pragma omp critical(SF_KEEP_FFTW) + //{ + plan = fftw_plan_dft_c2r(d, n0, in, out, flags); + //} + return plan; + } + static fftw_plan Bind_fftw_plan_dft(int d, int *n0, double *in, fftw_complex *out, int sign, unsigned flags){ + //SpErrAE(sign == -1); + fftw_plan plan; + //#pragma omp critical(SF_KEEP_FFTW) + //{ + plan = fftw_plan_dft_r2c(d, n0, in, out, flags); + //} + return plan; + } + static fftw_plan Bind_fftw_plan_dft(int d, int *n0, fftw_complex *in, fftw_complex *out, int sign, unsigned flags){ + fftw_plan plan; + //#pragma omp critical(SF_KEEP_FFTW) + //{ + plan = fftw_plan_dft(d, n0, in, out, sign, flags); + //} + return plan; + } + static void Bind_fftw_execute(fftw_plan plan){ + fftw_execute(plan); } - void buildDFT(const int steps[dim]) - { - // init number of steps - nsteps_=1; nsteps_opt_=1; - for(int d = 0; d dest[], const fftwf_complex src[], const int nbElements){ + const float* ptrSrc = reinterpret_cast(src); + for(int idxVal = 0 ; idxVal < nbElements ; ++idxVal){ + dest[idxVal] += FComplex(ptrSrc[2*idxVal], ptrSrc[2*idxVal+1]); + } } - - virtual ~FFft() - { - fftw_destroy_plan(plan_c2r_); - fftw_destroy_plan(plan_r2c_); - fftw_free(data_); - fftw_free(dataF_); + static void Bind_fftw_alloc(float** ptr, const int size){ + *ptr = (float*) fftwf_malloc(sizeof(float) * size); + //SpErrAEH(*ptr); } - - void applyDFT(const FReal* sampledData, - FComplex* transformedData) const - { - FBlas::c_setzero(nsteps_opt_,reinterpret_cast(dataF_)); - FBlas::copy(nsteps_, sampledData,data_); - - // perform fft - fftw_execute( plan_r2c_ ); - - // write transformed data - FBlas::c_copy(nsteps_opt_,reinterpret_cast(dataF_), - reinterpret_cast(transformedData)); + static void Bind_fftw_alloc(fftwf_complex** ptr, const int size){ + *ptr = (fftwf_complex*) fftw_malloc(sizeof(fftwf_complex) * size); + //SpErrAEH(*ptr); } - - - void applyIDFT(const FComplex* transformedData, - FReal* sampledData) const - { - // read transformed data - FBlas::setzero(nsteps_,data_); - FBlas::c_copy(nsteps_opt_,reinterpret_cast(transformedData), - reinterpret_cast(dataF_)); - - // perform ifft - fftw_execute( plan_c2r_ ); - - for( int s=0; s -class FCFft -{ - enum{dim = DIM}; - - // arrays - FComplex* data_; //< data in physical space - FComplex* dataF_; //< data in Fourier space - - // plans - fftw_plan plan_b_; // backward FFT plan - fftw_plan plan_f_; // forward FFT plan - -private: - int steps_[dim]; //< number of steps per dimension - int nsteps_; //< total number of steps public: - - FCFft(const int steps[dim]) - { - // init number of steps - nsteps_=1; - for(int d = 0; d*) fftw_malloc(sizeof(FComplex) * nsteps_); - dataF_ = (FComplex*) fftw_malloc(sizeof(FComplex) * nsteps_); - - // multidim fftw plans - plan_b_ = - fftw_plan_dft(dim, steps_, - reinterpret_cast(dataF_), - reinterpret_cast(data_), - FFTW_BACKWARD, - FFTW_MEASURE);// TODO: test FFTW_ESTIMATE - plan_f_ = - fftw_plan_dft(dim, steps_, - reinterpret_cast(data_), - reinterpret_cast(dataF_), - FFTW_FORWARD, - FFTW_MEASURE); - - } - - virtual ~FCFft() - { - fftw_destroy_plan(plan_b_); - fftw_destroy_plan(plan_f_); - fftw_free(data_); - fftw_free(dataF_); } - - void applyDFT(const FComplex* sampledData, - FComplex* transformedData) const - { - FBlas::c_setzero(nsteps_,reinterpret_cast(dataF_)); - FBlas::c_copy(nsteps_, reinterpret_cast(sampledData), - reinterpret_cast(data_)); - - // perform fft - fftw_execute( plan_f_ ); - - // write transformed data - FBlas::c_copy(nsteps_,reinterpret_cast(dataF_), - reinterpret_cast(transformedData)); + /** Copy r-constructor move data from given parameter object */ + FFftwCore(FFftwCore&& other) + : nbPoints(0), timeSignal(nullptr), freqSignal(nullptr) { + nbPointsPerDim = other.nbPointsPerDim; + nbPoints = other.nbPoints; + timeSignal = other.timeSignal; + freqSignal = other.freqSignal; + memcpy(plan_s2d, other.plan_s2d, sizeof(plan_s2d)); + memcpy(plan_d2s, other.plan_d2s, sizeof(plan_d2s)); + other.nbPointsPerDim = 0; + other.nbPoints = 0; + other.timeSignal = nullptr; + other.freqSignal = nullptr; + memset(other.plan_s2d, 0, sizeof(plan_s2d)); + memset(other.plan_d2s, 0, sizeof(plan_d2s)); + } + /** Copy r-operator move data from given parameter object */ + FFftwCore& operator=(FFftwCore&& other){ + releaseData(); + nbPointsPerDim = other.nbPointsPerDim; + nbPoints = other.nbPoints; + timeSignal = other.timeSignal; + freqSignal = other.freqSignal; + memcpy(plan_s2d, other.plan_s2d, sizeof(plan_s2d)); + memcpy(plan_d2s, other.plan_d2s, sizeof(plan_d2s)); + other.nbPointsPerDim = 0; + other.nbPoints = 0; + other.timeSignal = nullptr; + other.freqSignal = nullptr; + memset(other.plan_s2d, 0, sizeof(plan_s2d)); + memset(other.plan_d2s, 0, sizeof(plan_d2s)); + } + /** Release all data */ + ~FFftwCore(){ + releaseData(); + } + /** Dealloc and realloc to the desired size */ + void resize(const int inNbTemporalPoints){ + if(nbPointsPerDim != inNbTemporalPoints){ + releaseData(); + allocData(inNbTemporalPoints); + } + } + /** Compute the DFT using signalToTransform temporal values + * The result is added (+=) to resultSignal + */ + void applyDFT(const ValueClassSrc signalToTransform[], ValueClassDest resultSignal[]) const { + memcpy(timeSignal, signalToTransform, sizeof(ValueClassSrc) * nbPoints); + memset(freqSignal,0,sizeof(ValueClassDestFftw) * nbPoints); + Bind_fftw_execute( plan_d2s ); + PlusEqual(resultSignal, freqSignal, nbPoints); + } + /** Compute the inverse DFT using signalToTransform frequency values + * The result is added (+=) to resultSignal + */ + void applyIDFT(const ValueClassDest signalToTransform[], ValueClassSrc resultSignal[]) const { + memcpy(freqSignal, signalToTransform, sizeof(ValueClassDest) * nbPoints); + memset(timeSignal,0,sizeof(ValueClassSrcFftw) * nbPoints); + Bind_fftw_execute( plan_s2d ); + PlusEqual(resultSignal, timeSignal, nbPoints); + } + /** Compute the inverse DFT using signalToTransform frequency values + * The result is added (+=) to resultSignal + */ + void applyIDFTNorm(const ValueClassDest signalToTransform[], ValueClassSrc resultSignal[]) const { + memcpy(freqSignal, signalToTransform, sizeof(ValueClassDest) * nbPoints); + memset(timeSignal,0,sizeof(ValueClassSrcFftw) * nbPoints); + Bind_fftw_execute( plan_s2d ); + PlusEqual(resultSignal, timeSignal, nbPoints); + normalize(resultSignal); + } + void applyIDFTNormConj(const ValueClassDest signalToTransform[], ValueClassSrc resultSignal[]) const { + freqSignal[0][0] = signalToTransform[0].getReal(); + freqSignal[0][1] = signalToTransform[0].getImag(); + for(int idx = 1 ; idx <= nbPoints/2 ; ++idx){ + freqSignal[idx][0] = signalToTransform[idx].getReal(); + freqSignal[idx][1] = signalToTransform[idx].getImag(); + freqSignal[nbPoints-idx][0] = signalToTransform[idx].getReal(); + freqSignal[nbPoints-idx][1] = -signalToTransform[idx].getImag(); + } + memset(timeSignal,0,sizeof(ValueClassSrcFftw) * nbPoints); + Bind_fftw_execute( plan_s2d ); + PlusEqual(resultSignal, timeSignal, nbPoints); + normalize(resultSignal); + } + void normalize(double* resultSignal) const { + const double realNbPoints = static_cast(nbPoints); + for(int idxVal = 0 ; idxVal < nbPoints ; ++idxVal){ + resultSignal[idxVal] /= realNbPoints; + } + } + void normalize(FComplex* resultSignal) const { + const double realNbPoints = static_cast(nbPoints); + for(int idxVal = 0 ; idxVal < nbPoints ; ++idxVal){ + resultSignal[idxVal] /= realNbPoints; + } + } + void normalize(float* resultSignal) const { + const float realNbPoints = static_cast(nbPoints); + for(int idxVal = 0 ; idxVal < nbPoints ; ++idxVal){ + resultSignal[idxVal] /= realNbPoints; + } + } + void normalize(FComplex* resultSignal) const { + const float realNbPoints = static_cast(nbPoints); + for(int idxVal = 0 ; idxVal < nbPoints ; ++idxVal){ + resultSignal[idxVal] /= realNbPoints; + } } + /** To know if it is the real fftw */ + bool isTrueFftw() const{ + return true; + } +}; - void applyIDFT(const FComplex* transformedData, - FComplex* sampledData) const - { - // read transformed data - FBlas::c_setzero(nsteps_,reinterpret_cast(data_)); - FBlas::c_copy(nsteps_,reinterpret_cast(transformedData), - reinterpret_cast(dataF_)); - - // perform ifft - fftw_execute( plan_b_ ); - - for(unsigned int s=0; s(data_), - reinterpret_cast(sampledData)); - } +template +class FFftw; +////////////////////////////////////////////////////////////////////////////// +/// Double precision +////////////////////////////////////////////////////////////////////////////// +template +class FFftw , DIM> : public FFftwCore, double, fftw_complex, fftw_plan, DIM>{ + typedef FFftwCore, double, fftw_complex, fftw_plan, DIM> ParentClass; +public: + using ParentClass::ParentClass; +}; +template +class FFftw , FComplex, DIM> : public FFftwCore, FComplex, fftw_complex, fftw_complex, fftw_plan, DIM>{ + typedef FFftwCore, FComplex, fftw_complex, fftw_complex, fftw_plan, DIM> ParentClass; +public: + using ParentClass::ParentClass; +}; +////////////////////////////////////////////////////////////////////////////// +/// Single precision +////////////////////////////////////////////////////////////////////////////// +template +class FFftw , DIM> : public FFftwCore, float, fftwf_complex, fftwf_plan, DIM>{ + typedef FFftwCore, float, fftwf_complex, fftwf_plan, DIM> ParentClass; +public: + using ParentClass::ParentClass; +}; +template +class FFftw , FComplex, DIM> : public FFftwCore, FComplex, fftwf_complex, fftwf_complex, fftwf_plan, DIM>{ + typedef FFftwCore, FComplex, fftwf_complex, fftwf_complex, fftwf_plan, DIM> ParentClass; +public: + using ParentClass::ParentClass; }; + #endif /*SCALFMM_USE_FFT*/ #endif /* FDFT_HPP */ diff --git a/Tests/Kernels/testUnifAlgorithm.cpp b/Tests/Kernels/testUnifAlgorithm.cpp index bb7ee25e864364622a87111a37c6d5a02037605d..e0d9fcaefb6274a53478eb8f789889c9291263ee 100644 --- a/Tests/Kernels/testUnifAlgorithm.cpp +++ b/Tests/Kernels/testUnifAlgorithm.cpp @@ -50,7 +50,7 @@ #include "Core/FFmmAlgorithm.hpp" #include "Core/FFmmAlgorithmThread.hpp" -#include "../../Src/Utils/FParameterNames.hpp" +#include "Utils/FParameterNames.hpp" /** * This program runs the FMM Algorithm with the Uniform kernel and compares the results with a direct computation. diff --git a/Tests/Utils/testFFT.cpp b/Tests/Utils/testFFT.cpp new file mode 100644 index 0000000000000000000000000000000000000000..9706a9fe665165c9fed61a76ea2ec49f83c8a477 --- /dev/null +++ b/Tests/Utils/testFFT.cpp @@ -0,0 +1,185 @@ +// =================================================================================== +// Ce LOGICIEL "ScalFmm" est couvert par le copyright Inria 20xx-2012. +// Inria détient tous les droits de propriété sur le LOGICIEL, et souhaite que +// la communauté scientifique l'utilise afin de le tester et de l'évaluer. +// Inria donne gracieusement le droit d'utiliser ce LOGICIEL. Toute utilisation +// dans un but lucratif ou à des fins commerciales est interdite sauf autorisation +// expresse et préalable d'Inria. +// Toute utilisation hors des limites précisées ci-dessus et réalisée sans l'accord +// expresse préalable d'Inria constituerait donc le délit de contrefaçon. +// Le LOGICIEL étant un produit en cours de développement, Inria ne saurait assurer +// aucune responsabilité et notamment en aucune manière et en aucun cas, être tenu +// de répondre d'éventuels dommages directs ou indirects subits par l'utilisateur. +// Tout utilisateur du LOGICIEL s'engage à communiquer à Inria ses remarques +// relatives à l'usage du LOGICIEL +// =================================================================================== + +// ==== CMAKE ===== +// @FUSE_FFT +// ================ + +#include +#include + +#include + +#include "Utils/FGlobal.hpp" +#include "Utils/FComplex.hpp" + +#include "Utils/FTic.hpp" + +#include "Utils/FParameterNames.hpp" + +#include "Utils/FDft.hpp" + + +int main(int argc, char** argv) +{ + FHelpDescribeAndExit(argc, argv, "Test the FFT wrapper."); + + typedef float FReal; + const FReal FRandMax = FReal(RAND_MAX); + + FTic time; + + ////////////////////////////////////////////////////////////////////////////// + // INITIALIZATION + + // dimension + static const int dim = 3; + + // size (pick a power of 2 for better performance of the FFT algorithm) + const int size = 50; + int total_size = 1; + for(int d = 0; d FftR2COutputType; + typedef FComplex FftC2CInputType; + typedef FComplex FftC2COutputType; + + // fftw arrays + FftR2CInputType* FftR2CInput = new FftR2CInputType[total_size]; + FftR2CInputType* FftR2CInvOut = new FftR2CInputType[total_size]; + FftR2COutputType* FftR2COutput = new FftR2COutputType[total_size]; + + FftC2CInputType* FftC2CInput = new FftC2CInputType[total_size]; + FftC2CInputType* FftC2CInvOut = new FftC2CInputType[total_size]; + FftC2COutputType* FftC2COutput = new FftC2COutputType[total_size]; + + + // fftw wrappers + typedef FFftw,dim> FftwR2CClass; + typedef FFftw,FComplex,dim> FftwC2CClass; + + std::cout<< "Init FFTW wrappers: "; + time.tic(); + + FftwR2CClass FftwR2C(size); + FftwC2CClass FftwC2C(size); + + std::cout << "took " << time.tacAndElapsed() << "sec." << std::endl; + + ////////////////////////////////////////////////////////////////////////////// + // EXECUTION + // generate random physical data and set output to zero + for( int s=0; s(FftR2CInput, FftR2CInvOut, total_size).getRelativeInfNorm() > threshold) isWorking=false; + if(FMath::FAccurater(FftR2CInput, FftR2CInvOut, total_size).getRelativeL2Norm() > threshold) isWorking=false; + if(FMath::FAccurater(FftC2CInputReal, FftC2CInvOutReal, total_size).getRelativeInfNorm() > threshold) isWorking=false; + if(FMath::FAccurater(FftC2CInputImag, FftC2CInvOutImag, total_size).getRelativeInfNorm() > threshold) isWorking=false; + if(FMath::FAccurater(FftC2CInputReal, FftC2CInvOutReal, total_size).getRelativeL2Norm() > threshold) isWorking=false; + if(FMath::FAccurater(FftC2CInputImag, FftC2CInvOutImag, total_size).getRelativeL2Norm() > threshold) isWorking=false; + + if(isWorking) std::cout << "FFT Wrapper is working (errors < threshold = " << threshold << ")." << std::endl; + else std::cout << "FFT Wrapper is NOT working (errors > threshold = " << threshold << ")." << std::endl; + + //free memory + delete[] FftR2CInput; + delete[] FftR2CInvOut; + delete[] FftR2COutput; + delete[] FftC2CInput; + delete[] FftC2CInvOut; + delete[] FftC2COutput; + delete[] FftC2CInvOutReal; + delete[] FftC2CInputReal; + delete[] FftC2CInvOutImag; + delete[] FftC2CInputImag; + +}// end test diff --git a/Tests/Utils/testFFTW.cpp b/Tests/Utils/testFFTW.cpp deleted file mode 100644 index 91464d0bd5919bd41edf815dfce2a059cc9f535a..0000000000000000000000000000000000000000 --- a/Tests/Utils/testFFTW.cpp +++ /dev/null @@ -1,143 +0,0 @@ -// =================================================================================== -// Ce LOGICIEL "ScalFmm" est couvert par le copyright Inria 20xx-2012. -// Inria détient tous les droits de propriété sur le LOGICIEL, et souhaite que -// la communauté scientifique l'utilise afin de le tester et de l'évaluer. -// Inria donne gracieusement le droit d'utiliser ce LOGICIEL. Toute utilisation -// dans un but lucratif ou à des fins commerciales est interdite sauf autorisation -// expresse et préalable d'Inria. -// Toute utilisation hors des limites précisées ci-dessus et réalisée sans l'accord -// expresse préalable d'Inria constituerait donc le délit de contrefaçon. -// Le LOGICIEL étant un produit en cours de développement, Inria ne saurait assurer -// aucune responsabilité et notamment en aucune manière et en aucun cas, être tenu -// de répondre d'éventuels dommages directs ou indirects subits par l'utilisateur. -// Tout utilisateur du LOGICIEL s'engage à communiquer à Inria ses remarques -// relatives à l'usage du LOGICIEL -// =================================================================================== - -// ==== CMAKE ===== -// @FUSE_FFT -// ================ - -#include -#include - -#include - -#include "../../Src/Utils/FGlobal.hpp" -#include "../../Src/Utils/FComplex.hpp" - -#include "../../Src/Utils/FTic.hpp" - -#include "../../Src/Utils/FParameterNames.hpp" - -//#include "../../Src/Utils/FDft.hpp" - - -int main(int argc, char** argv) -{ - FHelpDescribeAndExit(argc, argv, "Test the FFTw (only the code is interesting)."); - - typedef double FReal; - const FReal FRandMax = FReal(RAND_MAX); - - FTic time; - - ////////////////////////////////////////////////////////////////////////////// - // INITIALIZATION - - // size (pick a power of 2 for better performance of the FFT algorithm) - unsigned int nsteps_ = 500; - - // fftw arrays - FReal* fftR_; - FComplex* fftC_; - - fftR_ = (FReal*) fftw_malloc(sizeof(FReal) * nsteps_); - fftC_ = (FComplex*) fftw_malloc(sizeof(FComplex) * nsteps_); - - // fftw plans - // use routine defined in file: - // /PATH/TO/mkl/interfaces/fftw3xf/wrappers/fftw_plan_dft_c2r_1d.c - fftw_plan plan_c2r_; // backward FFT plan - fftw_plan plan_r2c_; // forward FFT plan - - std::cout<< "Init FFTW plans: "; - time.tic(); - - plan_c2r_ = - fftw_plan_dft_c2r_1d(nsteps_, - reinterpret_cast(fftC_), - fftR_, - FFTW_MEASURE); - plan_r2c_ = - fftw_plan_dft_r2c_1d(nsteps_, - fftR_, - reinterpret_cast(fftC_), - FFTW_MEASURE); - - std::cout << "took " << time.tacAndElapsed() << "sec." << std::endl; - - // alternative choice of plan type - // plan_c2r_ = - // fftw_plan_dft_1d(nsteps_, fftC_, fftR_, FFTW_BACKWARD, FFTW_MEASURE); - // plan_r2c_ = - // fftw_plan_dft_1d(nsteps_, fftR_, fftC_, FFTW_FORWARD, FFTW_MEASURE); - - ////////////////////////////////////////////////////////////////////////////// - // EXECUTION - // generate random physical data - for(unsigned int s=0; s(fftC_[s].getReal(),-fftC_[s].getImag()); - // } - // - // std::cout<< "Full Transformed data : "< -#include - -#include - -#include "../../Src/Utils/FGlobal.hpp" -#include "../../Src/Utils/FComplex.hpp" - -#include "../../Src/Utils/FTic.hpp" -#include "../../Src/Utils/FParameterNames.hpp" - -//#include "../../Src/Utils/FDft.hpp" - - -int main(int argc, char** argv) -{ - FHelpDescribeAndExit(argc, argv, "Test the FFT in multi dimensions (only the code is interesting)."); - - typedef double FReal; - const FReal FRandMax = FReal(RAND_MAX); - - FTic time; - - ////////////////////////////////////////////////////////////////////////////// - // INITIALIZATION - // size (pick a power of 2 for better performance of the FFT algorithm) - const int dim=2; - const int pow_nsteps_=8; - int steps_[dim]; - int nsteps_=1; - for(int d=0; d* fftC_; - - fftR_ = (FReal*) fftw_malloc(sizeof(FReal) * nsteps_ ); - fftC_ = (FComplex*) fftw_malloc(sizeof(FComplex) * nsteps_ ); - - // fftw plans - // use routine defined in file: - // /PATH/TO/mkl/interfaces/fftw3xf/wrappers/fftw_plan_dft_c2r_1d.c - fftw_plan plan_c2r_; // backward FFT plan - fftw_plan plan_r2c_; // forward FFT plan - - std::cout<< "Init FFTW plans: "; - time.tic(); - - plan_c2r_ = - fftw_plan_dft_c2r(dim, steps_, - reinterpret_cast(fftC_), - fftR_, - FFTW_MEASURE); - plan_r2c_ = - fftw_plan_dft_r2c(dim, steps_, - fftR_, - reinterpret_cast(fftC_), - FFTW_MEASURE); - - std::cout << "took " << time.tacAndElapsed() << "sec." << std::endl; - - ////////////////////////////////////////////////////////////////////////////// - // EXECUTION - // generate random physical data - for(int s=0; s(fftC_[s].getReal(),-fftC_[s].getImag()); - //// } - - // std::cout<< "Full Transformed data : "< #include -#include "../../Src/Utils/FTic.hpp" -#include "../../Src/Utils/FMath.hpp" -#include "../../Src/Utils/FBlas.hpp" +#include "Utils/FTic.hpp" +#include "Utils/FMath.hpp" +#include "Utils/FBlas.hpp" // -#include "../../Src/Utils/FDft.hpp" -#include "../../Src/Utils/FComplex.hpp" +#include "Utils/FComplex.hpp" +#include "Utils/FDft.hpp" -#include "../../Src/Utils/FParameterNames.hpp" +#include "Utils/FParameterNames.hpp" int main(int argc, char ** argv){ @@ -110,10 +110,8 @@ int main(int argc, char ** argv){ std::cout<< "Set DFT: "; time.tic(); const int dim = 1; - const int steps[dim] = {N}; //FDft Dft(N);// direct version (Beware! Ordering of output differs from REAL valued-FFT) - FFft Dft;// fast version - Dft.buildDFT(steps);// fast version + FFftw,dim> Dft(N);// fast version std::cout << "took " << time.tacAndElapsed() << "sec." << std::endl; // Initialize manually @@ -170,7 +168,7 @@ int main(int argc, char ** argv){ // Transform FX back in Physical space std::cout<< "Transform FX->iF(FX): "; time.tic(); - Dft.applyIDFT(FX,iFX); + Dft.applyIDFTNorm(FX,iFX); std::cout << "took " << time.tacAndElapsed() << "sec." << std::endl; std::cout<< "iF(FK:FY): "< Dft; // fast version - Dft.buildDFT(steps); + FFftw,dimfft> Dft(rc); // fast version // Get first COLUMN of K and Store in T FReal T[rc]; @@ -418,7 +416,7 @@ int main(int argc, char ** argv){ // std::cout<< FPLocalExp[p] << ", "; // std::cout< Dft; // fast version - Dft.buildDFT(steps); + FFftw,dimfft> Dft(rc); // fast version // Get first COLUMN of K and Store in T FReal T[ncmp*rc]; @@ -466,7 +464,7 @@ int main(int argc, char ** argv){ // std::cout<