Une MAJ de sécurité est nécessaire sur notre version actuelle. Elle sera effectuée lundi 02/08 entre 12h30 et 13h. L'interruption de service devrait durer quelques minutes (probablement moins de 5 minutes).

Commit 2e1c9711 authored by BLANCHARD Pierre's avatar BLANCHARD Pierre
Browse files

Redesigned direct DFT and FFT classes (provided a separate complex valued FFT...

Redesigned direct DFT and FFT classes (provided a separate complex valued FFT and templatized direct DFT with physical value type); Provided test for C-interfaced Lapack functions such as Cholesky Decomposition (TODO add SVD and QRD).
parent fb5cedd0
......@@ -66,8 +66,10 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal Cel
TensorType::setNodeIdsPairs(node_ids_pairs);
// init Discrete Fourier Transformator
const int dimfft = 1; // unidim FFT since fully circulant embedding
const int steps[dimfft] = {rc};
// FDft Dft(rc);
FFft<1> Dft(rc);
FFft<dimfft> Dft(steps);
// get first column of K via permutation
unsigned int perm[rc];
......@@ -177,15 +179,17 @@ class FUnifM2LHandler<ORDER,HOMOGENEOUS> : FNoCopyable
FComplexe *FC;
// for real valued kernel only n/2+1 complex values are stored
// after performing the DFT (the rest is deduced by conjugation)
unsigned int opt_rc;
typedef FUnifTensor<ORDER> TensorType;
unsigned int node_diff[nnodes*nnodes];
// FDft Dft; // Direct Discrete Fourier Transformator
FFft<1> Dft; // Fast Discrete Fourier Transformator
/// DFT specific
static const int dimfft = 1; // unidim FFT since fully circulant embedding
// FDft Dft; // Direct Discrete Fourier Transformator
typedef FFft<dimfft> DftClass; // Fast Discrete Fourier Transformator
FSmartPointer<DftClass,FSmartPointerMemory> Dft;
const unsigned int opt_rc; // specific to real valued kernel
static const std::string getFileName()
{
......@@ -201,9 +205,12 @@ public:
template <typename MatrixKernelClass>
FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal)
: FC(NULL),
opt_rc(rc/2+1),
Dft(rc) // initialize Discrete Fourier Transformator
opt_rc(rc/2+1)
{
// init DFT
const int steps[dimfft] = {rc};
Dft = new DftClass(steps);
// initialize root node ids
TensorType::setNodeIdsDiff(node_diff);
......@@ -253,7 +260,7 @@ public:
FReal Px[rc];
FBlas::setzero(rc,Px);
// Apply forward Discrete Fourier Transform
Dft.applyIDFT(FX,Px);
Dft->applyIDFT(FX,Px);
// Unapply Zero Padding
for (unsigned int j=0; j<nnodes; ++j)
......@@ -319,7 +326,7 @@ public:
Py[node_diff[i*nnodes]]=y[i];
// Apply forward Discrete Fourier Transform
Dft.applyDFT(Py,FY);
Dft->applyDFT(Py,FY);
}
......@@ -342,15 +349,17 @@ class FUnifM2LHandler<ORDER,NON_HOMOGENEOUS> : FNoCopyable
const unsigned int TreeHeight;
const FReal RootCellWidth;
// for real valued kernel only n/2+1 complex values are stored
// after performing the DFT (the rest is deduced by conjugation)
unsigned int opt_rc;
typedef FUnifTensor<ORDER> TensorType;
unsigned int node_diff[nnodes*nnodes];
// FDft Dft; // Direct Discrete Fourier Transformator
FFft<1> Dft; // Fast Discrete Fourier Transformator
// DFT specific
static const int dimfft = 1; // unidim FFT since fully circulant embedding
// FDft Dft; // Direct Discrete Fourier Transformator
typedef FFft<dimfft> DftClass; // Fast Discrete Fourier Transformator
FSmartPointer<DftClass,FSmartPointerMemory> Dft;
const unsigned int opt_rc; // specific to real valued kernel
static const std::string getFileName()
{
......@@ -367,9 +376,12 @@ public:
FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int inTreeHeight, const FReal inRootCellWidth)
: TreeHeight(inTreeHeight),
RootCellWidth(inRootCellWidth),
opt_rc(rc/2+1),
Dft(rc) // initialize Discrete Fourier Transformator
{
opt_rc(rc/2+1)
{
// init DFT
const int steps[dimfft] = {rc};
Dft = new DftClass(steps);
// initialize root node ids
TensorType::setNodeIdsDiff(node_diff);
......@@ -431,7 +443,7 @@ public:
FReal Px[rc];
FBlas::setzero(rc,Px);
// Apply forward Discrete Fourier Transform
Dft.applyIDFT(FX,Px);
Dft->applyIDFT(FX,Px);
// Unapply Zero Padding
for (unsigned int j=0; j<nnodes; ++j)
......@@ -496,7 +508,7 @@ public:
Py[node_diff[i*nnodes]]=y[i];
// Apply forward Discrete Fourier Transform
Dft.applyDFT(Py,FY);
Dft->applyDFT(Py,FY);
}
......
......@@ -60,8 +60,10 @@ static void precompute(const MatrixKernelClass *const MatrixKernel, const FReal
_FC = new FComplexe [rc]; // TODO: do it in the non-sym version!!!
// init Discrete Fourier Transformator
const int dimfft = 1; // unidim FFT since fully circulant embedding
const int steps[dimfft] = {rc};
// FDft Dft(rc);
FFft Dft(rc);
FFft<dimfft> Dft(steps);
// reduce storage if real valued kernel
const unsigned int opt_rc = rc/2+1;
......
......@@ -48,9 +48,9 @@ class FTreeCoordinate;
* In fact, in the ChebyshevSym variant the matrix kernel needs to be
* evaluated compo-by-compo since we currently use a scalar ACA.
*
* 3) We currently use multiple 1D FFT instead of multidim FFT.
* TODO switch to multidim if relevant in considered range of size
* (see testFFTW and testFFTWMultidim).
* 3) We currently use multiple 1D FFT instead of multidim FFT since embedding
* is circulant. Multidim FFT could be used if embedding were block circulant.
* TODO investigate possibility of block circulant embedding
*
* @tparam CellClass Type of cell
* @tparam ContainerClass Type of container to store particles
......
......@@ -75,8 +75,10 @@ static void Compute(const MatrixKernelClass *const MatrixKernel,
TensorType::setNodeIdsPairs(node_ids_pairs);
// init Discrete Fourier Transformator
const int dimfft = 1; // unidim FFT since fully circulant embedding
const int steps[dimfft] = {rc};
// FDft Dft(rc);
FFft<1> Dft(rc);
FFft<dimfft> Dft(steps);
// get first column of K via permutation
unsigned int perm[rc];
......@@ -198,15 +200,17 @@ class FUnifTensorialM2LHandler<ORDER,MatrixKernelClass,HOMOGENEOUS> : FNoCopyabl
// Tensorial MatrixKernel specific
FComplexe** FC;
// for real valued kernel only n/2+1 complex values are stored
// after performing the DFT (the rest is deduced by conjugation)
unsigned int opt_rc;
typedef FUnifTensor<ORDER> TensorType;
unsigned int node_diff[nnodes*nnodes];
// FDft Dft; // Direct Discrete Fourier Transformator
FFft<1> Dft; // Fast Discrete Fourier Transformator
// DFT specific
static const int dimfft = 1; // unidim FFT since fully circulant embedding
// FDft Dft; // Direct Discrete Fourier Transformator
typedef FFft<dimfft> DftClass; // Fast Discrete Fourier Transformator
FSmartPointer<DftClass,FSmartPointerMemory> Dft;
const unsigned int opt_rc; // specific to real valued kernel
static const std::string getFileName()
{
......@@ -220,9 +224,12 @@ class FUnifTensorialM2LHandler<ORDER,MatrixKernelClass,HOMOGENEOUS> : FNoCopyabl
public:
FUnifTensorialM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal)
: opt_rc(rc/2+1),
Dft(rc) // initialize Discrete Fourier Transformator
: opt_rc(rc/2+1)
{
// init DFT
const int steps[dimfft] = {rc};
Dft = new DftClass(steps);
// allocate FC
FC = new FComplexe*[dim];
for (unsigned int d=0; d<dim; ++d)
......@@ -277,7 +284,7 @@ public:
FReal Px[rc];
FBlas::setzero(rc,Px);
// Apply forward Discrete Fourier Transform
Dft.applyIDFT(FX,Px);
Dft->applyIDFT(FX,Px);
// Unapply Zero Padding
for (unsigned int j=0; j<nnodes; ++j)
......@@ -344,7 +351,7 @@ public:
Py[node_diff[i*nnodes]]=y[i];
// Apply forward Discrete Fourier Transform
Dft.applyDFT(Py,FY);
Dft->applyDFT(Py,FY);
}
......@@ -369,15 +376,17 @@ class FUnifTensorialM2LHandler<ORDER,MatrixKernelClass,NON_HOMOGENEOUS> : FNoCop
const unsigned int TreeHeight;
const FReal RootCellWidth;
// for real valued kernel only n/2+1 complex values are stored
// after performing the DFT (the rest is deduced by conjugation)
unsigned int opt_rc;
typedef FUnifTensor<ORDER> TensorType;
unsigned int node_diff[nnodes*nnodes];
// FDft Dft; // Direct Discrete Fourier Transformator
FFft<1> Dft; // Fast Discrete Fourier Transformator
// DFT specific
static const int dimfft = 1; // unidim FFT since fully circulant embedding
// FDft Dft; // Direct Discrete Fourier Transformator
typedef FFft<dimfft> DftClass; // Fast Discrete Fourier Transformator
FSmartPointer<DftClass,FSmartPointerMemory> Dft;
const unsigned int opt_rc; // specific to real valued kernel
static const std::string getFileName()
{
......@@ -393,9 +402,12 @@ public:
FUnifTensorialM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int inTreeHeight, const FReal inRootCellWidth)
: TreeHeight(inTreeHeight),
RootCellWidth(inRootCellWidth),
opt_rc(rc/2+1),
Dft(rc) // initialize Discrete Fourier Transformator
opt_rc(rc/2+1)
{
// init DFT
const int steps[dimfft] = {rc};
Dft = new DftClass(steps);
// allocate FC
FC = new FComplexe**[TreeHeight];
for (unsigned int l=0; l<TreeHeight; ++l){
......@@ -458,7 +470,7 @@ public:
FReal Px[rc];
FBlas::setzero(rc,Px);
// Apply forward Discrete Fourier Transform
Dft.applyIDFT(FX,Px);
Dft->applyIDFT(FX,Px);
// Unapply Zero Padding
for (unsigned int j=0; j<nnodes; ++j)
......@@ -524,7 +536,7 @@ public:
Py[node_diff[i*nnodes]]=y[i];
// Apply forward Discrete Fourier Transform
Dft.applyDFT(Py,FY);
Dft->applyDFT(Py,FY);
}
......
......@@ -70,6 +70,7 @@ extern "C"
double*, double*, const unsigned*, int*);
void dorgqr_(const unsigned*, const unsigned*, const unsigned*,
double*, const unsigned*, double*, double*, const unsigned*, int*);
void dpotrf_(const char*, const unsigned*, double*, const unsigned*, int*);
#ifdef ScalFMM_USE_MKL_AS_BLAS
// mkl: hadamard product is not implemented in mkl_blas
......@@ -103,6 +104,7 @@ extern "C"
float*, float*, const unsigned*, int*);
void sorgqr_(const unsigned*, const unsigned*, const unsigned*,
float*, const unsigned*, float*, float*, const unsigned*, int*);
void spotrf_(const char*, const unsigned*, float*, const unsigned*, int*);
// double complex //////////////////////////////////////////////////
// blas 1
......@@ -119,7 +121,7 @@ extern "C"
double*, const unsigned*, const double*, double*, const unsigned*);
void zgeqrf_(const unsigned*, const unsigned*, double*, const unsigned*,
double*, double*, const unsigned*, int*);
void zpotrf_(const char*, const unsigned*, double*, const unsigned*, int*);
// single complex //////////////////////////////////////////////////
// blas 1
......@@ -136,7 +138,7 @@ extern "C"
float*, const unsigned*, const float*, float*, const unsigned*);
void cgeqrf_(const unsigned*, const unsigned*, float*, const unsigned*,
float*, float*, const unsigned*, int*);
void cpotrf_(const char*, const unsigned*, float*, const unsigned*, int*);
}
......@@ -506,6 +508,19 @@ namespace FBlas {
// Cholesky decomposition: A=LL^T (if A is symmetric definite positive)
inline int potrf(const unsigned m, double* A, const unsigned n)
{
int INF;
dpotrf_("L", &m, A, &n, &INF);
return INF;
}
inline int potrf(const unsigned m, float* A, const unsigned n)
{
int INF;
spotrf_("L", &m, A, &n, &INF);
return INF;
}
} // end namespace FCBlas
......
......@@ -36,7 +36,7 @@
/**
* @author Pierre Blanchard (pierre.blanchard@inria.fr)
* @class FDft and @class FFft
* @class FDft, @class FFft and @class FCFft
* Please read the license
*
* These classes handle the forward and backward Discete Fourier Transform
......@@ -45,6 +45,10 @@
* Fourier Transform (FFT). The FFT algorithm can either be provided by the
* FFTW(3) library itself or a version that is wrapped in Intel MKL.
*
* The direct DFT is templatized with the input value type (FReal or FComplexe),
* while 2 distinct classes resp. @class FFft and @class FCFft are defined
* resp. for the real and complex valued FFT.
*
* The aim of writing these specific classes is to allow further customization
* of the DFT such as implementing a tensorial variant, a weighted variant
* or any other feature.
......@@ -59,25 +63,25 @@
*
* @tparam nsteps number of sampled values \f$N\f$
*/
template<typename ValueType>
class FDft
{
FReal* fftR_;
FComplexe* fftC_;
ValueType* data_; //< data in physical space
FComplexe* dataF_; //< data in Fourier space
FReal *cosRS_, *sinRS_;
private:
unsigned int nsteps_;
unsigned int nsteps_; //< number of steps
public:
FDft(const unsigned int nsteps)
: nsteps_(nsteps)
{
fftR_ = new FReal[nsteps_];
fftC_ = new FComplexe[nsteps_];
data_ = new ValueType[nsteps_];
dataF_ = new FComplexe[nsteps_];
// Beware this is extremely HEAVY to store!!! => Use FDft only for debug!
cosRS_ = new FReal[nsteps_*nsteps_];
......@@ -93,67 +97,104 @@ public:
virtual ~FDft()
{
delete [] fftR_;
delete [] fftC_;
delete [] data_;
delete [] dataF_;
delete [] cosRS_;
delete [] sinRS_;
}
/// Forward DFT
// Real valued DFT
void applyDFT(const FReal* sampledData,
FComplexe* transformedData) const
{
// FTic time;
// read sampled data
// std::cout<< "copy(";
// time.tic();
FBlas::c_setzero(nsteps_,reinterpret_cast<FReal*>(fftC_));
FBlas::copy(nsteps_, sampledData,fftR_);
// std::cout << time.tacAndElapsed() << ")";
// std::cout<< " - exe(";
// time.tic();
FBlas::c_setzero(nsteps_,reinterpret_cast<FReal*>(dataF_));
FBlas::copy(nsteps_, sampledData,data_);
// perform direct forward transformation
for(unsigned int r=0; r<nsteps_; ++r)
for(unsigned int s=0; s<nsteps_; ++s){
fftC_[r] += FComplexe(fftR_[s]*cosRS_[r*nsteps_+s],
-fftR_[s]*sinRS_[r*nsteps_+s]);
dataF_[r] += FComplexe(data_[s]*cosRS_[r*nsteps_+s],
-data_[s]*sinRS_[r*nsteps_+s]);
}
// std::cout << time.tacAndElapsed() << ")";
// write transformed data
// std::cout<< " - copy(";
// time.tic();
FBlas::c_copy(nsteps_,reinterpret_cast<FReal*>(fftC_),
FBlas::c_copy(nsteps_,reinterpret_cast<FReal*>(dataF_),
reinterpret_cast<FReal*>(transformedData));
// std::cout << time.tacAndElapsed() << ") ";
}
// Complexe valued DFT
void applyDFT(const FComplexe* sampledData,
FComplexe* transformedData) const
{
// read sampled data
FBlas::c_setzero(nsteps_,reinterpret_cast<FReal*>(dataF_));
FBlas::c_copy(nsteps_,reinterpret_cast<const FReal*>(sampledData),
reinterpret_cast<FReal*>(data_));
// perform direct forward transformation
for(unsigned int r=0; r<nsteps_; ++r)
for(unsigned int s=0; s<nsteps_; ++s){
dataF_[r] += FComplexe(data_[s].getReal()*cosRS_[r*nsteps_+s]
+ data_[s].getImag()*sinRS_[r*nsteps_+s],
data_[s].getImag()*cosRS_[r*nsteps_+s]
- data_[s].getReal()*sinRS_[r*nsteps_+s]);
}
// write transformed data
FBlas::c_copy(nsteps_,reinterpret_cast<FReal*>(dataF_),
reinterpret_cast<FReal*>(transformedData));
}
/// Backward DFT
// Real valued IDFT
void applyIDFT(const FComplexe* transformedData,
FReal* sampledData) const
{
// read transformed data
FBlas::setzero(nsteps_,fftR_);
FBlas::setzero(nsteps_,data_);
FBlas::c_copy(nsteps_,reinterpret_cast<const FReal*>(transformedData),
reinterpret_cast<FReal*>(fftC_));
reinterpret_cast<FReal*>(dataF_));
// perform direct backward transformation
for(unsigned int r=0; r<nsteps_; ++r){
for(unsigned int s=0; s<nsteps_; ++s){
fftR_[r] += fftC_[s].getReal()*cosRS_[r*nsteps_+s]
-fftC_[s].getImag()*sinRS_[r*nsteps_+s];
data_[r] += dataF_[s].getReal()*cosRS_[r*nsteps_+s]
+ dataF_[s].getImag()*sinRS_[r*nsteps_+s];
}
fftR_[r]*=1./nsteps_;
data_[r]*=1./nsteps_;
}
// write sampled data
FBlas::copy(nsteps_,fftR_,sampledData);
FBlas::copy(nsteps_,data_,sampledData);
}
// Complexe valued IDFT
void applyIDFT(const FComplexe* transformedData,
FComplexe* sampledData) const
{
// read transformed data
FBlas::c_setzero(nsteps_,reinterpret_cast<FReal*>(data_));
FBlas::c_copy(nsteps_,reinterpret_cast<const FReal*>(transformedData),
reinterpret_cast<FReal*>(dataF_));
};
// perform direct backward transformation
for(unsigned int r=0; r<nsteps_; ++r){
for(unsigned int s=0; s<nsteps_; ++s){
data_[r] += FComplexe(dataF_[s].getReal()*cosRS_[r*nsteps_+s]
- dataF_[s].getImag()*sinRS_[r*nsteps_+s],
dataF_[s].getImag()*cosRS_[r*nsteps_+s]
+ dataF_[s].getReal()*sinRS_[r*nsteps_+s]);
}
data_[r]*=1./nsteps_;
}
// write sampled data
FBlas::c_copy(nsteps_,reinterpret_cast<FReal*>(data_),
reinterpret_cast<FReal*>(sampledData));
}
};
/**
......@@ -167,50 +208,56 @@ class FFft
enum{dim = DIM};
// arrays
FReal* fftR_;
FComplexe* fftC_;
FReal* data_; //< data in physical space
FComplexe* dataF_; //< data in Fourier space
// plans
fftw_plan plan_c2r_; // backward FFT plan
fftw_plan plan_r2c_; // forward FFT plan
fftw_plan plan_c2r_; //< backward FFT plan
fftw_plan plan_r2c_; //< forward FFT plan
private:
/*unsigned*/ int nsteps_; // all dimensions have same nb of steps
/*unsigned*/ int nsteps_opt_;
int steps_[dim]; //< number of steps per dimension
int nsteps_; //< total number of steps
int nsteps_opt_; //< reduced number of steps for real valued FFT
public:
FFft(const unsigned int nsteps)
: nsteps_(nsteps),
nsteps_opt_(nsteps/2+1) // SPECIFIC TO REAL VALUED FTT
FFft(const int steps[dim])
{
// init number of steps
nsteps_=1; nsteps_opt_=1;
for(int d = 0; d<dim;++d){
steps_[d]=steps[d];
nsteps_*=steps[d];
nsteps_opt_*=steps[d]/2+1; // real valued DFT specific
}
// allocate arrays
fftR_ = (FReal*) fftw_malloc(dim * sizeof(FReal) * dim * nsteps_);
fftC_ = (FComplexe*) fftw_malloc(dim * sizeof(FComplexe) * dim * nsteps_opt_);
data_ = (FReal*) fftw_malloc(sizeof(FReal) * nsteps_);
dataF_ = (FComplexe*) fftw_malloc(sizeof(FComplexe) * nsteps_opt_);
// // fftw plans
// // unidim fftw plans
// plan_c2r_ =
// fftw_plan_dft_c2r_1d(nsteps_,
// reinterpret_cast<fftw_complex*>(fftC_),
// fftR_,
// reinterpret_cast<fftw_complex*>(dataF_),
// data_,
// FFTW_MEASURE);// TODO: test FFTW_ESTIMATE
// plan_r2c_ =
// fftw_plan_dft_r2c_1d(nsteps_,
// fftR_,
// reinterpret_cast<fftw_complex*>(fftC_),
// data_,
// reinterpret_cast<fftw_complex*>(dataF_),
// FFTW_MEASURE);
// multidim fftw plans
const int steps[2] = {dim, nsteps_};
plan_c2r_ =
fftw_plan_dft_c2r(2, steps,
reinterpret_cast<fftw_complex*>(fftC_),
fftR_,
fftw_plan_dft_c2r(dim, steps_,
reinterpret_cast<fftw_complex*>(dataF_),
data_,
FFTW_MEASURE);// TODO: test FFTW_ESTIMATE
plan_r2c_ =
fftw_plan_dft_r2c(2, steps,
fftR_,
reinterpret_cast<fftw_complex*>(fftC_),
fftw_plan_dft_r2c(dim, steps_,
data_,
reinterpret_cast<fftw_complex*>(dataF_),
FFTW_MEASURE);
}
......@@ -219,38 +266,22 @@ public:
{
fftw_destroy_plan(plan_c2r_);
fftw_destroy_plan(plan_r2c_);
fftw_free(fftR_);
fftw_free(fftC_);
fftw_free(data_);
fftw_free(dataF_);
}
void applyDFT(const FReal* sampledData,
FComplexe* transformedData) const
{
// FTic time;
// read sampled data
// std::cout<< "copy(";
// time.tic();
FBlas::c_setzero(dim*nsteps_opt_,reinterpret_cast<FReal*>(fftC_));
FBlas::copy(dim*nsteps_, sampledData,fftR_);
// std::cout << time.tacAndElapsed() << ")";
FBlas::c_setzero(nsteps_opt_,reinterpret_cast<FReal*>(dataF_));
FBlas::copy(nsteps_, sampledData,data_);