Commit 2e1c9711 authored by BLANCHARD Pierre's avatar BLANCHARD Pierre

Redesigned direct DFT and FFT classes (provided a separate complex valued FFT...

Redesigned direct DFT and FFT classes (provided a separate complex valued FFT and templatized direct DFT with physical value type); Provided test for C-interfaced Lapack functions such as Cholesky Decomposition (TODO add SVD and QRD).
parent fb5cedd0
......@@ -66,8 +66,10 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal Cel
TensorType::setNodeIdsPairs(node_ids_pairs);
// init Discrete Fourier Transformator
const int dimfft = 1; // unidim FFT since fully circulant embedding
const int steps[dimfft] = {rc};
// FDft Dft(rc);
FFft<1> Dft(rc);
FFft<dimfft> Dft(steps);
// get first column of K via permutation
unsigned int perm[rc];
......@@ -177,15 +179,17 @@ class FUnifM2LHandler<ORDER,HOMOGENEOUS> : FNoCopyable
FComplexe *FC;
// for real valued kernel only n/2+1 complex values are stored
// after performing the DFT (the rest is deduced by conjugation)
unsigned int opt_rc;
typedef FUnifTensor<ORDER> TensorType;
unsigned int node_diff[nnodes*nnodes];
// FDft Dft; // Direct Discrete Fourier Transformator
FFft<1> Dft; // Fast Discrete Fourier Transformator
/// DFT specific
static const int dimfft = 1; // unidim FFT since fully circulant embedding
// FDft Dft; // Direct Discrete Fourier Transformator
typedef FFft<dimfft> DftClass; // Fast Discrete Fourier Transformator
FSmartPointer<DftClass,FSmartPointerMemory> Dft;
const unsigned int opt_rc; // specific to real valued kernel
static const std::string getFileName()
{
......@@ -201,9 +205,12 @@ public:
template <typename MatrixKernelClass>
FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal)
: FC(NULL),
opt_rc(rc/2+1),
Dft(rc) // initialize Discrete Fourier Transformator
opt_rc(rc/2+1)
{
// init DFT
const int steps[dimfft] = {rc};
Dft = new DftClass(steps);
// initialize root node ids
TensorType::setNodeIdsDiff(node_diff);
......@@ -253,7 +260,7 @@ public:
FReal Px[rc];
FBlas::setzero(rc,Px);
// Apply forward Discrete Fourier Transform
Dft.applyIDFT(FX,Px);
Dft->applyIDFT(FX,Px);
// Unapply Zero Padding
for (unsigned int j=0; j<nnodes; ++j)
......@@ -319,7 +326,7 @@ public:
Py[node_diff[i*nnodes]]=y[i];
// Apply forward Discrete Fourier Transform
Dft.applyDFT(Py,FY);
Dft->applyDFT(Py,FY);
}
......@@ -342,15 +349,17 @@ class FUnifM2LHandler<ORDER,NON_HOMOGENEOUS> : FNoCopyable
const unsigned int TreeHeight;
const FReal RootCellWidth;
// for real valued kernel only n/2+1 complex values are stored
// after performing the DFT (the rest is deduced by conjugation)
unsigned int opt_rc;
typedef FUnifTensor<ORDER> TensorType;
unsigned int node_diff[nnodes*nnodes];
// FDft Dft; // Direct Discrete Fourier Transformator
FFft<1> Dft; // Fast Discrete Fourier Transformator
// DFT specific
static const int dimfft = 1; // unidim FFT since fully circulant embedding
// FDft Dft; // Direct Discrete Fourier Transformator
typedef FFft<dimfft> DftClass; // Fast Discrete Fourier Transformator
FSmartPointer<DftClass,FSmartPointerMemory> Dft;
const unsigned int opt_rc; // specific to real valued kernel
static const std::string getFileName()
{
......@@ -367,9 +376,12 @@ public:
FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int inTreeHeight, const FReal inRootCellWidth)
: TreeHeight(inTreeHeight),
RootCellWidth(inRootCellWidth),
opt_rc(rc/2+1),
Dft(rc) // initialize Discrete Fourier Transformator
{
opt_rc(rc/2+1)
{
// init DFT
const int steps[dimfft] = {rc};
Dft = new DftClass(steps);
// initialize root node ids
TensorType::setNodeIdsDiff(node_diff);
......@@ -431,7 +443,7 @@ public:
FReal Px[rc];
FBlas::setzero(rc,Px);
// Apply forward Discrete Fourier Transform
Dft.applyIDFT(FX,Px);
Dft->applyIDFT(FX,Px);
// Unapply Zero Padding
for (unsigned int j=0; j<nnodes; ++j)
......@@ -496,7 +508,7 @@ public:
Py[node_diff[i*nnodes]]=y[i];
// Apply forward Discrete Fourier Transform
Dft.applyDFT(Py,FY);
Dft->applyDFT(Py,FY);
}
......
......@@ -60,8 +60,10 @@ static void precompute(const MatrixKernelClass *const MatrixKernel, const FReal
_FC = new FComplexe [rc]; // TODO: do it in the non-sym version!!!
// init Discrete Fourier Transformator
const int dimfft = 1; // unidim FFT since fully circulant embedding
const int steps[dimfft] = {rc};
// FDft Dft(rc);
FFft Dft(rc);
FFft<dimfft> Dft(steps);
// reduce storage if real valued kernel
const unsigned int opt_rc = rc/2+1;
......
......@@ -48,9 +48,9 @@ class FTreeCoordinate;
* In fact, in the ChebyshevSym variant the matrix kernel needs to be
* evaluated compo-by-compo since we currently use a scalar ACA.
*
* 3) We currently use multiple 1D FFT instead of multidim FFT.
* TODO switch to multidim if relevant in considered range of size
* (see testFFTW and testFFTWMultidim).
* 3) We currently use multiple 1D FFT instead of multidim FFT since embedding
* is circulant. Multidim FFT could be used if embedding were block circulant.
* TODO investigate possibility of block circulant embedding
*
* @tparam CellClass Type of cell
* @tparam ContainerClass Type of container to store particles
......
......@@ -75,8 +75,10 @@ static void Compute(const MatrixKernelClass *const MatrixKernel,
TensorType::setNodeIdsPairs(node_ids_pairs);
// init Discrete Fourier Transformator
const int dimfft = 1; // unidim FFT since fully circulant embedding
const int steps[dimfft] = {rc};
// FDft Dft(rc);
FFft<1> Dft(rc);
FFft<dimfft> Dft(steps);
// get first column of K via permutation
unsigned int perm[rc];
......@@ -198,15 +200,17 @@ class FUnifTensorialM2LHandler<ORDER,MatrixKernelClass,HOMOGENEOUS> : FNoCopyabl
// Tensorial MatrixKernel specific
FComplexe** FC;
// for real valued kernel only n/2+1 complex values are stored
// after performing the DFT (the rest is deduced by conjugation)
unsigned int opt_rc;
typedef FUnifTensor<ORDER> TensorType;
unsigned int node_diff[nnodes*nnodes];
// FDft Dft; // Direct Discrete Fourier Transformator
FFft<1> Dft; // Fast Discrete Fourier Transformator
// DFT specific
static const int dimfft = 1; // unidim FFT since fully circulant embedding
// FDft Dft; // Direct Discrete Fourier Transformator
typedef FFft<dimfft> DftClass; // Fast Discrete Fourier Transformator
FSmartPointer<DftClass,FSmartPointerMemory> Dft;
const unsigned int opt_rc; // specific to real valued kernel
static const std::string getFileName()
{
......@@ -220,9 +224,12 @@ class FUnifTensorialM2LHandler<ORDER,MatrixKernelClass,HOMOGENEOUS> : FNoCopyabl
public:
FUnifTensorialM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal)
: opt_rc(rc/2+1),
Dft(rc) // initialize Discrete Fourier Transformator
: opt_rc(rc/2+1)
{
// init DFT
const int steps[dimfft] = {rc};
Dft = new DftClass(steps);
// allocate FC
FC = new FComplexe*[dim];
for (unsigned int d=0; d<dim; ++d)
......@@ -277,7 +284,7 @@ public:
FReal Px[rc];
FBlas::setzero(rc,Px);
// Apply forward Discrete Fourier Transform
Dft.applyIDFT(FX,Px);
Dft->applyIDFT(FX,Px);
// Unapply Zero Padding
for (unsigned int j=0; j<nnodes; ++j)
......@@ -344,7 +351,7 @@ public:
Py[node_diff[i*nnodes]]=y[i];
// Apply forward Discrete Fourier Transform
Dft.applyDFT(Py,FY);
Dft->applyDFT(Py,FY);
}
......@@ -369,15 +376,17 @@ class FUnifTensorialM2LHandler<ORDER,MatrixKernelClass,NON_HOMOGENEOUS> : FNoCop
const unsigned int TreeHeight;
const FReal RootCellWidth;
// for real valued kernel only n/2+1 complex values are stored
// after performing the DFT (the rest is deduced by conjugation)
unsigned int opt_rc;
typedef FUnifTensor<ORDER> TensorType;
unsigned int node_diff[nnodes*nnodes];
// FDft Dft; // Direct Discrete Fourier Transformator
FFft<1> Dft; // Fast Discrete Fourier Transformator
// DFT specific
static const int dimfft = 1; // unidim FFT since fully circulant embedding
// FDft Dft; // Direct Discrete Fourier Transformator
typedef FFft<dimfft> DftClass; // Fast Discrete Fourier Transformator
FSmartPointer<DftClass,FSmartPointerMemory> Dft;
const unsigned int opt_rc; // specific to real valued kernel
static const std::string getFileName()
{
......@@ -393,9 +402,12 @@ public:
FUnifTensorialM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int inTreeHeight, const FReal inRootCellWidth)
: TreeHeight(inTreeHeight),
RootCellWidth(inRootCellWidth),
opt_rc(rc/2+1),
Dft(rc) // initialize Discrete Fourier Transformator
opt_rc(rc/2+1)
{
// init DFT
const int steps[dimfft] = {rc};
Dft = new DftClass(steps);
// allocate FC
FC = new FComplexe**[TreeHeight];
for (unsigned int l=0; l<TreeHeight; ++l){
......@@ -458,7 +470,7 @@ public:
FReal Px[rc];
FBlas::setzero(rc,Px);
// Apply forward Discrete Fourier Transform
Dft.applyIDFT(FX,Px);
Dft->applyIDFT(FX,Px);
// Unapply Zero Padding
for (unsigned int j=0; j<nnodes; ++j)
......@@ -524,7 +536,7 @@ public:
Py[node_diff[i*nnodes]]=y[i];
// Apply forward Discrete Fourier Transform
Dft.applyDFT(Py,FY);
Dft->applyDFT(Py,FY);
}
......
......@@ -70,6 +70,7 @@ extern "C"
double*, double*, const unsigned*, int*);
void dorgqr_(const unsigned*, const unsigned*, const unsigned*,
double*, const unsigned*, double*, double*, const unsigned*, int*);
void dpotrf_(const char*, const unsigned*, double*, const unsigned*, int*);
#ifdef ScalFMM_USE_MKL_AS_BLAS
// mkl: hadamard product is not implemented in mkl_blas
......@@ -103,6 +104,7 @@ extern "C"
float*, float*, const unsigned*, int*);
void sorgqr_(const unsigned*, const unsigned*, const unsigned*,
float*, const unsigned*, float*, float*, const unsigned*, int*);
void spotrf_(const char*, const unsigned*, float*, const unsigned*, int*);
// double complex //////////////////////////////////////////////////
// blas 1
......@@ -119,7 +121,7 @@ extern "C"
double*, const unsigned*, const double*, double*, const unsigned*);
void zgeqrf_(const unsigned*, const unsigned*, double*, const unsigned*,
double*, double*, const unsigned*, int*);
void zpotrf_(const char*, const unsigned*, double*, const unsigned*, int*);
// single complex //////////////////////////////////////////////////
// blas 1
......@@ -136,7 +138,7 @@ extern "C"
float*, const unsigned*, const float*, float*, const unsigned*);
void cgeqrf_(const unsigned*, const unsigned*, float*, const unsigned*,
float*, float*, const unsigned*, int*);
void cpotrf_(const char*, const unsigned*, float*, const unsigned*, int*);
}
......@@ -506,6 +508,19 @@ namespace FBlas {
// Cholesky decomposition: A=LL^T (if A is symmetric definite positive)
inline int potrf(const unsigned m, double* A, const unsigned n)
{
int INF;
dpotrf_("L", &m, A, &n, &INF);
return INF;
}
inline int potrf(const unsigned m, float* A, const unsigned n)
{
int INF;
spotrf_("L", &m, A, &n, &INF);
return INF;
}
} // end namespace FCBlas
......
This diff is collapsed.
......@@ -15,7 +15,7 @@
// ===================================================================================
// ==== CMAKE =====
// @FUSE_FFT
// @FUSE_BLAS
// ================
#include <iostream>
......
......@@ -44,23 +44,24 @@ int main()
//////////////////////////////////////////////////////////////////////////////
// INITIALIZATION
// size (pick a power of 2 for better performance of the FFT algorithm)
unsigned int rank = 2;
unsigned int nsteps_ = 500;
unsigned int dim = 10;
const int steps[2]={static_cast<int>(dim),
static_cast<int>(nsteps_)};
unsigned int size = dim*nsteps_;
const int dim=2;
const int pow_nsteps_=8;
int steps_[dim];
int nsteps_=1;
for(int d=0; d<dim; ++d) {
steps_[d]=FMath::pow(2,pow_nsteps_);
nsteps_*=steps_[d];
}
//////////////////////////////////////////////////////////////////////////////
// Multidimensionnal FFT PLANS
std::cout<< "Test "<< dim <<"D FFT."<<std::endl;
// fftw arrays
FReal* fftR_;
FComplexe* fftC_;
fftR_ = (FReal*) fftw_malloc(sizeof(FReal) * size );
fftC_ = (FComplexe*) fftw_malloc(sizeof(FComplexe) * size );
fftR_ = (FReal*) fftw_malloc(sizeof(FReal) * nsteps_ );
fftC_ = (FComplexe*) fftw_malloc(sizeof(FComplexe) * nsteps_ );
// fftw plans
// use routine defined in file:
......@@ -72,12 +73,12 @@ int main()
time.tic();
plan_c2r_ =
fftw_plan_dft_c2r(rank, steps,
fftw_plan_dft_c2r(dim, steps_,
reinterpret_cast<fftw_complex*>(fftC_),
fftR_,
FFTW_MEASURE);
plan_r2c_ =
fftw_plan_dft_r2c(rank, steps,
fftw_plan_dft_r2c(dim, steps_,
fftR_,
reinterpret_cast<fftw_complex*>(fftC_),
FFTW_MEASURE);
......@@ -87,20 +88,20 @@ int main()
//////////////////////////////////////////////////////////////////////////////
// EXECUTION
// generate random physical data
for(unsigned int s=0; s<size; ++s)
for(int s=0; s<nsteps_; ++s)
fftR_[s] = FReal(rand())/FRandMax;
// // display data in physical space
// std::cout<< "Physical data: "<<std::endl;
// for(unsigned int d=0; d<dim; ++d){
// for(unsigned int s=0; s<nsteps_; ++s)
// std::cout<< fftR_[s+d*nsteps_] << ", ";
// for( int r=0; r<steps_[0]; ++r) {
// for( int s=0; s<steps_[1]; ++s)
// std::cout<< fftR_[r*steps_[1]+s] << ", ";
// std::cout<<std::endl;
// }
// std::cout<<std::endl;
// perform fft
std::cout<< "Perform Forward FFT: ";
std::cout<< "Perform Forward FFT: ";
time.tic();
fftw_execute( plan_r2c_ );
std::cout << "took " << time.tacAndElapsed() << "sec." << std::endl;
......@@ -108,20 +109,23 @@ int main()
// // display transform in Fourier space
// // beware the real data FFT stores only N/2+1 complex output values
// std::cout<< "Transformed data : "<<std::endl;
// for(unsigned int d=0; d<dim; ++d){
// for(unsigned int s=0; s<nsteps_/2+1; ++s)
// std::cout<< fftC_[s+d*(nsteps_/2+1)] << ", ";
// for( int r=0; r<steps_[0]/2+1; ++r) {
// for( int s=0; s<steps_[1]/2+1; ++s)
// std::cout<< fftC_[r*(steps_[1]/2+1)+s] << ", ";
// std::cout<<std::endl;
// }
// std::cout<<std::endl;
// for(unsigned int s=0; s<nsteps_/2+1; ++s){
// fftC_[nsteps_-s]=FComplexe(fftC_[s].getReal(),-fftC_[s].getImag());
// }
//
//// for( int s=0; s<steps_[1]/2+1; ++s){
//// fftC_[nsteps_-s]=FComplexe(fftC_[s].getReal(),-fftC_[s].getImag());
//// }
// std::cout<< "Full Transformed data : "<<std::endl;
// for(unsigned int s=0; s<nsteps_; ++s)
// std::cout<< fftC_[s] << ", ";
// for( int r=0; r<steps_[0]; ++r){
// for( int s=0; s<steps_[1]; ++s)
// std::cout<< fftC_[r*steps_[1]+s] << ", ";
// std::cout<<std::endl;
// }
// std::cout<<std::endl;
// perform ifft of tranformed data (in order to get physical values back)
......@@ -132,9 +136,9 @@ int main()
// // display data in physical space
// std::cout<< "Physical data (from 1/N*IFFT(FFT(Physical data))): "<<std::endl;
// for(unsigned int d=0; d<dim; ++d){
// for(unsigned int s=0; s<nsteps_; ++s)
// std::cout<< fftR_[s+d*nsteps_]/(nsteps_*dim) << ", ";
// for( int r=0; r<steps_[0]; ++r) {
// for( int s=0; s<steps_[1]; ++s)
// std::cout<< fftR_[r*steps_[1]+s]/(nsteps_) << ", ";
// std::cout<<std::endl;
// }
// std::cout<<std::endl;
......
......@@ -106,8 +106,10 @@ int main(int, char **){
// Init DFTor
std::cout<< "Set DFT: ";
time.tic();
const int dim = 1;
const int steps[dim] = {N};
//FDft Dft(N);// direct version
FFft<1> Dft(N);// fast version
FFft<dim> Dft(steps);// fast version
std::cout << "took " << time.tacAndElapsed() << "sec." << std::endl;
// Initialize manually
......@@ -135,7 +137,8 @@ int main(int, char **){
// Transform first column of K
FReal tK[N];
for(unsigned int i=0; i<N; ++i) tK[i]=K[i*N];
for(unsigned int i=0; i<N; ++i) tK[i]=K[i*N]; // first column
// for(unsigned int i=0; i<N; ++i) tK[i]=K[i]; // first row
std::cout<< "Transform tK->FK: ";
time.tic();
Dft.applyDFT(tK,FK);
......
// ===================================================================================
// Ce LOGICIEL "ScalFmm" est couvert par le copyright Inria 20xx-2012.
// Inria détient tous les droits de propriété sur le LOGICIEL, et souhaite que
// la communauté scientifique l'utilise afin de le tester et de l'évaluer.
// Inria donne gracieusement le droit d'utiliser ce LOGICIEL. Toute utilisation
// dans un but lucratif ou à des fins commerciales est interdite sauf autorisation
// expresse et préalable d'Inria.
// Toute utilisation hors des limites précisées ci-dessus et réalisée sans l'accord
// expresse préalable d'Inria constituerait donc le délit de contrefaçon.
// Le LOGICIEL étant un produit en cours de développement, Inria ne saurait assurer
// aucune responsabilité et notamment en aucune manière et en aucun cas, être tenu
// de répondre d'éventuels dommages directs ou indirects subits par l'utilisateur.
// Tout utilisateur du LOGICIEL s'engage à communiquer à Inria ses remarques
// relatives à l'usage du LOGICIEL
// ===================================================================================
// ==== CMAKE =====
// @FUSE_BLAS
// ================
#include <iostream>
#include <stdlib.h>
#include "../../Src/Utils/FBlas.hpp"
#include "../../Src/Utils/FTic.hpp"
/**
* Test functionality of C - interfaced LAPACK functions
*/
int main()
{
FTic time;
/*
* List of tested functions:
* Cholesky decomposition: FBlas::potrf()
* TODO SVD: FBlas::gesvd()
* TODO QR decomposition: FBlas::geqrf()
*/
const unsigned int m = 4, n = 4;
FReal* A = new FReal [m * n]; // matrix: column major ordering
// A= LL^T ////////////////////////////////////
// define symmetric definite positive matrix A
A[0]=5; A[10]=4; A[15]=7;
A[1]=A[3]=A[4]=A[12]=2;
A[6]=A[7]=A[9]=A[13]=1;
A[2]=A[5]=A[8]=3;
A[11]=A[14]=-1;
// copy A in C
FReal* C = new FReal [m * n]; // matrix: column major ordering
for (unsigned int ii=0; ii<m; ++ii)
for (unsigned int jj=0; jj<n; ++jj)
C[ii*m + jj]=A[ii*m + jj];
std::cout<<"\nA=["<<std::endl;
for (unsigned int i=0; i<m; ++i) {
for (unsigned int j=0; j<n; ++j)
std::cout << A[i*n+j] << " ";
std::cout<< std::endl;
}
std::cout<<"]"<<std::endl;
// perform Cholesky decomposition
std::cout<<"\nCholesky decomposition ";
int INF = FBlas::potrf(m, A, n);
if(INF==0) {std::cout<<"succeeded!"<<std::endl;}
else {std::cout<<"failed!"<<std::endl;}
std::cout<<"\nA_out=["<<std::endl;
for (unsigned int i=0; i<m; ++i) {
for (unsigned int j=0; j<n; ++j)
std::cout << A[i*n+j] << " ";
std::cout<<std::endl;
}
std::cout<<"]"<<std::endl;
// build lower matrix
FReal* L = new FReal [m * n]; // matrix: column major ordering
for (unsigned int ii=0; ii<m; ++ii)
for (unsigned int jj=0; jj<n; ++jj){
if(ii<=jj)
L[ii*m + jj]=A[ii*m + jj];
else
L[ii*m + jj]=0.;
}
std::cout<<"\nL=["<<std::endl;
for (unsigned int i=0; i<m; ++i) {
for (unsigned int j=0; j<n; ++j)
std::cout << L[i*n+j] << " ";
std::cout<< std::endl;
}
std::cout<<"]"<<std::endl;
// verify result by computing B=LL^T
FReal* B = new FReal [m * n]; // matrix: column major ordering
for (unsigned int ii=0; ii<m; ++ii)
for (unsigned int jj=0; jj<n; ++jj){
B[ii*m + jj]=0.;
for (unsigned int j=0; j<n; ++j)
B[ii*m + jj]+=L[j*m + ii]*L[j*m + jj];
}
std::cout<<"\nA-LL^T=["<<std::endl;
for (unsigned int i=0; i<m; ++i) {
for (unsigned int j=0; j<n; ++j)
std::cout << B[i*n+j]-C[i*n+j] << " ";
std::cout<< std::endl;
}
std::cout<<"]"<<std::endl;
delete [] A;
delete [] B;
delete [] C;
return 0;
}
......@@ -211,14 +211,14 @@ int main(int, char **){
// In order to actually embed K into a circulant matrix C one just
// needs to insert (ORDER-1) extra lines/columns (to each block).
// std::cout<< "K=" <<std::endl;
// for (unsigned int i=0; i<nnodes; ++i){
// for (unsigned int j=0; j<nnodes; ++j){
// std::cout<< K[i*nnodes+j]<<", ";
// }
// std::cout<<std::endl;
// }
// std::cout<<std::endl;
std::cout<< "K=" <<std::endl;
for (unsigned int i=0; i<nnodes; ++i){
for (unsigned int j=0; j<nnodes; ++j){
std::cout<< K[i*nnodes+j]<<", ";
}
std::cout<<std::endl;
}
std::cout<<std::endl;
// Check multi-index
std::cout<< "node_ids=" <<std::endl;
......@@ -324,9 +324,10 @@ int main(int, char **){
FBlas::setzero(rc,PLocalExp);
// Init DFT
// NB: only one FFTor is defined a since scalar problems involve scalar r/lhs and scalar matrix kernel. All dimensions are 1.
const int dimfft = 1;
const int steps[dimfft] = {rc};
//FDft Dft(rc); // direct version
FFft<1> Dft(rc); // fast version
FFft<dimfft> Dft(steps); // fast version
// Get first COLUMN of K and Store in T
FReal T[rc];
......
......@@ -356,10 +356,10 @@ int main(int, char **){
// Efficient application of the Toeplitz system in FOURIER SPACE
// Init DFT
const int dimfft = 1;
const int steps[dimfft] = {rc};
//FDft Dft(rc); // direct version
FFft<1/*(TODO: fix MultidimFFT) nK*/> DftK(rc); // fast version
FFft<1/*nrhs*/> DftRhs(rc);
FFft<1/*nlhs*/> DftLhs(rc);
FFft<dimfft> Dft(steps); // fast version
// Get first COLUMN of K and Store in T
FReal T[dim*rc];
......@@ -393,9 +393,9 @@ int main(int, char **){
// if first COLUMN (T) of C is used
for (unsigned int d=0; d<dim; ++d)
DftK.applyDFT(T+d*rc,FT+d*rc);
Dft.applyDFT(T+d*rc,FT+d*rc);
// // if first ROW of C is used
// DftK.applyDFT(C,FT);
// Dft.applyDFT(C,FT);
FComplexe FPMultExp[nrhs*rc];
FComplexe FPLocalExp[nlhs*rc];
......@@ -428,7 +428,7 @@ int main(int, char **){
// Transform PaddedMultExp
for (unsigned int idxRhs=0; idxRhs<nrhs; ++idxRhs) // apply nrhs 1 dimensionnal FFT
DftRhs.applyDFT(PaddedMultExp+idxRhs*rc,FPMultExp+idxRhs*rc);
Dft.applyDFT(PaddedMultExp+idxRhs*rc,FPMultExp+idxRhs*rc);
std::cout<< "Apply M2L in Fourier space: ";
time.tic();
......@@ -463,7 +463,7 @@ int main(int, char **){
// std::cout<<std::endl;
for (unsigned int idxLhs=0; idxLhs<nlhs; ++idxLhs) // apply nrhs 1 dimensionnal FFT
DftLhs.applyIDFT(FPLocalExp+idxLhs*rc,PLocalExp+idxLhs*rc);
Dft.applyIDFT(FPLocalExp+idxLhs*rc,PLocalExp+idxLhs*rc);
// std::cout<< "Padded LocalExp: "<<std::endl;
// for (unsigned int p=0; p<rc; ++p)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment