Commit 67c21775 authored by BLANCHARD Pierre's avatar BLANCHARD Pierre

Implementation of Symmetric Uniform Kernel, optimized evaluation of derivative...

Implementation of Symmetric Uniform Kernel, optimized evaluation of derivative of Lagrange polynomials.
parent bbeed7db
...@@ -25,7 +25,7 @@ ...@@ -25,7 +25,7 @@
#include "../../Components/FAbstractKernels.hpp" #include "../../Components/FAbstractKernels.hpp"
#include "./FChebInterpolator.hpp" #include "./FChebInterpolator.hpp"
#include "./FChebSymmetries.hpp" #include "../Interpolation/FInterpSymmetries.hpp"
class FTreeCoordinate; class FTreeCoordinate;
...@@ -322,7 +322,7 @@ struct FChebFlopsSymKernel<CellClass, ContainerClass, MatrixKernelClass, ORDER> ...@@ -322,7 +322,7 @@ struct FChebFlopsSymKernel<CellClass, ContainerClass, MatrixKernelClass, ORDER>
} }
// set permutation vector and indices // set permutation vector and indices
const FChebSymmetries<ORDER> Symmetries; const FInterpSymmetries<ORDER> Symmetries;
for (int i=-3; i<=3; ++i) for (int i=-3; i<=3; ++i)
for (int j=-3; j<=3; ++j) for (int j=-3; j<=3; ++j)
for (int k=-3; k<=3; ++k) for (int k=-3; k<=3; ++k)
......
...@@ -21,7 +21,7 @@ ...@@ -21,7 +21,7 @@
#include "../../Utils/FBlas.hpp" #include "../../Utils/FBlas.hpp"
#include "./FChebTensor.hpp" #include "./FChebTensor.hpp"
#include "./FChebSymmetries.hpp" #include "../Interpolation/FInterpSymmetries.hpp"
#include "./FChebM2LHandler.hpp" #include "./FChebM2LHandler.hpp"
/** /**
...@@ -539,7 +539,7 @@ public: ...@@ -539,7 +539,7 @@ public:
} }
// set permutation vector and indices // set permutation vector and indices
const FChebSymmetries<ORDER> Symmetries; const FInterpSymmetries<ORDER> Symmetries;
for (int i=-3; i<=3; ++i) for (int i=-3; i<=3; ++i)
for (int j=-3; j<=3; ++j) for (int j=-3; j<=3; ++j)
for (int k=-3; k<=3; ++k) { for (int k=-3; k<=3; ++k) {
...@@ -620,7 +620,7 @@ public: ...@@ -620,7 +620,7 @@ public:
// set permutation vector and indices // set permutation vector and indices
const FChebSymmetries<ORDER> Symmetries; const FInterpSymmetries<ORDER> Symmetries;
for (int i=-3; i<=3; ++i) for (int i=-3; i<=3; ++i)
for (int j=-3; j<=3; ++j) for (int j=-3; j<=3; ++j)
for (int k=-3; k<=3; ++k) { for (int k=-3; k<=3; ++k) {
......
...@@ -13,8 +13,8 @@ ...@@ -13,8 +13,8 @@
// "http://www.cecill.info". // "http://www.cecill.info".
// "http://www.gnu.org/licenses". // "http://www.gnu.org/licenses".
// =================================================================================== // ===================================================================================
#ifndef FCHEBSYMMETRIES_HPP #ifndef FINTERPSYMMETRIES_HPP
#define FCHEBSYMMETRIES_HPP #define FINTERPSYMMETRIES_HPP
#include <climits> #include <climits>
...@@ -25,12 +25,12 @@ ...@@ -25,12 +25,12 @@
*/ */
/** /**
* @class FChebSymmetries * @class FInterpSymmetries
* *
* The class @p FChebSymmetries exploits all symmetries * The class @p FInterpSymmetries exploits all symmetries
*/ */
template <int ORDER> template <int ORDER>
class FChebSymmetries class FInterpSymmetries
{ {
enum {nnodes = ORDER*ORDER*ORDER}; enum {nnodes = ORDER*ORDER*ORDER};
...@@ -49,11 +49,11 @@ class FChebSymmetries ...@@ -49,11 +49,11 @@ class FChebSymmetries
return (sk | sj | si); return (sk | sj | si);
} }
public: public:
/** Constructor */ /** Constructor */
FChebSymmetries() FInterpSymmetries()
{ {
// permutations for 8 quadrants // permutations for 8 quadrants
unsigned int quads[8][nnodes]; unsigned int quads[8][nnodes];
......
...@@ -45,8 +45,6 @@ class FUnifInterpolator : FNoCopyable ...@@ -45,8 +45,6 @@ class FUnifInterpolator : FNoCopyable
typedef FUnifRoots< ORDER> BasisType; typedef FUnifRoots< ORDER> BasisType;
typedef FUnifTensor<ORDER> TensorType; typedef FUnifTensor<ORDER> TensorType;
// FReal T_of_roots[ORDER][ORDER];
// FReal T[ORDER * (ORDER-1)];
unsigned int node_ids[nnodes][3]; unsigned int node_ids[nnodes][3];
FReal* ChildParentInterpolator[8]; FReal* ChildParentInterpolator[8];
...@@ -56,12 +54,13 @@ class FUnifInterpolator : FNoCopyable ...@@ -56,12 +54,13 @@ class FUnifInterpolator : FNoCopyable
//////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////
// PB: use improved version of M2M/L2L
/** /**
* Initialize the child - parent - interpolator, it is basically the matrix * Initialize the child - parent - interpolator, it is basically the matrix
* S which is precomputed and reused for all M2M and L2L operations, ie for * S which is precomputed and reused for all M2M and L2L operations, ie for
* all non leaf inter/anterpolations. * all non leaf inter/anterpolations.
*/ */
/*
void initM2MandL2L() void initM2MandL2L()
{ {
FPoint ParentRoots[nnodes], ChildRoots[nnodes]; FPoint ParentRoots[nnodes], ChildRoots[nnodes];
...@@ -86,6 +85,7 @@ class FUnifInterpolator : FNoCopyable ...@@ -86,6 +85,7 @@ class FUnifInterpolator : FNoCopyable
assembleInterpolator(nnodes, ChildRoots, ChildParentInterpolator[child]); assembleInterpolator(nnodes, ChildRoots, ChildParentInterpolator[child]);
} }
} }
*/
/** /**
* Initialize the child - parent - interpolator, it is basically the matrix * Initialize the child - parent - interpolator, it is basically the matrix
...@@ -141,22 +141,10 @@ public: ...@@ -141,22 +141,10 @@ public:
*/ */
explicit FUnifInterpolator() explicit FUnifInterpolator()
{ {
// // initialize chebyshev polynomials of root nodes: T_o(x_j)
// for (unsigned int o=1; o<ORDER; ++o)
// for (unsigned int j=0; j<ORDER; ++j)
// T_of_roots[o][j] = FReal(BasisType::T(o, FReal(BasisType::roots[j])));
//
// // initialize chebyshev polynomials of root nodes: T_o(x_j)
// for (unsigned int o=1; o<ORDER; ++o)
// for (unsigned int j=0; j<ORDER; ++j)
// T[(o-1)*ORDER + j] = FReal(BasisType::T(o, FReal(BasisType::roots[j])));
// initialize root node ids // initialize root node ids
TensorType::setNodeIds(node_ids); TensorType::setNodeIds(node_ids);
// initialize interpolation operator for non M2M and L2L (non leaf // initialize interpolation operator for M2M and L2L (non leaf operations)
// operations)
//this -> initM2MandL2L(); // non tensor-product interpolation //this -> initM2MandL2L(); // non tensor-product interpolation
this -> initTensorM2MandL2L(); // tensor-product interpolation this -> initTensorM2MandL2L(); // tensor-product interpolation
} }
...@@ -278,7 +266,7 @@ public: ...@@ -278,7 +266,7 @@ public:
const FReal *const localExpansion, const FReal *const localExpansion,
ContainerClass *const localParticles) const; ContainerClass *const localParticles) const;
// PB: ORDER^6 version of applyM2M/L2L
/* /*
void applyM2M(const unsigned int ChildIndex, void applyM2M(const unsigned int ChildIndex,
const FReal *const ChildExpansion, const FReal *const ChildExpansion,
...@@ -299,9 +287,7 @@ public: ...@@ -299,9 +287,7 @@ public:
} }
*/ */
// PB: improvement of applyM2M/L2L can be preserved (TOFACTO) // PB: improved version of applyM2M/L2L also applies to Lagrange interpolation
// since relative position of child and parent interpolation is remains unchanged
void applyM2M(const unsigned int ChildIndex, void applyM2M(const unsigned int ChildIndex,
const FReal *const ChildExpansion, const FReal *const ChildExpansion,
FReal *const ParentExpansion) const FReal *const ParentExpansion) const
...@@ -506,10 +492,10 @@ inline void FUnifInterpolator<ORDER>::applyL2PGradient(const FPoint& center, ...@@ -506,10 +492,10 @@ inline void FUnifInterpolator<ORDER>::applyL2PGradient(const FPoint& center,
// evaluate Lagrange polynomials of source particle // evaluate Lagrange polynomials of source particle
for (unsigned int o=0; o<ORDER; ++o) { for (unsigned int o=0; o<ORDER; ++o) {
L_of_x[o][0] = BasisType::L(o, localPosition.getX()); // 3 * ORDER*(ORDER-1) flops PB: TODO confirm L_of_x[o][0] = BasisType::L(o, localPosition.getX()); // 3 * ORDER*(ORDER-1) flops
L_of_x[o][1] = BasisType::L(o, localPosition.getY()); // 3 * ORDER*(ORDER-1) flops L_of_x[o][1] = BasisType::L(o, localPosition.getY()); // 3 * ORDER*(ORDER-1) flops
L_of_x[o][2] = BasisType::L(o, localPosition.getZ()); // 3 * ORDER*(ORDER-1) flops L_of_x[o][2] = BasisType::L(o, localPosition.getZ()); // 3 * ORDER*(ORDER-1) flops
dL_of_x[o][0] = BasisType::dL(o, localPosition.getX()); // TODO verify 3 * ORDER*(ORDER-1) flops PB: TODO confirm dL_of_x[o][0] = BasisType::dL(o, localPosition.getX()); // TODO verify 3 * ORDER*(ORDER-1) flops
dL_of_x[o][1] = BasisType::dL(o, localPosition.getY()); // TODO verify 3 * ORDER*(ORDER-1) flops dL_of_x[o][1] = BasisType::dL(o, localPosition.getY()); // TODO verify 3 * ORDER*(ORDER-1) flops
dL_of_x[o][2] = BasisType::dL(o, localPosition.getZ()); // TODO verify 3 * ORDER*(ORDER-1) flops dL_of_x[o][2] = BasisType::dL(o, localPosition.getZ()); // TODO verify 3 * ORDER*(ORDER-1) flops
} }
......
...@@ -96,7 +96,7 @@ public: ...@@ -96,7 +96,7 @@ public:
{ {
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// 1) apply Sy // 1) apply Sy
FBlas::scal(AbstractBaseClass::nnodes*2, FReal(0.), ParentCell->getMultipole(idxRhs)); FBlas::scal(AbstractBaseClass::nnodes, FReal(0.), ParentCell->getMultipole(idxRhs));
for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){ for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){
if (ChildCells[ChildIndex]){ if (ChildCells[ChildIndex]){
AbstractBaseClass::Interpolator->applyM2M(ChildIndex, ChildCells[ChildIndex]->getMultipole(idxRhs), AbstractBaseClass::Interpolator->applyM2M(ChildIndex, ChildCells[ChildIndex]->getMultipole(idxRhs),
...@@ -170,7 +170,6 @@ public: ...@@ -170,7 +170,6 @@ public:
const int /*TreeLevel*/) const int /*TreeLevel*/)
{ {
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// 1) Apply Inverse Discete Fourier Transform // 1) Apply Inverse Discete Fourier Transform
M2LHandler->unapplyZeroPaddingAndDFT(ParentCell->getTransformedLocal(idxRhs), M2LHandler->unapplyZeroPaddingAndDFT(ParentCell->getTransformedLocal(idxRhs),
const_cast<CellClass*>(ParentCell)->getLocal(idxRhs)); const_cast<CellClass*>(ParentCell)->getLocal(idxRhs));
......
...@@ -64,7 +64,7 @@ class FUnifM2LHandler : FNoCopyable ...@@ -64,7 +64,7 @@ class FUnifM2LHandler : FNoCopyable
unsigned int opt_rc; unsigned int opt_rc;
typedef FUnifTensor<ORDER> TensorType; typedef FUnifTensor<ORDER> TensorType;
unsigned int node_diff[nnodes*nnodes]; // PB: used in applyC to get id=i-j unsigned int node_diff[nnodes*nnodes];
// FDft Dft; // Direct Discrete Fourier Transformator // FDft Dft; // Direct Discrete Fourier Transformator
FFft Dft; // Fast Discrete Fourier Transformator FFft Dft; // Fast Discrete Fourier Transformator
...@@ -222,18 +222,12 @@ public: ...@@ -222,18 +222,12 @@ public:
FReal Py[rc]; FReal Py[rc];
FBlas::setzero(rc,Py); FBlas::setzero(rc,Py);
FComplexe tmpFY[rc]; // not mandatory?
// Apply Zero Padding // Apply Zero Padding
for (unsigned int i=0; i<nnodes; ++i) for (unsigned int i=0; i<nnodes; ++i)
Py[node_diff[i*nnodes]]=y[i]; Py[node_diff[i*nnodes]]=y[i];
// Apply forward Discrete Fourier Transform // Apply forward Discrete Fourier Transform
Dft.applyDFT(Py,tmpFY); Dft.applyDFT(Py,FY);
// not mandatory?
for (unsigned int j=0; j<rc; ++j) // could be opt_rc
FY[j]+=tmpFY[j];
} }
......
...@@ -63,14 +63,14 @@ struct FUnifRoots : FNoCopyable ...@@ -63,14 +63,14 @@ struct FUnifRoots : FNoCopyable
x = (x < FReal(-1.) ? FReal(-1.) : x); x = (x < FReal(-1.) ? FReal(-1.) : x);
} }
FReal tmpL=FReal(1.); FReal L=FReal(1.);
for(unsigned int m=0;m<order;++m){ for(unsigned int m=0;m<order;++m){
if(m!=n) if(m!=n)
tmpL *= (x-FUnifRoots<order>::roots[m])/(FUnifRoots<order>::roots[n]-FUnifRoots<order>::roots[m]); L *= (x-FUnifRoots<order>::roots[m])/(FUnifRoots<order>::roots[n]-FUnifRoots<order>::roots[m]);
} }
return FReal(tmpL); return FReal(L);
} }
...@@ -90,26 +90,22 @@ struct FUnifRoots : FNoCopyable ...@@ -90,26 +90,22 @@ struct FUnifRoots : FNoCopyable
x = (x < FReal(-1.) ? FReal(-1.) : x); x = (x < FReal(-1.) ? FReal(-1.) : x);
} }
FReal tmpdL; // optimized variant
FReal dL=FReal(0.); FReal NdL=FReal(0.);// init numerator
FReal DdL=FReal(1.);// init denominator
FReal tmpNdL;
for(unsigned int p=0;p<order;++p){ for(unsigned int p=0;p<order;++p){
if(p!=n){ if(p!=n){
tmpdL=1./(FUnifRoots<order>::roots[n]-FUnifRoots<order>::roots[p]); tmpNdL=FReal(1.);
for(unsigned int m=0;m<order;++m)
for(unsigned int m=0;m<order;++m){ if(m!=n && m!=p)
if(m!=n && m!=p) tmpNdL*=x-FUnifRoots<order>::roots[m];
tmpdL *= (x-FUnifRoots<order>::roots[m])/(FUnifRoots<order>::roots[n]-FUnifRoots<order>::roots[m]); NdL+=tmpNdL;
DdL*=FUnifRoots<order>::roots[n]-FUnifRoots<order>::roots[p];
}// m
dL+=tmpdL;
}//endif }//endif
}// p }// p
return FReal(dL); return FReal(NdL/DdL);
} }
}; };
......
This diff is collapsed.
This diff is collapsed.
...@@ -172,15 +172,17 @@ class FFft ...@@ -172,15 +172,17 @@ class FFft
private: private:
unsigned int nsteps_; unsigned int nsteps_;
unsigned int nsteps_opt_;
public: public:
FFft(const unsigned int nsteps) FFft(const unsigned int nsteps)
: nsteps_(nsteps) : nsteps_(nsteps),
nsteps_opt_(nsteps/2+1) // SPECIFIC TO FTT FOR REAL VALUES
{ {
// allocate arrays // allocate arrays
fftR_ = (FReal*) fftw_malloc(sizeof(FReal) * nsteps_); fftR_ = (FReal*) fftw_malloc(sizeof(FReal) * nsteps_);
fftC_ = (FComplexe*) fftw_malloc(sizeof(FComplexe) * nsteps_); fftC_ = (FComplexe*) fftw_malloc(sizeof(FComplexe) * nsteps_opt_);
// fftw plans // fftw plans
plan_c2r_ = plan_c2r_ =
...@@ -213,7 +215,7 @@ public: ...@@ -213,7 +215,7 @@ public:
// read sampled data // read sampled data
// std::cout<< "copy("; // std::cout<< "copy(";
// time.tic(); // time.tic();
FBlas::c_setzero(nsteps_,reinterpret_cast<FReal*>(fftC_)); FBlas::c_setzero(nsteps_opt_,reinterpret_cast<FReal*>(fftC_));
FBlas::copy(nsteps_, sampledData,fftR_); FBlas::copy(nsteps_, sampledData,fftR_);
// std::cout << time.tacAndElapsed() << ")"; // std::cout << time.tacAndElapsed() << ")";
...@@ -226,11 +228,10 @@ public: ...@@ -226,11 +228,10 @@ public:
// write transformed data // write transformed data
// std::cout<< " - copy("; // std::cout<< " - copy(";
// time.tic(); // time.tic();
// FBlas::c_copy(nsteps_,reinterpret_cast<FReal*>(fftC_), FBlas::c_copy(nsteps_opt_,reinterpret_cast<FReal*>(fftC_),
// reinterpret_cast<FReal*>(transformedData)); reinterpret_cast<FReal*>(transformedData));
// for(unsigned int s=0; s<nsteps_opt_; ++s)
for(unsigned int s=0; s<nsteps_; ++s) // transformedData[s]=fftC_[s];
transformedData[s]=fftC_[s];
// std::cout << time.tacAndElapsed() << ") "; // std::cout << time.tacAndElapsed() << ") ";
...@@ -242,7 +243,7 @@ public: ...@@ -242,7 +243,7 @@ public:
{ {
// read transformed data // read transformed data
FBlas::setzero(nsteps_,fftR_); FBlas::setzero(nsteps_,fftR_);
FBlas::c_copy(nsteps_,reinterpret_cast<const FReal*>(transformedData), FBlas::c_copy(nsteps_opt_,reinterpret_cast<const FReal*>(transformedData),
reinterpret_cast<FReal*>(fftC_)); reinterpret_cast<FReal*>(fftC_));
// perform ifft // perform ifft
......
...@@ -29,7 +29,8 @@ ...@@ -29,7 +29,8 @@
#include "../../Src/Kernels/Uniform/FUnifCell.hpp" #include "../../Src/Kernels/Uniform/FUnifCell.hpp"
#include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp" #include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp"
#include "../../Src/Kernels/Uniform/FUnifKernel.hpp" //#include "../../Src/Kernels/Uniform/FUnifKernel.hpp"
#include "../../Src/Kernels/Uniform/FUnifSymKernel.hpp"
#include "../../Src/Components/FSimpleLeaf.hpp" #include "../../Src/Components/FSimpleLeaf.hpp"
#include "../../Src/Kernels/P2P/FP2PParticleContainerIndexed.hpp" #include "../../Src/Kernels/P2P/FP2PParticleContainerIndexed.hpp"
...@@ -51,7 +52,7 @@ ...@@ -51,7 +52,7 @@
int main(int argc, char* argv[]) int main(int argc, char* argv[])
{ {
const char* const filename = FParameters::getStr(argc,argv,"-f", "../Data/test20k.fma"); const char* const filename = FParameters::getStr(argc,argv,"-f", "../Data/test20k.fma");
const unsigned int TreeHeight = FParameters::getValue(argc, argv, "-h", 5); const unsigned int TreeHeight = FParameters::getValue(argc, argv, "-h", 3);
const unsigned int SubTreeHeight = FParameters::getValue(argc, argv, "-sh", 2); const unsigned int SubTreeHeight = FParameters::getValue(argc, argv, "-sh", 2);
const unsigned int NbThreads = FParameters::getValue(argc, argv, "-t", 1); const unsigned int NbThreads = FParameters::getValue(argc, argv, "-t", 1);
...@@ -121,7 +122,7 @@ int main(int argc, char* argv[]) ...@@ -121,7 +122,7 @@ int main(int argc, char* argv[])
{ // begin Lagrange kernel { // begin Lagrange kernel
// accuracy // accuracy
const unsigned int ORDER = 7; const unsigned int ORDER = 5;
// typedefs // typedefs
typedef FP2PParticleContainerIndexed ContainerClass; typedef FP2PParticleContainerIndexed ContainerClass;
typedef FSimpleLeaf< ContainerClass > LeafClass; typedef FSimpleLeaf< ContainerClass > LeafClass;
...@@ -129,7 +130,8 @@ int main(int argc, char* argv[]) ...@@ -129,7 +130,8 @@ int main(int argc, char* argv[])
typedef FInterpMatrixKernelR MatrixKernelClass; typedef FInterpMatrixKernelR MatrixKernelClass;
typedef FUnifCell<ORDER> CellClass; typedef FUnifCell<ORDER> CellClass;
typedef FOctree<CellClass,ContainerClass,LeafClass> OctreeClass; typedef FOctree<CellClass,ContainerClass,LeafClass> OctreeClass;
typedef FUnifKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass; //typedef FUnifKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass;
typedef FUnifSymKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass;
typedef FFmmAlgorithm<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass; typedef FFmmAlgorithm<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;
// typedef FFmmAlgorithmThread<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass; // typedef FFmmAlgorithmThread<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;
......
...@@ -31,7 +31,7 @@ ...@@ -31,7 +31,7 @@
#include "../../Src/Kernels/Chebyshev/FChebTensor.hpp" #include "../../Src/Kernels/Chebyshev/FChebTensor.hpp"
#include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp" #include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp"
#include "../../Src/Kernels/Chebyshev/FChebSymmetries.hpp" #include "../../Src/Kernels/Interpolation/FInterpSymmetries.hpp"
...@@ -108,7 +108,7 @@ int main(int argc, char* argv[]) ...@@ -108,7 +108,7 @@ int main(int argc, char* argv[])
FReal maxdiff(0.); FReal maxdiff(0.);
// permuter // permuter
FChebSymmetries<order> permuter; FInterpSymmetries<order> permuter;
// permutation vector // permutation vector
unsigned int perm[nnodes]; unsigned int perm[nnodes];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment