Commit 67c21775 authored by BLANCHARD Pierre's avatar BLANCHARD Pierre

Implementation of Symmetric Uniform Kernel, optimized evaluation of derivative...

Implementation of Symmetric Uniform Kernel, optimized evaluation of derivative of Lagrange polynomials.
parent bbeed7db
......@@ -25,7 +25,7 @@
#include "../../Components/FAbstractKernels.hpp"
#include "./FChebInterpolator.hpp"
#include "./FChebSymmetries.hpp"
#include "../Interpolation/FInterpSymmetries.hpp"
class FTreeCoordinate;
......@@ -322,7 +322,7 @@ struct FChebFlopsSymKernel<CellClass, ContainerClass, MatrixKernelClass, ORDER>
}
// set permutation vector and indices
const FChebSymmetries<ORDER> Symmetries;
const FInterpSymmetries<ORDER> Symmetries;
for (int i=-3; i<=3; ++i)
for (int j=-3; j<=3; ++j)
for (int k=-3; k<=3; ++k)
......
......@@ -21,7 +21,7 @@
#include "../../Utils/FBlas.hpp"
#include "./FChebTensor.hpp"
#include "./FChebSymmetries.hpp"
#include "../Interpolation/FInterpSymmetries.hpp"
#include "./FChebM2LHandler.hpp"
/**
......@@ -539,7 +539,7 @@ public:
}
// set permutation vector and indices
const FChebSymmetries<ORDER> Symmetries;
const FInterpSymmetries<ORDER> Symmetries;
for (int i=-3; i<=3; ++i)
for (int j=-3; j<=3; ++j)
for (int k=-3; k<=3; ++k) {
......@@ -620,7 +620,7 @@ public:
// set permutation vector and indices
const FChebSymmetries<ORDER> Symmetries;
const FInterpSymmetries<ORDER> Symmetries;
for (int i=-3; i<=3; ++i)
for (int j=-3; j<=3; ++j)
for (int k=-3; k<=3; ++k) {
......
......@@ -13,8 +13,8 @@
// "http://www.cecill.info".
// "http://www.gnu.org/licenses".
// ===================================================================================
#ifndef FCHEBSYMMETRIES_HPP
#define FCHEBSYMMETRIES_HPP
#ifndef FINTERPSYMMETRIES_HPP
#define FINTERPSYMMETRIES_HPP
#include <climits>
......@@ -25,12 +25,12 @@
*/
/**
* @class FChebSymmetries
* @class FInterpSymmetries
*
* The class @p FChebSymmetries exploits all symmetries
* The class @p FInterpSymmetries exploits all symmetries
*/
template <int ORDER>
class FChebSymmetries
class FInterpSymmetries
{
enum {nnodes = ORDER*ORDER*ORDER};
......@@ -53,7 +53,7 @@ class FChebSymmetries
public:
/** Constructor */
FChebSymmetries()
FInterpSymmetries()
{
// permutations for 8 quadrants
unsigned int quads[8][nnodes];
......
......@@ -45,8 +45,6 @@ class FUnifInterpolator : FNoCopyable
typedef FUnifRoots< ORDER> BasisType;
typedef FUnifTensor<ORDER> TensorType;
// FReal T_of_roots[ORDER][ORDER];
// FReal T[ORDER * (ORDER-1)];
unsigned int node_ids[nnodes][3];
FReal* ChildParentInterpolator[8];
......@@ -56,12 +54,13 @@ class FUnifInterpolator : FNoCopyable
////////////////////////////////////////////////////////////////////
// PB: use improved version of M2M/L2L
/**
* Initialize the child - parent - interpolator, it is basically the matrix
* S which is precomputed and reused for all M2M and L2L operations, ie for
* all non leaf inter/anterpolations.
*/
/*
void initM2MandL2L()
{
FPoint ParentRoots[nnodes], ChildRoots[nnodes];
......@@ -86,6 +85,7 @@ class FUnifInterpolator : FNoCopyable
assembleInterpolator(nnodes, ChildRoots, ChildParentInterpolator[child]);
}
}
*/
/**
* Initialize the child - parent - interpolator, it is basically the matrix
......@@ -141,22 +141,10 @@ public:
*/
explicit FUnifInterpolator()
{
// // initialize chebyshev polynomials of root nodes: T_o(x_j)
// for (unsigned int o=1; o<ORDER; ++o)
// for (unsigned int j=0; j<ORDER; ++j)
// T_of_roots[o][j] = FReal(BasisType::T(o, FReal(BasisType::roots[j])));
//
// // initialize chebyshev polynomials of root nodes: T_o(x_j)
// for (unsigned int o=1; o<ORDER; ++o)
// for (unsigned int j=0; j<ORDER; ++j)
// T[(o-1)*ORDER + j] = FReal(BasisType::T(o, FReal(BasisType::roots[j])));
// initialize root node ids
TensorType::setNodeIds(node_ids);
// initialize interpolation operator for non M2M and L2L (non leaf
// operations)
// initialize interpolation operator for M2M and L2L (non leaf operations)
//this -> initM2MandL2L(); // non tensor-product interpolation
this -> initTensorM2MandL2L(); // tensor-product interpolation
}
......@@ -278,7 +266,7 @@ public:
const FReal *const localExpansion,
ContainerClass *const localParticles) const;
// PB: ORDER^6 version of applyM2M/L2L
/*
void applyM2M(const unsigned int ChildIndex,
const FReal *const ChildExpansion,
......@@ -299,9 +287,7 @@ public:
}
*/
// PB: improvement of applyM2M/L2L can be preserved (TOFACTO)
// since relative position of child and parent interpolation is remains unchanged
// PB: improved version of applyM2M/L2L also applies to Lagrange interpolation
void applyM2M(const unsigned int ChildIndex,
const FReal *const ChildExpansion,
FReal *const ParentExpansion) const
......@@ -506,10 +492,10 @@ inline void FUnifInterpolator<ORDER>::applyL2PGradient(const FPoint& center,
// evaluate Lagrange polynomials of source particle
for (unsigned int o=0; o<ORDER; ++o) {
L_of_x[o][0] = BasisType::L(o, localPosition.getX()); // 3 * ORDER*(ORDER-1) flops PB: TODO confirm
L_of_x[o][0] = BasisType::L(o, localPosition.getX()); // 3 * ORDER*(ORDER-1) flops
L_of_x[o][1] = BasisType::L(o, localPosition.getY()); // 3 * ORDER*(ORDER-1) flops
L_of_x[o][2] = BasisType::L(o, localPosition.getZ()); // 3 * ORDER*(ORDER-1) flops
dL_of_x[o][0] = BasisType::dL(o, localPosition.getX()); // TODO verify 3 * ORDER*(ORDER-1) flops PB: TODO confirm
dL_of_x[o][0] = BasisType::dL(o, localPosition.getX()); // TODO verify 3 * ORDER*(ORDER-1) flops
dL_of_x[o][1] = BasisType::dL(o, localPosition.getY()); // TODO verify 3 * ORDER*(ORDER-1) flops
dL_of_x[o][2] = BasisType::dL(o, localPosition.getZ()); // TODO verify 3 * ORDER*(ORDER-1) flops
}
......
......@@ -96,7 +96,7 @@ public:
{
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// 1) apply Sy
FBlas::scal(AbstractBaseClass::nnodes*2, FReal(0.), ParentCell->getMultipole(idxRhs));
FBlas::scal(AbstractBaseClass::nnodes, FReal(0.), ParentCell->getMultipole(idxRhs));
for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){
if (ChildCells[ChildIndex]){
AbstractBaseClass::Interpolator->applyM2M(ChildIndex, ChildCells[ChildIndex]->getMultipole(idxRhs),
......@@ -170,7 +170,6 @@ public:
const int /*TreeLevel*/)
{
for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){
// 1) Apply Inverse Discete Fourier Transform
M2LHandler->unapplyZeroPaddingAndDFT(ParentCell->getTransformedLocal(idxRhs),
const_cast<CellClass*>(ParentCell)->getLocal(idxRhs));
......
......@@ -64,7 +64,7 @@ class FUnifM2LHandler : FNoCopyable
unsigned int opt_rc;
typedef FUnifTensor<ORDER> TensorType;
unsigned int node_diff[nnodes*nnodes]; // PB: used in applyC to get id=i-j
unsigned int node_diff[nnodes*nnodes];
// FDft Dft; // Direct Discrete Fourier Transformator
FFft Dft; // Fast Discrete Fourier Transformator
......@@ -222,18 +222,12 @@ public:
FReal Py[rc];
FBlas::setzero(rc,Py);
FComplexe tmpFY[rc]; // not mandatory?
// Apply Zero Padding
for (unsigned int i=0; i<nnodes; ++i)
Py[node_diff[i*nnodes]]=y[i];
// Apply forward Discrete Fourier Transform
Dft.applyDFT(Py,tmpFY);
// not mandatory?
for (unsigned int j=0; j<rc; ++j) // could be opt_rc
FY[j]+=tmpFY[j];
Dft.applyDFT(Py,FY);
}
......
......@@ -63,14 +63,14 @@ struct FUnifRoots : FNoCopyable
x = (x < FReal(-1.) ? FReal(-1.) : x);
}
FReal tmpL=FReal(1.);
FReal L=FReal(1.);
for(unsigned int m=0;m<order;++m){
if(m!=n)
tmpL *= (x-FUnifRoots<order>::roots[m])/(FUnifRoots<order>::roots[n]-FUnifRoots<order>::roots[m]);
L *= (x-FUnifRoots<order>::roots[m])/(FUnifRoots<order>::roots[n]-FUnifRoots<order>::roots[m]);
}
return FReal(tmpL);
return FReal(L);
}
......@@ -90,26 +90,22 @@ struct FUnifRoots : FNoCopyable
x = (x < FReal(-1.) ? FReal(-1.) : x);
}
FReal tmpdL;
FReal dL=FReal(0.);
// optimized variant
FReal NdL=FReal(0.);// init numerator
FReal DdL=FReal(1.);// init denominator
FReal tmpNdL;
for(unsigned int p=0;p<order;++p){
if(p!=n){
tmpdL=1./(FUnifRoots<order>::roots[n]-FUnifRoots<order>::roots[p]);
for(unsigned int m=0;m<order;++m){
tmpNdL=FReal(1.);
for(unsigned int m=0;m<order;++m)
if(m!=n && m!=p)
tmpdL *= (x-FUnifRoots<order>::roots[m])/(FUnifRoots<order>::roots[n]-FUnifRoots<order>::roots[m]);
}// m
dL+=tmpdL;
tmpNdL*=x-FUnifRoots<order>::roots[m];
NdL+=tmpNdL;
DdL*=FUnifRoots<order>::roots[n]-FUnifRoots<order>::roots[p];
}//endif
}// p
return FReal(dL);
return FReal(NdL/DdL);
}
};
......
This diff is collapsed.
This diff is collapsed.
......@@ -172,15 +172,17 @@ class FFft
private:
unsigned int nsteps_;
unsigned int nsteps_opt_;
public:
FFft(const unsigned int nsteps)
: nsteps_(nsteps)
: nsteps_(nsteps),
nsteps_opt_(nsteps/2+1) // SPECIFIC TO FTT FOR REAL VALUES
{
// allocate arrays
fftR_ = (FReal*) fftw_malloc(sizeof(FReal) * nsteps_);
fftC_ = (FComplexe*) fftw_malloc(sizeof(FComplexe) * nsteps_);
fftC_ = (FComplexe*) fftw_malloc(sizeof(FComplexe) * nsteps_opt_);
// fftw plans
plan_c2r_ =
......@@ -213,7 +215,7 @@ public:
// read sampled data
// std::cout<< "copy(";
// time.tic();
FBlas::c_setzero(nsteps_,reinterpret_cast<FReal*>(fftC_));
FBlas::c_setzero(nsteps_opt_,reinterpret_cast<FReal*>(fftC_));
FBlas::copy(nsteps_, sampledData,fftR_);
// std::cout << time.tacAndElapsed() << ")";
......@@ -226,11 +228,10 @@ public:
// write transformed data
// std::cout<< " - copy(";
// time.tic();
// FBlas::c_copy(nsteps_,reinterpret_cast<FReal*>(fftC_),
// reinterpret_cast<FReal*>(transformedData));
for(unsigned int s=0; s<nsteps_; ++s)
transformedData[s]=fftC_[s];
FBlas::c_copy(nsteps_opt_,reinterpret_cast<FReal*>(fftC_),
reinterpret_cast<FReal*>(transformedData));
// for(unsigned int s=0; s<nsteps_opt_; ++s)
// transformedData[s]=fftC_[s];
// std::cout << time.tacAndElapsed() << ") ";
......@@ -242,7 +243,7 @@ public:
{
// read transformed data
FBlas::setzero(nsteps_,fftR_);
FBlas::c_copy(nsteps_,reinterpret_cast<const FReal*>(transformedData),
FBlas::c_copy(nsteps_opt_,reinterpret_cast<const FReal*>(transformedData),
reinterpret_cast<FReal*>(fftC_));
// perform ifft
......
......@@ -29,7 +29,8 @@
#include "../../Src/Kernels/Uniform/FUnifCell.hpp"
#include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp"
#include "../../Src/Kernels/Uniform/FUnifKernel.hpp"
//#include "../../Src/Kernels/Uniform/FUnifKernel.hpp"
#include "../../Src/Kernels/Uniform/FUnifSymKernel.hpp"
#include "../../Src/Components/FSimpleLeaf.hpp"
#include "../../Src/Kernels/P2P/FP2PParticleContainerIndexed.hpp"
......@@ -51,7 +52,7 @@
int main(int argc, char* argv[])
{
const char* const filename = FParameters::getStr(argc,argv,"-f", "../Data/test20k.fma");
const unsigned int TreeHeight = FParameters::getValue(argc, argv, "-h", 5);
const unsigned int TreeHeight = FParameters::getValue(argc, argv, "-h", 3);
const unsigned int SubTreeHeight = FParameters::getValue(argc, argv, "-sh", 2);
const unsigned int NbThreads = FParameters::getValue(argc, argv, "-t", 1);
......@@ -121,7 +122,7 @@ int main(int argc, char* argv[])
{ // begin Lagrange kernel
// accuracy
const unsigned int ORDER = 7;
const unsigned int ORDER = 5;
// typedefs
typedef FP2PParticleContainerIndexed ContainerClass;
typedef FSimpleLeaf< ContainerClass > LeafClass;
......@@ -129,7 +130,8 @@ int main(int argc, char* argv[])
typedef FInterpMatrixKernelR MatrixKernelClass;
typedef FUnifCell<ORDER> CellClass;
typedef FOctree<CellClass,ContainerClass,LeafClass> OctreeClass;
typedef FUnifKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass;
//typedef FUnifKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass;
typedef FUnifSymKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass;
typedef FFmmAlgorithm<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;
// typedef FFmmAlgorithmThread<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass;
......
......@@ -31,7 +31,7 @@
#include "../../Src/Kernels/Chebyshev/FChebTensor.hpp"
#include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp"
#include "../../Src/Kernels/Chebyshev/FChebSymmetries.hpp"
#include "../../Src/Kernels/Interpolation/FInterpSymmetries.hpp"
......@@ -108,7 +108,7 @@ int main(int argc, char* argv[])
FReal maxdiff(0.);
// permuter
FChebSymmetries<order> permuter;
FInterpSymmetries<order> permuter;
// permutation vector
unsigned int perm[nnodes];
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment