diff --git a/Src/Kernels/Chebyshev/FChebFlopsSymKernel.hpp b/Src/Kernels/Chebyshev/FChebFlopsSymKernel.hpp index 58dc8d5459c47ab8a8af8bae08609eefbec83aaa..cf61e82f910b58058dbe9589f834631de798ff64 100755 --- a/Src/Kernels/Chebyshev/FChebFlopsSymKernel.hpp +++ b/Src/Kernels/Chebyshev/FChebFlopsSymKernel.hpp @@ -25,7 +25,7 @@ #include "../../Components/FAbstractKernels.hpp" #include "./FChebInterpolator.hpp" -#include "./FChebSymmetries.hpp" +#include "../Interpolation/FInterpSymmetries.hpp" class FTreeCoordinate; @@ -322,7 +322,7 @@ struct FChebFlopsSymKernel<CellClass, ContainerClass, MatrixKernelClass, ORDER> } // set permutation vector and indices - const FChebSymmetries<ORDER> Symmetries; + const FInterpSymmetries<ORDER> Symmetries; for (int i=-3; i<=3; ++i) for (int j=-3; j<=3; ++j) for (int k=-3; k<=3; ++k) diff --git a/Src/Kernels/Chebyshev/FChebSymM2LHandler.hpp b/Src/Kernels/Chebyshev/FChebSymM2LHandler.hpp index bd205555f55635dd4e85c142cdd4bb9667d1b36a..d3f46e29d109a5e43d27e792c251bf0262a475d3 100755 --- a/Src/Kernels/Chebyshev/FChebSymM2LHandler.hpp +++ b/Src/Kernels/Chebyshev/FChebSymM2LHandler.hpp @@ -21,7 +21,7 @@ #include "../../Utils/FBlas.hpp" #include "./FChebTensor.hpp" -#include "./FChebSymmetries.hpp" +#include "../Interpolation/FInterpSymmetries.hpp" #include "./FChebM2LHandler.hpp" /** @@ -539,7 +539,7 @@ public: } // set permutation vector and indices - const FChebSymmetries<ORDER> Symmetries; + const FInterpSymmetries<ORDER> Symmetries; for (int i=-3; i<=3; ++i) for (int j=-3; j<=3; ++j) for (int k=-3; k<=3; ++k) { @@ -620,7 +620,7 @@ public: // set permutation vector and indices - const FChebSymmetries<ORDER> Symmetries; + const FInterpSymmetries<ORDER> Symmetries; for (int i=-3; i<=3; ++i) for (int j=-3; j<=3; ++j) for (int k=-3; k<=3; ++k) { diff --git a/Src/Kernels/Chebyshev/FChebSymmetries.hpp b/Src/Kernels/Interpolation/FInterpSymmetries.hpp similarity index 96% rename from Src/Kernels/Chebyshev/FChebSymmetries.hpp rename to Src/Kernels/Interpolation/FInterpSymmetries.hpp index 699514f3c6bbe9fdc310e184dcda8c0dbefd43bd..24693aead9a3da01feca2316603b16220b951cc0 100755 --- a/Src/Kernels/Chebyshev/FChebSymmetries.hpp +++ b/Src/Kernels/Interpolation/FInterpSymmetries.hpp @@ -13,8 +13,8 @@ // "http://www.cecill.info". // "http://www.gnu.org/licenses". // =================================================================================== -#ifndef FCHEBSYMMETRIES_HPP -#define FCHEBSYMMETRIES_HPP +#ifndef FINTERPSYMMETRIES_HPP +#define FINTERPSYMMETRIES_HPP #include <climits> @@ -25,12 +25,12 @@ */ /** - * @class FChebSymmetries + * @class FInterpSymmetries * - * The class @p FChebSymmetries exploits all symmetries + * The class @p FInterpSymmetries exploits all symmetries */ template <int ORDER> -class FChebSymmetries +class FInterpSymmetries { enum {nnodes = ORDER*ORDER*ORDER}; @@ -49,11 +49,11 @@ class FChebSymmetries return (sk | sj | si); } - + public: /** Constructor */ - FChebSymmetries() + FInterpSymmetries() { // permutations for 8 quadrants unsigned int quads[8][nnodes]; diff --git a/Src/Kernels/Uniform/FUnifInterpolator.hpp b/Src/Kernels/Uniform/FUnifInterpolator.hpp index 4f7acccbf064c7643a97c12e3436148f56835b3a..c4c4f669bfc0ca586553440e728122fee625094a 100755 --- a/Src/Kernels/Uniform/FUnifInterpolator.hpp +++ b/Src/Kernels/Uniform/FUnifInterpolator.hpp @@ -45,8 +45,6 @@ class FUnifInterpolator : FNoCopyable typedef FUnifRoots< ORDER> BasisType; typedef FUnifTensor<ORDER> TensorType; - // FReal T_of_roots[ORDER][ORDER]; - // FReal T[ORDER * (ORDER-1)]; unsigned int node_ids[nnodes][3]; FReal* ChildParentInterpolator[8]; @@ -56,12 +54,13 @@ class FUnifInterpolator : FNoCopyable //////////////////////////////////////////////////////////////////// - + // PB: use improved version of M2M/L2L /** * Initialize the child - parent - interpolator, it is basically the matrix * S which is precomputed and reused for all M2M and L2L operations, ie for * all non leaf inter/anterpolations. */ +/* void initM2MandL2L() { FPoint ParentRoots[nnodes], ChildRoots[nnodes]; @@ -86,6 +85,7 @@ class FUnifInterpolator : FNoCopyable assembleInterpolator(nnodes, ChildRoots, ChildParentInterpolator[child]); } } +*/ /** * Initialize the child - parent - interpolator, it is basically the matrix @@ -141,22 +141,10 @@ public: */ explicit FUnifInterpolator() { - // // initialize chebyshev polynomials of root nodes: T_o(x_j) - // for (unsigned int o=1; o<ORDER; ++o) - // for (unsigned int j=0; j<ORDER; ++j) - // T_of_roots[o][j] = FReal(BasisType::T(o, FReal(BasisType::roots[j]))); - // - // // initialize chebyshev polynomials of root nodes: T_o(x_j) - // for (unsigned int o=1; o<ORDER; ++o) - // for (unsigned int j=0; j<ORDER; ++j) - // T[(o-1)*ORDER + j] = FReal(BasisType::T(o, FReal(BasisType::roots[j]))); - - // initialize root node ids TensorType::setNodeIds(node_ids); - // initialize interpolation operator for non M2M and L2L (non leaf - // operations) + // initialize interpolation operator for M2M and L2L (non leaf operations) //this -> initM2MandL2L(); // non tensor-product interpolation this -> initTensorM2MandL2L(); // tensor-product interpolation } @@ -278,7 +266,7 @@ public: const FReal *const localExpansion, ContainerClass *const localParticles) const; - + // PB: ORDER^6 version of applyM2M/L2L /* void applyM2M(const unsigned int ChildIndex, const FReal *const ChildExpansion, @@ -299,9 +287,7 @@ public: } */ - // PB: improvement of applyM2M/L2L can be preserved (TOFACTO) - // since relative position of child and parent interpolation is remains unchanged - + // PB: improved version of applyM2M/L2L also applies to Lagrange interpolation void applyM2M(const unsigned int ChildIndex, const FReal *const ChildExpansion, FReal *const ParentExpansion) const @@ -506,10 +492,10 @@ inline void FUnifInterpolator<ORDER>::applyL2PGradient(const FPoint& center, // evaluate Lagrange polynomials of source particle for (unsigned int o=0; o<ORDER; ++o) { - L_of_x[o][0] = BasisType::L(o, localPosition.getX()); // 3 * ORDER*(ORDER-1) flops PB: TODO confirm + L_of_x[o][0] = BasisType::L(o, localPosition.getX()); // 3 * ORDER*(ORDER-1) flops L_of_x[o][1] = BasisType::L(o, localPosition.getY()); // 3 * ORDER*(ORDER-1) flops L_of_x[o][2] = BasisType::L(o, localPosition.getZ()); // 3 * ORDER*(ORDER-1) flops - dL_of_x[o][0] = BasisType::dL(o, localPosition.getX()); // TODO verify 3 * ORDER*(ORDER-1) flops PB: TODO confirm + dL_of_x[o][0] = BasisType::dL(o, localPosition.getX()); // TODO verify 3 * ORDER*(ORDER-1) flops dL_of_x[o][1] = BasisType::dL(o, localPosition.getY()); // TODO verify 3 * ORDER*(ORDER-1) flops dL_of_x[o][2] = BasisType::dL(o, localPosition.getZ()); // TODO verify 3 * ORDER*(ORDER-1) flops } diff --git a/Src/Kernels/Uniform/FUnifKernel.hpp b/Src/Kernels/Uniform/FUnifKernel.hpp index 4bd37ed90025df313ee5f8bdeb02e42974fc27bf..2b4f2f1c6bdd936f2b7b9e231690ec02a02c5f9c 100644 --- a/Src/Kernels/Uniform/FUnifKernel.hpp +++ b/Src/Kernels/Uniform/FUnifKernel.hpp @@ -96,7 +96,7 @@ public: { for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ // 1) apply Sy - FBlas::scal(AbstractBaseClass::nnodes*2, FReal(0.), ParentCell->getMultipole(idxRhs)); + FBlas::scal(AbstractBaseClass::nnodes, FReal(0.), ParentCell->getMultipole(idxRhs)); for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){ if (ChildCells[ChildIndex]){ AbstractBaseClass::Interpolator->applyM2M(ChildIndex, ChildCells[ChildIndex]->getMultipole(idxRhs), @@ -170,7 +170,6 @@ public: const int /*TreeLevel*/) { for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ - // 1) Apply Inverse Discete Fourier Transform M2LHandler->unapplyZeroPaddingAndDFT(ParentCell->getTransformedLocal(idxRhs), const_cast<CellClass*>(ParentCell)->getLocal(idxRhs)); diff --git a/Src/Kernels/Uniform/FUnifM2LHandler.hpp b/Src/Kernels/Uniform/FUnifM2LHandler.hpp index 8a6c2fee98bfbca66077d1e28c412dbcf07769cc..20a2f4a8e186dd1d4f8117a0f7309477b4d9df8e 100644 --- a/Src/Kernels/Uniform/FUnifM2LHandler.hpp +++ b/Src/Kernels/Uniform/FUnifM2LHandler.hpp @@ -64,7 +64,7 @@ class FUnifM2LHandler : FNoCopyable unsigned int opt_rc; typedef FUnifTensor<ORDER> TensorType; - unsigned int node_diff[nnodes*nnodes]; // PB: used in applyC to get id=i-j + unsigned int node_diff[nnodes*nnodes]; // FDft Dft; // Direct Discrete Fourier Transformator FFft Dft; // Fast Discrete Fourier Transformator @@ -222,18 +222,12 @@ public: FReal Py[rc]; FBlas::setzero(rc,Py); - FComplexe tmpFY[rc]; // not mandatory? - // Apply Zero Padding for (unsigned int i=0; i<nnodes; ++i) Py[node_diff[i*nnodes]]=y[i]; // Apply forward Discrete Fourier Transform - Dft.applyDFT(Py,tmpFY); - - // not mandatory? - for (unsigned int j=0; j<rc; ++j) // could be opt_rc - FY[j]+=tmpFY[j]; + Dft.applyDFT(Py,FY); } diff --git a/Src/Kernels/Uniform/FUnifRoots.hpp b/Src/Kernels/Uniform/FUnifRoots.hpp index b3897e8efe5e9976b8577c8139993059ecd4a2ea..15f1b9f57249df8de7e9633a4bea2b56fb3112e3 100644 --- a/Src/Kernels/Uniform/FUnifRoots.hpp +++ b/Src/Kernels/Uniform/FUnifRoots.hpp @@ -63,14 +63,14 @@ struct FUnifRoots : FNoCopyable x = (x < FReal(-1.) ? FReal(-1.) : x); } - FReal tmpL=FReal(1.); + FReal L=FReal(1.); for(unsigned int m=0;m<order;++m){ if(m!=n) - tmpL *= (x-FUnifRoots<order>::roots[m])/(FUnifRoots<order>::roots[n]-FUnifRoots<order>::roots[m]); + L *= (x-FUnifRoots<order>::roots[m])/(FUnifRoots<order>::roots[n]-FUnifRoots<order>::roots[m]); } - return FReal(tmpL); + return FReal(L); } @@ -90,26 +90,22 @@ struct FUnifRoots : FNoCopyable x = (x < FReal(-1.) ? FReal(-1.) : x); } - FReal tmpdL; - FReal dL=FReal(0.); - + // optimized variant + FReal NdL=FReal(0.);// init numerator + FReal DdL=FReal(1.);// init denominator + FReal tmpNdL; for(unsigned int p=0;p<order;++p){ if(p!=n){ - tmpdL=1./(FUnifRoots<order>::roots[n]-FUnifRoots<order>::roots[p]); - - for(unsigned int m=0;m<order;++m){ - if(m!=n && m!=p) - tmpdL *= (x-FUnifRoots<order>::roots[m])/(FUnifRoots<order>::roots[n]-FUnifRoots<order>::roots[m]); - - }// m - - dL+=tmpdL; - + tmpNdL=FReal(1.); + for(unsigned int m=0;m<order;++m) + if(m!=n && m!=p) + tmpNdL*=x-FUnifRoots<order>::roots[m]; + NdL+=tmpNdL; + DdL*=FUnifRoots<order>::roots[n]-FUnifRoots<order>::roots[p]; }//endif - }// p - return FReal(dL); + return FReal(NdL/DdL); } }; diff --git a/Src/Kernels/Uniform/FUnifSymKernel.hpp b/Src/Kernels/Uniform/FUnifSymKernel.hpp new file mode 100755 index 0000000000000000000000000000000000000000..509585a6325c38dad226e9116781eb9a89d1b9c3 --- /dev/null +++ b/Src/Kernels/Uniform/FUnifSymKernel.hpp @@ -0,0 +1,471 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== +#ifndef FUNIFSYMKERNEL_HPP +#define FUNIFSYMKERNEL_HPP + +#include "../../Utils/FGlobal.hpp" +#include "../../Utils/FTrace.hpp" +#include "../../Utils/FSmartPointer.hpp" + +// Originally in M2LHandler but transferred to the kernel for the symmetric version +#include "../../Utils/FDft.hpp" // PB: for FFT +#include "../../Utils/FComplexe.hpp" +#include "./FUnifTensor.hpp" // PB: for node_diff +// + +#include "./FAbstractUnifKernel.hpp" +#include "./FUnifInterpolator.hpp" + +#include "./FUnifSymM2LHandler.hpp" + +class FTreeCoordinate; + + +// for verbosity only!!! +//#define COUNT_BLOCKED_INTERACTIONS + +// if timings should be logged +#define LOG_TIMINGS + +/** + * @author Pierre Blanchard (pierre.blanchard@inria.fr) + * @class FUnifSymKernel + * @brief + * Please read the license + * + * This kernels implement the Lagrange interpolation based FMM operators + * exploiting the symmetries in the far-field. It implements all interfaces + * (P2P, P2M, M2M, M2L, L2L, L2P) which are required by the FFmmAlgorithm and + * FFmmAlgorithmThread. + * + * @tparam CellClass Type of cell + * @tparam ContainerClass Type of container to store particles + * @tparam MatrixKernelClass Type of matrix kernel function + * @tparam ORDER interpolation order + */ +template < class CellClass, class ContainerClass, class MatrixKernelClass, int ORDER, int NVALS = 1> +class FUnifSymKernel + : public FAbstractUnifKernel<CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS> +{ + typedef FAbstractUnifKernel<CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS> AbstractBaseClass; + typedef FUnifSymM2LHandler<ORDER, MatrixKernelClass::Type> SymM2LHandlerClass; + enum {nnodes = AbstractBaseClass::nnodes}; + + /// Needed for handling all symmetries + const FSmartPointer<SymM2LHandlerClass,FSmartPointerMemory> SymM2LHandler; + + // permuted local and multipole expansions + FReal** Loc; + FReal** Mul; + unsigned int* countExp; + + // transformed expansions + FComplexe** TLoc; + FComplexe** TMul; + + static const unsigned int rc = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1); + static const unsigned int opt_rc = rc/2+1; + + typedef FUnifTensor<ORDER> TensorType; + unsigned int node_diff[nnodes*nnodes]; + + // FDft Dft; // Direct Discrete Fourier Transformator + FFft Dft; // Fast Discrete Fourier Transformator + + /** + * Allocate memory for storing locally permuted mulipole and local expansions + */ + void allocateMemoryForPermutedExpansions() + { + assert(Loc==NULL && Mul==NULL && countExp==NULL); + Loc = new FReal* [343]; + Mul = new FReal* [343]; + TLoc = new FComplexe* [343]; + TMul = new FComplexe* [343]; + countExp = new unsigned int [343]; + + // set all 343 to NULL + for (unsigned int idx=0; idx<343; ++idx) { + Mul[idx] = Loc[idx] = NULL; + TMul[idx] = TLoc[idx] = NULL; + } + + // init only 16 of 343 possible translations due to symmetries + for (int i=2; i<=3; ++i) + for (int j=0; j<=i; ++j) + for (int k=0; k<=j; ++k) { + const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3); + assert(Mul[idx]==NULL || Loc[idx]==NULL); + Mul[idx] = new FReal [24 * nnodes]; + Loc[idx] = new FReal [24 * nnodes]; + TMul[idx] = new FComplexe [24 * rc]; + TLoc[idx] = new FComplexe [24 * rc]; + } + } + + +#ifdef LOG_TIMINGS + FTic time; + FReal t_m2l_1, t_m2l_2, t_m2l_3; +#endif + +public: + /** + * The constructor initializes all constant attributes and it reads the + * precomputed and compressed M2L operators from a binary file (an + * runtime_error is thrown if the required file is not valid). + */ + FUnifSymKernel(const int inTreeHeight, + const FPoint& inBoxCenter, + const FReal inBoxWidth) + : AbstractBaseClass(inTreeHeight, inBoxCenter, inBoxWidth), + SymM2LHandler(new SymM2LHandlerClass(AbstractBaseClass::MatrixKernel.getPtr(), inBoxWidth, inTreeHeight)), + Loc(NULL), Mul(NULL), countExp(NULL), + TLoc(NULL), TMul(NULL), + Dft(rc) // initialize Discrete Fourier Transformator, + { + this->allocateMemoryForPermutedExpansions(); + + // initialize root node ids + TensorType::setNodeIdsDiff(node_diff); + +#ifdef LOG_TIMINGS + t_m2l_1 = FReal(0.); + t_m2l_2 = FReal(0.); + t_m2l_3 = FReal(0.); +#endif + } + + + + /** Copy constructor */ + FUnifSymKernel(const FUnifSymKernel& other) + : AbstractBaseClass(other), + SymM2LHandler(other.SymM2LHandler), + Loc(NULL), Mul(NULL), countExp(NULL), + TLoc(NULL), TMul(NULL) + { + this->allocateMemoryForPermutedExpansions(); + } + + + + /** Destructor */ + ~FUnifSymKernel() + { + for (unsigned int t=0; t<343; ++t) { + if (Loc[t]!=NULL) delete [] Loc[t]; + if (Mul[t]!=NULL) delete [] Mul[t]; + if (TLoc[t]!=NULL) delete [] TLoc[t]; + if (TMul[t]!=NULL) delete [] TMul[t]; + } + if (Loc!=NULL) delete [] Loc; + if (Mul!=NULL) delete [] Mul; + if (countExp!=NULL) delete [] countExp; + if (TLoc!=NULL) delete [] TLoc; + if (TMul!=NULL) delete [] TMul; + +#ifdef LOG_TIMINGS + std::cout << "- Permutation+Pad+Transfo took " << t_m2l_1 << "s" + << "\n- Apply M2L in Fourier space took " << t_m2l_2 << "s" + << "\n- Unpermutation+Untransfo+Unpad took " << t_m2l_3 << "s" + << std::endl; +#endif + } + + + const SymM2LHandlerClass *const getPtrToSymM2LHandler() const + { return SymM2LHandler.getPtr(); } + + + + void P2M(CellClass* const LeafCell, + const ContainerClass* const SourceParticles) + { + const FPoint LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafCell->getCoordinate())); + for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ + // 1) apply Sy + AbstractBaseClass::Interpolator->applyP2M(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf, + LeafCell->getMultipole(idxRhs), SourceParticles); +// // 2) apply Discrete Fourier Transform +// SymM2LHandler->applyZeroPaddingAndDFT(LeafCell->getMultipole(idxRhs), +// LeafCell->getTransformedMultipole(idxRhs)); + } + } + + + + void M2M(CellClass* const FRestrict ParentCell, + const CellClass*const FRestrict *const FRestrict ChildCells, + const int /*TreeLevel*/) + { + for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ + // apply Sy + FBlas::scal(nnodes, FReal(0.), ParentCell->getMultipole(idxRhs)); + for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){ + if (ChildCells[ChildIndex]){ + AbstractBaseClass::Interpolator->applyM2M(ChildIndex, ChildCells[ChildIndex]->getMultipole(idxRhs), ParentCell->getMultipole(idxRhs)); + } + } +// // 2) Apply Discete Fourier Transform +// SymM2LHandler->applyZeroPaddingAndDFT(ParentCell->getMultipole(idxRhs), +// ParentCell->getTransformedMultipole(idxRhs)); + } + } + + + + void M2L(CellClass* const FRestrict TargetCell, + const CellClass* SourceCells[343], + const int /*NumSourceCells*/, + const int TreeLevel) + { +#ifdef LOG_TIMINGS + time.tic(); +#endif + for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ + + // permute and copy multipole expansion + memset(countExp, 0, sizeof(int) * 343); + for (unsigned int idx=0; idx<343; ++idx) { + if (SourceCells[idx]) { + const unsigned int pidx = SymM2LHandler->pindices[idx]; + const unsigned int count = (countExp[pidx])++; + FReal *const mul = Mul[pidx] + count*nnodes; + const unsigned int *const pvec = SymM2LHandler->pvectors[idx]; + const FReal *const MultiExp = SourceCells[idx]->getMultipole(idxRhs); + + // explicit loop unrolling + for (unsigned int n=0; n<nnodes/4 * 4; n+=4) { + mul[pvec[n ]] = MultiExp[n]; + mul[pvec[n+1]] = MultiExp[n+1]; + mul[pvec[n+2]] = MultiExp[n+2]; + mul[pvec[n+3]] = MultiExp[n+3]; + } + for (unsigned int n=nnodes/4 * 4; n<nnodes; ++n){ + mul[pvec[n]] = MultiExp[n]; + } + + // transform permuted expansion + FComplexe *const tmul = TMul[pidx] + count*rc; + + /////////////////////////////////////////// + FReal pmul[rc]; + FBlas::setzero(rc,pmul); + + // Apply Zero Padding + for (unsigned int i=0; i<nnodes; ++i) + pmul[node_diff[i*nnodes]]=mul[i]; + + // Apply forward Discrete Fourier Transform + Dft.applyDFT(pmul,tmul); + /////////////////////////////////////////// + + + } + } + +#ifdef LOG_TIMINGS + t_m2l_1 += time.tacAndElapsed(); +#endif + +#ifdef COUNT_BLOCKED_INTERACTIONS //////////////////////////////////// + unsigned int count_lidx = 0; + unsigned int count_interactions = 0; + for (unsigned int idx=0; idx<343; ++idx) + count_interactions += countExp[idx]; + if (count_interactions==189) { + for (unsigned int idx=0; idx<343; ++idx) { + if (countExp[idx]) + std::cout << "gidx = " << idx << " gives lidx = " << count_lidx++ << " and has " + << countExp[idx] << " interactions" << std::endl; + } + std::cout << std::endl; + } +#endif /////////////////////////////////////////////////////////////// + + +#ifdef LOG_TIMINGS + time.tic(); +#endif + + // multiply (mat-mat-mul) + const FReal scale = AbstractBaseClass::MatrixKernel->getScaleFactor(AbstractBaseClass::BoxWidth, TreeLevel); + + FComplexe tmpTLoc; + + for (unsigned int pidx=0; pidx<343; ++pidx) { + const unsigned int count = countExp[pidx]; + if (count) { + + FComplexe *const tmpK=const_cast<FComplexe*>(SymM2LHandler->getK(TreeLevel, pidx)); + + for (unsigned int i=0; i<count; ++i){ + //unsigned int i=count; + FComplexe *const ttmul = TMul[pidx] + i*rc; + FComplexe *const ttloc = TLoc[pidx] + i*rc; + + // Perform entrywise product manually + for (unsigned int j=0; j<opt_rc; ++j){ + tmpTLoc=tmpK[j]; + tmpTLoc*=ttmul[j]; + tmpTLoc*=scale; + ttloc[j]=tmpTLoc; + } + } + +// // rank * count * (2*nnodes-1) flops +// FBlas::gemtm(nnodes, rank, count, FReal(1.), +// const_cast<FReal*>(SymM2LHandler->getK(TreeLevel, pidx))+rank*nnodes, +// nnodes, Mul[pidx], nnodes, Compressed, rank); +// // nnodes *count * (2*rank-1) flops +// FBlas::gemm( nnodes, rank, count, scale, +// const_cast<FReal*>(SymM2LHandler->getK(TreeLevel, pidx)), +// nnodes, Compressed, rank, Loc[pidx], nnodes); + } + } + + + +#ifdef LOG_TIMINGS + t_m2l_2 += time.tacAndElapsed(); +#endif + +#ifdef LOG_TIMINGS + time.tic(); +#endif + + // permute and add contribution to local expansions + FReal *const LocalExpansion = TargetCell->getLocal(idxRhs); + memset(countExp, 0, sizeof(int) * 343); + for (unsigned int idx=0; idx<343; ++idx) { + if (SourceCells[idx]) { + const unsigned int pidx = SymM2LHandler->pindices[idx]; + const unsigned int count = (countExp[pidx])++; + + FReal *const loc = Loc[pidx] + count*nnodes; + const FComplexe *const tloc = TLoc[pidx] + count*rc; + + /////////////////////////////////////////// + FReal ploc[rc]; + FBlas::setzero(rc,ploc); + // Apply forward Discrete Fourier Transform + Dft.applyIDFT(tloc,ploc); + + // Unapply Zero Padding + for (unsigned int j=0; j<nnodes; ++j) + loc[j]=ploc[node_diff[nnodes-j-1]]; + //loc[j]+=ploc[node_diff[nnodes-j-1]]; + /////////////////////////////////////////// + + const unsigned int *const pvec = SymM2LHandler->pvectors[idx]; + + /* + // no loop unrolling + for (unsigned int n=0; n<nnodes; ++n) + LocalExpansion[n] += loc[pvec[n]]; + */ + + // explicit loop unrolling + for (unsigned int n=0; n<nnodes/4 * 4; n+=4) { + LocalExpansion[n ] += loc[pvec[n ]]; + LocalExpansion[n+1] += loc[pvec[n+1]]; + LocalExpansion[n+2] += loc[pvec[n+2]]; + LocalExpansion[n+3] += loc[pvec[n+3]]; + } + for (unsigned int n=nnodes/4 * 4; n<nnodes; ++n) + LocalExpansion[n] += loc[pvec[n]]; + + } + } + +#ifdef LOG_TIMINGS + t_m2l_3 += time.tacAndElapsed(); +#endif + } + + } + + + + void L2L(const CellClass* const FRestrict ParentCell, + CellClass* FRestrict *const FRestrict ChildCells, + const int /*TreeLevel*/) + { + for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ +// // 1) Apply Inverse Discete Fourier Transform +// SymM2LHandler->unapplyZeroPaddingAndDFT(ParentCell->getTransformedLocal(idxRhs), +// const_cast<CellClass*>(ParentCell)->getLocal(idxRhs)); + // 2) apply Sx + for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){ + if (ChildCells[ChildIndex]){ + AbstractBaseClass::Interpolator->applyL2L(ChildIndex, ParentCell->getLocal(idxRhs), ChildCells[ChildIndex]->getLocal(idxRhs)); + } + } + } + } + + + + void L2P(const CellClass* const LeafCell, + ContainerClass* const TargetParticles) + { + const FPoint LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafCell->getCoordinate())); + + for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ + +// // 1) Apply Inverse Discete Fourier Transform +// SymM2LHandler->unapplyZeroPaddingAndDFT(LeafCell->getTransformedLocal(idxRhs), +// const_cast<CellClass*>(LeafCell)->getLocal(idxRhs)); +// + // 2.a) apply Sx + AbstractBaseClass::Interpolator->applyL2P(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf, + LeafCell->getLocal(idxRhs), TargetParticles); + + // 2.b) apply Px (grad Sx) + AbstractBaseClass::Interpolator->applyL2PGradient(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf, + LeafCell->getLocal(idxRhs), TargetParticles); + } + } + + void P2P(const FTreeCoordinate& /* LeafCellCoordinate */, // needed for periodic boundary conditions + ContainerClass* const FRestrict TargetParticles, + const ContainerClass* const FRestrict /*SourceParticles*/, + ContainerClass* const NeighborSourceParticles[27], + const int /* size */) + { + DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2P(TargetParticles,NeighborSourceParticles); + } + + + void P2PRemote(const FTreeCoordinate& /*inPosition*/, + ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, + ContainerClass* const inNeighbors[27], const int /*inSize*/){ + DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2PRemote(inTargets,inNeighbors,27); + } + +}; + + + + + + + + +#endif //FUNIFSYMKERNELS_HPP + +// [--END--] diff --git a/Src/Kernels/Uniform/FUnifSymM2LHandler.hpp b/Src/Kernels/Uniform/FUnifSymM2LHandler.hpp new file mode 100755 index 0000000000000000000000000000000000000000..a7e4843b651eadebe7ae35815815213ffee0f982 --- /dev/null +++ b/Src/Kernels/Uniform/FUnifSymM2LHandler.hpp @@ -0,0 +1,428 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== +#ifndef FUNIFSYMM2LHANDLER_HPP +#define FUNIFSYMM2LHANDLER_HPP + +#include <climits> + +#include "../../Utils/FBlas.hpp" +#include "../../Utils/FDft.hpp" + +#include "../../Utils/FComplexe.hpp" + +#include "./FUnifTensor.hpp" +#include "../Interpolation/FInterpSymmetries.hpp" +#include "./FUnifM2LHandler.hpp" + +/** + * @author Pierre Blanchard (pierre.blanchard@inria.fr) + * Please read the license + */ + + +/*! Precomputes the 16 far-field interactions (due to symmetries in their + arrangement all 316 far-field interactions can be represented by + permutations of the 16 we compute in this function). + */ +template <int ORDER, typename MatrixKernelClass> +static void precompute(const MatrixKernelClass *const MatrixKernel, const FReal CellWidth, + FComplexe* FC[343]) +{ + // std::cout << "\nComputing 16 far-field interactions (l=" << ORDER << ", eps=" << Epsilon + // << ") for cells of width w = " << CellWidth << std::endl; + + static const unsigned int nnodes = ORDER*ORDER*ORDER; + + // interpolation points of source (Y) and target (X) cell + FPoint X[nnodes], Y[nnodes]; + // set roots of target cell (X) + FUnifTensor<ORDER>::setRoots(FPoint(0.,0.,0.), CellWidth, X); + // temporary matrices + FReal *_C; + FComplexe *_FC; + + // reduce storage from nnodes^2=order^6 to (2order-1)^3 + const unsigned int rc = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1); + _C = new FReal [rc]; + _FC = new FComplexe [rc]; // TODO: do it in the non-sym version!!! + + // init Discrete Fourier Transformator +// FDft Dft(rc); + FFft Dft(rc); + + // reduce storage if real valued kernel + const unsigned int opt_rc = rc/2+1; + + // get first column of K via permutation + unsigned int perm[rc]; + for(unsigned int p=0; p<rc; ++p){ + if(p<rc-1) perm[p]=p+1; + else perm[p]=p+1-rc; + } + unsigned int li,lj, mi,mj, ni,nj; + unsigned int idi, idj, ido; + + // initialize timer + FTic time; + double overall_time(0.); + double elapsed_time(0.); + + unsigned int counter = 0; + for (int i=2; i<=3; ++i) { + for (int j=0; j<=i; ++j) { + for (int k=0; k<=j; ++k) { + + // set roots of source cell (Y) + const FPoint cy(CellWidth*FReal(i), CellWidth*FReal(j), CellWidth*FReal(k)); + FUnifTensor<ORDER>::setRoots(cy, CellWidth, Y); + + // start timer + time.tic(); + + // evaluate m2l operator + ido=0; + for(unsigned int l=0; l<2*ORDER-1; ++l) + for(unsigned int m=0; m<2*ORDER-1; ++m) + for(unsigned int n=0; n<2*ORDER-1; ++n){ + + // l=0:(2*ORDER-1) => li-lj=-(ORDER-1):(ORDER-1) + // Convention: + // lj=ORDER-1 & li=0:ORDER-1 => li-lj=1-ORDER:0 + // lj=1 & li=0:ORDER-1 => li-lj=1:ORDER-1 + if(l<ORDER-1) lj=ORDER-1; else lj=0; + if(m<ORDER-1) mj=ORDER-1; else mj=0; + if(n<ORDER-1) nj=ORDER-1; else nj=0; + li=(l-(ORDER-1))+lj; mi=(m-(ORDER-1))+mj; ni=(n-(ORDER-1))+nj; + // deduce corresponding index of K[nnodes x nnodes] + idi=li*ORDER*ORDER + mi*ORDER + ni; + idj=lj*ORDER*ORDER + mj*ORDER + nj; + + // store value at current position in C + // use permutation if DFT is used because + // the storage of the first column is required + // i.e. C[0] C[rc-1] C[rc-2] .. C[1] < WRONG! + // i.e. C[rc-1] C[0] C[1] .. C[rc-2] < RIGHT! +// _C[counter*rc + ido] + _C[perm[ido]] + = MatrixKernel->evaluate(X[idi], Y[idj]); + ido++; + } + + // Apply Discrete Fourier Transformation + Dft.applyDFT(_C,_FC); + + // determine new index + const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3); + + // store + { + // allocate + assert(FC[idx]==NULL); + FC[idx] = new FComplexe[opt_rc]; + FBlas::c_copy(opt_rc, reinterpret_cast<FReal*>(_FC), + reinterpret_cast<FReal*>(FC[idx])); + } + + elapsed_time = time.tacAndElapsed(); + overall_time += elapsed_time; + + counter++; + } + } + } + + std::cout << "The approximation of the " << counter + << " far-field interactions (sizeM2L= " + << counter*opt_rc*sizeof(FComplexe) << " B" + << ") took " << overall_time << "s\n" << std::endl; + + // Free _C & _FC + delete [] _C; + delete [] _FC; +} + + + + + + + + + +/*! \class FUnifSymM2LHandler + + \brief Deals with all the symmetries in the arrangement of the far-field interactions + + Stores permutation indices and permutation vectors to reduce 316 (7^3-3^3) + different far-field interactions to 16 only. We use the number 343 (7^3) + because it allows us to use to associate the far-field interactions based on + the index \f$t = 7^2(i+3) + 7(j+3) + (k+3)\f$ where \f$(i,j,k)\f$ denotes + the relative position of the source cell to the target cell. */ +template <int ORDER, KERNEL_FUNCTION_TYPE TYPE> class FUnifSymM2LHandler; + +/*! Specialization for homogeneous kernel functions */ +template <int ORDER> +class FUnifSymM2LHandler<ORDER, HOMOGENEOUS> +{ + static const unsigned int nnodes = ORDER*ORDER*ORDER; + + // M2L operators + FComplexe* K[343]; + +public: + + // permutation vectors and permutated indices + unsigned int pvectors[343][nnodes]; + unsigned int pindices[343]; + + + /** Constructor: with 16 small SVDs */ + template <typename MatrixKernelClass> + FUnifSymM2LHandler(const MatrixKernelClass *const MatrixKernel, + const FReal, const unsigned int) + { + // init all 343 item to zero, because effectively only 16 exist + for (unsigned int t=0; t<343; ++t) + K[t] = NULL; + + // set permutation vector and indices + const FInterpSymmetries<ORDER> Symmetries; + for (int i=-3; i<=3; ++i) + for (int j=-3; j<=3; ++j) + for (int k=-3; k<=3; ++k) { + const unsigned int idx = ((i+3) * 7 + (j+3)) * 7 + (k+3); + pindices[idx] = 0; + if (abs(i)>1 || abs(j)>1 || abs(k)>1) + pindices[idx] = Symmetries.getPermutationArrayAndIndex(i,j,k, pvectors[idx]); + } + + // precompute 16 M2L operators + const FReal ReferenceCellWidth = FReal(2.); + precompute<ORDER>(MatrixKernel, ReferenceCellWidth, K); + } + + + + /** Destructor */ + ~FUnifSymM2LHandler() + { + for (unsigned int t=0; t<343; ++t) if (K[t]!=NULL) delete [] K[t]; + } + + + /*! return the t-th approximated far-field interactions*/ + const FComplexe *const getK(const unsigned int, const unsigned int t) const + { return K[t]; } + +}; + + + + + + +/*! Specialization for non-homogeneous kernel functions */ +template <int ORDER> +class FUnifSymM2LHandler<ORDER, NON_HOMOGENEOUS> +{ + static const unsigned int nnodes = ORDER*ORDER*ORDER; + + // Height of octree; needed only in the case of non-homogeneous kernel functions + const unsigned int TreeHeight; + + // M2L operators for all levels in the octree + FComplexe*** K; + +public: + + // permutation vectors and permutated indices + unsigned int pvectors[343][nnodes]; + unsigned int pindices[343]; + + + /** Constructor: with 16 small SVDs */ + template <typename MatrixKernelClass> + FUnifSymM2LHandler(const MatrixKernelClass *const MatrixKernel, + const FReal RootCellWidth, const unsigned int inTreeHeight) + : TreeHeight(inTreeHeight) + { + // init all 343 item to zero, because effectively only 16 exist + K = new FComplexe** [TreeHeight]; + K[0] = NULL; K[1] = NULL; + for (unsigned int l=2; l<TreeHeight; ++l) { + K[l] = new FComplexe* [343]; + for (unsigned int t=0; t<343; ++t) + K[l][t] = NULL; + } + + + // set permutation vector and indices + const FInterpSymmetries<ORDER> Symmetries; + for (int i=-3; i<=3; ++i) + for (int j=-3; j<=3; ++j) + for (int k=-3; k<=3; ++k) { + const unsigned int idx = ((i+3) * 7 + (j+3)) * 7 + (k+3); + pindices[idx] = 0; + if (abs(i)>1 || abs(j)>1 || abs(k)>1) + pindices[idx] = Symmetries.getPermutationArrayAndIndex(i,j,k, pvectors[idx]); + } + + // precompute 16 M2L operators at all levels having far-field interactions + FReal CellWidth = RootCellWidth / FReal(2.); // at level 1 + CellWidth /= FReal(2.); // at level 2 + for (unsigned int l=2; l<TreeHeight; ++l) { + precompute<ORDER>(MatrixKernel, CellWidth, K[l]); + CellWidth /= FReal(2.); // at level l+1 + } + } + + + + /** Destructor */ + ~FUnifSymM2LHandler() + { + for (unsigned int l=0; l<TreeHeight; ++l) { + if (K[l]!=NULL) { + for (unsigned int t=0; t<343; ++t) if (K[l][t]!=NULL) delete [] K[l][t]; + delete [] K[l]; + } + } + delete [] K; + } + + /*! return the t-th approximated far-field interactions*/ + const FComplexe *const getK(const unsigned int l, const unsigned int t) const + { return K[l][t]; } + +}; + + + + + + + + +//#include <fstream> +//#include <sstream> +// +// +///** +// * Computes, compresses and stores the 16 M2L kernels in a binary file. +// */ +//template <int ORDER, typename MatrixKernelClass> +//static void ComputeAndCompressAndStoreInBinaryFile(const MatrixKernelClass *const MatrixKernel, const FReal Epsilon) +//{ +// static const unsigned int nnodes = ORDER*ORDER*ORDER; +// +// // compute and compress //////////// +// FReal* K[343]; +// int LowRank[343]; +// for (unsigned int idx=0; idx<343; ++idx) { K[idx] = NULL; LowRank[idx] = 0; } +// precompute<ORDER>(MatrixKernel, FReal(2.), Epsilon, K, LowRank); +// +// // write to binary file //////////// +// FTic time; time.tic(); +// // start computing process +// const char precision = (typeid(FReal)==typeid(double) ? 'd' : 'f'); +// std::stringstream sstream; +// sstream << "sym2l_" << precision << "_o" << ORDER << "_e" << Epsilon << ".bin"; +// const std::string filename(sstream.str()); +// std::ofstream stream(filename.c_str(), +// std::ios::out | std::ios::binary | std::ios::trunc); +// if (stream.good()) { +// stream.seekp(0); +// for (unsigned int idx=0; idx<343; ++idx) +// if (K[idx]!=NULL) { +// // 1) write index +// stream.write(reinterpret_cast<char*>(&idx), sizeof(int)); +// // 2) write low rank (int) +// int rank = LowRank[idx]; +// stream.write(reinterpret_cast<char*>(&rank), sizeof(int)); +// // 3) write U and V (both: rank*nnodes * FReal) +// FReal *const U = K[idx]; +// FReal *const V = K[idx] + rank*nnodes; +// stream.write(reinterpret_cast<char*>(U), sizeof(FReal)*rank*nnodes); +// stream.write(reinterpret_cast<char*>(V), sizeof(FReal)*rank*nnodes); +// } +// } else throw std::runtime_error("File could not be opened to write"); +// stream.close(); +// // write info +// // std::cout << "Compressed M2L operators stored in binary file " << filename +// // << " in " << time.tacAndElapsed() << "sec." << std::endl; +// +// // free memory ///////////////////// +// for (unsigned int t=0; t<343; ++t) if (K[t]!=NULL) delete [] K[t]; +//} +// +// +///** +// * Reads the 16 compressed M2L kernels from the binary files and writes them +// * in K and the respective low-rank in LowRank. +// */ +//template <int ORDER> +//void ReadFromBinaryFile(const FReal Epsilon, FReal* K[343], int LowRank[343]) +//{ +// // compile time constants +// const unsigned int nnodes = ORDER*ORDER*ORDER; +// +// // find filename +// const char precision = (typeid(FReal)==typeid(double) ? 'd' : 'f'); +// std::stringstream sstream; +// sstream << "sym2l_" << precision << "_o" << ORDER << "_e" << Epsilon << ".bin"; +// const std::string filename(sstream.str()); +// +// // read binary file +// std::ifstream istream(filename.c_str(), +// std::ios::in | std::ios::binary | std::ios::ate); +// const std::ifstream::pos_type size = istream.tellg(); +// if (size<=0) throw std::runtime_error("The requested binary file does not yet exist. Exit."); +// +// if (istream.good()) { +// istream.seekg(0); +// // 1) read index (int) +// int _idx; +// istream.read(reinterpret_cast<char*>(&_idx), sizeof(int)); +// // loop to find 16 compressed m2l operators +// for (int idx=0; idx<343; ++idx) { +// K[idx] = NULL; +// LowRank[idx] = 0; +// // if it exists +// if (idx == _idx) { +// // 2) read low rank (int) +// int rank; +// istream.read(reinterpret_cast<char*>(&rank), sizeof(int)); +// LowRank[idx] = rank; +// // 3) read U and V (both: rank*nnodes * FReal) +// K[idx] = new FReal [2*rank*nnodes]; +// FReal *const U = K[idx]; +// FReal *const V = K[idx] + rank*nnodes; +// istream.read(reinterpret_cast<char*>(U), sizeof(FReal)*rank*nnodes); +// istream.read(reinterpret_cast<char*>(V), sizeof(FReal)*rank*nnodes); +// +// // 1) read next index +// istream.read(reinterpret_cast<char*>(&_idx), sizeof(int)); +// } +// } +// } else throw std::runtime_error("File could not be opened to read"); +// istream.close(); +//} + + + + + +#endif diff --git a/Src/Utils/FDft.hpp b/Src/Utils/FDft.hpp index fa61ade230f1aa6cfad0b00f92312ee9f0ef2615..7a6b3f62f48675746cc3580c69935f10e73cb0d3 100644 --- a/Src/Utils/FDft.hpp +++ b/Src/Utils/FDft.hpp @@ -172,15 +172,17 @@ class FFft private: unsigned int nsteps_; + unsigned int nsteps_opt_; public: FFft(const unsigned int nsteps) - : nsteps_(nsteps) + : nsteps_(nsteps), + nsteps_opt_(nsteps/2+1) // SPECIFIC TO FTT FOR REAL VALUES { // allocate arrays fftR_ = (FReal*) fftw_malloc(sizeof(FReal) * nsteps_); - fftC_ = (FComplexe*) fftw_malloc(sizeof(FComplexe) * nsteps_); + fftC_ = (FComplexe*) fftw_malloc(sizeof(FComplexe) * nsteps_opt_); // fftw plans plan_c2r_ = @@ -213,7 +215,7 @@ public: // read sampled data // std::cout<< "copy("; // time.tic(); - FBlas::c_setzero(nsteps_,reinterpret_cast<FReal*>(fftC_)); + FBlas::c_setzero(nsteps_opt_,reinterpret_cast<FReal*>(fftC_)); FBlas::copy(nsteps_, sampledData,fftR_); // std::cout << time.tacAndElapsed() << ")"; @@ -226,11 +228,10 @@ public: // write transformed data // std::cout<< " - copy("; // time.tic(); -// FBlas::c_copy(nsteps_,reinterpret_cast<FReal*>(fftC_), -// reinterpret_cast<FReal*>(transformedData)); - - for(unsigned int s=0; s<nsteps_; ++s) - transformedData[s]=fftC_[s]; + FBlas::c_copy(nsteps_opt_,reinterpret_cast<FReal*>(fftC_), + reinterpret_cast<FReal*>(transformedData)); +// for(unsigned int s=0; s<nsteps_opt_; ++s) +// transformedData[s]=fftC_[s]; // std::cout << time.tacAndElapsed() << ") "; @@ -242,7 +243,7 @@ public: { // read transformed data FBlas::setzero(nsteps_,fftR_); - FBlas::c_copy(nsteps_,reinterpret_cast<const FReal*>(transformedData), + FBlas::c_copy(nsteps_opt_,reinterpret_cast<const FReal*>(transformedData), reinterpret_cast<FReal*>(fftC_)); // perform ifft diff --git a/Tests/Kernels/testUnifAlgorithm.cpp b/Tests/Kernels/testUnifAlgorithm.cpp index 22379a0c9f31cfd88231fdb94746d04445ba79f6..bf04ddaad767d41b6aff15fe219cab32e49b8e1e 100644 --- a/Tests/Kernels/testUnifAlgorithm.cpp +++ b/Tests/Kernels/testUnifAlgorithm.cpp @@ -29,7 +29,8 @@ #include "../../Src/Kernels/Uniform/FUnifCell.hpp" #include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp" -#include "../../Src/Kernels/Uniform/FUnifKernel.hpp" +//#include "../../Src/Kernels/Uniform/FUnifKernel.hpp" +#include "../../Src/Kernels/Uniform/FUnifSymKernel.hpp" #include "../../Src/Components/FSimpleLeaf.hpp" #include "../../Src/Kernels/P2P/FP2PParticleContainerIndexed.hpp" @@ -51,7 +52,7 @@ int main(int argc, char* argv[]) { const char* const filename = FParameters::getStr(argc,argv,"-f", "../Data/test20k.fma"); - const unsigned int TreeHeight = FParameters::getValue(argc, argv, "-h", 5); + const unsigned int TreeHeight = FParameters::getValue(argc, argv, "-h", 3); const unsigned int SubTreeHeight = FParameters::getValue(argc, argv, "-sh", 2); const unsigned int NbThreads = FParameters::getValue(argc, argv, "-t", 1); @@ -121,7 +122,7 @@ int main(int argc, char* argv[]) { // begin Lagrange kernel // accuracy - const unsigned int ORDER = 7; + const unsigned int ORDER = 5; // typedefs typedef FP2PParticleContainerIndexed ContainerClass; typedef FSimpleLeaf< ContainerClass > LeafClass; @@ -129,7 +130,8 @@ int main(int argc, char* argv[]) typedef FInterpMatrixKernelR MatrixKernelClass; typedef FUnifCell<ORDER> CellClass; typedef FOctree<CellClass,ContainerClass,LeafClass> OctreeClass; - typedef FUnifKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass; + //typedef FUnifKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass; + typedef FUnifSymKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass; typedef FFmmAlgorithm<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass; // typedef FFmmAlgorithmThread<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass; diff --git a/Tests/Utils/testChebSymmetries.cpp b/Tests/Utils/testChebSymmetries.cpp index f506ca106be7e76490a98673881f5ee6216f275c..94886d04115c666130f5fe5227ea62470c861982 100755 --- a/Tests/Utils/testChebSymmetries.cpp +++ b/Tests/Utils/testChebSymmetries.cpp @@ -31,7 +31,7 @@ #include "../../Src/Kernels/Chebyshev/FChebTensor.hpp" #include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp" -#include "../../Src/Kernels/Chebyshev/FChebSymmetries.hpp" +#include "../../Src/Kernels/Interpolation/FInterpSymmetries.hpp" @@ -108,7 +108,7 @@ int main(int argc, char* argv[]) FReal maxdiff(0.); // permuter - FChebSymmetries<order> permuter; + FInterpSymmetries<order> permuter; // permutation vector unsigned int perm[nnodes];