diff --git a/CMakeLists.txt b/CMakeLists.txt index fe411a702260b870c5d5e0ca8acbef65a400a54e..db2983fd84758c9add26c19804f1c7df89be10e7 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -42,6 +42,7 @@ MESSAGE(STATUS " CXX ${CMAKE_CXX_COMPILER_ID}" ) # # Options OPTION( ScalFMM_USE_BLAS "Set to ON to build ScaFMM with BLAS" OFF ) +OPTION( ScalFMM_USE_FFTW "Set to ON to build ScaFMM with FFTW" OFF ) OPTION( ScalFMM_USE_TRACE "Set to ON to print trace or use itac trace" OFF ) OPTION( ScalFMM_BUILD_TESTS "Set to ON to build fonctionnalities Tests" OFF ) OPTION( ScalFMM_BUILD_UTESTS "Set to ON to build UTests" OFF ) @@ -138,7 +139,8 @@ endif() if( ScalFMM_USE_BLAS ) OPTION( ScalFMM_USE_MKL_AS_BLAS "Set to ON to use MKL CBLAS" OFF ) if( ScalFMM_USE_MKL_AS_BLAS ) - SET(BLAS_LIBRARIES "-L$ENV{MKLROOT}/lib;-lmkl_intel_lp64;-lmkl_sequential;-lmkl_core" CACHE STRING "Set your MKL flags") + SET(BLAS_LIBRARIES + "-L$ENV{MKLROOT}/lib;-lmkl_intel_lp64;-lmkl_sequential;-lmkl_core" CACHE STRING "Set your MKL flags") SET(LAPACK_LIBRARIES "") elseif(ScalFMM_USE_EXTERNAL_BLAS) MESSAGE(STATUS "BLAS SET BY EXTERNAL PROGRAM = ${BLAS_LIBRARIES}") @@ -150,6 +152,18 @@ if( ScalFMM_USE_BLAS ) MESSAGE(STATUS "SCALFMM_LIBRARIES = ${SCALFMM_LIBRARIES}") endif(ScalFMM_USE_BLAS) +# FFTW option +if( ScalFMM_USE_FFTW ) + OPTION( ScalFMM_USE_MKL_AS_FFTW "Set to ON to use MKL FFTW" ON ) + if( ScalFMM_USE_MKL_AS_FFTW ) + SET(FFTW_LIBRARIES "-I$ENV{MKLROOT}/include/fftw; -L$ENV{MKLROOT}/lib; -lmkl_intel_lp64; -lmkl_sequential; -lmkl_core; -lpthread; -lm" CACHE STRING "Set your MKL flags") + else() + SET(FFTW_LIBRARIES "-lfftw3" CACHE STRING "Use LIBFFTW") + endif() + SET(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES}; ${FFTW_LIBRARIES}") + MESSAGE(STATUS "SCALFMM_LIBRARIES = ${SCALFMM_LIBRARIES}") +endif(ScalFMM_USE_FFTW) + # Compile option #ADD_DEFINITIONS(-Wall -Wshadow -Wpointer-arith -Wcast-qual -Wconversion -fpic ) # diff --git a/Src/Kernels/Chebyshev/FAbstractChebKernel.hpp b/Src/Kernels/Chebyshev/FAbstractChebKernel.hpp index c65e4a598d7fdde90ad9a09456c723d7fd587ea9..ecf9ba5d87aa7d1641c43ead36e057ca8ecfc3bf 100755 --- a/Src/Kernels/Chebyshev/FAbstractChebKernel.hpp +++ b/Src/Kernels/Chebyshev/FAbstractChebKernel.hpp @@ -22,7 +22,7 @@ #include "../../Components/FAbstractKernels.hpp" -#include "FChebP2PKernels.hpp" +#include "../Interpolation/FInterpP2PKernels.hpp" #include "./FChebInterpolator.hpp" #include "../../Containers/FTreeCoordinate.hpp" diff --git a/Src/Kernels/Chebyshev/FChebInterpolator.hpp b/Src/Kernels/Chebyshev/FChebInterpolator.hpp index ff3133670738108e6ead437f52e3a9c7f4ec302d..71c1b2263c40b9cb8cb2a6541874f9333b15216b 100755 --- a/Src/Kernels/Chebyshev/FChebInterpolator.hpp +++ b/Src/Kernels/Chebyshev/FChebInterpolator.hpp @@ -17,7 +17,7 @@ #define FCHEBINTERPOLATOR_HPP -#include "./FChebMapping.hpp" +#include "./../Interpolation/FInterpMapping.hpp" #include "./FChebTensor.hpp" #include "./FChebRoots.hpp" @@ -296,7 +296,7 @@ class FChebInterpolator : FNoCopyable // set child info FChebTensor<ORDER>::setRelativeChildCenter(child, ChildCenter); - FChebTensor<ORDER>::setChebyshevRoots(ChildCenter, ChildWidth, ChildCoords); + FChebTensor<ORDER>::setPolynomialsRoots(ChildCenter, ChildWidth, ChildCoords); // allocate memory ChildParentInterpolator[child] = new FReal [3 * ORDER*ORDER]; diff --git a/Src/Kernels/Chebyshev/FChebKernel.hpp b/Src/Kernels/Chebyshev/FChebKernel.hpp index c064d568d882628154dd3862d7327b5e386abc15..2c169262dccee226664a81bf6f2d7846239669d8 100755 --- a/Src/Kernels/Chebyshev/FChebKernel.hpp +++ b/Src/Kernels/Chebyshev/FChebKernel.hpp @@ -70,8 +70,8 @@ public: M2LHandler(new M2LHandlerClass(Epsilon)) { // read precomputed compressed m2l operators from binary file - M2LHandler->ReadFromBinaryFileAndSet(); - //M2LHandler->ComputeAndCompressAndSet(); + //M2LHandler->ReadFromBinaryFileAndSet(); + M2LHandler->ComputeAndCompressAndSet(); } @@ -209,14 +209,14 @@ public: ContainerClass* const NeighborSourceParticles[27], const int /* size */) { - DirectInteactionComputer<MatrixKernelClass::Identifier, NVALS>::P2P(TargetParticles,NeighborSourceParticles); + DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2P(TargetParticles,NeighborSourceParticles); } void P2PRemote(const FTreeCoordinate& /*inPosition*/, ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, ContainerClass* const inNeighbors[27], const int /*inSize*/){ - DirectInteactionComputer<MatrixKernelClass::Identifier, NVALS>::P2PRemote(inTargets,inNeighbors,27); + DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2PRemote(inTargets,inNeighbors,27); } }; diff --git a/Src/Kernels/Chebyshev/FChebM2LHandler.hpp b/Src/Kernels/Chebyshev/FChebM2LHandler.hpp index fe33b955491ab569a26736370ca3ed54478060f2..4c84f77d0b09f69896a7885dc40266fbbd488a3c 100755 --- a/Src/Kernels/Chebyshev/FChebM2LHandler.hpp +++ b/Src/Kernels/Chebyshev/FChebM2LHandler.hpp @@ -99,8 +99,12 @@ public: // check if aready set if (U||C||B) throw std::runtime_error("Compressed M2L operator already set"); rank = ComputeAndCompress(epsilon, U, C, B); + + unsigned long sizeM2L = 343*rank*rank*sizeof(FReal); + + // write info - std::cout << "Compressed and set M2L operators (" << rank << ") in " + std::cout << "Compressed and set M2L operators (" << long(sizeM2L) << " B) in " << time.tacAndElapsed() << "sec." << std::endl; } diff --git a/Src/Kernels/Chebyshev/FChebMatrixKernel.hpp b/Src/Kernels/Chebyshev/FChebMatrixKernel.hpp deleted file mode 100755 index bbbbc5d3a49982e6ddff5c6602f04d473d385191..0000000000000000000000000000000000000000 --- a/Src/Kernels/Chebyshev/FChebMatrixKernel.hpp +++ /dev/null @@ -1,215 +0,0 @@ -// =================================================================================== -// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner -// olivier.coulaud@inria.fr, berenger.bramas@inria.fr -// This software is a computer program whose purpose is to compute the FMM. -// -// This software is governed by the CeCILL-C and LGPL licenses and -// abiding by the rules of distribution of free software. -// -// This program is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public and CeCILL-C Licenses for more details. -// "http://www.cecill.info". -// "http://www.gnu.org/licenses". -// =================================================================================== -#ifndef FCHEBMATRIXKERNEL_HPP -#define FCHEBMATRIXKERNEL_HPP - -#include "../../Utils/FPoint.hpp" -#include "../../Utils/FNoCopyable.hpp" -#include "../../Utils/FMath.hpp" -#include "../../Utils/FBlas.hpp" - - -// extendable -enum KERNEL_FUNCCTION_IDENTIFIER {ONE_OVER_R, - ONE_OVER_R_SQUARED, - LEONARD_JONES_POTENTIAL}; - -// probably not extedable :) -enum KERNEL_FUNCTION_TYPE {HOMOGENEOUS, NON_HOMOGENEOUS}; - - -/** - * @author Matthias Messner (matthias.messner@inria.fr) - * @class FChebMatrixKernels - * Please read the license - */ -struct FChebAbstractMatrixKernel : FNoCopyable -{ - virtual ~FChebAbstractMatrixKernel(){} // to remove warning - virtual FReal evaluate(const FPoint&, const FPoint&) const = 0; - // I need both functions because required arguments are not always given - virtual FReal getScaleFactor(const FReal, const int) const = 0; - virtual FReal getScaleFactor(const FReal) const = 0; -}; - - - -/// One over r -struct FChebMatrixKernelR : FChebAbstractMatrixKernel -{ - static const KERNEL_FUNCTION_TYPE Type = HOMOGENEOUS; - static const KERNEL_FUNCCTION_IDENTIFIER Identifier = ONE_OVER_R; - - FChebMatrixKernelR() {} - - FReal evaluate(const FPoint& x, const FPoint& y) const - { - const FPoint xy(x-y); - return FReal(1.) / FMath::Sqrt(xy.getX()*xy.getX() + - xy.getY()*xy.getY() + - xy.getZ()*xy.getZ()); - } - - FReal getScaleFactor(const FReal RootCellWidth, const int TreeLevel) const - { - const FReal CellWidth(RootCellWidth / FReal(FMath::pow(2, TreeLevel))); - return getScaleFactor(CellWidth); - } - - FReal getScaleFactor(const FReal CellWidth) const - { - return FReal(2.) / CellWidth; - } -}; - - - -/// One over r^2 -struct FChebMatrixKernelRR : FChebAbstractMatrixKernel -{ - static const KERNEL_FUNCTION_TYPE Type = HOMOGENEOUS; - static const KERNEL_FUNCCTION_IDENTIFIER Identifier = ONE_OVER_R_SQUARED; - - FChebMatrixKernelRR() {} - - FReal evaluate(const FPoint& x, const FPoint& y) const - { - const FPoint xy(x-y); - return FReal(1.) / FReal(xy.getX()*xy.getX() + - xy.getY()*xy.getY() + - xy.getZ()*xy.getZ()); - } - - FReal getScaleFactor(const FReal RootCellWidth, const int TreeLevel) const - { - const FReal CellWidth(RootCellWidth / FReal(FMath::pow(2, TreeLevel))); - return getScaleFactor(CellWidth); - } - - FReal getScaleFactor(const FReal CellWidth) const - { - return FReal(4.) / CellWidth; - } -}; - - - -/// One over r^12 - One over r^6 -struct FChebMatrixKernelLJ : FChebAbstractMatrixKernel -{ - static const KERNEL_FUNCTION_TYPE Type = NON_HOMOGENEOUS; - static const KERNEL_FUNCCTION_IDENTIFIER Identifier = LEONARD_JONES_POTENTIAL; - - FChebMatrixKernelLJ() {} - - FReal evaluate(const FPoint& x, const FPoint& y) const - { - const FPoint xy(x-y); - const FReal r = xy.norm(); - const FReal r3 = r*r*r; - const FReal one_over_r6 = FReal(1.) / (r3*r3); - //return one_over_r6 * one_over_r6; - //return one_over_r6; - return one_over_r6 * one_over_r6 - one_over_r6; - } - - FReal getScaleFactor(const FReal, const int) const - { - // return 1 because non homogeneous kernel functions cannot be scaled!!! - return FReal(1.); - } - - FReal getScaleFactor(const FReal) const - { - // return 1 because non homogeneous kernel functions cannot be scaled!!! - return FReal(1.); - } -}; - - - - - -/*! Functor which provides the interface to assemble a matrix based on the - number of rows and cols and on the coordinates x and y and the type of the - generating matrix-kernel function. - */ -template <typename MatrixKernel> -class EntryComputer -{ - const MatrixKernel Kernel; - - const unsigned int nx, ny; - const FPoint *const px, *const py; - - const FReal *const weights; - -public: - explicit EntryComputer(const unsigned int _nx, const FPoint *const _px, - const unsigned int _ny, const FPoint *const _py, - const FReal *const _weights = NULL) - : Kernel(), nx(_nx), ny(_ny), px(_px), py(_py), weights(_weights) {} - -// template <typename Point> -// void operator()(const unsigned int nx, const Point *const px, -// const unsigned int ny, const Point *const py, -// FReal *const data) const -// { -// for (unsigned int j=0; j<ny; ++j) -// for (unsigned int i=0; i<nx; ++i) -// data[j*nx + i] = Kernel.evaluate(px[i], py[j]); -// } - - void operator()(const unsigned int xbeg, const unsigned int xend, - const unsigned int ybeg, const unsigned int yend, - FReal *const data) const - { - unsigned int idx = 0; - if (weights) { - for (unsigned int j=ybeg; j<yend; ++j) - for (unsigned int i=xbeg; i<xend; ++i) - data[idx++] = weights[i] * weights[j] * Kernel.evaluate(px[i], py[j]); - } else { - for (unsigned int j=ybeg; j<yend; ++j) - for (unsigned int i=xbeg; i<xend; ++i) - data[idx++] = Kernel.evaluate(px[i], py[j]); - } - - /* - // apply weighting matrices - if (weights) { - if ((xend-xbeg) == (yend-ybeg) && (xend-xbeg) == nx) - for (unsigned int n=0; n<nx; ++n) { - FBlas::scal(nx, weights[n], data + n, nx); // scale rows - FBlas::scal(nx, weights[n], data + n * nx); // scale cols - } - else if ((xend-xbeg) == 1 && (yend-ybeg) == ny) - for (unsigned int j=0; j<ny; ++j) data[j] *= weights[j]; - else if ((yend-ybeg) == 1 && (xend-xbeg) == nx) - for (unsigned int i=0; i<nx; ++i) data[i] *= weights[i]; - } - */ - - } -}; - - - - - -#endif // FCHEBMATRIXKERNEL_HPP - -// [--END--] diff --git a/Src/Kernels/Chebyshev/FChebP2PKernels.hpp b/Src/Kernels/Chebyshev/FChebP2PKernels.hpp deleted file mode 100644 index f54b41c669e098477460e2388b83d6ad6a47c340..0000000000000000000000000000000000000000 --- a/Src/Kernels/Chebyshev/FChebP2PKernels.hpp +++ /dev/null @@ -1,101 +0,0 @@ -#ifndef FCHEBP2PKERNELS_HPP -#define FCHEBP2PKERNELS_HPP - - -#include "../P2P/FP2P.hpp" - -template <KERNEL_FUNCCTION_IDENTIFIER Identifier, int NVALS> -struct DirectInteactionComputer; - -/////////////////////////////////////////////////////// -// P2P Wrappers -/////////////////////////////////////////////////////// - -/*! Specialization for Laplace potential */ -template <> -struct DirectInteactionComputer<ONE_OVER_R, 1> -{ - template <typename ContainerClass> - static void P2P( ContainerClass* const FRestrict TargetParticles, - ContainerClass* const NeighborSourceParticles[27]){ - FP2P::FullMutual(TargetParticles,NeighborSourceParticles,14); - } - - template <typename ContainerClass> - static void P2PRemote( ContainerClass* const FRestrict inTargets, - ContainerClass* const inNeighbors[27], - const int inSize){ - FP2P::FullRemote(inTargets,inNeighbors,inSize); - } -}; - - -/*! Specialization for Leonard-Jones potential */ -template <> -struct DirectInteactionComputer<LEONARD_JONES_POTENTIAL, 1> -{ - template <typename ContainerClass> - static void P2P( ContainerClass* const FRestrict TargetParticles, - ContainerClass* const NeighborSourceParticles[27]){ - FP2P::FullMutualLJ(TargetParticles,NeighborSourceParticles,14); - } - - template <typename ContainerClass> - static void P2PRemote( ContainerClass* const FRestrict inTargets, - ContainerClass* const inNeighbors[27], - const int inSize){ - FP2P::FullRemoteLJ(inTargets,inNeighbors,inSize); - } -}; - -/////////////////////////////////////////////////////// -// In case of multi right hand side -/////////////////////////////////////////////////////// - - - -template <int NVALS> -struct DirectInteactionComputer<ONE_OVER_R, NVALS> -{ - template <typename ContainerClass> - static void P2P( ContainerClass* const FRestrict TargetParticles, - ContainerClass* const NeighborSourceParticles[27]){ - for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ - FP2P::FullMutual(TargetParticles,NeighborSourceParticles,14); - } - } - - template <typename ContainerClass> - static void P2PRemote( ContainerClass* const FRestrict inTargets, - ContainerClass* const inNeighbors[27], - const int inSize){ - for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ - FP2P::FullRemote(inTargets,inNeighbors,inSize); - } - } -}; - - -/*! Specialization for Leonard-Jones potential */ -template <int NVALS> -struct DirectInteactionComputer<LEONARD_JONES_POTENTIAL, NVALS> -{ - template <typename ContainerClass> - static void P2P( ContainerClass* const FRestrict TargetParticles, - ContainerClass* const NeighborSourceParticles[27]){ - for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ - FP2P::FullMutualLJ(TargetParticles,NeighborSourceParticles,14); - } - } - - template <typename ContainerClass> - static void P2PRemote( ContainerClass* const FRestrict inTargets, - ContainerClass* const inNeighbors[27], - const int inSize){ - for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ - FP2P::FullRemoteLJ(inTargets,inNeighbors,inSize); - } - } -}; - -#endif // FCHEBP2PKERNELS_HPP diff --git a/Src/Kernels/Chebyshev/FChebSymKernel.hpp b/Src/Kernels/Chebyshev/FChebSymKernel.hpp index b54556a86e095806d7084380a59a5e63671f0a24..9143c569d3df31db88f6bedca3e98c498b4db249 100755 --- a/Src/Kernels/Chebyshev/FChebSymKernel.hpp +++ b/Src/Kernels/Chebyshev/FChebSymKernel.hpp @@ -454,14 +454,14 @@ public: ContainerClass* const NeighborSourceParticles[27], const int /* size */) { - DirectInteactionComputer<MatrixKernelClass::Identifier, NVALS>::P2P(TargetParticles,NeighborSourceParticles); + DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2P(TargetParticles,NeighborSourceParticles); } void P2PRemote(const FTreeCoordinate& /*inPosition*/, ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, ContainerClass* const inNeighbors[27], const int /*inSize*/){ - DirectInteactionComputer<MatrixKernelClass::Identifier, NVALS>::P2PRemote(inTargets,inNeighbors,27); + DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2PRemote(inTargets,inNeighbors,27); } }; diff --git a/Src/Kernels/Chebyshev/FChebSymM2LHandler.hpp b/Src/Kernels/Chebyshev/FChebSymM2LHandler.hpp index c3595be8b987bfb8814a9167e7cefdb75abc85f1..bd205555f55635dd4e85c142cdd4bb9667d1b36a 100755 --- a/Src/Kernels/Chebyshev/FChebSymM2LHandler.hpp +++ b/Src/Kernels/Chebyshev/FChebSymM2LHandler.hpp @@ -479,9 +479,12 @@ static void precompute(const MatrixKernelClass *const MatrixKernel, const FReal } } } - // std::cout << "The approximation of the " << counter - // << " far-field interactions (overall rank " << overall_rank << ") took " - // << overall_time << "s\n" << std::endl; + std::cout << "The approximation of the " << counter + << " far-field interactions (overall rank " << overall_rank + << " / " << 16*nnodes + << " , sizeM2L= " << 2*overall_rank*nnodes*sizeof(FReal) << "" + << " / " << 16*nnodes*nnodes*sizeof(FReal) << " B" + << ") took " << overall_time << "s\n" << std::endl; delete [] U; delete [] WORK; delete [] VT; diff --git a/Src/Kernels/Chebyshev/FChebTensor.hpp b/Src/Kernels/Chebyshev/FChebTensor.hpp index 37af0d4c520e84bc90f778b79dae89f1eafe743b..627f09c10ef5f923b069e027875290d0b5820527 100755 --- a/Src/Kernels/Chebyshev/FChebTensor.hpp +++ b/Src/Kernels/Chebyshev/FChebTensor.hpp @@ -4,13 +4,13 @@ // This software is a computer program whose purpose is to compute the FMM. // // This software is governed by the CeCILL-C and LGPL licenses and -// abiding by the rules of distribution of free software. -// +// abiding by the rules of distribution of free software. +// // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public and CeCILL-C Licenses for more details. -// "http://www.cecill.info". +// "http://www.cecill.info". // "http://www.gnu.org/licenses". // =================================================================================== #ifndef FCHEBTENSOR_HPP @@ -19,7 +19,7 @@ #include "../../Utils/FMath.hpp" #include "./FChebRoots.hpp" -#include "./FChebMapping.hpp" +#include "./../Interpolation/FInterpTensor.hpp" /** @@ -27,19 +27,6 @@ * Please read the license */ -/** - * @class TensorTraits - * - * The class @p TensorTraits gives the number of interpolation nodes per - * cluster in 3D, depending on the interpolation order. - * - * @tparam ORDER interpolation order - */ -template <int ORDER> struct TensorTraits -{ - enum {nnodes = ORDER*ORDER*ORDER}; -}; - /** @@ -51,123 +38,37 @@ template <int ORDER> struct TensorTraits * @tparam ORDER interpolation order \f$\ell\f$ */ template <int ORDER> -class FChebTensor : FNoCopyable +class FChebTensor : public FInterpTensor<ORDER,FChebRoots<ORDER>> { - enum {nnodes = TensorTraits<ORDER>::nnodes}; - typedef FChebRoots<ORDER> BasisType; - -public: - /** - * Sets the ids of the coordinates of all \f$\ell^3\f$ interpolation - * nodes - * - * @param[out] NodeIds ids of coordinates of interpolation nodes - */ - static - void setNodeIds(unsigned int NodeIds[nnodes][3]) - { - for (unsigned int n=0; n<nnodes; ++n) { - NodeIds[n][0] = n % ORDER; - NodeIds[n][1] = (n/ ORDER) % ORDER; - NodeIds[n][2] = n/(ORDER * ORDER); - } - } - - - /** - * Sets the roots of the Chebyshev quadrature weights defined as \f$w_i = - * \frac{\pi}{\ell}\sqrt{1-\bar x_i^2}\f$ with the Chebyshev roots \f$\bar - * x\f$. - * - * @param weights[out] the root of the weights \f$\sqrt{w_i}\f$ - */ - static - void setRootOfWeights(FReal weights[nnodes]) - { - // weights in 1d - FReal weights_1d[ORDER]; - for (unsigned int o=0; o<ORDER; ++o) - weights_1d[o] = FMath::FPi/ORDER * FMath::Sqrt(FReal(1.)-FReal(BasisType::roots[o])*FReal(BasisType::roots[o])); - // weights in 3d (tensor structure) - unsigned int node_ids[nnodes][3]; - setNodeIds(node_ids); - for (unsigned int n=0; n<nnodes; ++n) { - weights[n] = FMath::Sqrt(weights_1d[node_ids[n][0]]*weights_1d[node_ids[n][1]]*weights_1d[node_ids[n][2]]); - } - } - - - /** - * Sets the interpolation points in the cluster with @p center and @p width - * - * @param[in] center of cluster - * @param[in] width of cluster - * @param[out] rootPositions coordinates of interpolation points - */ - static - void setRoots(const FPoint& center, const FReal width, FPoint rootPositions[nnodes]) - { - unsigned int node_ids[nnodes][3]; - setNodeIds(node_ids); - const map_loc_glob map(center, width); - FPoint localPosition; + enum {nnodes = TensorTraits<ORDER>::nnodes}; + typedef FChebRoots<ORDER> BasisType; + typedef FInterpTensor<ORDER,BasisType> ParentTensor; + + public: + + /** + * Sets the roots of the Chebyshev quadrature weights defined as \f$w_i = + * \frac{\pi}{\ell}\sqrt{1-\bar x_i^2}\f$ with the Chebyshev roots \f$\bar + * x\f$. + * + * @param weights[out] the root of the weights \f$\sqrt{w_i}\f$ + */ + static + void setRootOfWeights(FReal weights[nnodes]) + { + // weights in 1d + FReal weights_1d[ORDER]; + for (unsigned int o=0; o<ORDER; ++o) + weights_1d[o] = FMath::FPi/ORDER * FMath::Sqrt(FReal(1.)-FReal(BasisType::roots[o])*FReal(BasisType::roots[o])); + // weights in 3d (tensor structure) + unsigned int node_ids[nnodes][3]; + ParentTensor::setNodeIds(node_ids); for (unsigned int n=0; n<nnodes; ++n) { - localPosition.setX(FReal(BasisType::roots[node_ids[n][0]])); - localPosition.setY(FReal(BasisType::roots[node_ids[n][1]])); - localPosition.setZ(FReal(BasisType::roots[node_ids[n][2]])); - map(localPosition, rootPositions[n]); - } - } + weights[n] = FMath::Sqrt(weights_1d[node_ids[n][0]]*weights_1d[node_ids[n][1]]*weights_1d[node_ids[n][2]]); + } + } - /** - * Sets the Chebyshev roots in the cluster with @p center and @p width - * - * @param[in] center of cluster - * @param[in] width of cluster - * @param[out] roots coordinates of Chebyshev roots - */ - static - void setChebyshevRoots(const FPoint& center, const FReal width, FReal roots[3][ORDER]) - { - const map_loc_glob map(center, width); - FPoint lPos, gPos; - for (unsigned int n=0; n<ORDER; ++n) { - lPos.setX(FReal(BasisType::roots[n])); - lPos.setY(FReal(BasisType::roots[n])); - lPos.setZ(FReal(BasisType::roots[n])); - map(lPos, gPos); - roots[0][n] = gPos.getX(); - roots[1][n] = gPos.getY(); - roots[2][n] = gPos.getZ(); - } - } - - /** - * Set the relative child (width = 1) center according to the Morton index. - * - * @param[in] ChildIndex index of child according to Morton index - * @param[out] center - */ - static - void setRelativeChildCenter(const unsigned int ChildIndex, - FPoint& ChildCenter) - { - const int RelativeChildPositions[][3] = { {-1, -1, -1}, - {-1, -1, 1}, - {-1, 1, -1}, - {-1, 1, 1}, - { 1, -1, -1}, - { 1, -1, 1}, - { 1, 1, -1}, - { 1, 1, 1} }; - ChildCenter.setX(FReal(RelativeChildPositions[ChildIndex][0]) / FReal(2.)); - ChildCenter.setY(FReal(RelativeChildPositions[ChildIndex][1]) / FReal(2.)); - ChildCenter.setZ(FReal(RelativeChildPositions[ChildIndex][2]) / FReal(2.)); - } }; - - - #endif diff --git a/Src/Kernels/Chebyshev/FChebMapping.hpp b/Src/Kernels/Interpolation/FInterpMapping.hpp similarity index 91% rename from Src/Kernels/Chebyshev/FChebMapping.hpp rename to Src/Kernels/Interpolation/FInterpMapping.hpp index 43b13ff4b53625c0e48e35915ba4849905eb610f..21a4b6b25302636c0dfa9737ee9401a73ad8ea42 100755 --- a/Src/Kernels/Chebyshev/FChebMapping.hpp +++ b/Src/Kernels/Interpolation/FInterpMapping.hpp @@ -13,8 +13,8 @@ // "http://www.cecill.info". // "http://www.gnu.org/licenses". // =================================================================================== -#ifndef FCHEBMAPPING_HPP -#define FCHEBMAPPING_HPP +#ifndef FINTERPMAPPING_HPP +#define FINTERPMAPPING_HPP #include <limits> @@ -27,19 +27,19 @@ */ /** - * @class FChebMapping + * @class FInterpMapping * - * The class @p FChebMapping is the base class for the affine mapping + * The class @p FInterpMapping is the base class for the affine mapping * \f$\Phi:[-1,1]\rightarrow[a,b] \f$ and the inverse affine mapping * \f$\Phi^{-1}:[a,b]\rightarrow[-1,1]\f$. */ -class FChebMapping : FNoCopyable +class FInterpMapping : FNoCopyable { protected: FPoint a; FPoint b; - explicit FChebMapping(const FPoint& center, + explicit FInterpMapping(const FPoint& center, const FReal width) : a(center.getX() - width / FReal(2.), center.getY() - width / FReal(2.), @@ -94,11 +94,11 @@ public: * \f$\Phi^{-1}:[a,b]\rightarrow[-1,1]\f$. It maps from global coordinates to * local ones. */ -class map_glob_loc : public FChebMapping +class map_glob_loc : public FInterpMapping { public: explicit map_glob_loc(const FPoint& center, const FReal width) - : FChebMapping(center, width) {} + : FInterpMapping(center, width) {} /** * Maps from a global position to its local position: \f$\Phi^{-1}(x) = @@ -130,11 +130,11 @@ public: * This class defines the affine mapping \f$\Phi:[-1,1]\rightarrow[a,b]\f$. It * maps from local coordinates to global ones. */ -class map_loc_glob : public FChebMapping +class map_loc_glob : public FInterpMapping { public: explicit map_loc_glob(const FPoint& center, const FReal width) - : FChebMapping(center, width) {} + : FInterpMapping(center, width) {} // globPos = (a + b) / 2 + (b - a) * loclPos / 2; /** @@ -157,4 +157,4 @@ public: -#endif +#endif /*FUNIFTENSOR_HPP*/ diff --git a/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp b/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp new file mode 100755 index 0000000000000000000000000000000000000000..fa61c83aada050e9112f2fc602464c6abc08480c --- /dev/null +++ b/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp @@ -0,0 +1,215 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== +#ifndef FINTERPMATRIXKERNEL_HPP +#define FINTERPMATRIXKERNEL_HPP + +#include "../../Utils/FPoint.hpp" +#include "../../Utils/FNoCopyable.hpp" +#include "../../Utils/FMath.hpp" +#include "../../Utils/FBlas.hpp" + + +// extendable +enum KERNEL_FUNCTION_IDENTIFIER {ONE_OVER_R, + ONE_OVER_R_SQUARED, + LENNARD_JONES_POTENTIAL}; + +// probably not extedable :) +enum KERNEL_FUNCTION_TYPE {HOMOGENEOUS, NON_HOMOGENEOUS}; + + +/** + * @author Matthias Messner (matthias.messner@inria.fr) + * @class FInterpMatrixKernels + * Please read the license + */ +struct FInterpAbstractMatrixKernel : FNoCopyable +{ + virtual ~FInterpAbstractMatrixKernel(){} // to remove warning + virtual FReal evaluate(const FPoint&, const FPoint&) const = 0; + // I need both functions because required arguments are not always given + virtual FReal getScaleFactor(const FReal, const int) const = 0; + virtual FReal getScaleFactor(const FReal) const = 0; +}; + + + +/// One over r +struct FInterpMatrixKernelR : FInterpAbstractMatrixKernel +{ + static const KERNEL_FUNCTION_TYPE Type = HOMOGENEOUS; + static const KERNEL_FUNCTION_IDENTIFIER Identifier = ONE_OVER_R; + + FInterpMatrixKernelR() {} + + FReal evaluate(const FPoint& x, const FPoint& y) const + { + const FPoint xy(x-y); + return FReal(1.) / FMath::Sqrt(xy.getX()*xy.getX() + + xy.getY()*xy.getY() + + xy.getZ()*xy.getZ()); + } + + FReal getScaleFactor(const FReal RootCellWidth, const int TreeLevel) const + { + const FReal CellWidth(RootCellWidth / FReal(FMath::pow(2, TreeLevel))); + return getScaleFactor(CellWidth); + } + + FReal getScaleFactor(const FReal CellWidth) const + { + return FReal(2.) / CellWidth; + } +}; + + + +/// One over r^2 +struct FInterpMatrixKernelRR : FInterpAbstractMatrixKernel +{ + static const KERNEL_FUNCTION_TYPE Type = HOMOGENEOUS; + static const KERNEL_FUNCTION_IDENTIFIER Identifier = ONE_OVER_R_SQUARED; + + FInterpMatrixKernelRR() {} + + FReal evaluate(const FPoint& x, const FPoint& y) const + { + const FPoint xy(x-y); + return FReal(1.) / FReal(xy.getX()*xy.getX() + + xy.getY()*xy.getY() + + xy.getZ()*xy.getZ()); + } + + FReal getScaleFactor(const FReal RootCellWidth, const int TreeLevel) const + { + const FReal CellWidth(RootCellWidth / FReal(FMath::pow(2, TreeLevel))); + return getScaleFactor(CellWidth); + } + + FReal getScaleFactor(const FReal CellWidth) const + { + return FReal(4.) / CellWidth; + } +}; + + + +/// One over r^12 - One over r^6 +struct FInterpMatrixKernelLJ : FInterpAbstractMatrixKernel +{ + static const KERNEL_FUNCTION_TYPE Type = NON_HOMOGENEOUS; + static const KERNEL_FUNCTION_IDENTIFIER Identifier = LENNARD_JONES_POTENTIAL; + + FInterpMatrixKernelLJ() {} + + FReal evaluate(const FPoint& x, const FPoint& y) const + { + const FPoint xy(x-y); + const FReal r = xy.norm(); + const FReal r3 = r*r*r; + const FReal one_over_r6 = FReal(1.) / (r3*r3); + //return one_over_r6 * one_over_r6; + //return one_over_r6; + return one_over_r6 * one_over_r6 - one_over_r6; + } + + FReal getScaleFactor(const FReal, const int) const + { + // return 1 because non homogeneous kernel functions cannot be scaled!!! + return FReal(1.); + } + + FReal getScaleFactor(const FReal) const + { + // return 1 because non homogeneous kernel functions cannot be scaled!!! + return FReal(1.); + } +}; + + + + + +/*! Functor which provides the interface to assemble a matrix based on the + number of rows and cols and on the coordinates x and y and the type of the + generating matrix-kernel function. +*/ +template <typename MatrixKernel> +class EntryComputer +{ + const MatrixKernel Kernel; + + const unsigned int nx, ny; + const FPoint *const px, *const py; + + const FReal *const weights; + +public: + explicit EntryComputer(const unsigned int _nx, const FPoint *const _px, + const unsigned int _ny, const FPoint *const _py, + const FReal *const _weights = NULL) + : Kernel(), nx(_nx), ny(_ny), px(_px), py(_py), weights(_weights) {} + + // template <typename Point> + // void operator()(const unsigned int nx, const Point *const px, + // const unsigned int ny, const Point *const py, + // FReal *const data) const + // { + // for (unsigned int j=0; j<ny; ++j) + // for (unsigned int i=0; i<nx; ++i) + // data[j*nx + i] = Kernel.evaluate(px[i], py[j]); + // } + + void operator()(const unsigned int xbeg, const unsigned int xend, + const unsigned int ybeg, const unsigned int yend, + FReal *const data) const + { + unsigned int idx = 0; + if (weights) { + for (unsigned int j=ybeg; j<yend; ++j) + for (unsigned int i=xbeg; i<xend; ++i) + data[idx++] = weights[i] * weights[j] * Kernel.evaluate(px[i], py[j]); + } else { + for (unsigned int j=ybeg; j<yend; ++j) + for (unsigned int i=xbeg; i<xend; ++i) + data[idx++] = Kernel.evaluate(px[i], py[j]); + } + + /* + // apply weighting matrices + if (weights) { + if ((xend-xbeg) == (yend-ybeg) && (xend-xbeg) == nx) + for (unsigned int n=0; n<nx; ++n) { + FBlas::scal(nx, weights[n], data + n, nx); // scale rows + FBlas::scal(nx, weights[n], data + n * nx); // scale cols + } + else if ((xend-xbeg) == 1 && (yend-ybeg) == ny) + for (unsigned int j=0; j<ny; ++j) data[j] *= weights[j]; + else if ((yend-ybeg) == 1 && (xend-xbeg) == nx) + for (unsigned int i=0; i<nx; ++i) data[i] *= weights[i]; + } + */ + + } +}; + + + + + +#endif // FINTERPMATRIXKERNEL_HPP + +// [--END--] diff --git a/Src/Kernels/Interpolation/FInterpP2PKernels.hpp b/Src/Kernels/Interpolation/FInterpP2PKernels.hpp new file mode 100644 index 0000000000000000000000000000000000000000..476d42d7863f8058b017dc62ee8f78258420c68c --- /dev/null +++ b/Src/Kernels/Interpolation/FInterpP2PKernels.hpp @@ -0,0 +1,101 @@ +#ifndef FINTERPP2PKERNELS_HPP +#define FINTERPP2PKERNELS_HPP + + +#include "../P2P/FP2P.hpp" + +template <KERNEL_FUNCTION_IDENTIFIER Identifier, int NVALS> +struct DirectInteractionComputer; + +/////////////////////////////////////////////////////// +// P2P Wrappers +/////////////////////////////////////////////////////// + +/*! Specialization for Laplace potential */ +template <> +struct DirectInteractionComputer<ONE_OVER_R, 1> +{ + template <typename ContainerClass> + static void P2P( ContainerClass* const FRestrict TargetParticles, + ContainerClass* const NeighborSourceParticles[27]){ + FP2P::FullMutual(TargetParticles,NeighborSourceParticles,14); + } + + template <typename ContainerClass> + static void P2PRemote( ContainerClass* const FRestrict inTargets, + ContainerClass* const inNeighbors[27], + const int inSize){ + FP2P::FullRemote(inTargets,inNeighbors,inSize); + } +}; + + +/*! Specialization for Lennard-Jones potential */ +template <> +struct DirectInteractionComputer<LENNARD_JONES_POTENTIAL, 1> +{ + template <typename ContainerClass> + static void P2P(ContainerClass* const FRestrict TargetParticles, + ContainerClass* const NeighborSourceParticles[27]){ + FP2P::FullMutualLJ(TargetParticles,NeighborSourceParticles,14); + } + + template <typename ContainerClass> + static void P2PRemote(ContainerClass* const FRestrict inTargets, + ContainerClass* const inNeighbors[27], + const int inSize){ + FP2P::FullRemoteLJ(inTargets,inNeighbors,inSize); + } +}; + +/////////////////////////////////////////////////////// +// In case of multi right hand side +/////////////////////////////////////////////////////// + + + +template <int NVALS> +struct DirectInteractionComputer<ONE_OVER_R, NVALS> +{ + template <typename ContainerClass> + static void P2P(ContainerClass* const FRestrict TargetParticles, + ContainerClass* const NeighborSourceParticles[27]){ + for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ + FP2P::FullMutual(TargetParticles,NeighborSourceParticles,14); + } + } + + template <typename ContainerClass> + static void P2PRemote(ContainerClass* const FRestrict inTargets, + ContainerClass* const inNeighbors[27], + const int inSize){ + for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ + FP2P::FullRemote(inTargets,inNeighbors,inSize); + } + } +}; + + +/*! Specialization for Lennard-Jones potential */ +template <int NVALS> +struct DirectInteractionComputer<LENNARD_JONES_POTENTIAL, NVALS> +{ + template <typename ContainerClass> + static void P2P(ContainerClass* const FRestrict TargetParticles, + ContainerClass* const NeighborSourceParticles[27]){ + for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ + FP2P::FullMutualLJ(TargetParticles,NeighborSourceParticles,14); + } + } + + template <typename ContainerClass> + static void P2PRemote(ContainerClass* const FRestrict inTargets, + ContainerClass* const inNeighbors[27], + const int inSize){ + for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ + FP2P::FullRemoteLJ(inTargets,inNeighbors,inSize); + } + } +}; + +#endif // FINTERPP2PKERNELS_HPP diff --git a/Src/Kernels/Interpolation/FInterpTensor.hpp b/Src/Kernels/Interpolation/FInterpTensor.hpp new file mode 100755 index 0000000000000000000000000000000000000000..38ee2a2f4f9f57b5c3746a32019bb3449d90cd3d --- /dev/null +++ b/Src/Kernels/Interpolation/FInterpTensor.hpp @@ -0,0 +1,154 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== +#ifndef FINTERPTENSOR_HPP +#define FINTERPTENSOR_HPP + +#include "../../Utils/FMath.hpp" + +#include "./FInterpMapping.hpp" + + +/** + * @author Pierre Blanchard (pierre.blanchard@inria.fr) + * Please read the license + */ + +/** + * @class TensorTraits + * + * The class @p TensorTraits gives the number of interpolation nodes per + * cluster in 3D, depending on the interpolation order. + * + * @tparam ORDER interpolation order + */ +template <int ORDER> struct TensorTraits +{ + enum {nnodes = ORDER*ORDER*ORDER}; +}; + + +/** + * @class FInterpTensor + * + * The class FInterpTensor provides function considering the tensor product + * interpolation. + * + * @tparam ORDER interpolation order \f$\ell\f$ + * @tparam RootsClass class containing the roots choosen for the interpolation + * (e.g. FChebRoots, FUnifRoots...) + + */ +template <int ORDER, typename RootsClass> +class FInterpTensor : FNoCopyable +{ + enum {nnodes = TensorTraits<ORDER>::nnodes}; + typedef RootsClass BasisType; + +public: + + /** + * Sets the ids of the coordinates of all \f$\ell^3\f$ interpolation + * nodes + * + * @param[out] NodeIds ids of coordinates of interpolation nodes + */ + static + void setNodeIds(unsigned int NodeIds[nnodes][3]) + { + for (unsigned int n=0; n<nnodes; ++n) { + NodeIds[n][0] = n % ORDER; + NodeIds[n][1] = (n/ ORDER) % ORDER; + NodeIds[n][2] = n/(ORDER * ORDER); + } + } + + + /** + * Sets the interpolation points in the cluster with @p center and @p width + * + * PB: tensorial version + * + * @param[in] center of cluster + * @param[in] width of cluster + * @param[out] rootPositions coordinates of interpolation points + */ + static + void setRoots(const FPoint& center, const FReal width, FPoint rootPositions[nnodes]) + { + unsigned int node_ids[nnodes][3]; + setNodeIds(node_ids); + const map_loc_glob map(center, width); + FPoint localPosition; + for (unsigned int n=0; n<nnodes; ++n) { + localPosition.setX(FReal(BasisType::roots[node_ids[n][0]])); + localPosition.setY(FReal(BasisType::roots[node_ids[n][1]])); + localPosition.setZ(FReal(BasisType::roots[node_ids[n][2]])); + map(localPosition, rootPositions[n]); + } + } + + /** + * Sets the equispaced roots in the cluster with @p center and @p width + * + * @param[in] center of cluster + * @param[in] width of cluster + * @param[out] roots coordinates of equispaced roots + */ + static + void setPolynomialsRoots(const FPoint& center, const FReal width, FReal roots[3][ORDER]) + { + const map_loc_glob map(center, width); + FPoint lPos, gPos; + for (unsigned int n=0; n<ORDER; ++n) { + lPos.setX(FReal(BasisType::roots[n])); + lPos.setY(FReal(BasisType::roots[n])); + lPos.setZ(FReal(BasisType::roots[n])); + map(lPos, gPos); + roots[0][n] = gPos.getX(); + roots[1][n] = gPos.getY(); + roots[2][n] = gPos.getZ(); + } + } + + /** + * Set the relative child (width = 1) center according to the Morton index. + * + * @param[in] ChildIndex index of child according to Morton index + * @param[out] center + */ + static + void setRelativeChildCenter(const unsigned int ChildIndex, + FPoint& ChildCenter) + { + const int RelativeChildPositions[][3] = { {-1, -1, -1}, + {-1, -1, 1}, + {-1, 1, -1}, + {-1, 1, 1}, + { 1, -1, -1}, + { 1, -1, 1}, + { 1, 1, -1}, + { 1, 1, 1} }; + ChildCenter.setX(FReal(RelativeChildPositions[ChildIndex][0]) / FReal(2.)); + ChildCenter.setY(FReal(RelativeChildPositions[ChildIndex][1]) / FReal(2.)); + ChildCenter.setZ(FReal(RelativeChildPositions[ChildIndex][2]) / FReal(2.)); + } +}; + + + + + +#endif /*FINTERPTENSOR_HPP*/ diff --git a/Src/Kernels/Uniform/FAbstractUnifKernel.hpp b/Src/Kernels/Uniform/FAbstractUnifKernel.hpp new file mode 100644 index 0000000000000000000000000000000000000000..18bcf6242ff89cf7bc5d9a9614f4c184d7843efb --- /dev/null +++ b/Src/Kernels/Uniform/FAbstractUnifKernel.hpp @@ -0,0 +1,146 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== +#ifndef FABSTRACTUNIFKERNEL_HPP +#define FABSTRACTUNIFKERNEL_HPP + +#include "../../Utils/FGlobal.hpp" +#include "../../Utils/FTrace.hpp" +#include "../../Utils/FSmartPointer.hpp" + +#include "../../Components/FAbstractKernels.hpp" + +#include "../Interpolation/FInterpP2PKernels.hpp" +#include "./FUnifInterpolator.hpp" + +#include "../../Containers/FTreeCoordinate.hpp" + +/** + * @author Pierre Blanchard (pierre.blanchard@inria.fr) + * @class FAbstractUnifKernel + * @brief + * This kernels implement the Lagrange interpolation based FMM operators. It + * implements all interfaces (P2P, P2M, M2M, M2L, L2L, L2P) which are required by + * the FFmmAlgorithm and FFmmAlgorithmThread. + * + * @tparam CellClass Type of cell + * @tparam ContainerClass Type of container to store particles + * @tparam MatrixKernelClass Type of matrix kernel function + * @tparam ORDER Lagrange interpolation order + */ +template < class CellClass, class ContainerClass, class MatrixKernelClass, int ORDER, int NVALS = 1> +class FAbstractUnifKernel : public FAbstractKernels< CellClass, ContainerClass> +{ +protected: + enum {nnodes = TensorTraits<ORDER>::nnodes}; + typedef FUnifInterpolator<ORDER> InterpolatorClass; + + /// Needed for P2M, M2M, L2L and L2P operators + const FSmartPointer<InterpolatorClass,FSmartPointerMemory> Interpolator; + /// Needed for P2P operator + const FSmartPointer<MatrixKernelClass,FSmartPointerMemory> MatrixKernel; + /// Height of the entire oct-tree + const unsigned int TreeHeight; + /// Corner of oct-tree box + const FPoint BoxCorner; + /// Width of oct-tree box + const FReal BoxWidth; + /// Width of a leaf cell box + const FReal BoxWidthLeaf; + + /** + * Compute center of leaf cell from its tree coordinate. + * @param[in] Coordinate tree coordinate + * @return center of leaf cell + */ + const FPoint getLeafCellCenter(const FTreeCoordinate& Coordinate) const + { + return FPoint(BoxCorner.getX() + (FReal(Coordinate.getX()) + FReal(.5)) * BoxWidthLeaf, + BoxCorner.getY() + (FReal(Coordinate.getY()) + FReal(.5)) * BoxWidthLeaf, + BoxCorner.getZ() + (FReal(Coordinate.getZ()) + FReal(.5)) * BoxWidthLeaf); + } + +public: + /** + * The constructor initializes all constant attributes and it reads the + * precomputed and compressed M2L operators from a binary file (an + * runtime_error is thrown if the required file is not valid). + */ + FAbstractUnifKernel(const int inTreeHeight, + const FPoint& inBoxCenter, + const FReal inBoxWidth) + : Interpolator(new InterpolatorClass()), + MatrixKernel(new MatrixKernelClass()), + TreeHeight(inTreeHeight), + BoxCorner(inBoxCenter - inBoxWidth / FReal(2.)), + BoxWidth(inBoxWidth), + BoxWidthLeaf(BoxWidth / FReal(FMath::pow(2, inTreeHeight - 1))) + { + /* empty */ + } + + virtual ~FAbstractUnifKernel(){ + // should not be used + } + + const InterpolatorClass *const getPtrToInterpolator() const + { return Interpolator.getPtr(); } + + + virtual void P2M(CellClass* const LeafCell, + const ContainerClass* const SourceParticles) = 0; + + + virtual void M2M(CellClass* const FRestrict ParentCell, + const CellClass*const FRestrict *const FRestrict ChildCells, + const int TreeLevel) = 0; + + + virtual void M2L(CellClass* const FRestrict TargetCell, + const CellClass* SourceCells[343], + const int NumSourceCells, + const int TreeLevel) = 0; + + + virtual void L2L(const CellClass* const FRestrict ParentCell, + CellClass* FRestrict *const FRestrict ChildCells, + const int TreeLevel) = 0; + + + virtual void L2P(const CellClass* const LeafCell, + ContainerClass* const TargetParticles) = 0; + + + + virtual void P2P(const FTreeCoordinate& /* LeafCellCoordinate */, // needed for periodic boundary conditions + ContainerClass* const FRestrict TargetParticles, + const ContainerClass* const FRestrict /*SourceParticles*/, + ContainerClass* const NeighborSourceParticles[27], + const int /* size */) = 0; + + + virtual void P2PRemote(const FTreeCoordinate& /*inPosition*/, + ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, + ContainerClass* const inNeighbors[27], const int /*inSize*/) = 0; + +}; + + + + + +#endif //FABSTRACTUNIFKERNEL_HPP + +// [--END--] diff --git a/Src/Kernels/Uniform/FUnifCell.hpp b/Src/Kernels/Uniform/FUnifCell.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7584f857572cff4619668ac6537f398bd053ea84 --- /dev/null +++ b/Src/Kernels/Uniform/FUnifCell.hpp @@ -0,0 +1,142 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== +#ifndef FUNIFCELL_HPP +#define FUNIFCELL_HPP + + +#include "../../Extensions/FExtendMortonIndex.hpp" +#include "../../Extensions/FExtendCoordinate.hpp" + +#include "./FUnifTensor.hpp" + +#include "../../Extensions/FExtendCellType.hpp" + +#include "../../Utils/FComplexe.hpp" + +/** + * @author Pierre Blanchard (pierre.blanchard@inria.fr) + * @class FUnifCell + * Please read the license + * + * This class defines a cell used in the Lagrange based FMM. + * + * PB: !!! exactly the same as FChebCell except the TensorTraits used is + * the one from FUnifTensor. This is a quick work around to avoid multiple + * definition of TensorTraits until it is factorized for all interpolations. + * + * PB: This class also contains the storage and accessors for the tranformed + * expansion (in Fourier space, i.e. complex valued). + * + * @param NVALS is the number of right hand side. + */ +template <int ORDER, int NVALS = 1> +class FUnifCell : public FExtendMortonIndex, public FExtendCoordinate +{ + static const int VectorSize = TensorTraits<ORDER>::nnodes; + static const int TransformedVectorSize = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1); + + FReal multipole_exp[NVALS * VectorSize]; //< Multipole expansion + FReal local_exp[NVALS * VectorSize]; //< Local expansion + + // PB: Store multipole and local expansion in Fourier space + FComplexe transformed_multipole_exp[NVALS * TransformedVectorSize]; + FComplexe transformed_local_exp[NVALS * TransformedVectorSize]; + +public: + FUnifCell(){ + memset(multipole_exp, 0, sizeof(FReal) * NVALS * VectorSize); + memset(local_exp, 0, sizeof(FReal) * NVALS * VectorSize); + memset(transformed_multipole_exp, 0, + sizeof(FComplexe) * NVALS * TransformedVectorSize); + memset(transformed_local_exp, 0, + sizeof(FComplexe) * NVALS * TransformedVectorSize); + } + + ~FUnifCell() {} + + /** Get Multipole */ + const FReal* getMultipole(const int inRhs) const + { return this->multipole_exp + inRhs*VectorSize; + } + /** Get Local */ + const FReal* getLocal(const int inRhs) const{ + return this->local_exp + inRhs*VectorSize; + } + + /** Get Multipole */ + FReal* getMultipole(const int inRhs){ + return this->multipole_exp + inRhs*VectorSize; + } + /** Get Local */ + FReal* getLocal(const int inRhs){ + return this->local_exp + inRhs*VectorSize; + } + + /** To get the leading dim of a vec */ + int getVectorSize() const{ + return VectorSize; + } + + /** Make it like the begining */ + void resetToInitialState(){ + memset(multipole_exp, 0, sizeof(FReal) * NVALS * VectorSize); + memset(local_exp, 0, sizeof(FReal) * NVALS * VectorSize); + memset(transformed_multipole_exp, 0, + sizeof(FComplexe) * NVALS * TransformedVectorSize); + memset(transformed_local_exp, 0, + sizeof(FComplexe) * NVALS * TransformedVectorSize); + } + + /** Get Transformed Multipole */ + const FComplexe* getTransformedMultipole(const int inRhs) const + { return this->transformed_multipole_exp + inRhs*TransformedVectorSize; + } + /** Get Transformed Local */ + const FComplexe* getTransformedLocal(const int inRhs) const{ + return this->transformed_local_exp + inRhs*TransformedVectorSize; + } + + /** Get Transformed Multipole */ + FComplexe* getTransformedMultipole(const int inRhs){ + return this->transformed_multipole_exp + inRhs*TransformedVectorSize; + } + /** Get Transformed Local */ + FComplexe* getTransformedLocal(const int inRhs){ + return this->transformed_local_exp + inRhs*TransformedVectorSize; + } + +}; + +template <int ORDER, int NVALS = 1> +class FTypedUnifCell : public FUnifCell<ORDER,NVALS>, public FExtendCellType { +public: + template <class BufferWriterClass> + void save(BufferWriterClass& buffer) const{ + FUnifCell<ORDER,NVALS>::save(buffer); + FExtendCellType::save(buffer); + } + template <class BufferReaderClass> + void restore(BufferReaderClass& buffer){ + FUnifCell<ORDER,NVALS>::restore(buffer); + FExtendCellType::restore(buffer); + } + void resetToInitialState(){ + FUnifCell<ORDER,NVALS>::resetToInitialState(); + FExtendCellType::resetToInitialState(); + } +}; + +#endif //FUNIFCELL_HPP diff --git a/Src/Kernels/Uniform/FUnifInterpolator.hpp b/Src/Kernels/Uniform/FUnifInterpolator.hpp new file mode 100755 index 0000000000000000000000000000000000000000..4f7acccbf064c7643a97c12e3436148f56835b3a --- /dev/null +++ b/Src/Kernels/Uniform/FUnifInterpolator.hpp @@ -0,0 +1,550 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== +#ifndef FUNIFINTERPOLATOR_HPP +#define FUNIFINTERPOLATOR_HPP + + +#include "./../Interpolation/FInterpMapping.hpp" + +#include "./FUnifTensor.hpp" +#include "./FUnifRoots.hpp" + +#include "../../Utils/FBlas.hpp" + + + +/** + * @author Pierre Blanchard (pierre.blanchard@inria.fr) + * Please read the license + */ + +/** + * @class FUnifInterpolator + * + * The class @p FUnifInterpolator defines the anterpolation (M2M) and + * interpolation (L2L) concerning operations. + */ +template <int ORDER> +class FUnifInterpolator : FNoCopyable +{ + // compile time constants and types + enum {nnodes = TensorTraits<ORDER>::nnodes}; + typedef FUnifRoots< ORDER> BasisType; + typedef FUnifTensor<ORDER> TensorType; + + // FReal T_of_roots[ORDER][ORDER]; + // FReal T[ORDER * (ORDER-1)]; + unsigned int node_ids[nnodes][3]; + FReal* ChildParentInterpolator[8]; + + // permutations (only needed in the tensor product interpolation case) + unsigned int perm[3][nnodes]; + + //////////////////////////////////////////////////////////////////// + + + + /** + * Initialize the child - parent - interpolator, it is basically the matrix + * S which is precomputed and reused for all M2M and L2L operations, ie for + * all non leaf inter/anterpolations. + */ + void initM2MandL2L() + { + FPoint ParentRoots[nnodes], ChildRoots[nnodes]; + const FReal ParentWidth(2.); + const FPoint ParentCenter(0., 0., 0.); + FUnifTensor<ORDER>::setRoots(ParentCenter, ParentWidth, ParentRoots); + + FPoint ChildCenter; + const FReal ChildWidth(1.); + + // loop: child cells + for (unsigned int child=0; child<8; ++child) { + + // allocate memory + ChildParentInterpolator[child] = new FReal [nnodes * nnodes]; + + // set child info + FUnifTensor<ORDER>::setRelativeChildCenter(child, ChildCenter); + FUnifTensor<ORDER>::setRoots(ChildCenter, ChildWidth, ChildRoots); + + // assemble child - parent - interpolator + assembleInterpolator(nnodes, ChildRoots, ChildParentInterpolator[child]); + } + } + + /** + * Initialize the child - parent - interpolator, it is basically the matrix + * S which is precomputed and reused for all M2M and L2L operations, ie for + * all non leaf inter/anterpolations. + */ + void initTensorM2MandL2L() + { + FPoint ParentRoots[nnodes]; + FReal ChildCoords[3][ORDER]; + const FReal ParentWidth(2.); + const FPoint ParentCenter(0., 0., 0.); + FUnifTensor<ORDER>::setRoots(ParentCenter, ParentWidth, ParentRoots); + + FPoint ChildCenter; + const FReal ChildWidth(1.); + + // loop: child cells + for (unsigned int child=0; child<8; ++child) { + + // set child info + FUnifTensor<ORDER>::setRelativeChildCenter(child, ChildCenter); + FUnifTensor<ORDER>::setPolynomialsRoots(ChildCenter, ChildWidth, ChildCoords); + + // allocate memory + ChildParentInterpolator[child] = new FReal [3 * ORDER*ORDER]; + assembleInterpolator(ORDER, ChildCoords[0], ChildParentInterpolator[child]); + assembleInterpolator(ORDER, ChildCoords[1], ChildParentInterpolator[child] + 1 * ORDER*ORDER); + assembleInterpolator(ORDER, ChildCoords[2], ChildParentInterpolator[child] + 2 * ORDER*ORDER); + } + + + // init permutations + for (unsigned int i=0; i<ORDER; ++i) { + for (unsigned int j=0; j<ORDER; ++j) { + for (unsigned int k=0; k<ORDER; ++k) { + const unsigned int index = k*ORDER*ORDER + j*ORDER + i; + perm[0][index] = k*ORDER*ORDER + j*ORDER + i; + perm[1][index] = i*ORDER*ORDER + k*ORDER + j; + perm[2][index] = j*ORDER*ORDER + i*ORDER + k; + } + } + } + + } + + + +public: + /** + * Constructor: Initialize the Lagrange polynomials at the equispaced + * roots/interpolation point + */ + explicit FUnifInterpolator() + { + // // initialize chebyshev polynomials of root nodes: T_o(x_j) + // for (unsigned int o=1; o<ORDER; ++o) + // for (unsigned int j=0; j<ORDER; ++j) + // T_of_roots[o][j] = FReal(BasisType::T(o, FReal(BasisType::roots[j]))); + // + // // initialize chebyshev polynomials of root nodes: T_o(x_j) + // for (unsigned int o=1; o<ORDER; ++o) + // for (unsigned int j=0; j<ORDER; ++j) + // T[(o-1)*ORDER + j] = FReal(BasisType::T(o, FReal(BasisType::roots[j]))); + + + // initialize root node ids + TensorType::setNodeIds(node_ids); + + // initialize interpolation operator for non M2M and L2L (non leaf + // operations) + //this -> initM2MandL2L(); // non tensor-product interpolation + this -> initTensorM2MandL2L(); // tensor-product interpolation + } + + + /** + * Destructor: Delete dynamically allocated memory for M2M and L2L operator + */ + ~FUnifInterpolator() + { + for (unsigned int child=0; child<8; ++child) + delete [] ChildParentInterpolator[child]; + } + + + /** + * Assembles the interpolator \f$S_\ell\f$ of size \f$N\times + * \ell^3\f$. Here local points is meant as points whose global coordinates + * have already been mapped to the reference interval [-1,1]. + * + * @param[in] NumberOfLocalPoints + * @param[in] LocalPoints + * @param[out] Interpolator + */ + void assembleInterpolator(const unsigned int NumberOfLocalPoints, + const FPoint *const LocalPoints, + FReal *const Interpolator) const + { + // values of Lagrange polynomials of source particle: L_o(x_i) + FReal L_of_x[ORDER][3]; + // loop: local points (mapped in [-1,1]) + for (unsigned int m=0; m<NumberOfLocalPoints; ++m) { + // evaluate Lagrange polynomials at local points + for (unsigned int o=0; o<ORDER; ++o) { + L_of_x[o][0] = BasisType::L(o, LocalPoints[m].getX()); + L_of_x[o][1] = BasisType::L(o, LocalPoints[m].getY()); + L_of_x[o][2] = BasisType::L(o, LocalPoints[m].getZ()); + } + + // assemble interpolator + for (unsigned int n=0; n<nnodes; ++n) { + Interpolator[n*NumberOfLocalPoints + m] = FReal(1.); + for (unsigned int d=0; d<3; ++d) { + const unsigned int j = node_ids[n][d]; + // The Lagrange case is much simpler than the Chebyshev case + // as no summation is required + Interpolator[n*NumberOfLocalPoints + m] *= L_of_x[j][d]; + } + + } + + } + + } + + + void assembleInterpolator(const unsigned int M, const FReal *const x, FReal *const S) const + { + // loop: local points (mapped in [-1,1]) + for (unsigned int m=0; m<M; ++m) { + // evaluate Lagrange polynomials at local points + for (unsigned int o=0; o<ORDER; ++o) + S[o*M + m] = BasisType::L(o, x[m]); + + } + + } + + + + const FReal *const *const getChildParentInterpolator() const + { return ChildParentInterpolator; } + const unsigned int *const getPermutationsM2ML2L(unsigned int i) const + { return perm[i]; } + + + + + + + /** + * Particle to moment: application of \f$S_\ell(y,\bar y_n)\f$ + * (anterpolation, it is the transposed interpolation) + */ + template <class ContainerClass> + void applyP2M(const FPoint& center, + const FReal width, + FReal *const multipoleExpansion, + const ContainerClass *const sourceParticles) const; + + + + /** + * Local to particle operation: application of \f$S_\ell(x,\bar x_m)\f$ (interpolation) + */ + template <class ContainerClass> + void applyL2P(const FPoint& center, + const FReal width, + const FReal *const localExpansion, + ContainerClass *const localParticles) const; + + + /** + * Local to particle operation: application of \f$\nabla_x S_\ell(x,\bar x_m)\f$ (interpolation) + */ + template <class ContainerClass> + void applyL2PGradient(const FPoint& center, + const FReal width, + const FReal *const localExpansion, + ContainerClass *const localParticles) const; + + /** + * Local to particle operation: application of \f$S_\ell(x,\bar x_m)\f$ and + * \f$\nabla_x S_\ell(x,\bar x_m)\f$ (interpolation) + */ + template <class ContainerClass> + void applyL2PTotal(const FPoint& center, + const FReal width, + const FReal *const localExpansion, + ContainerClass *const localParticles) const; + + + /* + void applyM2M(const unsigned int ChildIndex, + const FReal *const ChildExpansion, + FReal *const ParentExpansion) const + { + FBlas::gemtva(nnodes, nnodes, FReal(1.), + ChildParentInterpolator[ChildIndex], + const_cast<FReal*>(ChildExpansion), ParentExpansion); + } + + void applyL2L(const unsigned int ChildIndex, + const FReal *const ParentExpansion, + FReal *const ChildExpansion) const + { + FBlas::gemva(nnodes, nnodes, FReal(1.), + ChildParentInterpolator[ChildIndex], + const_cast<FReal*>(ParentExpansion), ChildExpansion); + } + */ + + // PB: improvement of applyM2M/L2L can be preserved (TOFACTO) + // since relative position of child and parent interpolation is remains unchanged + + void applyM2M(const unsigned int ChildIndex, + const FReal *const ChildExpansion, + FReal *const ParentExpansion) const + { + FReal Exp[nnodes], PermExp[nnodes]; + // ORDER*ORDER*ORDER * (2*ORDER-1) + FBlas::gemtm(ORDER, ORDER, ORDER*ORDER, FReal(1.), + ChildParentInterpolator[ChildIndex], ORDER, + const_cast<FReal*>(ChildExpansion), ORDER, PermExp, ORDER); + + for (unsigned int n=0; n<nnodes; ++n) Exp[n] = PermExp[perm[1][n]]; + // ORDER*ORDER*ORDER * (2*ORDER-1) + FBlas::gemtm(ORDER, ORDER, ORDER*ORDER, FReal(1.), + ChildParentInterpolator[ChildIndex] + 2 * ORDER*ORDER, ORDER, + Exp, ORDER, PermExp, ORDER); + + for (unsigned int n=0; n<nnodes; ++n) Exp[perm[1][n]] = PermExp[perm[2][n]]; + // ORDER*ORDER*ORDER * (2*ORDER-1) + FBlas::gemtm(ORDER, ORDER, ORDER*ORDER, FReal(1.), + ChildParentInterpolator[ChildIndex] + 1 * ORDER*ORDER, ORDER, + Exp, ORDER, PermExp, ORDER); + + for (unsigned int n=0; n<nnodes; ++n) ParentExpansion[perm[2][n]] += PermExp[n]; + } + + + void applyL2L(const unsigned int ChildIndex, + const FReal *const ParentExpansion, + FReal *const ChildExpansion) const + { + FReal Exp[nnodes], PermExp[nnodes]; + // ORDER*ORDER*ORDER * (2*ORDER-1) + FBlas::gemm(ORDER, ORDER, ORDER*ORDER, FReal(1.), + ChildParentInterpolator[ChildIndex], ORDER, + const_cast<FReal*>(ParentExpansion), ORDER, PermExp, ORDER); + + for (unsigned int n=0; n<nnodes; ++n) Exp[n] = PermExp[perm[1][n]]; + // ORDER*ORDER*ORDER * (2*ORDER-1) + FBlas::gemm(ORDER, ORDER, ORDER*ORDER, FReal(1.), + ChildParentInterpolator[ChildIndex] + 2 * ORDER*ORDER, ORDER, + Exp, ORDER, PermExp, ORDER); + + for (unsigned int n=0; n<nnodes; ++n) Exp[perm[1][n]] = PermExp[perm[2][n]]; + // ORDER*ORDER*ORDER * (2*ORDER-1) + FBlas::gemm(ORDER, ORDER, ORDER*ORDER, FReal(1.), + ChildParentInterpolator[ChildIndex] + 1 * ORDER*ORDER, ORDER, + Exp, ORDER, PermExp, ORDER); + + for (unsigned int n=0; n<nnodes; ++n) ChildExpansion[perm[2][n]] += PermExp[n]; + } + // total flops count: 3 * ORDER*ORDER*ORDER * (2*ORDER-1) +}; + + + + + + + +/** + * Particle to moment: application of \f$S_\ell(y,\bar y_n)\f$ + * (anterpolation, it is the transposed interpolation) + */ +template <int ORDER> +template <class ContainerClass> +inline void FUnifInterpolator<ORDER>::applyP2M(const FPoint& center, + const FReal width, + FReal *const multipoleExpansion, + const ContainerClass *const inParticles) const +{ + // set all multipole expansions to zero + FBlas::setzero(nnodes, multipoleExpansion); + + // allocate stuff + const map_glob_loc map(center, width); + FPoint localPosition; + + // loop over source particles + const FReal*const physicalValues = inParticles->getPhysicalValues(); + const FReal*const positionsX = inParticles->getPositions()[0]; + const FReal*const positionsY = inParticles->getPositions()[1]; + const FReal*const positionsZ = inParticles->getPositions()[2]; + + for(int idxPart = 0 ; idxPart < inParticles->getNbParticles() ; ++idxPart){ + // map global position to [-1,1] + map(FPoint(positionsX[idxPart],positionsY[idxPart],positionsZ[idxPart]), localPosition); // 15 flops + // evaluate Lagrange polynomial at local position + FReal L_of_x[ORDER][3]; + for (unsigned int o=0; o<ORDER; ++o) { + L_of_x[o][0] = BasisType::L(o, localPosition.getX()); // 3 * ORDER*(ORDER-1) flops PB: TODO confirm + L_of_x[o][1] = BasisType::L(o, localPosition.getY()); // 3 * ORDER*(ORDER-1) flops + L_of_x[o][2] = BasisType::L(o, localPosition.getZ()); // 3 * ORDER*(ORDER-1) flops + } + // read physicalValue + const FReal weight = physicalValues[idxPart]; + + // assemble multipole expansions + for (unsigned int i=0; i<ORDER; ++i) { + for (unsigned int j=0; j<ORDER; ++j) { + for (unsigned int k=0; k<ORDER; ++k) { + const unsigned int idx = k*ORDER*ORDER + j*ORDER + i; + multipoleExpansion[idx] += L_of_x[i][0] * L_of_x[j][1] * L_of_x[k][2] * weight; // 3 * ORDER*ORDER*ORDER flops + } + } + } + + } // flops: N * (3 * ORDER*ORDER*ORDER + 3 * 3 * ORDER*(ORDER-1)) flops + +} + + +/** + * Local to particle operation: application of \f$S_\ell(x,\bar x_m)\f$ (interpolation) + */ +template <int ORDER> +template <class ContainerClass> +inline void FUnifInterpolator<ORDER>::applyL2P(const FPoint& center, + const FReal width, + const FReal *const localExpansion, + ContainerClass *const inParticles) const +{ + // loop over particles + const map_glob_loc map(center, width); + FPoint localPosition; + + //const FReal*const physicalValues = inParticles->getPhysicalValues(); + const FReal*const positionsX = inParticles->getPositions()[0]; + const FReal*const positionsY = inParticles->getPositions()[1]; + const FReal*const positionsZ = inParticles->getPositions()[2]; + FReal*const potentials = inParticles->getPotentials(); + + for(int idxPart = 0 ; idxPart < inParticles->getNbParticles() ; ++ idxPart){ + + // map global position to [-1,1] + map(FPoint(positionsX[idxPart],positionsY[idxPart],positionsZ[idxPart]), localPosition); // 15 flops + + // evaluate Lagrange polynomial at local position + FReal L_of_x[ORDER][3]; + for (unsigned int o=0; o<ORDER; ++o) { + L_of_x[o][0] = BasisType::L(o, localPosition.getX()); // 3 * ORDER*(ORDER-1) flops PB: TODO confirm + L_of_x[o][1] = BasisType::L(o, localPosition.getY()); // 3 * ORDER*(ORDER-1) flops + L_of_x[o][2] = BasisType::L(o, localPosition.getZ()); // 3 * ORDER*(ORDER-1) flops + } + + // interpolate and increment target value + FReal targetValue=0.; + { + for (unsigned int l=0; l<ORDER; ++l) { + for (unsigned int m=0; m<ORDER; ++m) { + for (unsigned int n=0; n<ORDER; ++n) { + const unsigned int idx = n*ORDER*ORDER + m*ORDER + l; + targetValue += + L_of_x[l][0] * L_of_x[m][1] * L_of_x[n][2] * localExpansion[idx]; + } // ORDER * 4 flops + } // ORDER * ORDER * 4 flops + } // ORDER * ORDER * ORDER * 4 flops + } + + // set potential + potentials[idxPart] += (targetValue); + } // N * (4 * ORDER * ORDER * ORDER + 9 * ORDER*(ORDER-1) ) flops +} + + + +/** + * Local to particle operation: application of \f$\nabla_x S_\ell(x,\bar x_m)\f$ (interpolation) + */ +template <int ORDER> +template <class ContainerClass> +inline void FUnifInterpolator<ORDER>::applyL2PGradient(const FPoint& center, + const FReal width, + const FReal *const localExpansion, + ContainerClass *const inParticles) const +{ + //////////////////////////////////////////////////////////////////// + // TENSOR-PRODUCT INTERPOLUTION NOT IMPLEMENTED YET HERE!!! //////// + //////////////////////////////////////////////////////////////////// + + // setup local to global mapping + const map_glob_loc map(center, width); + FPoint Jacobian; + map.computeJacobian(Jacobian); + const FReal jacobian[3] = {Jacobian.getX(), Jacobian.getY(), Jacobian.getZ()}; + FPoint localPosition; + FReal L_of_x[ORDER][3]; + FReal dL_of_x[ORDER][3]; + + const FReal*const physicalValues = inParticles->getPhysicalValues(); + const FReal*const positionsX = inParticles->getPositions()[0]; + const FReal*const positionsY = inParticles->getPositions()[1]; + const FReal*const positionsZ = inParticles->getPositions()[2]; + FReal*const forcesX = inParticles->getForcesX(); + FReal*const forcesY = inParticles->getForcesY(); + FReal*const forcesZ = inParticles->getForcesZ(); + //FReal*const potentials = inParticles->getPotentials(); + + for(int idxPart = 0 ; idxPart < inParticles->getNbParticles() ; ++ idxPart){ + + // map global position to [-1,1] + map(FPoint(positionsX[idxPart],positionsY[idxPart],positionsZ[idxPart]), localPosition); + + // evaluate Lagrange polynomials of source particle + for (unsigned int o=0; o<ORDER; ++o) { + L_of_x[o][0] = BasisType::L(o, localPosition.getX()); // 3 * ORDER*(ORDER-1) flops PB: TODO confirm + L_of_x[o][1] = BasisType::L(o, localPosition.getY()); // 3 * ORDER*(ORDER-1) flops + L_of_x[o][2] = BasisType::L(o, localPosition.getZ()); // 3 * ORDER*(ORDER-1) flops + dL_of_x[o][0] = BasisType::dL(o, localPosition.getX()); // TODO verify 3 * ORDER*(ORDER-1) flops PB: TODO confirm + dL_of_x[o][1] = BasisType::dL(o, localPosition.getY()); // TODO verify 3 * ORDER*(ORDER-1) flops + dL_of_x[o][2] = BasisType::dL(o, localPosition.getZ()); // TODO verify 3 * ORDER*(ORDER-1) flops + } + + // interpolate and increment forces value + FReal forces[3] = {FReal(0.), FReal(0.), FReal(0.)}; + { + for (unsigned int l=0; l<ORDER; ++l) { + for (unsigned int m=0; m<ORDER; ++m) { + for (unsigned int n=0; n<ORDER; ++n) { + const unsigned int idx = n*ORDER*ORDER + m*ORDER + l; + forces[0] += + dL_of_x[l][0] * L_of_x[m][1] * L_of_x[n][2] * localExpansion[idx]; + forces[1] += + L_of_x[l][0] * dL_of_x[m][1] * L_of_x[n][2] * localExpansion[idx]; + forces[2] += + L_of_x[l][0] * L_of_x[m][1] * dL_of_x[n][2] * localExpansion[idx]; + + } // ORDER * 4 flops + } // ORDER * ORDER * 4 flops + } // ORDER * ORDER * ORDER * 4 flops + + + // scale forces + forces[0] *= jacobian[0];// / nnodes; + forces[1] *= jacobian[1];// / nnodes; + forces[2] *= jacobian[2];// / nnodes; + } + + // set computed forces + forcesX[idxPart] += forces[0] * physicalValues[idxPart]; + forcesY[idxPart] += forces[1] * physicalValues[idxPart]; + forcesZ[idxPart] += forces[2] * physicalValues[idxPart]; + } +} + + +#endif /* FUNIFINTERPOLATOR_HPP */ diff --git a/Src/Kernels/Uniform/FUnifKernel.hpp b/Src/Kernels/Uniform/FUnifKernel.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4bd37ed90025df313ee5f8bdeb02e42974fc27bf --- /dev/null +++ b/Src/Kernels/Uniform/FUnifKernel.hpp @@ -0,0 +1,229 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== +#ifndef FUNIFKERNEL_HPP +#define FUNIFKERNEL_HPP + +#include "../../Utils/FGlobal.hpp" +#include "../../Utils/FTrace.hpp" +#include "../../Utils/FSmartPointer.hpp" + +#include "./FAbstractUnifKernel.hpp" +#include "./FUnifM2LHandler.hpp" + +class FTreeCoordinate; + +/** + * @author Pierre Blanchard (pierre.blanchard@inria.fr) + * @class FUnifKernel + * @brief + * Please read the license + * + * This kernels implement the Lagrange interpolation based FMM operators. It + * implements all interfaces (P2P,P2M,M2M,M2L,L2L,L2P) which are required by + * the FFmmAlgorithm and FFmmAlgorithmThread. + * + * @tparam CellClass Type of cell + * @tparam ContainerClass Type of container to store particles + * @tparam MatrixKernelClass Type of matrix kernel function + * @tparam ORDER Lagrange interpolation order + */ +template < class CellClass, class ContainerClass, class MatrixKernelClass, int ORDER, int NVALS = 1> +class FUnifKernel + : public FAbstractUnifKernel< CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS> +{ + // private types + typedef FUnifM2LHandler<ORDER,MatrixKernelClass> M2LHandlerClass; + + // using from + typedef FAbstractUnifKernel< CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS> + AbstractBaseClass; + + /// Needed for M2L operator + FSmartPointer< M2LHandlerClass,FSmartPointerMemory> M2LHandler; + +public: + /** + * The constructor initializes all constant attributes and it reads the + * precomputed and compressed M2L operators from a binary file (an + * runtime_error is thrown if the required file is not valid). + */ + FUnifKernel(const int inTreeHeight, + const FPoint& inBoxCenter, + const FReal inBoxWidth) + : FAbstractUnifKernel< CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>(inTreeHeight, + inBoxCenter, + inBoxWidth), + M2LHandler(new M2LHandlerClass()) + { + // read precomputed compressed m2l operators from binary file + //M2LHandler->ReadFromBinaryFileAndSet(); // PB: TODO? + M2LHandler->ComputeAndSet(); + } + + + void P2M(CellClass* const LeafCell, + const ContainerClass* const SourceParticles) + { + const FPoint LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafCell->getCoordinate())); + for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ + // 1) apply Sy + AbstractBaseClass::Interpolator->applyP2M(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf, + LeafCell->getMultipole(idxRhs), SourceParticles); + // 2) apply Discrete Fourier Transform + M2LHandler->applyZeroPaddingAndDFT(LeafCell->getMultipole(idxRhs), + LeafCell->getTransformedMultipole(idxRhs)); + + } + } + + + void M2M(CellClass* const FRestrict ParentCell, + const CellClass*const FRestrict *const FRestrict ChildCells, + const int /*TreeLevel*/) + { + for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ + // 1) apply Sy + FBlas::scal(AbstractBaseClass::nnodes*2, FReal(0.), ParentCell->getMultipole(idxRhs)); + for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){ + if (ChildCells[ChildIndex]){ + AbstractBaseClass::Interpolator->applyM2M(ChildIndex, ChildCells[ChildIndex]->getMultipole(idxRhs), + ParentCell->getMultipole(idxRhs)); + } + } + // 2) Apply Discete Fourier Transform + M2LHandler->applyZeroPaddingAndDFT(ParentCell->getMultipole(idxRhs), + ParentCell->getTransformedMultipole(idxRhs)); + } + } + + + // void M2L(CellClass* const FRestrict TargetCell, + // const CellClass* SourceCells[343], + // const int NumSourceCells, + // const int TreeLevel) const + // { + // const FReal CellWidth(BoxWidth / FReal(FMath::pow(2, TreeLevel))); + // const FTreeCoordinate& cx = TargetCell->getCoordinate(); + // for (int idx=0; idx<NumSourceCells; ++idx) { + // const FTreeCoordinate& cy = SourceCells[idx]->getCoordinate(); + // const int transfer[3] = {cy.getX()-cx.getX(), + // cy.getY()-cx.getY(), + // cy.getZ()-cx.getZ()}; + // M2LHandler->applyC(transfer, CellWidth, + // SourceCells[idx]->getMultipole() + AbstractBaseClass::nnodes, + // TargetCell->getLocal() + AbstractBaseClass::nnodes); + // } + // } + + void M2L(CellClass* const FRestrict TargetCell, + const CellClass* SourceCells[343], + const int /*NumSourceCells*/, + const int TreeLevel) + { + for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ + FComplexe *const TransformedLocalExpansion = TargetCell->getTransformedLocal(idxRhs); + + const FReal CellWidth(AbstractBaseClass::BoxWidth / FReal(FMath::pow(2, TreeLevel))); + for (int idx=0; idx<343; ++idx){ + if (SourceCells[idx]){ + M2LHandler->applyFC(idx, CellWidth, SourceCells[idx]->getTransformedMultipole(idxRhs), + TransformedLocalExpansion); + + + } + } + } + } + + // void M2L(CellClass* const FRestrict TargetCell, + // const CellClass* SourceCells[343], + // const int NumSourceCells, + // const int TreeLevel) const + // { + // const unsigned int rank = M2LHandler.getRank(); + // FBlas::scal(343*rank, FReal(0.), MultipoleExpansion); + // const FReal CellWidth(BoxWidth / FReal(FMath::pow(2, TreeLevel))); + // for (int idx=0; idx<343; ++idx) + // if (SourceCells[idx]) + // FBlas::copy(rank, const_cast<FReal *const>(SourceCells[idx]->getMultipole())+AbstractBaseClass::nnodes, + // MultipoleExpansion+idx*rank); + // + // M2LHandler->applyC(CellWidth, MultipoleExpansion, TargetCell->getLocal() + AbstractBaseClass::nnodes); + // } + + + void L2L(const CellClass* const FRestrict ParentCell, + CellClass* FRestrict *const FRestrict ChildCells, + const int /*TreeLevel*/) + { + for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ + + // 1) Apply Inverse Discete Fourier Transform + M2LHandler->unapplyZeroPaddingAndDFT(ParentCell->getTransformedLocal(idxRhs), + const_cast<CellClass*>(ParentCell)->getLocal(idxRhs)); + // 2) apply Sx + for (unsigned int ChildIndex=0; ChildIndex < 8; ++ChildIndex){ + if (ChildCells[ChildIndex]){ + AbstractBaseClass::Interpolator->applyL2L(ChildIndex, ParentCell->getLocal(idxRhs), ChildCells[ChildIndex]->getLocal(idxRhs)); + } + } + } + } + + void L2P(const CellClass* const LeafCell, + ContainerClass* const TargetParticles) + { + const FPoint LeafCellCenter(AbstractBaseClass::getLeafCellCenter(LeafCell->getCoordinate())); + + for(int idxRhs = 0 ; idxRhs < NVALS ; ++idxRhs){ + + // 1) Apply Inverse Discete Fourier Transform + M2LHandler->unapplyZeroPaddingAndDFT(LeafCell->getTransformedLocal(idxRhs), + const_cast<CellClass*>(LeafCell)->getLocal(idxRhs)); + + // 2.a) apply Sx + AbstractBaseClass::Interpolator->applyL2P(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf, + LeafCell->getLocal(idxRhs), TargetParticles); + + // 2.b) apply Px (grad Sx) + AbstractBaseClass::Interpolator->applyL2PGradient(LeafCellCenter, AbstractBaseClass::BoxWidthLeaf, + LeafCell->getLocal(idxRhs), TargetParticles); + + } + } + + void P2P(const FTreeCoordinate& /* LeafCellCoordinate */, // needed for periodic boundary conditions + ContainerClass* const FRestrict TargetParticles, + const ContainerClass* const FRestrict /*SourceParticles*/, + ContainerClass* const NeighborSourceParticles[27], + const int /* size */) + { + DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2P(TargetParticles,NeighborSourceParticles); + } + + + void P2PRemote(const FTreeCoordinate& /*inPosition*/, + ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, + ContainerClass* const inNeighbors[27], const int /*inSize*/){ + DirectInteractionComputer<MatrixKernelClass::Identifier, NVALS>::P2PRemote(inTargets,inNeighbors,27); + } + +}; + + +#endif //FUNIFKERNEL_HPP + +// [--END--] diff --git a/Src/Kernels/Uniform/FUnifM2LHandler.hpp b/Src/Kernels/Uniform/FUnifM2LHandler.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8a6c2fee98bfbca66077d1e28c412dbcf07769cc --- /dev/null +++ b/Src/Kernels/Uniform/FUnifM2LHandler.hpp @@ -0,0 +1,481 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== +#ifndef FUNIFM2LHANDLER_HPP +#define FUNIFM2LHANDLER_HPP + +#include <numeric> +#include <stdexcept> +#include <string> +#include <sstream> +#include <fstream> +#include <typeinfo> + +#include "../../Utils/FBlas.hpp" +#include "../../Utils/FTic.hpp" +#include "../../Utils/FDft.hpp" + +#include "../../Utils/FComplexe.hpp" + + +#include "./FUnifTensor.hpp" + + + +/** + * @author Pierre Blanchard (pierre.blanchard@inria.fr) + * @class FUnifM2LHandler + * Please read the license + * + * This class precomputes and efficiently stores the M2L operators + * \f$[K_1,\dots,K_{316}]\f$ for all (\f$7^3-3^3 = 316\f$ possible interacting + * cells in the far-field) interactions for the Lagrange interpolation + * approach. The resulting Lagrange operators have a Circulant Toeplitz + * structure and can be applied efficiently in Fourier Space. Hence, the + * originally \f$K_t\f$ of size \f$\ell^3\times\ell^3\f$ times \f$316\f$ for + * all interactions is reduced to \f$316\f$ \f$C_t\f$, each of size \f$2\ell-1\f$. + * + * @tparam ORDER interpolation order \f$\ell\f$ + */ +template <int ORDER, class MatrixKernelClass> +class FUnifM2LHandler : FNoCopyable +{ + enum {order = ORDER, + nnodes = TensorTraits<ORDER>::nnodes, + ninteractions = 316}; // 7^3 - 3^3 (max num cells in far-field) + + const MatrixKernelClass MatrixKernel; + + FComplexe *FC; + + unsigned int rc; + unsigned int opt_rc; + + typedef FUnifTensor<ORDER> TensorType; + unsigned int node_diff[nnodes*nnodes]; // PB: used in applyC to get id=i-j + + // FDft Dft; // Direct Discrete Fourier Transformator + FFft Dft; // Fast Discrete Fourier Transformator + + static const std::string getFileName() + { + const char precision_type = (typeid(FReal)==typeid(double) ? 'd' : 'f'); + std::stringstream stream; + stream << "m2l_k"<< MatrixKernelClass::Identifier << "_" << precision_type + << "_o" << order << ".bin"; + return stream.str(); + } + + +public: + FUnifM2LHandler() + : MatrixKernel(), FC(NULL), + rc((2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1)), + opt_rc(rc/2+1), + Dft(rc) // initialize Discrete Fourier Transformator + { + // initialize root node ids + TensorType::setNodeIdsDiff(node_diff); + + // for real valued kernel only n/2+1 complex values are stored + // after performing the DFT (the rest is deduced by conjugation) + + } + + ~FUnifM2LHandler() + { + if (FC != NULL) delete [] FC; + } + + /** + * Computes and sets the matrix \f$C_t\f$ + */ + void ComputeAndSet() + { + // measure time + FTic time; time.tic(); + // check if aready set + if (FC) throw std::runtime_error("M2L operator already set"); + // Compute matrix of interactions + Compute(FC); + + // Compute memory usage + unsigned long sizeM2L = 343*opt_rc*sizeof(FComplexe); + + + // write info + std::cout << "Compute and Set M2L operators ("<< long(sizeM2L/**1e-6*/) <<" Bytes) in " + << time.tacAndElapsed() << "sec." << std::endl; + } + + /** + * Computes, writes to binary file, reads it and sets the matrices \f$Y, C_t, B\f$ + */ + void ComputeAndStoreInBinaryFileAndReadFromFileAndSet() + { + FUnifM2LHandler<ORDER,MatrixKernelClass>::ComputeAndStoreInBinaryFile(); + this->ReadFromBinaryFileAndSet(); + } + + /** + * Computes all \f$K_t\f$. + * + * @param[out] C matrix of size \f$r\times 316 r\f$ storing \f$[C_1,\dots,C_{316}]\f$ + */ + static void Compute(FComplexe* &FC); + + /** + * Computes and stores the matrix \f$C_t\f$ in a binary + * file + */ + static void ComputeAndStoreInBinaryFile(); + + /** + * Reads the matrices \f$Y, C_t, B\f$ from the respective binary file + */ + void ReadFromBinaryFileAndSet(); + + + /** + * Expands potentials \f$x+=IDFT(X)\f$ of a target cell. This operation can be + * seen as part of the L2L operation. + * + * @param[in] X transformed local expansion of size \f$r\f$ + * @param[out] x local expansion of size \f$\ell^3\f$ + */ + void unapplyZeroPaddingAndDFT(const FComplexe *const FX, FReal *const x) const + { + FReal Px[rc]; + FBlas::setzero(rc,Px); + // Apply forward Discrete Fourier Transform + Dft.applyIDFT(FX,Px); + + // Unapply Zero Padding + for (unsigned int j=0; j<nnodes; ++j) + x[j]+=Px[node_diff[nnodes-j-1]]; + } + + /** + * The M2L operation \f$X+=C_tY\f$ is performed in Fourier space by + * application of the convolution theorem (i.e. \f$FX+=FC_t:FY\f$), + * where \f$FY\f$ is the transformed multipole expansion and + * \f$FX\f$ is the transformed local expansion, both padded with zeros and + * of size \f$r_c=(2\times ORDER-1)^3\f$ or \f$r_c^{opt}=\frac{r_c}{2}+1\f$ + * for real valued kernels. The index \f$t\f$ denotes the transfer vector + * of the target cell to the source cell. + * + * @param[in] transfer transfer vector + * @param[in] FY transformed multipole expansion + * @param[out] FX transformed local expansion + * @param[in] CellWidth needed for the scaling of the compressed M2L operators which are based on a homogeneous matrix kernel computed for the reference cell width \f$w=2\f$, ie in \f$[-1,1]^3\f$. + */ + void applyFC(const unsigned int idx, FReal CellWidth, + const FComplexe *const FY, FComplexe *const FX) const + { + const FReal scale(MatrixKernel.getScaleFactor(CellWidth)); + + FComplexe tmpFX; + + // Perform entrywise product manually + for (unsigned int j=0; j<opt_rc; ++j){ + tmpFX=FC[idx*opt_rc + j]; + tmpFX*=FY[j]; + tmpFX*=scale; + FX[j]+=tmpFX; + } + +// // Perform entrywise product using BLAS and MKL routines +// // PB: not necessary faster than the naive version +// FComplexe tmpFX[rc]; +// FBlas::c_setzero(rc,reinterpret_cast<FReal*>(tmpFX)); +// FMkl::c_had(rc,reinterpret_cast<const FReal* const>(FC + idx*rc), +// reinterpret_cast<const FReal* const>(FY), +// reinterpret_cast<FReal* const>(tmpFX)); +// // Scale +// FBlas::c_axpy(rc,&scale,reinterpret_cast<FReal* const>(tmpFX), +// reinterpret_cast<FReal* const>(FX)); + + } + + + /** + * Transform densities \f$Y= DFT(y)\f$ of a source cell. This operation + * can be seen as part of the M2M operation. + * + * @param[in] y multipole expansion of size \f$\ell^3\f$ + * @param[out] Y transformed multipole expansion of size \f$r\f$ + */ + void applyZeroPaddingAndDFT(FReal *const y, FComplexe *const FY) const + { + FReal Py[rc]; + FBlas::setzero(rc,Py); + + FComplexe tmpFY[rc]; // not mandatory? + + // Apply Zero Padding + for (unsigned int i=0; i<nnodes; ++i) + Py[node_diff[i*nnodes]]=y[i]; + + // Apply forward Discrete Fourier Transform + Dft.applyDFT(Py,tmpFY); + + // not mandatory? + for (unsigned int j=0; j<rc; ++j) // could be opt_rc + FY[j]+=tmpFY[j]; + + } + + +}; + + + + + + +////////////////////////////////////////////////////////////////////// +// definition //////////////////////////////////////////////////////// +////////////////////////////////////////////////////////////////////// + + + + + + +template <int ORDER, class MatrixKernelClass> +void +FUnifM2LHandler<ORDER, MatrixKernelClass>::Compute(FComplexe* &FC) +{ + // allocate memory and store compressed M2L operators + if (FC) throw std::runtime_error("M2L operators are already set"); + + // interpolation points of source (Y) and target (X) cell + FPoint X[nnodes], Y[nnodes]; + // set roots of target cell (X) + FUnifTensor<order>::setRoots(FPoint(0.,0.,0.), FReal(2.), X); + // init matrix kernel + const MatrixKernelClass MatrixKernel; + + // allocate memory and compute 316 m2l operators + FReal *_C; + FComplexe *_FC; + + // reduce storage from nnodes^2=order^6 to (2order-1)^3 + const unsigned int rc = (2*order-1)*(2*order-1)*(2*order-1); + _C = new FReal [rc]; + _FC = new FComplexe [rc * ninteractions]; + + // init Discrete Fourier Transformator +// FDft Dft(rc); + FFft Dft(rc); + + // get first column of K via permutation + unsigned int perm[rc]; + for(unsigned int p=0; p<rc; ++p){ + // permutation to order WHILE computing the entire 1st row + if(p<rc-1) perm[p]=p+1; + else perm[p]=p+1-rc; + // std::cout << "perm["<< p << "]="<< perm[p] << std::endl; + } + + unsigned int counter = 0; + unsigned int li,lj, mi,mj, ni,nj; + unsigned int idi, idj, ido; + for (int i=-3; i<=3; ++i) { + for (int j=-3; j<=3; ++j) { + for (int k=-3; k<=3; ++k) { + if (abs(i)>1 || abs(j)>1 || abs(k)>1) { + // set roots of source cell (Y) + const FPoint cy(FReal(2.*i), FReal(2.*j), FReal(2.*k)); + FUnifTensor<order>::setRoots(cy, FReal(2.), Y); + // evaluate m2l operator + ido=0; + for(unsigned int l=0; l<2*order-1; ++l) + for(unsigned int m=0; m<2*order-1; ++m) + for(unsigned int n=0; n<2*order-1; ++n){ + + // l=0:(2*order-1) => li-lj=-(order-1):(order-1) + // Convention: + // lj=order-1 & li=0:order-1 => li-lj=1-order:0 + // lj=1 & li=0:order-1 => li-lj=1:order-1 + if(l<order-1) lj=order-1; else lj=0; + if(m<order-1) mj=order-1; else mj=0; + if(n<order-1) nj=order-1; else nj=0; + li=(l-(order-1))+lj; mi=(m-(order-1))+mj; ni=(n-(order-1))+nj; + // deduce corresponding index of K[nnodes x nnodes] + idi=li*order*order + mi*order + ni; + idj=lj*order*order + mj*order + nj; + + // store value at current position in C + // use permutation if DFT is used because + // the storage of the first column is required + // i.e. C[0] C[rc-1] C[rc-2] .. C[1] < WRONG! + // i.e. C[rc-1] C[0] C[1] .. C[rc-2] < RIGHT! + // _C[counter*rc + ido] + _C[perm[ido]] + = MatrixKernel.evaluate(X[idi], Y[idj]); + ido++; + } + + // Apply Discrete Fourier Transformation + Dft.applyDFT(_C,_FC+counter*rc); + + // increment interaction counter + counter++; + } + } + } + } + if (counter != ninteractions) + throw std::runtime_error("Number of interactions must correspond to 316"); + + // Free _C + delete [] _C; + + // store FC + counter = 0; + // reduce storage if real valued kernel + const unsigned int opt_rc = rc/2+1; + // allocate M2L + FC = new FComplexe[343 * opt_rc]; + + for (int i=-3; i<=3; ++i) + for (int j=-3; j<=3; ++j) + for (int k=-3; k<=3; ++k) { + const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3); + if (abs(i)>1 || abs(j)>1 || abs(k)>1) { + FBlas::c_copy(opt_rc, reinterpret_cast<FReal*>(_FC + counter*rc), + reinterpret_cast<FReal*>(FC + idx*opt_rc)); +// for (unsigned int n=0; n<rc; ++n){ +// FC[idx*rc+n]=_FC[counter*rc+n]; +// } + counter++; + } else{ + FBlas::c_setzero(opt_rc, reinterpret_cast<FReal*>(FC + idx*opt_rc)); +// for (unsigned int n=0; n<rc; ++n){ +// FC[idx*rc+n]=FComplexe(0.0,0.0); +// } + } + } + + if (counter != ninteractions) + throw std::runtime_error("Number of interactions must correspond to 316"); + delete [] _FC; +} + + + + + + +//template <int ORDER, class MatrixKernelClass> +//void +//FUnifM2LHandler<ORDER, MatrixKernelClass>::ComputeAndStoreInBinaryFile() +//{ +// // measure time +// FTic time; time.tic(); +// // start computing process +// FReal *C; +// C = NULL; +// const unsigned int rc = Compute(C); +// // store into binary file +// const std::string filename(getFileName()); +// std::ofstream stream(filename.c_str(), +// std::ios::out | std::ios::binary | std::ios::trunc); +// if (stream.good()) { +// stream.seekp(0); +// // 1) write number of interpolation points (int) +// int _nnodes = nnodes; +// stream.write(reinterpret_cast<char*>(&_nnodes), sizeof(int)); +// // 2) write 343 C (343 * rc * FReal) +// stream.write(reinterpret_cast<char*>(C), sizeof(FReal)*rc*343); +// } else throw std::runtime_error("File could not be opened to write"); +// stream.close(); +// // free memory +// if (C != NULL) delete [] C; +// // write info +// std::cout << "M2L operators (r2="<< rc +// << ",nnodes2="<< nnodes*nnodes +// <<") stored in binary file " << filename +// << " in " << time.tacAndElapsed() << "sec." << std::endl; +//} +// +// +//template <int ORDER, class MatrixKernelClass> +//void +//FUnifM2LHandler<ORDER, MatrixKernelClass>::ReadFromBinaryFileAndSet() +//{ +// // measure time +// FTic time; time.tic(); +// // start reading process +// if (C) throw std::runtime_error("M2L operator already set"); +// const std::string filename(getFileName()); +// std::ifstream stream(filename.c_str(), +// std::ios::in | std::ios::binary | std::ios::ate); +// const std::ifstream::pos_type size = stream.tellg(); +// if (size<=0) { +// std::cout << "Info: The requested binary file " << filename +// << " does not yet exist. Compute it now ... " << std::endl; +// this->ComputeAndStoreInBinaryFileAndReadFromFileAndSet(); +// return; +// } +// if (stream.good()) { +// stream.seekg(0); +// // 1) read number of interpolation points (int) +// int npts; +// stream.read(reinterpret_cast<char*>(&npts), sizeof(int)); +// if (npts!=nnodes) throw std::runtime_error("nnodes and npts do not correspond"); +// // 2) write 343 C (343 * rank*rank * FReal) +// C = new FReal [343 * rc]; +// stream.read(reinterpret_cast<char*>(C), sizeof(FReal)*rc*343); +// } else throw std::runtime_error("File could not be opened to read"); +// stream.close(); +// // write info +// std::cout << "M2L operators (rc=" << rc +// <<",nnodes2="<< nnodes*nnodes +// << ") read from binary file " +// << filename << " in " << time.tacAndElapsed() << "sec." << std::endl; +//} + +/* +unsigned int ReadRankFromBinaryFile(const std::string& filename) +{ + // start reading process + std::ifstream stream(filename.c_str(), std::ios::in | std::ios::binary | std::ios::ate); + const std::ifstream::pos_type size = stream.tellg(); + if (size<=0) throw std::runtime_error("The requested binary file does not exist."); + unsigned int rank = -1; + if (stream.good()) { + stream.seekg(0); + // 1) read number of interpolation points (int) + int npts; + stream.read(reinterpret_cast<char*>(&npts), sizeof(int)); + // 2) read low rank (int) + stream.read(reinterpret_cast<char*>(&rank), sizeof(int)); + return rank; + } else throw std::runtime_error("File could not be opened to read"); + stream.close(); + return rank; +} +*/ + + + + + + +#endif // FUNIFM2LHANDLER_HPP + +// [--END--] diff --git a/Src/Kernels/Uniform/FUnifRoots.cpp b/Src/Kernels/Uniform/FUnifRoots.cpp new file mode 100644 index 0000000000000000000000000000000000000000..644c2bc329d54d5c7f3ee2c6b1c5e771e9ad3c09 --- /dev/null +++ b/Src/Kernels/Uniform/FUnifRoots.cpp @@ -0,0 +1,134 @@ +#include "FUnifInterpolator.hpp" //PB: if include FUnifRoots.hpp then Error: "FReal is not a type"... + +// In case of multiple include of FUnifInterpolator.hpp, this has to be defined only once!! + +// order 2 +template<> const double FUnifRoots<2>::roots[] = {-1., + 1.}; + +// order 3 +template<> const double FUnifRoots<3>::roots[] = {-1., + 0.0, + 1.}; + +// order 4 +template<> const double FUnifRoots<4>::roots[] = {-1., + -0.333333333333333, + 0.333333333333333, + 1.}; + +// order 5 +template<> const double FUnifRoots<5>::roots[] = {-1., + -0.5, + 0., + 0.5, + 1.}; + +// order 6 +template<> const double FUnifRoots<6>::roots[] = {-1., + -0.6, + -0.2, + 0.2, + 0.6, + 1.}; + +// order 7 +template<> const double FUnifRoots<7>::roots[] = {-1., + -0.666666666666666, + -0.333333333333333, + 0., + 0.333333333333333, + 0.666666666666666, + 1.}; + +// order 8 +template<> const double FUnifRoots<8>::roots[] = {-1., + -0.714285714285714, + -0.428571428571429, + -0.142857142857143, + 0.142857142857143, + 0.428571428571429, + 0.714285714285714, + 1.}; + +// order 9 +template<> const double FUnifRoots<9>::roots[] = {-1., + -0.75, + -0.5, + -0.25, + 0.0, + 0.25, + 0.5, + 0.75, + 1.}; + +// order 10 +template<> const double FUnifRoots<10>::roots[] = {-1., + -0.777777777777777, + -0.555555555555555, + -0.333333333333333, + -0.111111111111111, + 0.111111111111111, + 0.333333333333333, + 0.555555555555555, + 0.777777777777777, + 1.}; + +// order 11 +template<> const double FUnifRoots<11>::roots[] = {-1., + -0.888888888888888, + -0.666666666666666, + -0.444444444444444, + -0.222222222222222, + 0.0, + 0.2222222222222222, + 0.444444444444444, + 0.666666666666666, + 0.888888888888888, + 1.}; + +// order 12 +template<> const double FUnifRoots<12>::roots[] = {-1., + -0.818181818181818, + -0.636363636363636, + -0.454545454545455, + -0.272727272727273, + -0.090909090909091, + 0.090909090909091, + 0.272727272727273, + 0.454545454545455, + 0.636363636363636, + 0.818181818181818, + 1.}; + + +// order 13 +template<> const double FUnifRoots<13>::roots[] = {-1., + -0.833333333333333, + -0.666666666666666, + -0.5, + -0.333333333333333, + -0.166666666666666, + 0.0, + 0.166666666666666, + 0.333333333333333, + 0.5, + 0.666666666666666, + 0.833333333333333, + 1.}; + +// order 14 +template<> const double FUnifRoots<14>::roots[] = {-1., + -0.846153846153846, + -0.692307692307692, + -0.538461538461538, + -0.384615384615385, + -0.230769230769231, + -0.076923076923077, + 0.076923076923077, + 0.230769230769231, + 0.384615384615385, + 0.538461538461538, + 0.692307692307692, + 0.846153846153846, + 1.}; diff --git a/Src/Kernels/Uniform/FUnifRoots.hpp b/Src/Kernels/Uniform/FUnifRoots.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b3897e8efe5e9976b8577c8139993059ecd4a2ea --- /dev/null +++ b/Src/Kernels/Uniform/FUnifRoots.hpp @@ -0,0 +1,159 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== +#ifndef FUNIFROOTS_HPP +#define FUNIFROOTS_HPP + +#include <cmath> +#include <limits> +#include <cassert> + +#include "../../Utils/FNoCopyable.hpp" + + +/** + * @author Pierre Blanchard (pierre.blanchard@inria.fr) + * Please read the license + */ + +/** + * @class FUnifRoots + * + * The class @p FUnifRoots provides the equispaced roots of order \f$\ell\f$ + * and the Lagrange polynomials \f$L_n(x)\f$. + * + * @tparam ORDER interpolation order \f$\ell\f$ + */ +template <int ORDER> +struct FUnifRoots : FNoCopyable +{ + enum {order = ORDER}; //!< interpolation order + + /** + * Lagrange roots in [-1,1] computed as \f$\bar x_n = + * -1 + 2\frac{n-1}{\ell}\f$ for \f$n=1,\dots,\ell\f$ + */ + const static double roots[]; + + /** + * Lagrange polynomials \f$ L_n(x) = \Pi_{m=1 \atop m\neq n}^\ell \frac{x-\bar x_m}{\bar x_n-\bar x_m} \f$ + * + * @param[in] n index + * @param[in] x coordinate in [-1,1] + * @return function value + */ + static FReal L(const unsigned int n, FReal x) + { + //std::cout << x << std::endl; + assert(std::fabs(x)-1.<10.*std::numeric_limits<FReal>::epsilon()); + if (std::fabs(x)>1.) { + x = (x > FReal( 1.) ? FReal( 1.) : x); + x = (x < FReal(-1.) ? FReal(-1.) : x); + } + + FReal tmpL=FReal(1.); + for(unsigned int m=0;m<order;++m){ + if(m!=n) + tmpL *= (x-FUnifRoots<order>::roots[m])/(FUnifRoots<order>::roots[n]-FUnifRoots<order>::roots[m]); + + } + + return FReal(tmpL); + } + + + /** + * For the derivation of the Lagrange polynomials + * \f$ \frac{\mathrm{d} L_n(x)}{\mathrm{d}x} = ... \f$ + * + * @param[in] n index + * @param[in] x coordinate in [-1,1] + * @return function value + */ + static FReal dL(const unsigned int n, FReal x) + { + assert(std::fabs(x)-1.<10.*std::numeric_limits<FReal>::epsilon()); + if (std::fabs(x)>1.) { + x = (x > FReal( 1.) ? FReal( 1.) : x); + x = (x < FReal(-1.) ? FReal(-1.) : x); + } + + FReal tmpdL; + FReal dL=FReal(0.); + + for(unsigned int p=0;p<order;++p){ + if(p!=n){ + tmpdL=1./(FUnifRoots<order>::roots[n]-FUnifRoots<order>::roots[p]); + + for(unsigned int m=0;m<order;++m){ + if(m!=n && m!=p) + tmpdL *= (x-FUnifRoots<order>::roots[m])/(FUnifRoots<order>::roots[n]-FUnifRoots<order>::roots[m]); + + }// m + + dL+=tmpdL; + + }//endif + + }// p + + return FReal(dL); + + } +}; + +// We declare the roots here only once Please look to .cpp for definitions + +// order 2 +template<> const double FUnifRoots<2>::roots[]; + +// order 3 +template<> const double FUnifRoots<3>::roots[]; + +// order 4 +template<> const double FUnifRoots<4>::roots[]; + +// order 5 +template<> const double FUnifRoots<5>::roots[]; + +// order 6 +template<> const double FUnifRoots<6>::roots[]; + +// order 7 +template<> const double FUnifRoots<7>::roots[]; + +// order 8 +template<> const double FUnifRoots<8>::roots[]; + +// order 9 +template<> const double FUnifRoots<9>::roots[]; + +// order 10 +template<> const double FUnifRoots<10>::roots[]; + +// order 11 +template<> const double FUnifRoots<11>::roots[]; + +// order 12 +template<> const double FUnifRoots<12>::roots[]; + +// order 13 +template<> const double FUnifRoots<13>::roots[]; + +// order 14 +template<> const double FUnifRoots<14>::roots[]; + + +#endif diff --git a/Src/Kernels/Uniform/FUnifTensor.hpp b/Src/Kernels/Uniform/FUnifTensor.hpp new file mode 100755 index 0000000000000000000000000000000000000000..004ca44461a77e3b455c0cc627f09f26f2c3331c --- /dev/null +++ b/Src/Kernels/Uniform/FUnifTensor.hpp @@ -0,0 +1,76 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== +#ifndef FUNIFTENSOR_HPP +#define FUNIFTENSOR_HPP + +#include "../../Utils/FMath.hpp" + +#include "./FUnifRoots.hpp" +#include "./../Interpolation/FInterpTensor.hpp" + + +/** + * @author Pierre Blanchard (pierre.blanchard@inria.fr) + * Please read the license + */ + + + +/** + * @class FUnifTensor + * + * The class FUnifTensor provides function considering the tensor product + * interpolation. + * + * @tparam ORDER interpolation order \f$\ell\f$ + */ +template <int ORDER> +class FUnifTensor : public FInterpTensor<ORDER,FUnifRoots<ORDER>> +{ + enum {nnodes = TensorTraits<ORDER>::nnodes}; + typedef FUnifRoots<ORDER> BasisType; + typedef FInterpTensor<ORDER,BasisType> ParentTensor; + + public: + + /** + * Sets the diff of ids of the coordinates of all \f$\ell^6\f$ interpolation + * nodes duet + * + * @param[out] NodeIdsDiff diff of ids of coordinates of interpolation nodes + */ + static + void setNodeIdsDiff(unsigned int NodeIdsDiff[nnodes*nnodes]) + { + unsigned int node_ids[nnodes][3]; + ParentTensor::setNodeIds(node_ids); + + for (unsigned int i=0; i<nnodes; ++i) { + for (unsigned int j=0; j<nnodes; ++j) { + // 0 <= id < 2*ORDER-1 + unsigned int idl = node_ids[i][0]-node_ids[j][0]+ORDER-1; + unsigned int idm = node_ids[i][1]-node_ids[j][1]+ORDER-1; + unsigned int idn = node_ids[i][2]-node_ids[j][2]+ORDER-1; + NodeIdsDiff[i*nnodes+j] + = idn*(2*ORDER-1)*(2*ORDER-1) + idm*(2*ORDER-1) + idl; + } + } + } + +}; + + +#endif /*FUNIFTENSOR_HPP*/ diff --git a/Src/Utils/FBlas.hpp b/Src/Utils/FBlas.hpp index 1a2457e84b57df4218751a69f35c8f44f39b2cfd..01fea5dd01a4e61e9ce93e145a29955ab127eae2 100755 --- a/Src/Utils/FBlas.hpp +++ b/Src/Utils/FBlas.hpp @@ -71,6 +71,16 @@ extern "C" void dorgqr_(const unsigned*, const unsigned*, const unsigned*, double*, const unsigned*, double*, double*, const unsigned*, int*); +#ifdef ScalFMM_USE_MKL_AS_BLAS + // mkl: hadamard product is not implemented in mkl_blas + void vdmul_(const unsigned* n, const double*, const double*, double*); + void vsmul_(const unsigned* n, const float*, const float*, float*); + void vzmul_(const unsigned* n, const double*, const double*, double*); + void vcmul_(const unsigned* n, const float*, const float*, float*); +#else + // TODO create interface for hadamard product in case an external Blas is used +#endif + // single ////////////////////////////////////////////////////////// // blas 1 float sdot_(const unsigned*, const float*, const unsigned*, const float*, const unsigned*); @@ -127,18 +137,59 @@ extern "C" void cgeqrf_(const unsigned*, const unsigned*, float*, const unsigned*, float*, float*, const unsigned*, int*); + + +} + + +// Hadamard (i.e. entrywise) product: +// NB: The following optimized routines are currently not used +// since they have not proved their efficiency in comparison +// with a naive application of the entrywise product. +#ifdef ScalFMM_USE_MKL_AS_BLAS + +//#include "mkl_vml.h" + +namespace FMkl{ + + // Hadamard product: dest[i]=a[i]*b[i] + inline void had(const unsigned n, const double* const a, const double* const b, double* const dest) + { vdmul_(&n, a, b, dest); } + inline void had(const unsigned n, const float* const a, const float* const b, float* const dest) + { vsmul_(&n, a, b, dest); } + inline void c_had(const unsigned n, const double* const a, const double* const b, double* const dest) + { vzmul_(&n, a, b, dest); } + inline void c_had(const unsigned n, const float* const a, const float* const b, float* const dest) + { vcmul_(&n, a, b, dest); } + } +#else + +namespace FBlas{ + + // TODO create interface for hadamard product in case an external Blas is used + +} + +#endif +// end Hadamard product + + namespace FBlas { // copy inline void copy(const unsigned n, double* orig, double* dest) { dcopy_(&n, orig, &N_ONE, dest, &N_ONE); } + inline void copy(const unsigned n, const double* orig, double* dest) + { dcopy_(&n, orig, &N_ONE, dest, &N_ONE); } inline void copy(const unsigned n, float* orig, float* dest) { scopy_(&n, orig, &N_ONE, dest, &N_ONE); } inline void c_copy(const unsigned n, double* orig, double* dest) { zcopy_(&n, orig, &N_ONE, dest, &N_ONE); } + inline void c_copy(const unsigned n, const double* orig, double* dest) + { zcopy_(&n, orig, &N_ONE, dest, &N_ONE); } inline void c_copy(const unsigned n, float* orig, float* dest) { ccopy_(&n, orig, &N_ONE, dest, &N_ONE); } diff --git a/Src/Utils/FDft.hpp b/Src/Utils/FDft.hpp new file mode 100644 index 0000000000000000000000000000000000000000..fa61ade230f1aa6cfad0b00f92312ee9f0ef2615 --- /dev/null +++ b/Src/Utils/FDft.hpp @@ -0,0 +1,263 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Bérenger Bramas, Matthias Messner +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== +#ifndef FDFT_HPP +#define FDFT_HPP + +#include <iostream> +#include <stdlib.h> + +// include fftw3 (specify path in cmake) +// if MKL: path/to/mkl/include/fftw/fftw3.h +// elseif libfftw_dev: usr/include/fftw3.h +#include <fftw3.h> + +#include "FGlobal.hpp" +#include "FComplexe.hpp" + +#include "FMath.hpp" + +#include "FTic.hpp" + + +/** + * @author Pierre Blanchard (pierre.blanchard@inria.fr) + * @class FDft and @class FFft + * Please read the license + * + * These classes handle the forward and backward Discete Fourier Transform + * (DFT). + * @class FDft implements a direct method while @class FFft uses the Fast + * Fourier Transform (FFT). The FFT algorithm can either be provided by the + * FFTW(3) library itself or a version that is wrapped in Intel MKL. + * + * The aim of writing these specific classes is to allow further customization + * of the DFT such as implementing a tensorial variant, a weighted variant + * or any other feature. + * + * TODO: some copies might slow down the routines, this needs to be optimized. + * + */ + + +/** + * @class FDft + * + * @tparam nsteps number of sampled values \f$N\f$ + */ + +class FDft +{ + + FReal* fftR_; + FComplexe* fftC_; + + FReal *cosRS_, *sinRS_; + +private: + unsigned int nsteps_; + +public: + + FDft(const unsigned int nsteps) + : nsteps_(nsteps) + { + fftR_ = new FReal[nsteps_]; + fftC_ = new FComplexe[nsteps_]; + + // Beware this is extremely HEAVY to store!!! => Use FDft only for debug! + cosRS_ = new FReal[nsteps_*nsteps_]; + sinRS_ = new FReal[nsteps_*nsteps_]; + + for(unsigned int r=0; r<nsteps_; ++r) + for(unsigned int s=0; s<nsteps_; ++s){ + FReal thetaRS = 2*M_PI*r*s/nsteps_; + cosRS_[r*nsteps_+s]=FMath::Cos(thetaRS); + sinRS_[r*nsteps_+s]=FMath::Sin(thetaRS); + } + } + + virtual ~FDft() + { + delete [] fftR_; + delete [] fftC_; + delete [] cosRS_; + delete [] sinRS_; + } + + void applyDFT(const FReal* sampledData, + FComplexe* transformedData) const + { +// FTic time; + + // read sampled data +// std::cout<< "copy("; +// time.tic(); + FBlas::c_setzero(nsteps_,reinterpret_cast<FReal*>(fftC_)); + FBlas::copy(nsteps_, sampledData,fftR_); +// std::cout << time.tacAndElapsed() << ")"; + +// std::cout<< " - exe("; +// time.tic(); + for(unsigned int r=0; r<nsteps_; ++r) + for(unsigned int s=0; s<nsteps_; ++s){ + fftC_[r] += FComplexe(fftR_[s]*cosRS_[r*nsteps_+s], + -fftR_[s]*sinRS_[r*nsteps_+s]); + } +// std::cout << time.tacAndElapsed() << ")"; + + // write transformed data +// std::cout<< " - copy("; +// time.tic(); + FBlas::c_copy(nsteps_,reinterpret_cast<FReal*>(fftC_), + reinterpret_cast<FReal*>(transformedData)); +// std::cout << time.tacAndElapsed() << ") "; + + } + + + void applyIDFT(const FComplexe* transformedData, + FReal* sampledData) const + { + // read transformed data + FBlas::setzero(nsteps_,fftR_); + FBlas::c_copy(nsteps_,reinterpret_cast<const FReal*>(transformedData), + reinterpret_cast<FReal*>(fftC_)); + + for(unsigned int r=0; r<nsteps_; ++r){ + for(unsigned int s=0; s<nsteps_; ++s){ + fftR_[r] += fftC_[s].getReal()*cosRS_[r*nsteps_+s] + -fftC_[s].getImag()*sinRS_[r*nsteps_+s]; + } + fftR_[r]*=1./nsteps_; + } + + // write sampled data + FBlas::copy(nsteps_,fftR_,sampledData); + + } + + +}; + + + +/** + * @class FFft + * + * @tparam nsteps number of sampled values \f$N\f$ + */ + +class FFft +{ + // arrays + FReal* fftR_; + FComplexe* fftC_; + + // plans + fftw_plan plan_c2r_; // backward FFT plan + fftw_plan plan_r2c_; // forward FFT plan + +private: + unsigned int nsteps_; + +public: + + FFft(const unsigned int nsteps) + : nsteps_(nsteps) + { + // allocate arrays + fftR_ = (FReal*) fftw_malloc(sizeof(FReal) * nsteps_); + fftC_ = (FComplexe*) fftw_malloc(sizeof(FComplexe) * nsteps_); + + // fftw plans + plan_c2r_ = + fftw_plan_dft_c2r_1d(nsteps_, + reinterpret_cast<fftw_complex*>(fftC_), + fftR_, + FFTW_MEASURE);// TODO: test FFTW_ESTIMATE + plan_r2c_ = + fftw_plan_dft_r2c_1d(nsteps_, + fftR_, + reinterpret_cast<fftw_complex*>(fftC_), + FFTW_MEASURE); + + + } + + virtual ~FFft() + { + fftw_destroy_plan(plan_c2r_); + fftw_destroy_plan(plan_r2c_); + fftw_free(fftR_); + fftw_free(fftC_); + } + + void applyDFT(const FReal* sampledData, + FComplexe* transformedData) const + { +// FTic time; + + // read sampled data +// std::cout<< "copy("; +// time.tic(); + FBlas::c_setzero(nsteps_,reinterpret_cast<FReal*>(fftC_)); + FBlas::copy(nsteps_, sampledData,fftR_); +// std::cout << time.tacAndElapsed() << ")"; + + // perform fft +// std::cout<< " - exe("; +// time.tic(); + fftw_execute( plan_r2c_ ); +// std::cout << time.tacAndElapsed() << ")"; + + // write transformed data +// std::cout<< " - copy("; +// time.tic(); +// FBlas::c_copy(nsteps_,reinterpret_cast<FReal*>(fftC_), +// reinterpret_cast<FReal*>(transformedData)); + + for(unsigned int s=0; s<nsteps_; ++s) + transformedData[s]=fftC_[s]; + +// std::cout << time.tacAndElapsed() << ") "; + + } + + + void applyIDFT(const FComplexe* transformedData, + FReal* sampledData) const + { + // read transformed data + FBlas::setzero(nsteps_,fftR_); + FBlas::c_copy(nsteps_,reinterpret_cast<const FReal*>(transformedData), + reinterpret_cast<FReal*>(fftC_)); + + // perform ifft + fftw_execute( plan_c2r_ ); + + for(unsigned int s=0; s<nsteps_; ++s) + fftR_[s]/=nsteps_; // the fft from fftw is not scaled !!!!!!!! + + // write sampled data + FBlas::copy(nsteps_,fftR_,sampledData); + + } + + +}; + +#endif /* FDFT_HPP */ + diff --git a/Tests/Kernels/testChebAlgorithm.cpp b/Tests/Kernels/testChebAlgorithm.cpp index 1766b9f74e9185c6d2aa274a390fd1a8e61a91bb..e9e5a26dce0e4e4f568c1d98a484557c5dfcbd72 100755 --- a/Tests/Kernels/testChebAlgorithm.cpp +++ b/Tests/Kernels/testChebAlgorithm.cpp @@ -28,12 +28,12 @@ #include "../../Src/Kernels/Chebyshev/FChebCell.hpp" -#include "../../Src/Kernels/Chebyshev/FChebMatrixKernel.hpp" +#include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp" #include "../../Src/Kernels/Chebyshev/FChebKernel.hpp" #include "../../Src/Kernels/Chebyshev/FChebSymKernel.hpp" #include "../../Src/Components/FSimpleLeaf.hpp" -#include "../../Src/Kernels/P2P/FP2PParticleContainer.hpp" +#include "../../Src/Kernels/P2P/FP2PParticleContainerIndexed.hpp" #include "../../Src/Utils/FParameters.hpp" #include "../../Src/Utils/FMemUtils.hpp" @@ -44,273 +44,313 @@ #include "../../Src/Core/FFmmAlgorithm.hpp" #include "../../Src/Core/FFmmAlgorithmThread.hpp" - +/** + * This program runs the FMM Algorithm with the Chebyshev kernel and compares the results with a direct computation. + */ // Simply create particles and try the kernels int main(int argc, char* argv[]) { - const char* const filename = FParameters::getStr(argc,argv,"-f", "../Data/test20k.fma"); - const unsigned int TreeHeight = FParameters::getValue(argc, argv, "-h", 5); - const unsigned int SubTreeHeight = FParameters::getValue(argc, argv, "-sh", 2); - const unsigned int NbThreads = FParameters::getValue(argc, argv, "-t", 1); + const char* const filename = FParameters::getStr(argc,argv,"-f", "../Data/test20k.fma"); + const unsigned int TreeHeight = FParameters::getValue(argc, argv, "-h", 5); + const unsigned int SubTreeHeight = FParameters::getValue(argc, argv, "-sh", 2); + const unsigned int NbThreads = FParameters::getValue(argc, argv, "-t", 1); + +#ifdef _OPENMP + omp_set_num_threads(NbThreads); + std::cout << "\n>> Using " << omp_get_max_threads() << " threads.\n" << std::endl; +#else + std::cout << "\n>> Sequential version.\n" << std:: +#endif + + // init timer + FTic time; + + // init particles position and physical value + struct TestParticle{ + FPoint position; + FReal forces[3]; + FReal physicalValue; + FReal potential; + }; + + // open particle file + FFmaScanfLoader loader(filename); + if(!loader.isOpen()) throw std::runtime_error("Particle file couldn't be opened!"); + + TestParticle* const particles = new TestParticle[loader.getNumberOfParticles()]; + for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + FPoint position; + FReal physicalValue = 0.0; + loader.fillParticle(&position,&physicalValue); + // get copy + particles[idxPart].position = position; + particles[idxPart].physicalValue = physicalValue; + particles[idxPart].potential = 0.0; + particles[idxPart].forces[0] = 0.0; + particles[idxPart].forces[1] = 0.0; + particles[idxPart].forces[2] = 0.0; + } + + //////////////////////////////////////////////////////////////////// + + { // begin direct computation + + time.tic(); + { + for(int idxTarget = 0 ; idxTarget < loader.getNumberOfParticles() ; ++idxTarget){ + for(int idxOther = idxTarget + 1 ; idxOther < loader.getNumberOfParticles() ; ++idxOther){ + FP2P::MutualParticles(particles[idxTarget].position.getX(), particles[idxTarget].position.getY(), + particles[idxTarget].position.getZ(), particles[idxTarget].physicalValue, + &particles[idxTarget].forces[0], &particles[idxTarget].forces[1], + &particles[idxTarget].forces[2], &particles[idxTarget].potential, + particles[idxOther].position.getX(), particles[idxOther].position.getY(), + particles[idxOther].position.getZ(), particles[idxOther].physicalValue, + &particles[idxOther].forces[0], &particles[idxOther].forces[1], + &particles[idxOther].forces[2], &particles[idxOther].potential); + } + } + } + time.tac(); + printf("Elapsed Time for direct computation: %f\n",time.elapsed()); - const unsigned int ORDER = 9; - const FReal epsilon = FReal(1e-9); + } // end direct computation - // init timer - FTic time; + //////////////////////////////////////////////////////////////////// + { // begin Chebyshev kernel + // accuracy + const unsigned int ORDER = 7; + const FReal epsilon = FReal(1e-7); // typedefs - typedef FP2PParticleContainer ContainerClass; + typedef FP2PParticleContainerIndexed ContainerClass; typedef FSimpleLeaf< ContainerClass > LeafClass; - //typedef FChebMatrixKernelLJ MatrixKernelClass; - typedef FChebMatrixKernelR MatrixKernelClass; - typedef FChebCell<ORDER> CellClass; + //typedef FInterpMatrixKernelLJ MatrixKernelClass; + typedef FInterpMatrixKernelR MatrixKernelClass; + typedef FChebCell<ORDER> CellClass; typedef FOctree<CellClass,ContainerClass,LeafClass> OctreeClass; - //typedef FChebKernel<ParticleClass,CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass; + //typedef FChebKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass; typedef FChebSymKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass; - //typedef FFmmAlgorithm<OctreeClass,ParticleClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass; - typedef FFmmAlgorithmThread<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass; - - - // What we do ////////////////////////////////////////////////////// - std::cout << ">> Testing the Chebyshev interpolation base FMM algorithm.\n"; - - // open particle file - FFmaScanfLoader loader(filename); - //FFmaBinLoader<ParticleClass> loader(filename); - if(!loader.isOpen()) throw std::runtime_error("Particle file couldn't be opened!"); - - // init oct-tree - OctreeClass tree(TreeHeight, SubTreeHeight, loader.getBoxWidth(), loader.getCenterOfBox()); - - // ----------------------------------------------------- - std::cout << "Creating and inserting " << loader.getNumberOfParticles() - << " particles in a octree of height " << TreeHeight - << " ..." << std::endl; - time.tic(); - - { - FPoint particlePosition; - FReal physicalValue = 0.0; - for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ - loader.fillParticle(&particlePosition,&physicalValue); - tree.insert(particlePosition, physicalValue); - } + typedef FFmmAlgorithm<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass; + // typedef FFmmAlgorithmThread<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass; + + // init oct-tree + OctreeClass tree(TreeHeight, SubTreeHeight, loader.getBoxWidth(), loader.getCenterOfBox()); + + + { // ----------------------------------------------------- + std::cout << "Creating & Inserting " << loader.getNumberOfParticles() + << " particles ..." << std::endl; + std::cout << "\tHeight : " << TreeHeight << " \t sub-height : " << SubTreeHeight << std::endl; + time.tic(); + + for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + // put in tree + tree.insert(particles[idxPart].position, idxPart, particles[idxPart].physicalValue); + } + + time.tac(); + std::cout << "Done " << "(@Creating and Inserting Particles = " + << time.elapsed() << "s)." << std::endl; + } // ----------------------------------------------------- + + { // ----------------------------------------------------- + std::cout << "\nChebyshev FMM (ORDER="<< ORDER << ",EPS="<< epsilon <<") ... " << std::endl; + time.tic(); + KernelClass kernels(TreeHeight, loader.getCenterOfBox(), loader.getBoxWidth(), epsilon); + FmmClass algorithm(&tree, &kernels); + algorithm.execute(); + time.tac(); + std::cout << "Done " << "(@Algorithm = " << time.elapsed() << "s)." << std::endl; + } // ----------------------------------------------------- + + + { // ----------------------------------------------------- + std::cout << "\nError computation ... " << std::endl; + FMath::FAccurater potentialDiff; + FMath::FAccurater fx, fy, fz; + { // Check that each particle has been summed with all other + + tree.forEachLeaf([&](LeafClass* leaf){ + const FReal*const potentials = leaf->getTargets()->getPotentials(); + const FReal*const forcesX = leaf->getTargets()->getForcesX(); + const FReal*const forcesY = leaf->getTargets()->getForcesY(); + const FReal*const forcesZ = leaf->getTargets()->getForcesZ(); + const int nbParticlesInLeaf = leaf->getTargets()->getNbParticles(); + const FVector<int>& indexes = leaf->getTargets()->getIndexes(); + + for(int idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart){ + const int indexPartOrig = indexes[idxPart]; + potentialDiff.add(particles[indexPartOrig].potential,potentials[idxPart]); + fx.add(particles[indexPartOrig].forces[0],forcesX[idxPart]); + fy.add(particles[indexPartOrig].forces[1],forcesY[idxPart]); + fz.add(particles[indexPartOrig].forces[2],forcesZ[idxPart]); + } + }); + } + + // Print for information + std::cout << "Potential " << potentialDiff << std::endl; + std::cout << "Fx " << fx << std::endl; + std::cout << "Fy " << fy << std::endl; + std::cout << "Fz " << fz << std::endl; + } // ----------------------------------------------------- + + } // end Chebyshev kernel + + + + + //// ----------------------------------------------------- + //{ // cost of symmetric m2l opertors, weighted rank, etc. + // const unsigned int nnodes = ORDER*ORDER*ORDER; + // const SymmetryHandler<ORDER> *const SymHandler = kernels.getPtrToSymHandler(); + // unsigned int expansionCounter[343]; + // for (unsigned i=0; i<343; ++i) expansionCounter[i] = 0; + // for (unsigned i=0; i<343; ++i) if (SymHandler->pindices[i]) expansionCounter[SymHandler->pindices[i]]++; + // + // unsigned int overallCost = 0; + // unsigned int overallWeightedRank = 0; + // unsigned int nbExpansions = 0; + // for (unsigned i=0; i<343; ++i) + // if (expansionCounter[i]) { + // const unsigned int cost = (2*nnodes*SymHandler->LowRank[i]) * expansionCounter[i]; + // const unsigned int weightedRank = SymHandler->LowRank[i] * expansionCounter[i]; + // overallCost += cost; + // overallWeightedRank += weightedRank; + // nbExpansions += expansionCounter[i]; + // std::cout << "expansionCounter[" << i << "] = " << expansionCounter[i] + // << "\tlow rank = " << SymHandler->LowRank[i] + // << "\t(2*nnodes*rank) * nb_exp = " << cost + // << std::endl; + // } + // std::cout << "=== Overall cost = " << overallCost << "\t Weighted rank = " << (double)overallWeightedRank / (double)nbExpansions << std::endl; + // if (nbExpansions!=316) std::cout << "Something went wrong, number of counted expansions = " << nbExpansions << std::endl; + //} + //// ----------------------------------------------------- + + + // ----------------------------------------------------- + // find first non empty leaf cell + /*if (FParameters::findParameter(argc,argv,"-dont_check_accuracy") == FParameters::NotFound) { + OctreeClass::Iterator iLeafs(&tree); + iLeafs.gotoBottomLeft(); + + const ContainerClass *const Targets = iLeafs.getCurrentListTargets(); + const unsigned int NumTargets = Targets->getSize(); + + FReal* Potential = new FReal [NumTargets]; + FBlas::setzero(NumTargets, Potential); + + FReal* Force = new FReal [NumTargets * 3]; + FBlas::setzero(NumTargets * 3, Force); + + std::cout << "\nDirect computation of " << NumTargets << " target particles ..." << std::endl; + const MatrixKernelClass MatrixKernel; + do { + const ContainerClass *const Sources = iLeafs.getCurrentListSrc(); + unsigned int counter = 0; + ContainerClass::ConstBasicIterator iTarget(*Targets); + while(iTarget.hasNotFinished()) { + const FReal wt = iTarget.data().getPhysicalValue(); + ContainerClass::ConstBasicIterator iSource(*Sources); + while(iSource.hasNotFinished()) { + if (&iTarget.data() != &iSource.data()) { + const FReal potential_value = MatrixKernel.evaluate(iTarget.data().getPosition(), + iSource.data().getPosition()); + const FReal ws = iSource.data().getPhysicalValue(); + // potential + Potential[counter] += potential_value * ws; + // force + if (MatrixKernelClass::Identifier == ONE_OVER_R) { // laplace force + FPoint force(iSource.data().getPosition() - iTarget.data().getPosition()); + force *= ((ws*wt) * (potential_value*potential_value*potential_value)); + Force[counter*3 + 0] += force.getX(); + Force[counter*3 + 1] += force.getY(); + Force[counter*3 + 2] += force.getZ(); + } else if (MatrixKernelClass::Identifier == LEONARD_JONES_POTENTIAL) { // lenard-jones force + FPoint force(iSource.data().getPosition() - iTarget.data().getPosition()); + const FReal one_over_r = FReal(1.) / FMath::Sqrt(force.getX()*force.getX() + + force.getY()*force.getY() + + force.getZ()*force.getZ()); // 1 + 15 + 5 = 21 flops + const FReal one_over_r3 = one_over_r * one_over_r * one_over_r; + const FReal one_over_r6 = one_over_r3 * one_over_r3; + const FReal one_over_r4 = one_over_r3 * one_over_r; + force *= ((ws*wt) * (FReal(12.)*one_over_r6*one_over_r4*one_over_r4 - FReal(6.)*one_over_r4*one_over_r4)); + Force[counter*3 + 0] += force.getX(); + Force[counter*3 + 1] += force.getY(); + Force[counter*3 + 2] += force.getZ(); + } } + iSource.gotoNext(); + } + counter++; + iTarget.gotoNext(); + } + } while(iLeafs.moveRight()); - std::cout << "Done " << "(" << time.tacAndElapsed() << ")." << std::endl; - // ----------------------------------------------------- - - - //// ----------------------------------------------------- - //KernelClass kernels(TreeHeight, loader.getCenterOfBox(), loader.getBoxWidth(), epsilon); - //for (unsigned int NbThreads=1; NbThreads<=160; NbThreads+=3) { - // omp_set_num_threads(NbThreads); - // std::cout << "\n================================================================" - // << "\nChebyshev FMM using" << omp_get_max_threads() << " threads ..." << std::endl; - // - // { // reinitialize ////////////////////////////////////// - // OctreeClass::Iterator octreeIterator(&tree); - // octreeIterator.gotoBottomLeft(); - // OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator); // for each levels - // - // // set potential and forces to zero - // do { - // ContainerClass *const Sources = octreeIterator.getCurrentListSrc(); - // ContainerClass::BasicIterator iSource(*Sources); - // while(iSource.hasNotFinished()) { - // iSource.data().setPotential(FReal(0.)); - // iSource.data().setForces(FReal(0.), FReal(0.), FReal(0.)); - // iSource.gotoNext(); - // } - // } while(octreeIterator.moveRight()); - // - // // set multipole and local expansions - // octreeIterator = avoidGotoLeftIterator; - // for(int idxLevel = TreeHeight-1; idxLevel>1; --idxLevel) { // for each cells - // - // do{ - // FMemUtils::setall( octreeIterator.getCurrentCell()->getLocal(), FReal(0), ORDER*ORDER*ORDER * 2); - // FMemUtils::setall( octreeIterator.getCurrentCell()->getMultipole(), FReal(0), ORDER*ORDER*ORDER * 2); - // } while(octreeIterator.moveRight()); - // - // avoidGotoLeftIterator.moveUp(); - // octreeIterator = avoidGotoLeftIterator; - // } - // } ////////////////////////////////////////////////////// - // - // FmmClass algorithm(&tree,&kernels); - // time.tic(); - // algorithm.execute(); - // std::cout << "completed in " << time.tacAndElapsed() << "sec." << std::endl; - //} - //// ----------------------------------------------------- - - { - omp_set_num_threads(NbThreads); - std::cout << "\nChebyshev FMM using " << omp_get_max_threads() << " threads ..." << std::endl; - KernelClass kernels(TreeHeight, loader.getCenterOfBox(), loader.getBoxWidth(), epsilon); - FmmClass algorithm(&tree, &kernels); - time.tic(); - algorithm.execute(); - std::cout << "completed in " << time.tacAndElapsed() << "sec." << std::endl; - } - - - //// ----------------------------------------------------- - //{ // cost of symmetric m2l opertors, weighted rank, etc. - // const unsigned int nnodes = ORDER*ORDER*ORDER; - // const SymmetryHandler<ORDER> *const SymHandler = kernels.getPtrToSymHandler(); - // unsigned int expansionCounter[343]; - // for (unsigned i=0; i<343; ++i) expansionCounter[i] = 0; - // for (unsigned i=0; i<343; ++i) if (SymHandler->pindices[i]) expansionCounter[SymHandler->pindices[i]]++; - // - // unsigned int overallCost = 0; - // unsigned int overallWeightedRank = 0; - // unsigned int nbExpansions = 0; - // for (unsigned i=0; i<343; ++i) - // if (expansionCounter[i]) { - // const unsigned int cost = (2*nnodes*SymHandler->LowRank[i]) * expansionCounter[i]; - // const unsigned int weightedRank = SymHandler->LowRank[i] * expansionCounter[i]; - // overallCost += cost; - // overallWeightedRank += weightedRank; - // nbExpansions += expansionCounter[i]; - // std::cout << "expansionCounter[" << i << "] = " << expansionCounter[i] - // << "\tlow rank = " << SymHandler->LowRank[i] - // << "\t(2*nnodes*rank) * nb_exp = " << cost - // << std::endl; - // } - // std::cout << "=== Overall cost = " << overallCost << "\t Weighted rank = " << (double)overallWeightedRank / (double)nbExpansions << std::endl; - // if (nbExpansions!=316) std::cout << "Something went wrong, number of counted expansions = " << nbExpansions << std::endl; - //} - //// ----------------------------------------------------- - - - // ----------------------------------------------------- - // find first non empty leaf cell - /*if (FParameters::findParameter(argc,argv,"-dont_check_accuracy") == FParameters::NotFound) { - OctreeClass::Iterator iLeafs(&tree); - iLeafs.gotoBottomLeft(); - - const ContainerClass *const Targets = iLeafs.getCurrentListTargets(); - const unsigned int NumTargets = Targets->getSize(); - - FReal* Potential = new FReal [NumTargets]; - FBlas::setzero(NumTargets, Potential); - - FReal* Force = new FReal [NumTargets * 3]; - FBlas::setzero(NumTargets * 3, Force); - - std::cout << "\nDirect computation of " << NumTargets << " target particles ..." << std::endl; - const MatrixKernelClass MatrixKernel; - do { - const ContainerClass *const Sources = iLeafs.getCurrentListSrc(); - unsigned int counter = 0; - ContainerClass::ConstBasicIterator iTarget(*Targets); - while(iTarget.hasNotFinished()) { - const FReal wt = iTarget.data().getPhysicalValue(); - ContainerClass::ConstBasicIterator iSource(*Sources); - while(iSource.hasNotFinished()) { - if (&iTarget.data() != &iSource.data()) { - const FReal potential_value = MatrixKernel.evaluate(iTarget.data().getPosition(), - iSource.data().getPosition()); - const FReal ws = iSource.data().getPhysicalValue(); - // potential - Potential[counter] += potential_value * ws; - // force - if (MatrixKernelClass::Identifier == ONE_OVER_R) { // laplace force - FPoint force(iSource.data().getPosition() - iTarget.data().getPosition()); - force *= ((ws*wt) * (potential_value*potential_value*potential_value)); - Force[counter*3 + 0] += force.getX(); - Force[counter*3 + 1] += force.getY(); - Force[counter*3 + 2] += force.getZ(); - } else if (MatrixKernelClass::Identifier == LEONARD_JONES_POTENTIAL) { // lenard-jones force - FPoint force(iSource.data().getPosition() - iTarget.data().getPosition()); - const FReal one_over_r = FReal(1.) / FMath::Sqrt(force.getX()*force.getX() + - force.getY()*force.getY() + - force.getZ()*force.getZ()); // 1 + 15 + 5 = 21 flops - const FReal one_over_r3 = one_over_r * one_over_r * one_over_r; - const FReal one_over_r6 = one_over_r3 * one_over_r3; - const FReal one_over_r4 = one_over_r3 * one_over_r; - force *= ((ws*wt) * (FReal(12.)*one_over_r6*one_over_r4*one_over_r4 - FReal(6.)*one_over_r4*one_over_r4)); - Force[counter*3 + 0] += force.getX(); - Force[counter*3 + 1] += force.getY(); - Force[counter*3 + 2] += force.getZ(); - } - } - iSource.gotoNext(); - } - counter++; - iTarget.gotoNext(); - } - } while(iLeafs.moveRight()); - - - FReal* ApproxPotential = new FReal [NumTargets]; - FReal* ApproxForce = new FReal [NumTargets * 3]; - - unsigned int counter = 0; - ContainerClass::ConstBasicIterator iTarget(*Targets); - while(iTarget.hasNotFinished()) { - ApproxPotential[counter] = iTarget.data().getPotential(); - ApproxForce[counter*3 + 0] = iTarget.data().getForces().getX(); - ApproxForce[counter*3 + 1] = iTarget.data().getForces().getY(); - ApproxForce[counter*3 + 2] = iTarget.data().getForces().getZ(); - counter++; - iTarget.gotoNext(); - } - - std::cout << "\nPotential error:" << std::endl; - std::cout << "Relative L2 error = " << computeL2norm( NumTargets, Potential, ApproxPotential) - << std::endl; - std::cout << "Relative Lmax error = " << computeINFnorm(NumTargets, Potential, ApproxPotential) - << std::endl; - - std::cout << "\nForce error:" << std::endl; - std::cout << "Relative L2 error = " << computeL2norm( NumTargets*3, Force, ApproxForce) - << std::endl; - std::cout << "Relative Lmax error = " << computeINFnorm(NumTargets*3, Force, ApproxForce) - << std::endl; - std::cout << std::endl; - - // free memory - delete [] Potential; - delete [] ApproxPotential; - delete [] Force; - delete [] ApproxForce; - }*/ - /* - // Check if particles are strictly within its containing cells - const FReal BoxWidthLeaf = loader.getBoxWidth() / FReal(FMath::pow(2, TreeHeight-1)); - OctreeClass::Iterator octreeIterator(&tree); - octreeIterator.gotoBottomLeft(); - do{ - const CellClass *const LeafCell = octreeIterator.getCurrentCell(); - const FPoint LeafCellCenter(LeafCell->getCoordinate().getX() * BoxWidthLeaf + BoxWidthLeaf/2 + loader.getCenterOfBox().getX(), - LeafCell->getCoordinate().getY() * BoxWidthLeaf + BoxWidthLeaf/2 + loader.getCenterOfBox().getY(), - LeafCell->getCoordinate().getZ() * BoxWidthLeaf + BoxWidthLeaf/2 + loader.getCenterOfBox().getZ()); - const ContainerClass *const Particles = octreeIterator.getCurrentListSrc(); - ContainerClass::ConstBasicIterator particleIterator(*Particles); - while(particleIterator.hasNotFinished()) { - const FPoint distance(LeafCellCenter-particleIterator.data().getPosition()); - std::cout << "center - particle = " << distance << " < " << BoxWidthLeaf/FReal(2.) << std::endl; - if (std::abs(distance.getX())>BoxWidthLeaf/FReal(2.) || - std::abs(distance.getY())>BoxWidthLeaf/FReal(2.) || - std::abs(distance.getZ())>BoxWidthLeaf/FReal(2.)) { - std::cout << "stop" << std::endl; - exit(-1); - } - particleIterator.gotoNext(); - } - } while(octreeIterator.moveRight()); - */ - - - return 0; -} + FReal* ApproxPotential = new FReal [NumTargets]; + FReal* ApproxForce = new FReal [NumTargets * 3]; + unsigned int counter = 0; + ContainerClass::ConstBasicIterator iTarget(*Targets); + while(iTarget.hasNotFinished()) { + ApproxPotential[counter] = iTarget.data().getPotential(); + ApproxForce[counter*3 + 0] = iTarget.data().getForces().getX(); + ApproxForce[counter*3 + 1] = iTarget.data().getForces().getY(); + ApproxForce[counter*3 + 2] = iTarget.data().getForces().getZ(); + counter++; + iTarget.gotoNext(); + } + std::cout << "\nPotential error:" << std::endl; + std::cout << "Relative L2 error = " << computeL2norm( NumTargets, Potential, ApproxPotential) + << std::endl; + std::cout << "Relative Lmax error = " << computeINFnorm(NumTargets, Potential, ApproxPotential) + << std::endl; + + std::cout << "\nForce error:" << std::endl; + std::cout << "Relative L2 error = " << computeL2norm( NumTargets*3, Force, ApproxForce) + << std::endl; + std::cout << "Relative Lmax error = " << computeINFnorm(NumTargets*3, Force, ApproxForce) + << std::endl; + std::cout << std::endl; + + // free memory + delete [] Potential; + delete [] ApproxPotential; + delete [] Force; + delete [] ApproxForce; + }*/ + /* + // Check if particles are strictly within its containing cells + const FReal BoxWidthLeaf = loader.getBoxWidth() / FReal(FMath::pow(2, TreeHeight-1)); + OctreeClass::Iterator octreeIterator(&tree); + octreeIterator.gotoBottomLeft(); + do{ + const CellClass *const LeafCell = octreeIterator.getCurrentCell(); + const FPoint LeafCellCenter(LeafCell->getCoordinate().getX() * BoxWidthLeaf + BoxWidthLeaf/2 + loader.getCenterOfBox().getX(), + LeafCell->getCoordinate().getY() * BoxWidthLeaf + BoxWidthLeaf/2 + loader.getCenterOfBox().getY(), + LeafCell->getCoordinate().getZ() * BoxWidthLeaf + BoxWidthLeaf/2 + loader.getCenterOfBox().getZ()); + const ContainerClass *const Particles = octreeIterator.getCurrentListSrc(); + ContainerClass::ConstBasicIterator particleIterator(*Particles); + while(particleIterator.hasNotFinished()) { + const FPoint distance(LeafCellCenter-particleIterator.data().getPosition()); + std::cout << "center - particle = " << distance << " < " << BoxWidthLeaf/FReal(2.) << std::endl; + if (std::abs(distance.getX())>BoxWidthLeaf/FReal(2.) || + std::abs(distance.getY())>BoxWidthLeaf/FReal(2.) || + std::abs(distance.getZ())>BoxWidthLeaf/FReal(2.)) { + std::cout << "stop" << std::endl; + exit(-1); + } + particleIterator.gotoNext(); + } + } while(octreeIterator.moveRight()); + */ + + + return 0; +} diff --git a/Tests/Kernels/testCompareKernels.cpp b/Tests/Kernels/testCompareKernels.cpp index baff4fd20f5dcbbaaf8187d3d1a342bdcdb683e1..7e2dd3e19ce725ba1502b0163b8a5e014a75e512 100755 --- a/Tests/Kernels/testCompareKernels.cpp +++ b/Tests/Kernels/testCompareKernels.cpp @@ -37,7 +37,7 @@ // chebyshev kernel #include "../../Src/Kernels/Chebyshev/FChebCell.hpp" -#include "../../Src/Kernels/Chebyshev/FChebMatrixKernel.hpp" +#include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp" #include "../../Src/Kernels/Chebyshev/FChebKernel.hpp" #include "../../Src/Kernels/Chebyshev/FChebSymKernel.hpp" @@ -121,7 +121,7 @@ int main(int argc, char* argv[]) // typedefs typedef FP2PParticleContainerIndexed ContainerClass; typedef FSimpleLeaf<ContainerClass> LeafClass; - typedef FChebMatrixKernelR MatrixKernelClass; + typedef FInterpMatrixKernelR MatrixKernelClass; typedef FChebCell<ORDER> CellClass; typedef FOctree<CellClass,ContainerClass,LeafClass> OctreeClass; diff --git a/Tests/Kernels/testFlopsChebAlgorithm.cpp b/Tests/Kernels/testFlopsChebAlgorithm.cpp index 1448eb446dfa8681cbb72851078ba0a6632ae268..3ff510e477b1c9e0ecea7d546f7fd149f06cd103 100755 --- a/Tests/Kernels/testFlopsChebAlgorithm.cpp +++ b/Tests/Kernels/testFlopsChebAlgorithm.cpp @@ -27,7 +27,7 @@ #include "../../Src/Files/FFmaBinLoader.hpp" #include "../../Src/Kernels/Chebyshev/FChebCell.hpp" -#include "../../Src/Kernels/Chebyshev/FChebMatrixKernel.hpp" +#include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp" #include "../../Src/Kernels/Chebyshev/FChebFlopsSymKernel.hpp" @@ -59,7 +59,7 @@ int main(int argc, char* argv[]) // typedefs typedef FP2PParticleContainer ContainerClass; typedef FSimpleLeaf<ContainerClass> LeafClass; - typedef FChebMatrixKernelR MatrixKernelClass; + typedef FInterpMatrixKernelR MatrixKernelClass; typedef FChebCell<ORDER> CellClass; typedef FOctree<CellClass,ContainerClass,LeafClass> OctreeClass; typedef FChebFlopsSymKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass; diff --git a/Tests/Kernels/testNewCompareKernels.cpp b/Tests/Kernels/testNewCompareKernels.cpp index 6e3cb86035ff39a477ad96963600e6be094cb770..64e03b5a2f105edc769ca16d6d869f05dcf13108 100644 --- a/Tests/Kernels/testNewCompareKernels.cpp +++ b/Tests/Kernels/testNewCompareKernels.cpp @@ -39,7 +39,7 @@ // chebyshev kernel #include "../../Src/Kernels/Chebyshev/FChebCell.hpp" -#include "../../Src/Kernels/Chebyshev/FChebMatrixKernel.hpp" +#include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp" #include "../../Src/Kernels/Chebyshev/FChebKernel.hpp" #include "../../Src/Kernels/Chebyshev/FChebSymKernel.hpp" #endif @@ -62,6 +62,13 @@ #include "../../Src/Kernels/Rotation/FRotationKernel.hpp" #include "../../Src/Kernels/Rotation/FRotationCell.hpp" +#ifdef ScalFMM_USE_BLAS +// Uniform grid kernel +#include "../../Src/Kernels/Uniform/FUnifCell.hpp" +//#include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp" +#include "../../Src/Kernels/Uniform/FUnifKernel.hpp" +#endif + /** * This program compares two different kernels, eg., the Chebyshev kernel with @@ -140,11 +147,12 @@ int main(int argc, char* argv[]) // typedefs typedef FP2PParticleContainerIndexed ContainerClass; typedef FSimpleLeaf<ContainerClass> LeafClass; - typedef FChebMatrixKernelR MatrixKernelClass; + typedef FInterpMatrixKernelR MatrixKernelClass; typedef FChebCell<ORDER> CellClass; typedef FOctree<CellClass,ContainerClass,LeafClass> OctreeClass; typedef FChebSymKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass; + // typedef FChebKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass; typedef FFmmAlgorithm<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass; @@ -279,6 +287,85 @@ int main(int argc, char* argv[]) std::cout << "Fy " << fy << std::endl; std::cout << "Fz " << fz << std::endl; } // end FFmaBlas kernel + // + //////////////////////////////////////////////////////////////////// + // + { // begin Lagrange/Uniform Grid kernel + + // TODO + + // accuracy + const unsigned int ORDER = 7; + + // typedefs + typedef FP2PParticleContainerIndexed ContainerClass; + typedef FSimpleLeaf<ContainerClass> LeafClass; + typedef FInterpMatrixKernelR MatrixKernelClass; + typedef FUnifCell<ORDER> CellClass; + typedef FOctree<CellClass,ContainerClass,LeafClass> OctreeClass; + + typedef FUnifKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass; + typedef FFmmAlgorithm<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass; + + + // init oct-tree + OctreeClass tree(TreeHeight, SubTreeHeight, loader.getBoxWidth(), loader.getCenterOfBox()); + + { // ----------------------------------------------------- + std::cout << "Creating & Inserting " << loader.getNumberOfParticles() + << " particles ..." << std::endl; + std::cout << "\tHeight : " << TreeHeight << " \t sub-height : " << SubTreeHeight << std::endl; + time.tic(); + + for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + // put in tree + tree.insert(particles[idxPart].position, idxPart, particles[idxPart].physicalValue); + } + + time.tac(); + std::cout << "Done " << "(@Creating and Inserting Particles = " + << time.elapsed() << "s)." << std::endl; + } // ----------------------------------------------------- + + { // ----------------------------------------------------- + std::cout << "\nLagrange FMM ... " << std::endl; + time.tic(); + KernelClass kernels(TreeHeight, loader.getCenterOfBox(), loader.getBoxWidth()); + FmmClass algorithm(&tree, &kernels); + algorithm.execute(); + time.tac(); + std::cout << "Done " << "(@Algorithm = " << time.elapsed() << "s)." << std::endl; + } // ----------------------------------------------------- + + FMath::FAccurater potentialDiff; + FMath::FAccurater fx, fy, fz; + { // Check that each particle has been summed with all other + + tree.forEachLeaf([&](LeafClass* leaf){ + const FReal*const potentials = leaf->getTargets()->getPotentials(); + const FReal*const forcesX = leaf->getTargets()->getForcesX(); + const FReal*const forcesY = leaf->getTargets()->getForcesY(); + const FReal*const forcesZ = leaf->getTargets()->getForcesZ(); + const int nbParticlesInLeaf = leaf->getTargets()->getNbParticles(); + const FVector<int>& indexes = leaf->getTargets()->getIndexes(); + + for(int idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart){ + const int indexPartOrig = indexes[idxPart]; + potentialDiff.add(particles[indexPartOrig].potential,potentials[idxPart]); + fx.add(particles[indexPartOrig].forces[0],forcesX[idxPart]); + fy.add(particles[indexPartOrig].forces[1],forcesY[idxPart]); + fz.add(particles[indexPartOrig].forces[2],forcesZ[idxPart]); + } + }); + } + + // Print for information + std::cout << "Potential " << potentialDiff << std::endl; + std::cout << "Fx " << fx << std::endl; + std::cout << "Fy " << fy << std::endl; + std::cout << "Fz " << fz << std::endl; + + } // end Lagrange/Uniform Grid kernel #endif { diff --git a/Tests/Kernels/testUnifAlgorithm.cpp b/Tests/Kernels/testUnifAlgorithm.cpp new file mode 100644 index 0000000000000000000000000000000000000000..22379a0c9f31cfd88231fdb94746d04445ba79f6 --- /dev/null +++ b/Tests/Kernels/testUnifAlgorithm.cpp @@ -0,0 +1,201 @@ +// =================================================================================== +// Ce LOGICIEL "ScalFmm" est couvert par le copyright Inria 20xx-2012. +// Inria détient tous les droits de propriété sur le LOGICIEL, et souhaite que +// la communauté scientifique l'utilise afin de le tester et de l'évaluer. +// Inria donne gracieusement le droit d'utiliser ce LOGICIEL. Toute utilisation +// dans un but lucratif ou à des fins commerciales est interdite sauf autorisation +// expresse et préalable d'Inria. +// Toute utilisation hors des limites précisées ci-dessus et réalisée sans l'accord +// expresse préalable d'Inria constituerait donc le délit de contrefaçon. +// Le LOGICIEL étant un produit en cours de développement, Inria ne saurait assurer +// aucune responsabilité et notamment en aucune manière et en aucun cas, être tenu +// de répondre d'éventuels dommages directs ou indirects subits par l'utilisateur. +// Tout utilisateur du LOGICIEL s'engage à communiquer à Inria ses remarques +// relatives à l'usage du LOGICIEL +// =================================================================================== + +// ==== CMAKE ===== +// @FUSE_BLAS +// ================ + +#include <iostream> + +#include <cstdio> +#include <cstdlib> + +#include "../../Src/Files/FFmaScanfLoader.hpp" +#include "../../Src/Files/FFmaBinLoader.hpp" + + +#include "../../Src/Kernels/Uniform/FUnifCell.hpp" +#include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp" +#include "../../Src/Kernels/Uniform/FUnifKernel.hpp" + +#include "../../Src/Components/FSimpleLeaf.hpp" +#include "../../Src/Kernels/P2P/FP2PParticleContainerIndexed.hpp" + +#include "../../Src/Utils/FParameters.hpp" +#include "../../Src/Utils/FMemUtils.hpp" + +#include "../../Src/Containers/FOctree.hpp" +#include "../../Src/Containers/FVector.hpp" + +#include "../../Src/Core/FFmmAlgorithm.hpp" +#include "../../Src/Core/FFmmAlgorithmThread.hpp" + +/** + * This program runs the FMM Algorithm with the Uniform kernel and compares the results with a direct computation. + */ + +// Simply create particles and try the kernels +int main(int argc, char* argv[]) +{ + const char* const filename = FParameters::getStr(argc,argv,"-f", "../Data/test20k.fma"); + const unsigned int TreeHeight = FParameters::getValue(argc, argv, "-h", 5); + const unsigned int SubTreeHeight = FParameters::getValue(argc, argv, "-sh", 2); + const unsigned int NbThreads = FParameters::getValue(argc, argv, "-t", 1); + +#ifdef _OPENMP + omp_set_num_threads(NbThreads); + std::cout << "\n>> Using " << omp_get_max_threads() << " threads.\n" << std::endl; +#else + std::cout << "\n>> Sequential version.\n" << std:: +#endif + + // init timer + FTic time; + + // init particles position and physical value + struct TestParticle{ + FPoint position; + FReal forces[3]; + FReal physicalValue; + FReal potential; + }; + + // open particle file + FFmaScanfLoader loader(filename); + if(!loader.isOpen()) throw std::runtime_error("Particle file couldn't be opened!"); + + TestParticle* const particles = new TestParticle[loader.getNumberOfParticles()]; + for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + FPoint position; + FReal physicalValue = 0.0; + loader.fillParticle(&position,&physicalValue); + // get copy + particles[idxPart].position = position; + particles[idxPart].physicalValue = physicalValue; + particles[idxPart].potential = 0.0; + particles[idxPart].forces[0] = 0.0; + particles[idxPart].forces[1] = 0.0; + particles[idxPart].forces[2] = 0.0; + } + + //////////////////////////////////////////////////////////////////// + + { // begin direct computation + std::cout << "\nDirect Computation ... " << std::endl; + time.tic(); + { + for(int idxTarget = 0 ; idxTarget < loader.getNumberOfParticles() ; ++idxTarget){ + for(int idxOther = idxTarget + 1 ; idxOther < loader.getNumberOfParticles() ; ++idxOther){ + FP2P::MutualParticles(particles[idxTarget].position.getX(), particles[idxTarget].position.getY(), + particles[idxTarget].position.getZ(), particles[idxTarget].physicalValue, + &particles[idxTarget].forces[0], &particles[idxTarget].forces[1], + &particles[idxTarget].forces[2], &particles[idxTarget].potential, + particles[idxOther].position.getX(), particles[idxOther].position.getY(), + particles[idxOther].position.getZ(), particles[idxOther].physicalValue, + &particles[idxOther].forces[0], &particles[idxOther].forces[1], + &particles[idxOther].forces[2], &particles[idxOther].potential); + } + } + } + time.tac(); + std::cout << "Done " << "(@Direct Computation = " + << time.elapsed() << "s)." << std::endl; + + } // end direct computation + + //////////////////////////////////////////////////////////////////// + + { // begin Lagrange kernel + + // accuracy + const unsigned int ORDER = 7; + // typedefs + typedef FP2PParticleContainerIndexed ContainerClass; + typedef FSimpleLeaf< ContainerClass > LeafClass; + //typedef FInterpMatrixKernelLJ MatrixKernelClass; + typedef FInterpMatrixKernelR MatrixKernelClass; + typedef FUnifCell<ORDER> CellClass; + typedef FOctree<CellClass,ContainerClass,LeafClass> OctreeClass; + typedef FUnifKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass; + typedef FFmmAlgorithm<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass; + // typedef FFmmAlgorithmThread<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass; + + // init oct-tree + OctreeClass tree(TreeHeight, SubTreeHeight, loader.getBoxWidth(), loader.getCenterOfBox()); + + + { // ----------------------------------------------------- + std::cout << "Creating & Inserting " << loader.getNumberOfParticles() + << " particles ..." << std::endl; + std::cout << "\tHeight : " << TreeHeight << " \t sub-height : " << SubTreeHeight << std::endl; + time.tic(); + + for(int idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + // put in tree + tree.insert(particles[idxPart].position, idxPart, particles[idxPart].physicalValue); + } + + time.tac(); + std::cout << "Done " << "(@Creating and Inserting Particles = " + << time.elapsed() << "s)." << std::endl; + } // ----------------------------------------------------- + + { // ----------------------------------------------------- + std::cout << "\nLagrange/Uniform grid FMM (ORDER="<< ORDER << ") ... " << std::endl; + time.tic(); + KernelClass kernels(TreeHeight, loader.getCenterOfBox(), loader.getBoxWidth()); + FmmClass algorithm(&tree, &kernels); + algorithm.execute(); + time.tac(); + std::cout << "Done " << "(@Algorithm = " << time.elapsed() << "s)." << std::endl; + } // ----------------------------------------------------- + + + { // ----------------------------------------------------- + std::cout << "\nError computation ... " << std::endl; + FMath::FAccurater potentialDiff; + FMath::FAccurater fx, fy, fz; + { // Check that each particle has been summed with all other + + tree.forEachLeaf([&](LeafClass* leaf){ + const FReal*const potentials = leaf->getTargets()->getPotentials(); + const FReal*const forcesX = leaf->getTargets()->getForcesX(); + const FReal*const forcesY = leaf->getTargets()->getForcesY(); + const FReal*const forcesZ = leaf->getTargets()->getForcesZ(); + const int nbParticlesInLeaf = leaf->getTargets()->getNbParticles(); + const FVector<int>& indexes = leaf->getTargets()->getIndexes(); + + for(int idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart){ + const int indexPartOrig = indexes[idxPart]; + potentialDiff.add(particles[indexPartOrig].potential,potentials[idxPart]); + fx.add(particles[indexPartOrig].forces[0],forcesX[idxPart]); + fy.add(particles[indexPartOrig].forces[1],forcesY[idxPart]); + fz.add(particles[indexPartOrig].forces[2],forcesZ[idxPart]); + } + }); + } + + // Print for information + std::cout << "Potential " << potentialDiff << std::endl; + std::cout << "Fx " << fx << std::endl; + std::cout << "Fy " << fy << std::endl; + std::cout << "Fz " << fz << std::endl; + } // ----------------------------------------------------- + + } // end Lagrange kernel + + return 0; +} diff --git a/Tests/Utils/testACA.cpp b/Tests/Utils/testACA.cpp index 2e40ff173794a1e9ae7d0bf9aaffa9b19a398bdc..445f338a8a271edf5d303e72ece60b97e6475a14 100755 --- a/Tests/Utils/testACA.cpp +++ b/Tests/Utils/testACA.cpp @@ -29,7 +29,7 @@ #include "../../Src/Utils/FPoint.hpp" -#include "../../Src/Kernels/Chebyshev/FChebMatrixKernel.hpp" +#include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp" #include "../../Src/Kernels/Chebyshev/FChebRoots.hpp" #include "../../Src/Kernels/Chebyshev/FChebTensor.hpp" #include "../../Src/Kernels/Chebyshev/FChebSymM2LHandler.hpp" @@ -42,7 +42,7 @@ int main(int argc, char* argv[]) { - typedef FChebMatrixKernelR MatrixKernelClass; + typedef FInterpMatrixKernelR MatrixKernelClass; MatrixKernelClass MatrixKernel; const unsigned int ORDER = 9; diff --git a/Tests/Utils/testChebBinaryM2L.cpp b/Tests/Utils/testChebBinaryM2L.cpp index c5cb1d0f3032e3f67cf80fa617c457bf05aac110..f2412491abd9175bafcdd9eae5e81c5ff49934f2 100755 --- a/Tests/Utils/testChebBinaryM2L.cpp +++ b/Tests/Utils/testChebBinaryM2L.cpp @@ -29,7 +29,7 @@ #include "../../Src/Utils/FTic.hpp" #include "../../Src/Utils/FMath.hpp" -#include "../../Src/Kernels/Chebyshev/FChebMatrixKernel.hpp" +#include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp" #include "../../Src/Kernels/Chebyshev/FChebM2LHandler.hpp" @@ -41,7 +41,7 @@ int main(int argc, char* argv[]) { // typedefs - typedef FChebMatrixKernelR MatrixKernelClass; + typedef FInterpMatrixKernelR MatrixKernelClass; // instantiations diff --git a/Tests/Utils/testChebBinarySymM2L.cpp b/Tests/Utils/testChebBinarySymM2L.cpp index 9f894b34bc2fc7178e47cb90d2ad77231cd64e6a..958d5ea9d69f9e322a7bfcaafa19eb13148e7bda 100755 --- a/Tests/Utils/testChebBinarySymM2L.cpp +++ b/Tests/Utils/testChebBinarySymM2L.cpp @@ -30,7 +30,7 @@ #include "../../Src/Utils/FTic.hpp" #include "../../Src/Utils/FMath.hpp" -#include "../../Src/Kernels/Chebyshev/FChebMatrixKernel.hpp" +#include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp" #include "../../Src/Kernels/Chebyshev/FChebSymM2LHandler.hpp" @@ -42,7 +42,7 @@ int main(int argc, char* argv[]) { // typedefs - typedef FChebMatrixKernelR MatrixKernelClass; + typedef FInterpMatrixKernelR MatrixKernelClass; // instantiations FTic time; diff --git a/Tests/Utils/testChebInterpolator.cpp b/Tests/Utils/testChebInterpolator.cpp index 2a7b18582deb751b05c01a44f85f901df96ea32d..31af57253db722fa8a10e7485edce796e866e05b 100755 --- a/Tests/Utils/testChebInterpolator.cpp +++ b/Tests/Utils/testChebInterpolator.cpp @@ -35,7 +35,7 @@ #include "../../Src/Kernels/Chebyshev/FChebInterpolator.hpp" -#include "../../Src/Kernels/Chebyshev/FChebMatrixKernel.hpp" +#include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp" #include "../../Src/Kernels/P2P/FP2PParticleContainer.hpp" #include "../../Src/Components/FSimpleLeaf.hpp" @@ -50,7 +50,7 @@ int main(int, char **){ typedef FP2PParticleContainer ContainerClass; typedef FSimpleLeaf<ContainerClass> LeafClass; - typedef FChebMatrixKernelR MatrixKernelClass; + typedef FInterpMatrixKernelR MatrixKernelClass; ///////////////////////What we do///////////////////////////// diff --git a/Tests/Utils/testChebM2Lprecomputation.cpp b/Tests/Utils/testChebM2Lprecomputation.cpp index a59d5c403681ebeb0ee47cd2a0dd7beb0780a423..503caf43f822718e8293c10cf2b931bb238d497d 100755 --- a/Tests/Utils/testChebM2Lprecomputation.cpp +++ b/Tests/Utils/testChebM2Lprecomputation.cpp @@ -32,7 +32,7 @@ #include "../../Src/Kernels/Chebyshev/FChebTensor.hpp" #include "../../Src/Kernels/Chebyshev/FChebM2LHandler.hpp" -#include "../../Src/Kernels/Chebyshev/FChebMatrixKernel.hpp" +#include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp" @@ -45,7 +45,7 @@ int main(int argc, char* argv[]) FTic time; // define set matrix kernel - typedef FChebMatrixKernelR MatrixKernelClass; + typedef FInterpMatrixKernelR MatrixKernelClass; MatrixKernelClass MatrixKernel; // constants diff --git a/Tests/Utils/testChebSxUCBSy.cpp b/Tests/Utils/testChebSxUCBSy.cpp index 8ad70da9a19a2e5b2c2fccd4d93583f66f0abca0..568793760108a0705b5bdf25711dbd30f767906d 100755 --- a/Tests/Utils/testChebSxUCBSy.cpp +++ b/Tests/Utils/testChebSxUCBSy.cpp @@ -35,7 +35,7 @@ #include "../../Src/Kernels/Chebyshev/FChebInterpolator.hpp" -#include "../../Src/Kernels/Chebyshev/FChebMatrixKernel.hpp" +#include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp" #include "../../Src/Components/FSimpleLeaf.hpp" #include "../../Src/Kernels/P2P/FP2PParticleContainer.hpp" @@ -52,7 +52,7 @@ int main(int argc, char* argv[]) { typedef FP2PParticleContainer ContainerClass; typedef FSimpleLeaf<ContainerClass> LeafClass; - typedef FChebMatrixKernelR MatrixKernelClass; + typedef FInterpMatrixKernelR MatrixKernelClass; ///////////////////////What we do///////////////////////////// std::cout << "\nTask: Compute interactions between source particles in leaf Y and target\n"; diff --git a/Tests/Utils/testChebSymmetries.cpp b/Tests/Utils/testChebSymmetries.cpp index 28d1c535b499c4364f8f72604af076403788726b..f506ca106be7e76490a98673881f5ee6216f275c 100755 --- a/Tests/Utils/testChebSymmetries.cpp +++ b/Tests/Utils/testChebSymmetries.cpp @@ -30,7 +30,7 @@ #include "../../Src/Utils/FBlas.hpp" #include "../../Src/Kernels/Chebyshev/FChebTensor.hpp" -#include "../../Src/Kernels/Chebyshev/FChebMatrixKernel.hpp" +#include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp" #include "../../Src/Kernels/Chebyshev/FChebSymmetries.hpp" @@ -59,7 +59,7 @@ int main(int argc, char* argv[]) FTic time; // define set matrix kernel - typedef FChebMatrixKernelR MatrixKernelClass; + typedef FInterpMatrixKernelR MatrixKernelClass; MatrixKernelClass MatrixKernel; // constants diff --git a/Tests/Utils/testChebTensorProduct.cpp b/Tests/Utils/testChebTensorProduct.cpp index 38d57bb953f509b61effeafd88b0850b2e594889..ee12ed4b5f4bd156aac1d225a2eb220690e8c68d 100755 --- a/Tests/Utils/testChebTensorProduct.cpp +++ b/Tests/Utils/testChebTensorProduct.cpp @@ -85,7 +85,7 @@ int main(int argc, char* argv[]) FReal coords[3][ORDER]; - FChebTensor<ORDER>::setChebyshevRoots(cx, wx, coords); + FChebTensor<ORDER>::setPolynomialsRoots(cx, wx, coords); // for (unsigned int n=0; n<ORDER; ++n) { // std::cout << coords[0][n] << "\t" diff --git a/Tests/Utils/testFFTW.cpp b/Tests/Utils/testFFTW.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7a9b260d0f13547fb732ee0cb7f71327bf52d3de --- /dev/null +++ b/Tests/Utils/testFFTW.cpp @@ -0,0 +1,139 @@ +// =================================================================================== +// Ce LOGICIEL "ScalFmm" est couvert par le copyright Inria 20xx-2012. +// Inria détient tous les droits de propriété sur le LOGICIEL, et souhaite que +// la communauté scientifique l'utilise afin de le tester et de l'évaluer. +// Inria donne gracieusement le droit d'utiliser ce LOGICIEL. Toute utilisation +// dans un but lucratif ou à des fins commerciales est interdite sauf autorisation +// expresse et préalable d'Inria. +// Toute utilisation hors des limites précisées ci-dessus et réalisée sans l'accord +// expresse préalable d'Inria constituerait donc le délit de contrefaçon. +// Le LOGICIEL étant un produit en cours de développement, Inria ne saurait assurer +// aucune responsabilité et notamment en aucune manière et en aucun cas, être tenu +// de répondre d'éventuels dommages directs ou indirects subits par l'utilisateur. +// Tout utilisateur du LOGICIEL s'engage à communiquer à Inria ses remarques +// relatives à l'usage du LOGICIEL +// =================================================================================== + +#include <iostream> +#include <stdlib.h> + +// include for libfftw3 +//#include <fftw3.h> + +// include for mkl_fftw3 +#include <fftw/fftw3.h> + +#include "../../Src/Utils/FGlobal.hpp" +#include "../../Src/Utils/FComplexe.hpp" + +#include "../../Src/Utils/FTic.hpp" + + +//#include "../../Src/Utils/FDft.hpp" + + +int main() +{ + const FReal FRandMax = FReal(RAND_MAX); + + FTic time; + + ////////////////////////////////////////////////////////////////////////////// + // INITIALIZATION + + // size (pick a power of 2 for better performance of the FFT algorithm) + unsigned int nsteps_ = 5; + + // fftw arrays + FReal* fftR_; + FComplexe* fftC_; + + fftR_ = (FReal*) fftw_malloc(sizeof(FReal) * nsteps_); + fftC_ = (FComplexe*) fftw_malloc(sizeof(FComplexe) * nsteps_); + + // fftw plans + // use routine defined in file: + // /PATH/TO/mkl/interfaces/fftw3xf/wrappers/fftw_plan_dft_c2r_1d.c + fftw_plan plan_c2r_; // backward FFT plan + fftw_plan plan_r2c_; // forward FFT plan + + std::cout<< "Init FFTW plans: "; + time.tic(); + + plan_c2r_ = + fftw_plan_dft_c2r_1d(nsteps_, + reinterpret_cast<fftw_complex*>(fftC_), + fftR_, + FFTW_MEASURE); + plan_r2c_ = + fftw_plan_dft_r2c_1d(nsteps_, + fftR_, + reinterpret_cast<fftw_complex*>(fftC_), + FFTW_MEASURE); + + std::cout << "took " << time.tacAndElapsed() << "sec." << std::endl; + + // alternative choice of plan type +// plan_c2r_ = +// fftw_plan_dft_1d(nsteps_, fftC_, fftR_, FFTW_BACKWARD, FFTW_MEASURE); +// plan_r2c_ = +// fftw_plan_dft_1d(nsteps_, fftR_, fftC_, FFTW_FORWARD, FFTW_MEASURE); + + ////////////////////////////////////////////////////////////////////////////// + // EXECUTION + // generate random physical data + for(unsigned int s=0; s<nsteps_; ++s) + fftR_[s] = FReal(rand())/FRandMax; + + // display data in physical space + std::cout<< "Physical data: "<<std::endl; + for(unsigned int s=0; s<nsteps_; ++s) + std::cout<< fftR_[s] << ", "; + std::cout<<std::endl; + + // perform fft + std::cout<< "Perform Forward FFT: "; + time.tic(); + fftw_execute( plan_r2c_ ); + std::cout << "took " << time.tacAndElapsed() << "sec." << std::endl; + + // display transform in Fourier space + // beware the real data FFT stores only N/2+1 complex output values + std::cout<< "Transformed data : "<<std::endl; + for(unsigned int s=0; s<nsteps_; ++s) + std::cout<< fftC_[s] << ", "; + std::cout<<std::endl; + +// for(unsigned int s=0; s<nsteps_/2+1; ++s){ +// fftC_[nsteps_-s]=FComplexe(fftC_[s].getReal(),-fftC_[s].getImag()); +// } +// +// std::cout<< "Full Transformed data : "<<std::endl; +// for(unsigned int s=0; s<nsteps_; ++s) +// std::cout<< fftC_[s] << ", "; +// std::cout<<std::endl; + + // perform ifft of tranformed data (in order to get physical values back) + std::cout<< "Perform Backward FFT: "; + time.tic(); + fftw_execute( plan_c2r_ ); + std::cout << "took " << time.tacAndElapsed() << "sec." << std::endl; + + // display data in physical space + std::cout<< "Physical data (from 1/N*IFFT(FFT(Physical data))): "<<std::endl; + for(unsigned int s=0; s<nsteps_; ++s) + std::cout<< fftR_[s]/nsteps_ << ", "; + std::cout<<std::endl; + + ////////////////////////////////////////////////////////////////////////////// + // VALIDATION + + + + //free memory + fftw_destroy_plan(plan_c2r_); + fftw_free(fftR_); + fftw_free(fftC_); + + +}// end test diff --git a/Tests/Utils/testFastDiscreteConvolution.cpp b/Tests/Utils/testFastDiscreteConvolution.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f6ae16c53173b3626d72a1438bc1b50c1a7dcec7 --- /dev/null +++ b/Tests/Utils/testFastDiscreteConvolution.cpp @@ -0,0 +1,178 @@ +// =================================================================================== +// Ce LOGICIEL "ScalFmm" est couvert par le copyright Inria 20xx-2012. +// Inria détient tous les droits de propriété sur le LOGICIEL, et souhaite que +// la communauté scientifique l'utilise afin de le tester et de l'évaluer. +// Inria donne gracieusement le droit d'utiliser ce LOGICIEL. Toute utilisation +// dans un but lucratif ou à des fins commerciales est interdite sauf autorisation +// expresse et préalable d'Inria. +// Toute utilisation hors des limites précisées ci-dessus et réalisée sans l'accord +// expresse préalable d'Inria constituerait donc le délit de contrefaçon. +// Le LOGICIEL étant un produit en cours de développement, Inria ne saurait assurer +// aucune responsabilité et notamment en aucune manière et en aucun cas, être tenu +// de répondre d'éventuels dommages directs ou indirects subits par l'utilisateur. +// Tout utilisateur du LOGICIEL s'engage à communiquer à Inria ses remarques +// relatives à l'usage du LOGICIEL +// =================================================================================== + +// ==== CMAKE ===== +// @FUSE_BLAS +// ================ + + +#include <iostream> + +#include <stdlib.h> +#include <time.h> + +#include "../../Src/Utils/FTic.hpp" +#include "../../Src/Utils/FMath.hpp" +#include "../../Src/Utils/FBlas.hpp" + +// +#include "../../Src/Utils/FDft.hpp" +#include "../../Src/Utils/FComplexe.hpp" + + + +/** + * In this file we show how to use octree + */ + +int main(int, char **){ + + const FReal FRandMax = FReal(RAND_MAX); + + FTic time; + + const unsigned int ORDER = 3; + const unsigned int N=(2*ORDER-1); + + + ////////////////////////////////////////////////////////////////////////////// + // Application of Convolution Theorem + // Let there be a Circulant Toeplitz matrix K of size N + + FReal K[N*N]; + FReal Y[N]; + FReal X[N]; + + for(unsigned int i=0; i<N; ++i) X[i]=0.0; + for(unsigned int i=0; i<N; ++i) Y[i]=FReal(rand())/FRandMax; + + std::cout<< "Y: "<<std::endl; + for(unsigned int i=0; i<N; ++i) + std::cout<< Y[i] << ", "; + std::cout<<std::endl; + + // Assemble Matrix K + std::cout<< "Assemble Full Matrix K: "; + time.tic(); + for(unsigned int i=0; i<N; ++i){ + for(unsigned int j=0; j<N; ++j){ + if(i>j) K[i*N+j]=i-j-1; + else K[i*N+j]=N+i-j-1; + } + } + std::cout << "took " << time.tacAndElapsed() << "sec." << std::endl; + + std::cout<< "Circulant Toeplitz matrix K: "<<std::endl; + for(unsigned int i=0; i<N; ++i){ + for(unsigned int j=0; j<N; ++j){ + std::cout<< K[i*N+j] << ", "; + } std::cout<<std::endl; + } std::cout<<std::endl; + + // Direct application of Circulant Matrix + std::cout<< "Direct application of K: "; + time.tic(); + for(unsigned int i=0; i<N; ++i){ + for(unsigned int j=0; j<N; ++j){ + X[i]+=K[i*N+j]*Y[j]; + } + } + std::cout << "took " << time.tacAndElapsed() << "sec." << std::endl; + + std::cout<< "X=KY: "<<std::endl; + for(unsigned int i=0; i<N; ++i) + std::cout<< X[i] << ", "; + std::cout<<std::endl; + + // now compute via DFT and use convolution theorem + FComplexe FK[N]; + FComplexe FY[N]; + FComplexe FX[N]; + FReal iFX[N]; + + // Init DFTor + std::cout<< "Set DFT: "; + time.tic(); + //FDft Dft(N);// direct version + FFft Dft(N);// fast version + std::cout << "took " << time.tacAndElapsed() << "sec." << std::endl; + + // Initialize manually +// for(unsigned int s=0; s<N; ++s){ +// FX[s] = FY[s] = FK[s] =FComplexe(0.0,0.0); // init +// iFX[s]=0.0; +// } + // ... or using Blas routines + FBlas::c_setzero(N,reinterpret_cast<FReal*>(FX)); + FBlas::c_setzero(N,reinterpret_cast<FReal*>(FY)); + FBlas::c_setzero(N,reinterpret_cast<FReal*>(FK)); + FBlas::setzero(N,iFX); + + + // Transform Y in Fourier space + std::cout<< "Transform Y->FY: "; + time.tic(); + Dft.applyDFT(Y,FY); + std::cout << "took " << time.tacAndElapsed() << "sec." << std::endl; + + std::cout<< "FY: "<<std::endl; + for(unsigned int i=0; i<N; ++i){ + std::cout<< FY[i] << ", "; + }std::cout<<std::endl; + + // Transform first column of K + FReal tK[N]; + for(unsigned int i=0; i<N; ++i) tK[i]=K[i*N]; + std::cout<< "Transform tK->FK: "; + time.tic(); + Dft.applyDFT(tK,FK); + std::cout << "took " << time.tacAndElapsed() << "sec." << std::endl; + +// std::cout<< "Transformed Matrix FK: "<<std::endl; +// for(unsigned int i=0; i<N; ++i){ +// std::cout<< FK[i] << ", "; +// }std::cout<<std::endl; + + // Compute X=KY in Fourier space + std::cout<< "Apply K in Fourier space (entrywise prod.): "; + time.tic(); + for(unsigned int s=0; s<N; ++s){// TODO loop only over non zero entries + FX[s]=FK[s]; + FX[s]*=FY[s]; + } + std::cout << "took " << time.tacAndElapsed() << "sec." << std::endl; + + std::cout<< "Transformed Local Exp FX=FK:FY: "<<std::endl; + for(unsigned int i=0; i<N; ++i){ + std::cout<< FX[i] << ", "; + }std::cout<<std::endl; + + // Transform FX back in Physical space + std::cout<< "Transform FX->iF(FX): "; + time.tic(); + Dft.applyIDFT(FX,iFX); + std::cout << "took " << time.tacAndElapsed() << "sec." << std::endl; + + std::cout<< "iF(FK:FY): "<<std::endl; + for(unsigned int i=0; i<N; ++i){ + std::cout<< iFX[i] << ", "; + }std::cout<<std::endl; + +std::cout << "Relative error = " << FMath::FAccurater( X, iFX, N) << std::endl; + + + return 0; +} diff --git a/Tests/Utils/testUnifInterpolator.cpp b/Tests/Utils/testUnifInterpolator.cpp new file mode 100755 index 0000000000000000000000000000000000000000..3ef383da4cfdddb8d76b7a4768cbd52a84ad1046 --- /dev/null +++ b/Tests/Utils/testUnifInterpolator.cpp @@ -0,0 +1,513 @@ +// =================================================================================== +// Ce LOGICIEL "ScalFmm" est couvert par le copyright Inria 20xx-2012. +// Inria détient tous les droits de propriété sur le LOGICIEL, et souhaite que +// la communauté scientifique l'utilise afin de le tester et de l'évaluer. +// Inria donne gracieusement le droit d'utiliser ce LOGICIEL. Toute utilisation +// dans un but lucratif ou à des fins commerciales est interdite sauf autorisation +// expresse et préalable d'Inria. +// Toute utilisation hors des limites précisées ci-dessus et réalisée sans l'accord +// expresse préalable d'Inria constituerait donc le délit de contrefaçon. +// Le LOGICIEL étant un produit en cours de développement, Inria ne saurait assurer +// aucune responsabilité et notamment en aucune manière et en aucun cas, être tenu +// de répondre d'éventuels dommages directs ou indirects subits par l'utilisateur. +// Tout utilisateur du LOGICIEL s'engage à communiquer à Inria ses remarques +// relatives à l'usage du LOGICIEL +// =================================================================================== + +// ==== CMAKE ===== +// @FUSE_BLAS +// ================ + + +#include <iostream> + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> + +#include "../../Src/Utils/FTic.hpp" +#include "../../Src/Utils/FMath.hpp" +#include "../../Src/Utils/FBlas.hpp" + +#include "../../Src/Containers/FVector.hpp" + +#include "../../Src/Utils/FAssert.hpp" +#include "../../Src/Utils/FPoint.hpp" + + +#include "../../Src/Kernels/Uniform/FUnifInterpolator.hpp" +#include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp" +#include "../../Src/Kernels/Uniform/FUnifTensor.hpp" + +// Check DFT +#include "../../Src/Utils/FDft.hpp" +#include "../../Src/Utils/FComplexe.hpp" + + +#include "../../Src/Kernels/P2P/FP2PParticleContainer.hpp" +#include "../../Src/Components/FSimpleLeaf.hpp" + + + + +/** + * In this file we show how to use octree + */ + +int main(int, char **){ + typedef FP2PParticleContainer ContainerClass; + typedef FSimpleLeaf<ContainerClass> LeafClass; + typedef FInterpMatrixKernelR MatrixKernelClass; + + + ///////////////////////What we do///////////////////////////// + std::cout << "\nTask: Compute interactions between source particles in leaf Y and target\n"; + std::cout << " particles in leaf X. Compare the fast summation K ~ Lx K Ly' with the\n"; + std::cout << " direct computation.\n" << std::endl; + ////////////////////////////////////////////////////////////// + + MatrixKernelClass MatrixKernel; + const FReal FRandMax = FReal(RAND_MAX); + FTic time; + + + // Leaf size + FReal width = FReal(3.723); + + //////////////////////////////////////////////////////////////////// + LeafClass X; + FPoint cx(0., 0., 0.); + const unsigned long M = 20000; + std::cout << "Fill the leaf X of width " << width + << " centered at cx=" << cx << " with M=" << M << " target particles" << std::endl; + { + for(unsigned long i=0; i<M; ++i){ + FReal x = (FReal(rand())/FRandMax - FReal(.5)) * width + cx.getX(); + FReal y = (FReal(rand())/FRandMax - FReal(.5)) * width + cx.getY(); + FReal z = (FReal(rand())/FRandMax - FReal(.5)) * width + cx.getZ(); + X.push(FPoint(x, y, z), FReal(rand())/FRandMax); + } + } + + + //////////////////////////////////////////////////////////////////// + LeafClass Y; + // FPoint cy(FReal(2.)*width, 0., 0.); + FPoint cy(FReal(2.)*width, FReal(2.)*width, 0.); + + const unsigned long N = 20000; + std::cout << "Fill the leaf Y of width " << width + << " centered at cy=" << cy << " with N=" << N << " target particles" << std::endl; + { + for(unsigned long i=0; i<N; ++i){ + FReal x = (FReal(rand())/FRandMax - FReal(.5)) * width + cy.getX(); + FReal y = (FReal(rand())/FRandMax - FReal(.5)) * width + cy.getY(); + FReal z = (FReal(rand())/FRandMax - FReal(.5)) * width + cy.getZ(); + Y.push(FPoint(x, y, z), FReal(rand())/FRandMax); + } + } + + + + //////////////////////////////////////////////////////////////////// + // approximative computation + const unsigned int ORDER = 3; + const unsigned int nnodes = TensorTraits<ORDER>::nnodes; + typedef FUnifInterpolator<ORDER> InterpolatorClass; + InterpolatorClass S; + + std::cout << "\nCompute interactions approximatively, interpolation order = " << ORDER << " ..." << std::endl; + + std::cout << "\nP2M ... " << std::flush; + time.tic(); + // Anterpolate: W_n = \sum_j^N S(y_j,\bar y_n) * w_j + FReal W[nnodes]; // multipole expansion + S.applyP2M(cy, width, W, Y.getSrc()); // the multipole expansions are set to 0 in S.applyP2M + std::cout << "took " << time.tacAndElapsed() << "s" << std::endl; + + std::cout << "M2L ... " << std::flush; + time.tic(); + // Multipole to local: F_m = \sum_n^L K(\bar x_m, \bar y_n) * W_n + FPoint rootsX[nnodes], rootsY[nnodes]; + FUnifTensor<ORDER>::setRoots(cx, width, rootsX); + FUnifTensor<ORDER>::setRoots(cy, width, rootsY); + + FReal F[nnodes]; // local expansion + for (unsigned int i=0; i<nnodes; ++i) { + F[i] = FReal(0.); + for (unsigned int j=0; j<nnodes; ++j){ + + F[i] += MatrixKernel.evaluate(rootsX[i], rootsY[j]) * W[j]; + + } + } + std::cout << "took " << time.tacAndElapsed() << "s" << std::endl; + + + //////////////////////////////////////////////////////////////////////////// + // Store M2L in K and apply K + FReal K[nnodes*nnodes]; // local expansion + for (unsigned int i=0; i<nnodes; ++i) { + for (unsigned int j=0; j<nnodes; ++j){ + K[i*nnodes+j] = MatrixKernel.evaluate(rootsX[i], rootsY[j]); + } + } + std::cout<< "Apply M2L in usual sense: "; + time.tic(); + + for (unsigned int i=0; i<nnodes; ++i) { + F[i] = FReal(0.); + for (unsigned int j=0; j<nnodes; ++j){ + + F[i] += K[i*nnodes+j] * W[j]; + + } + } + + time.tac(); + std::cout << "took " << time.elapsed() << "sec." << std::endl; + + // PB: Verify storage improvement works (indexing etc...) + // 1) store circulant matrix + const unsigned int rc = (2*ORDER-1)*(2*ORDER-1)*(2*ORDER-1); + FReal C[rc]; + + // std::cout << "2*order-1=" << 2*ORDER-1 << std::endl; + unsigned int li,lj; + unsigned int mi,mj; + unsigned int ni,nj; + unsigned int idi, idj, ido=0; + + for(unsigned int l=0; l<2*ORDER-1; ++l) + for(unsigned int m=0; m<2*ORDER-1; ++m) + for(unsigned int n=0; n<2*ORDER-1; ++n){ + + // compute position in nnodes vector of roots + if(l<ORDER-1) lj=ORDER-1; else lj=0; + if(m<ORDER-1) mj=ORDER-1; else mj=0; + if(n<ORDER-1) nj=ORDER-1; else nj=0; + li=(l-(ORDER-1))+lj; mi=(m-(ORDER-1))+mj; ni=(n-(ORDER-1))+nj; + + idi=li*ORDER*ORDER + mi*ORDER + ni; + idj=lj*ORDER*ORDER + mj*ORDER + nj; + + C[ido] + = MatrixKernel.evaluate(rootsX[idi], rootsY[idj]); + ido++; + } +// // Display C (gathers every values of K that need to be stored, +// // corresponds to the first line of the padded matrix (reverse order?)) +// std::cout<<"C="<<std::endl; +// for (unsigned int n=0; n<rankpad; ++n) +// std::cout<< C[ido] << ", "; +// std::cout<<std::endl; + + typedef FUnifTensor<ORDER> TensorType; + unsigned int node_diff[nnodes*nnodes]; + TensorType::setNodeIdsDiff(node_diff); + + ////////////////////////////////////////////////////////////////////////////////////////////////////// + // K is a block Toeplitz matrix + // i.e. a blockwise Toeplitz matrix where the block also have the Toeplitz structure. + // e.g. for ORDER=3: K=[K_{1,1} K_{1,2} K_{1,3}, where K_{i,j}=[k11 k12 k13, + // K_{2,1} K_{1,1} K_{1,2}, k21 k11 k12, + // K_{3,1} K_{2,1} K_{1,1}]; k31 k21 k11]; + // K is of size order^3 x order^3 + // (i.e. order^2 x order^2 Toeplitz blocks of size order x order), + // K is very close to be Toeplitz itself and even circulant. + // In order to actually embed K into a circulant matrix C one just + // needs to insert (ORDER-1) extra lines/columns (to each block). + +// std::cout<< "K=" <<std::endl; +// for (unsigned int i=0; i<nnodes; ++i){ +// for (unsigned int j=0; j<nnodes; ++j){ +// std::cout<< K[i*nnodes+j]<<", "; +// } +// std::cout<<std::endl; +// } +// std::cout<<std::endl; + +// // Check matrix node_diff +// std::cout<< "node_diff=" <<std::endl; +// for (unsigned int i=0; i<nnodes; ++i){ +// for (unsigned int j=0; j<nnodes; ++j){ +// std::cout<< node_diff[i*nnodes+j] <<", "; +// } +// std::cout<<std::endl; +// } +// std::cout<<std::endl; + +// // Expected ido for the (2*ORDER-1)^3x(2*ORDER-1)^3 circulant matrix +// for (unsigned int i=0; i<rc; ++i){ +// for (unsigned int j=0; j<rc; ++j){ +// if(i>j) std::cout<< i-j-1 << ", "; +// else std::cout<< rc+i-j-1 << ", "; +// } std::cout<<std::endl; +// } std::cout<<std::endl; + +// // kernel evaluated at previous ido returns a circulant matrix +// for (unsigned int i=0; i<rc/2; ++i){ +// for (unsigned int j=0; j<rc/2; ++j){ +// if(i>j) std::cout<< C[i-j-1] << ", "; +// else std::cout<< C[rc+i-j-1] << ", "; +// } std::cout<<std::endl; +// } std::cout<<std::endl; + + // In 1D the Zero Padding consists in + // inserting ORDER-1 zeros in the multipole exp + // in order to apply the (ORDER+ORDER-1)x(ORDER+ORDER-1) + // circulant matrix to it. + // Let us extend it to the 3D case: + FReal MultExp[nnodes]; FReal PaddedMultExp[rc]; + for (unsigned int i=0; i<nnodes; ++i) MultExp[i]=W[i]; + FReal LocalExp[nnodes]; FReal PaddedLocalExp[rc]; + FBlas::setzero(nnodes,LocalExp); + FBlas::setzero(rc,PaddedLocalExp); + + std::cout<< "Expected LocalExp: "<<std::endl; + for (unsigned int i=0; i<nnodes; ++i) + std::cout<< F[i] << ", "; + std::cout<<std::endl; + + ///////////////////////////////////////////////////////////////////////////////////// + // Application of circulant Toeplitz system in PHYSICAL SPACE + std::cout<< "Apply circulant M2L in Physical space: "; + time.tic(); + for (unsigned int i=0; i<nnodes; ++i){ + + // Pad Multipole Expansion with respect to current row + FBlas::setzero(rc,PaddedMultExp); + for (unsigned int j=0; j<nnodes; ++j) + PaddedMultExp[node_diff[i*nnodes+j]]=MultExp[j]; + +// std::cout<< "Padded MultExp for row i=" << i << ": "<<std::endl; +// for (unsigned int p=0; p<rc; ++p) +// std::cout<< PaddedMultExp[p] << ", "; +// std::cout<<std::endl; + + // Application of M2L in PHYSICAL SPACE + for (unsigned int pj=0; pj<rc; ++pj) + LocalExp[i]+=C[pj]*PaddedMultExp[pj]; + + }// end i + time.tac(); + std::cout << "took " << time.elapsed() << "sec." << std::endl; + + std::cout<< "LocalExp via product in PHYSICAL SPACE: "<<std::endl; + for (unsigned int p=0; p<nnodes; ++p) + std::cout<< LocalExp[p] << ", "; + std::cout<<std::endl; + std::cout<<std::endl; + + ///////////////////////////////////////////////////////////////////////////////////// + // Efficient application of the Toeplitz system in FOURIER SPACE + FComplexe FPMultExp[rc]; + FComplexe FPLocalExp[rc]; + FReal PLocalExp[rc]; + + //for (unsigned int n=0; n<rc; ++n) FPLocalExp[n]=FComplexe(0.0,0.0); + FBlas::c_setzero(rc,reinterpret_cast<FReal*>(FPLocalExp)); + + FBlas::setzero(rc,PLocalExp); + + // Init DFT + //FDft Dft(rc); // direct version + FFft Dft(rc); // fast version + + // Get first COLUMN of K and Store in T + FReal T[rc]; + // use permutations + unsigned int perm[rc]; + for(unsigned int p=0; p<rc; ++p){ + if(p>0) perm[p]=p-1; + else perm[p]=rc+p-1; +// std::cout << "perm["<< p << "]="<< perm[p] << std::endl; + } + + for (unsigned int i=0; i<rc; ++i){ + // keep this lines commented to see what permutation accounts for: + // for (unsigned int j=0; j<rc; ++j){ +// if(i>0) T[i]=C[i-0-1]; +// else T[i]=C[rc+i-0-1]; + T[i]=C[perm[i]]; + // } + } + +// std::cout<< "First column of C[rc x rc]: "<<std::endl; +// for (unsigned int p=0; p<rc; ++p) +// std::cout<< T[p] << ", "; +// std::cout<<std::endl; + + // Apply DFT to T + FComplexe FT[rc]; + // for (unsigned int n=0; n<rc; ++n) FT[n]=FComplexe(0.0,0.0); + FBlas::c_setzero(rc,reinterpret_cast<FReal*>(FT)); + + // if first COLUMN (T) of C is used + Dft.applyDFT(T,FT); +// // if first ROW of C is used +// Dft.applyDFT(C,FT); + + // Pad physical MultExp + FBlas::setzero(rc,PaddedMultExp); //part of padding + for (unsigned int j=0; j<nnodes; ++j){ + // if first COLUMN (T) of C is used + PaddedMultExp[node_diff[j*nnodes]]=MultExp[j]; +// // if first ROW of C is used +// PaddedMultExp[node_diff[j]]=MultExp[j]; + } + +// std::cout<< "Padded MultExp for row i=" << i << ": "<<std::endl; +// for (unsigned int p=0; p<rc; ++p) +// std::cout<< PaddedMultExp[p] << ", "; +// std::cout<<std::endl; + + + // Set transformed MultExp to 0 + // for (unsigned int n=0; n<rc; ++n) FPMultExp[n]=FComplexe(0.0,0.0); + FBlas::c_setzero(rc,reinterpret_cast<FReal*>(FPMultExp)); + + // Transform PaddedMultExp + Dft.applyDFT(PaddedMultExp,FPMultExp); + + std::cout<< "Apply M2L in Fourier space: "; + time.tic(); + + // Application of M2L in FOURIER SPACE + // > Use FMkl::c_had for hadamard product + // if mkl is used as blas (TODO otherwise use FBlas::c_had()) +// FMkl::c_had(rc,reinterpret_cast<FReal*>(FT), +// reinterpret_cast<FReal*>(FPMultExp), +// reinterpret_cast<FReal*>(FPLocalExp)); + // > or perform entrywise product manually + for (unsigned int pj=0; pj<rc; ++pj){ + FPLocalExp[pj]=FT[pj]; + FPLocalExp[pj]*=FPMultExp[pj]; + } + + time.tac(); + std::cout << "took " << time.elapsed() << "sec." << std::endl; + +// std::cout<< "Transfo Padded LocalExp: "<<std::endl; +// for (unsigned int p=0; p<rc; ++p) +// std::cout<< FPLocalExp[p] << ", "; +// std::cout<<std::endl; + + Dft.applyIDFT(FPLocalExp,PLocalExp); + +// std::cout<< "Padded LocalExp: "<<std::endl; +// for (unsigned int p=0; p<rc; ++p) +// std::cout<< PLocalExp[p] << ", "; +// std::cout<<std::endl; + + // Unpad + for (unsigned int j=0; j<nnodes; ++j){ + // if first COLUMN (T) of C is used + LocalExp[j]=PLocalExp[node_diff[nnodes-j-1]]; +// // if first ROW of C is used +// LocalExp[j]=PLocalExp[node_diff[j*nnodes]]; + } + +// std::cout<< "Mask to be applied to Padded LocalExp: "<<std::endl; +// for (unsigned int j=0; j<nnodes; ++j) +// std::cout<< node_diff[nnodes-j-1] << ", "; +// std::cout<<std::endl; + + std::cout<< "LocalExp via product in FOURIER SPACE: "<<std::endl; + for (unsigned int p=0; p<nnodes; ++p) + std::cout<< LocalExp[p] << ", "; + std::cout<<std::endl; + + + ///////////////////////////////////////////////////////////////////////////////////// + + std::cout << "L2P (potential) ... " << std::flush; + time.tic(); + // Interpolate p_i = \sum_m^L S(x_i,\bar x_m) * F_m + S.applyL2P(cx, width, F, X.getTargets()); + std::cout << "took " << time.tacAndElapsed() << "s" << std::endl; + + std::cout << "L2P (forces) ... " << std::flush; + time.tic(); + // Interpolate f_i = \sum_m^L P(x_i,\bar x_m) * F_m + S.applyL2PGradient(cx, width, F, X.getTargets()); + std::cout << "took " << time.tacAndElapsed() << "s" << std::endl; + + //////////////////////////////////////////////////////////////////// + // direct computation + std::cout << "Compute interactions directly ..." << std::endl; + time.tic(); + + FReal* approx_f = new FReal [M * 3]; + FReal* f = new FReal [M * 3]; + FBlas::setzero(M*3, f); + + FReal* approx_p = new FReal[M]; + FReal* p = new FReal[M]; + FBlas::setzero(M, p); + + { // start direct computation + unsigned int counter = 0; + + for(int idxPartX = 0 ; idxPartX < X.getSrc()->getNbParticles() ; ++idxPartX){ + const FPoint x = FPoint(X.getSrc()->getPositions()[0][idxPartX], + X.getSrc()->getPositions()[1][idxPartX], + X.getSrc()->getPositions()[2][idxPartX]); + const FReal wx = X.getSrc()->getPhysicalValues()[idxPartX]; + + for(int idxPartY = 0 ; idxPartY < Y.getSrc()->getNbParticles() ; ++idxPartY){ + const FPoint y = FPoint(Y.getSrc()->getPositions()[0][idxPartY], + Y.getSrc()->getPositions()[1][idxPartY], + Y.getSrc()->getPositions()[2][idxPartY]); + const FReal wy = Y.getSrc()->getPhysicalValues()[idxPartY]; + + const FReal one_over_r = MatrixKernel.evaluate(x, y); + // potential + p[counter] += one_over_r * wy; + // force + FPoint force(y - x); + force *= one_over_r*one_over_r*one_over_r; + f[counter*3 + 0] += force.getX() * wx * wy; + f[counter*3 + 1] += force.getY() * wx * wy; + f[counter*3 + 2] += force.getZ() * wx * wy; + } + + counter++; + } + } // end direct computation + + + time.tac(); + std::cout << "Done in " << time.elapsed() << "sec." << std::endl; + + + //////////////////////////////////////////////////////////////////// + unsigned int counter = 0; + for(int idxPartX = 0 ; idxPartX < X.getSrc()->getNbParticles() ; ++idxPartX){ + approx_p[counter] = X.getSrc()->getPotentials()[idxPartX]; + const FPoint force = FPoint(X.getSrc()->getForcesX()[idxPartX], + X.getSrc()->getForcesY()[idxPartX], + X.getSrc()->getForcesZ()[idxPartX]); + approx_f[counter*3 + 0] = force.getX(); + approx_f[counter*3 + 1] = force.getY(); + approx_f[counter*3 + 2] = force.getZ(); + + counter++; + } + + std::cout << "\nPotential error:" << std::endl; + std::cout << "Relative error = " << FMath::FAccurater( p, approx_p, M) << std::endl; + + std::cout << "\nForce error:" << std::endl; + std::cout << "Relative L2 error = " << FMath::FAccurater( f, approx_f, M*3) << std::endl; + std::cout << std::endl; + + // free memory + delete [] approx_p; + delete [] p; + delete [] approx_f; + delete [] f; + + + return 0; +} diff --git a/UTests/utestChebyshevDirect.cpp b/UTests/utestChebyshevDirect.cpp index c71fc1c1b9ed4b708739d66d93a323a7433cf4d3..0d87c0d5729b06f8fd0d793c0e5d1c822de35ad0 100755 --- a/UTests/utestChebyshevDirect.cpp +++ b/UTests/utestChebyshevDirect.cpp @@ -35,7 +35,7 @@ #include "../Src/Kernels/Chebyshev/FChebCell.hpp" -#include "../Src/Kernels/Chebyshev/FChebMatrixKernel.hpp" +#include "../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp" #include "../Src/Kernels/Chebyshev/FChebKernel.hpp" #include "../Src/Kernels/Chebyshev/FChebSymKernel.hpp" @@ -203,7 +203,7 @@ class TestChebyshevDirect : public FUTester<TestChebyshevDirect> { const FReal epsilon = FReal(1e-5); typedef FP2PParticleContainerIndexed ContainerClass; typedef FSimpleLeaf<ContainerClass> LeafClass; - typedef FChebMatrixKernelR MatrixKernelClass; + typedef FInterpMatrixKernelR MatrixKernelClass; typedef FChebCell<ORDER> CellClass; typedef FOctree<CellClass,ContainerClass,LeafClass> OctreeClass; typedef FChebKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass; @@ -218,7 +218,7 @@ class TestChebyshevDirect : public FUTester<TestChebyshevDirect> { const FReal epsilon = FReal(1e-5); typedef FP2PParticleContainerIndexed ContainerClass; typedef FSimpleLeaf<ContainerClass> LeafClass; - typedef FChebMatrixKernelR MatrixKernelClass; + typedef FInterpMatrixKernelR MatrixKernelClass; typedef FChebCell<ORDER> CellClass; typedef FOctree<CellClass,ContainerClass,LeafClass> OctreeClass; typedef FChebSymKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass; diff --git a/UTests/utestChebyshevDirectPeriodic.cpp b/UTests/utestChebyshevDirectPeriodic.cpp index a7e821489a20284cae4d34406cd482059e25a182..944db45165a5826f4cbcd291f271b4f59db432b1 100755 --- a/UTests/utestChebyshevDirectPeriodic.cpp +++ b/UTests/utestChebyshevDirectPeriodic.cpp @@ -32,7 +32,7 @@ #include "../Src/Kernels/Chebyshev/FChebCell.hpp" -#include "../Src/Kernels/Chebyshev/FChebMatrixKernel.hpp" +#include "../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp" #include "../Src/Kernels/Chebyshev/FChebKernel.hpp" #include "../Src/Kernels/Chebyshev/FChebSymKernel.hpp" @@ -225,7 +225,7 @@ class TestChebyshevDirect : public FUTester<TestChebyshevDirect> { const FReal epsilon = FReal(1e-5); typedef FP2PParticleContainerIndexed ContainerClass; typedef FSimpleLeaf<ContainerClass> LeafClass; - typedef FChebMatrixKernelR MatrixKernelClass; + typedef FInterpMatrixKernelR MatrixKernelClass; typedef FChebCell<ORDER> CellClass; typedef FOctree<CellClass,ContainerClass,LeafClass> OctreeClass; typedef FChebKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass; @@ -240,7 +240,7 @@ class TestChebyshevDirect : public FUTester<TestChebyshevDirect> { const FReal epsilon = FReal(1e-5); typedef FP2PParticleContainerIndexed ContainerClass; typedef FSimpleLeaf<ContainerClass> LeafClass; - typedef FChebMatrixKernelR MatrixKernelClass; + typedef FInterpMatrixKernelR MatrixKernelClass; typedef FChebCell<ORDER> CellClass; typedef FOctree<CellClass,ContainerClass,LeafClass> OctreeClass; typedef FChebSymKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass; diff --git a/UTests/utestChebyshevMultiRhs.cpp b/UTests/utestChebyshevMultiRhs.cpp index d86fd0c0d6fe2c6de2ca23b14529f53a73f937d1..b2634d28db2c507ac52b03d71740b01ba4422fab 100644 --- a/UTests/utestChebyshevMultiRhs.cpp +++ b/UTests/utestChebyshevMultiRhs.cpp @@ -35,7 +35,7 @@ #include "../Src/Kernels/Chebyshev/FChebCell.hpp" -#include "../Src/Kernels/Chebyshev/FChebMatrixKernel.hpp" +#include "../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp" #include "../Src/Kernels/Chebyshev/FChebKernel.hpp" #include "../Src/Kernels/Chebyshev/FChebSymKernel.hpp" @@ -226,7 +226,7 @@ class TestChebyshevDirect : public FUTester<TestChebyshevDirect> { const FReal epsilon = FReal(1e-5); typedef FP2PParticleContainerIndexed ContainerClass; typedef FSimpleLeaf<ContainerClass> LeafClass; - typedef FChebMatrixKernelR MatrixKernelClass; + typedef FInterpMatrixKernelR MatrixKernelClass; typedef FChebCell<ORDER, NbRhs> CellClass; typedef FOctree<CellClass,ContainerClass,LeafClass> OctreeClass; typedef FChebKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER, NbRhs> KernelClass; @@ -242,7 +242,7 @@ class TestChebyshevDirect : public FUTester<TestChebyshevDirect> { const FReal epsilon = FReal(1e-5); typedef FP2PParticleContainerIndexed ContainerClass; typedef FSimpleLeaf<ContainerClass> LeafClass; - typedef FChebMatrixKernelR MatrixKernelClass; + typedef FInterpMatrixKernelR MatrixKernelClass; typedef FChebCell<ORDER, NbRhs> CellClass; typedef FOctree<CellClass,ContainerClass,LeafClass> OctreeClass; typedef FChebSymKernel<CellClass,ContainerClass,MatrixKernelClass,ORDER, NbRhs> KernelClass;