diff --git a/Sources/Core/FFmmAlgorithmArrayTsm.hpp b/Sources/Core/FFmmAlgorithmArrayTsm.hpp new file mode 100644 index 0000000000000000000000000000000000000000..3d3f9e37b2ce18f24b7025d69fd48f7485d8403a --- /dev/null +++ b/Sources/Core/FFmmAlgorithmArrayTsm.hpp @@ -0,0 +1,367 @@ +#ifndef FFMMALGORITHMARRAYTSM_HPP +#define FFMMALGORITHMARRAYTSM_HPP +// /!\ Please, you must read the license at the bottom of this page + +#include "../Utils/FAssertable.hpp" +#include "../Utils/FDebug.hpp" +#include "../Utils/FTrace.hpp" +#include "../Utils/FTic.hpp" +#include "../Utils/FGlobal.hpp" + +#include "../Containers/FOctree.hpp" + + +#include <omp.h> + +/** +* @author Berenger Bramas (berenger.bramas@inria.fr) +* @class FFmmAlgorithmArrayTsm +* @brief +* Please read the license +* +* This class is a threaded FMM algorithm +* It just iterates on a tree and call the kernels with good arguments. +* It used the inspector-executor model : +* iterates on the tree and builds an array to work in parallel on this array +* +* Of course this class does not deallocate pointer given in arguements. +* +* Threaded & based on the inspector-executor model +* schedule(runtime) +*/ +template<template< class ParticuleClass, class CellClass, int OctreeHeight> class KernelClass, + class ParticuleClass, class CellClass, + template<class ParticuleClass> class LeafClass, + int OctreeHeight, int SubtreeHeight> +class FFmmAlgorithmArrayTsm : protected FAssertable{ + // To reduce the size of variable type based on foctree in this file + typedef FOctree<ParticuleClass, CellClass, LeafClass, OctreeHeight, SubtreeHeight> Octree; + typedef typename FOctree<ParticuleClass, CellClass,LeafClass, OctreeHeight, SubtreeHeight>::Iterator OctreeIterator; + typedef KernelClass<ParticuleClass, CellClass, OctreeHeight> Kernel; + + Octree* const tree; //< The octree to work on + Kernel* kernels[FThreadNumbers]; //< The kernels + + FDEBUG(FTic counterTime); //< In case of debug: to count the elapsed time + FDEBUG(FTic computationCounter); //< In case of debug: to count computation time + + OctreeIterator* iterArray; + +public: + /** The constructor need the octree and the kernels used for computation + * @param inTree the octree to work on + * @param inKernels the kernels to call + * An assert is launched if one of the arguments is null + */ + FFmmAlgorithmArrayTsm(Octree* const inTree, Kernel* const inKernels) + : tree(inTree) , iterArray(0) { + + assert(tree, "tree cannot be null", __LINE__, __FILE__); + assert(kernels, "kernels cannot be null", __LINE__, __FILE__); + + for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){ + this->kernels[idxThread] = new KernelClass<ParticuleClass, CellClass, OctreeHeight>(*inKernels); + } + + FDEBUG(FDebug::Controller << "FFmmAlgorithmArrayTsm\n"); + } + + /** Default destructor */ + virtual ~FFmmAlgorithmArrayTsm(){ + for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){ + delete this->kernels[idxThread]; + } + } + + /** + * To execute the fmm algorithm + * Call this function to run the complete algorithm + */ + void execute(){ + FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); + + // Count leaf + int leafs = 0; + OctreeIterator octreeIterator(tree); + octreeIterator.gotoBottomLeft(); + do{ + ++leafs; + } while(octreeIterator.moveRight()); + iterArray = new OctreeIterator[leafs]; + assert(iterArray, "iterArray bad alloc", __LINE__, __FILE__); + + for(int idxThread = 0 ; idxThread < FThreadNumbers ; ++idxThread){ + this->kernels[idxThread]->init(); + } + + bottomPass(); + upwardPass(); + + downardPass(); + + directPass(); + + delete [] iterArray; + iterArray = 0; + + FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); + } + + /** P2M */ + void bottomPass(){ + FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); + FDEBUG( FDebug::Controller.write("\tStart Bottom Pass\n").write(FDebug::Flush) ); + FDEBUG( counterTime.tic() ); + + OctreeIterator octreeIterator(tree); + int leafs = 0; + // Iterate on leafs + octreeIterator.gotoBottomLeft(); + do{ + iterArray[leafs] = octreeIterator; + ++leafs; + } while(octreeIterator.moveRight()); + + FDEBUG(computationCounter.tic()); + #pragma omp parallel num_threads(FThreadNumbers) + { + Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; + #pragma omp for + for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){ + // We need the current cell that represent the leaf + // and the list of particules + FList<ParticuleClass*>* const sources = iterArray[idxLeafs].getCurrentListSources(); + if(sources->getSize()){ + iterArray[idxLeafs].getCurrentCell()->setSourcesChildTrue(); + myThreadkernels->P2M( iterArray[idxLeafs].getCurrentCell() , sources); + } + if(iterArray[idxLeafs].getCurrentListTargets()->getSize()){ + iterArray[idxLeafs].getCurrentCell()->setTargetsChildTrue(); + } + } + } + FDEBUG(computationCounter.tac()); + + FDEBUG( counterTime.tac() ); + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" ); + FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); + } + + /** M2M */ + void upwardPass(){ + FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); + FDEBUG( FDebug::Controller.write("\tStart Upward Pass\n").write(FDebug::Flush); ); + FDEBUG( counterTime.tic() ); + FDEBUG( double totalComputation = 0 ); + + // Start from leal level - 1 + OctreeIterator octreeIterator(tree); + octreeIterator.gotoBottomLeft(); + octreeIterator.moveUp(); + OctreeIterator avoidGotoLeftIterator(octreeIterator); + + // for each levels + for(int idxLevel = OctreeHeight - 2 ; idxLevel > 1 ; --idxLevel ){ + int leafs = 0; + // for each cells + do{ + iterArray[leafs] = octreeIterator; + ++leafs; + } while(octreeIterator.moveRight()); + avoidGotoLeftIterator.moveUp(); + octreeIterator = avoidGotoLeftIterator;// equal octreeIterator.moveUp(); octreeIterator.gotoLeft(); + + FDEBUG(computationCounter.tic()); + #pragma omp parallel num_threads(FThreadNumbers) + { + Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; + #pragma omp for + for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){ + // We need the current cell and the child + // child is an array (of 8 child) that may be null + CellClass* potentialChild[8]; + CellClass** const realChild = iterArray[idxLeafs].getCurrentChild(); + CellClass* const currentCell = iterArray[idxLeafs].getCurrentCell(); + for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){ + potentialChild[idxChild] = 0; + if(realChild[idxChild]){ + if(realChild[idxChild]->hasSourcesChild()){ + currentCell->setSourcesChildTrue(); + potentialChild[idxChild] = realChild[idxChild]; + } + if(realChild[idxChild]->hasTargetsChild()){ + currentCell->setTargetsChildTrue(); + } + } + } + myThreadkernels->M2M( currentCell , potentialChild, idxLevel); + } + } + FDEBUG(computationCounter.tac()); + FDEBUG(totalComputation += computationCounter.elapsed()); + } + + FDEBUG( counterTime.tac() ); + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" ); + FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); + } + + /** M2L L2L */ + void downardPass(){ + FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); + FDEBUG( FDebug::Controller.write("\tStart Downward Pass (M2L)\n").write(FDebug::Flush); ); + FDEBUG( counterTime.tic() ); + FDEBUG( double totalComputation = 0 ); + + { // first M2L + OctreeIterator octreeIterator(tree); + octreeIterator.moveDown(); + OctreeIterator avoidGotoLeftIterator(octreeIterator); + + // for each levels + for(int idxLevel = 2 ; idxLevel < OctreeHeight ; ++idxLevel ){ + int leafs = 0; + // for each cells + do{ + iterArray[leafs] = octreeIterator; + ++leafs; + } while(octreeIterator.moveRight()); + avoidGotoLeftIterator.moveDown(); + octreeIterator = avoidGotoLeftIterator; + + FDEBUG(computationCounter.tic()); + #pragma omp parallel num_threads(FThreadNumbers) + { + Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; + CellClass* neighbors[208]; + #pragma omp for + for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){ + CellClass* const currentCell = iterArray[idxLeafs].getCurrentCell(); + if(currentCell->hasTargetsChild()){ + const int counter = tree->getDistantNeighbors(neighbors, iterArray[idxLeafs].getCurrentGlobalIndex(),idxLevel); + int offsetTargetNeighbors = 0; + for(int idxRealNeighbors = 0 ; idxRealNeighbors < counter ; ++idxRealNeighbors, ++offsetTargetNeighbors){ + if(neighbors[idxRealNeighbors]->hasSourcesChild()){ + if(idxRealNeighbors != offsetTargetNeighbors){ + neighbors[offsetTargetNeighbors] = neighbors[idxRealNeighbors]; + } + } + else{ + --offsetTargetNeighbors; + } + } + if(offsetTargetNeighbors){ + myThreadkernels->M2L( currentCell , neighbors, offsetTargetNeighbors, idxLevel); + } + } + } + } + FDEBUG(computationCounter.tac()); + FDEBUG(totalComputation += computationCounter.elapsed()); + } + } + FDEBUG( counterTime.tac() ); + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" ); + + FDEBUG( FDebug::Controller.write("\tStart Downward Pass (L2L)\n").write(FDebug::Flush); ); + FDEBUG( counterTime.tic() ); + FDEBUG( totalComputation = 0 ); + { // second L2L + OctreeIterator octreeIterator(tree); + octreeIterator.moveDown(); + + OctreeIterator avoidGotoLeftIterator(octreeIterator); + + const int heightMinusOne = OctreeHeight - 1; + // for each levels exepted leaf level + for(int idxLevel = 2 ; idxLevel < heightMinusOne ; ++idxLevel ){ + int leafs = 0; + // for each cells + do{ + iterArray[leafs] = octreeIterator; + ++leafs; + } while(octreeIterator.moveRight()); + avoidGotoLeftIterator.moveDown(); + octreeIterator = avoidGotoLeftIterator; + + FDEBUG(computationCounter.tic()); + #pragma omp parallel num_threads(FThreadNumbers) + { + Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; + #pragma omp for + for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){ + CellClass* potentialChild[8]; + CellClass** const realChild = iterArray[idxLeafs].getCurrentChild(); + CellClass* const currentCell = iterArray[idxLeafs].getCurrentCell(); + for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){ + if(realChild[idxChild] && realChild[idxChild]->hasTargetsChild()){ + potentialChild[idxChild] = realChild[idxChild]; + } + else{ + potentialChild[idxChild] = 0; + } + } + myThreadkernels->L2L( currentCell , potentialChild, idxLevel); + } + } + FDEBUG(computationCounter.tac()); + FDEBUG(totalComputation += computationCounter.elapsed()); + } + } + + FDEBUG( counterTime.tac() ); + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << totalComputation << " s\n" ); + FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); + } + + /** P2P */ + void directPass(){ + FTRACE( FTrace::Controller.enterFunction(FTrace::FMM, __FUNCTION__ , __FILE__ , __LINE__) ); + FDEBUG( FDebug::Controller.write("\tStart Direct Pass\n").write(FDebug::Flush); ); + FDEBUG( counterTime.tic() ); + + int leafs = 0; + { + OctreeIterator octreeIterator(tree); + octreeIterator.gotoBottomLeft(); + // for each leafs + do{ + iterArray[leafs] = octreeIterator; + ++leafs; + } while(octreeIterator.moveRight()); + } + + const int heightMinusOne = OctreeHeight - 1; + FDEBUG(computationCounter.tic()); + #pragma omp parallel num_threads(FThreadNumbers) + { + Kernel * const myThreadkernels = kernels[omp_get_thread_num()]; + // There is a maximum of 26 neighbors + FList<ParticuleClass*>* neighbors[26]; + + #pragma omp for + for(int idxLeafs = 0 ; idxLeafs < leafs ; ++idxLeafs){ + myThreadkernels->L2P(iterArray[idxLeafs].getCurrentCell(), iterArray[idxLeafs].getCurrentListTargets()); + // need the current particules and neighbors particules + const int counter = tree->getLeafsNeighbors(neighbors, iterArray[idxLeafs].getCurrentGlobalIndex(),heightMinusOne); + myThreadkernels->P2P( iterArray[idxLeafs].getCurrentListTargets(), iterArray[idxLeafs].getCurrentListSources() , neighbors, counter); + } + } + FDEBUG(computationCounter.tac()); + + FDEBUG( counterTime.tac() ); + FDEBUG( FDebug::Controller << "\tFinished (" << counterTime.elapsed() << "s)\n" ); + FDEBUG( FDebug::Controller << "\t\t Computation : " << computationCounter.elapsed() << " s\n" ); + FTRACE( FTrace::Controller.leaveFunction(FTrace::FMM) ); + } + +}; + + +#endif //FFMMALGORITHMARRAYTSM_HPP + +// [--LICENSE--] diff --git a/Sources/Core/FFmmAlgorithmTsm.hpp b/Sources/Core/FFmmAlgorithmTsm.hpp index 5666f95b368d398098cfc85b22f8aa94195d7d30..18db919b1dd92dc1d563ec75b82a2e64b07033b4 100644 --- a/Sources/Core/FFmmAlgorithmTsm.hpp +++ b/Sources/Core/FFmmAlgorithmTsm.hpp @@ -22,10 +22,10 @@ * Of course this class does not deallocate pointer given in arguements. */ template<template< class ParticuleClass, class CellClass, int OctreeHeight> class KernelClass, - class ParticuleClass, class CellClass, - template<class ParticuleClass> class LeafClass, - int OctreeHeight, int SubtreeHeight> -class FFmmAlgorithmTsm : protected FAssertable{ +class ParticuleClass, class CellClass, +template<class ParticuleClass> class LeafClass, +int OctreeHeight, int SubtreeHeight> + class FFmmAlgorithmTsm : protected FAssertable{ // To reduce the size of variable type based on foctree in this file typedef FOctree<ParticuleClass, CellClass, LeafClass, OctreeHeight, SubtreeHeight> Octree; typedef typename FOctree<ParticuleClass, CellClass,LeafClass, OctreeHeight, SubtreeHeight>::Iterator FOctreeIterator; @@ -33,17 +33,17 @@ class FFmmAlgorithmTsm : protected FAssertable{ Octree* const tree; //< The octree to work on KernelClass<ParticuleClass, CellClass, OctreeHeight>* const kernels; //< The kernels - FDEBUG(FTic counterTime); //< In case of debug: to count the elapsed time - FDEBUG(FTic computationCounter); //< In case of debug: to count computation time + FDEBUG(FTic counterTime); //< In case of debug: to count the elapsed time + FDEBUG(FTic computationCounter); //< In case of debug: to count computation time -public: +public: /** The constructor need the octree and the kernels used for computation * @param inTree the octree to work on * @param inKernels the kernels to call * An assert is launched if one of the arguments is null */ FFmmAlgorithmTsm(Octree* const inTree, KernelClass<ParticuleClass,CellClass,OctreeHeight>* const inKernels) - : tree(inTree) , kernels(inKernels) { + : tree(inTree) , kernels(inKernels) { assert(tree, "tree cannot be null", __LINE__, __FILE__); assert(kernels, "kernels cannot be null", __LINE__, __FILE__); diff --git a/Sources/Fmb/FFmbKernelsBlas.hpp b/Sources/Fmb/FFmbKernelsBlas.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c4b24f4ee1f1a08bc4baed778f95e2b08ef3bfaa --- /dev/null +++ b/Sources/Fmb/FFmbKernelsBlas.hpp @@ -0,0 +1,1442 @@ +#ifndef FFMBKERNELSBLAS_HPP +#define FFMBKERNELSBLAS_HPP +// /!\ Please, you must read the license at the bottom of this page + +#include "../Utils/FGlobal.hpp" +#include "../Components/FAbstractKernels.hpp" + +#include "../Containers/FTreeCoordinate.hpp" +#include "../Containers/FList.hpp" + +#include "../Utils/F3DPosition.hpp" +#include "../Utils/FComplexe.hpp" +#include "../Utils/FMath.hpp" +#include "../Utils/FTrace.hpp" + +#include <iostream> +#include <cblas.h> + +/** +* @author Berenger Bramas (berenger.bramas@inria.fr) +* @class FFmbKernelsBlas +* @brief +* Please read the license +* +* This code is coming from the fmb library (see documentation to know more about it). +* It is a copy'n paste file with a few modifications. +* To be able to make link between this file and the originals ones you can +* look to the commentary before function or attributes declarations. +* +* This class is abstract because the fmb's algorithm is able to compute +* forces / potential / both +* So this class is the trunk and defines code used for each 3 computations. +* +* Needs cell to extend {FExtendFmbCell} +*/ +template< class ParticuleClass, class CellClass, int TreeHeight> +class FFmbKernelsBlas : public FAbstractKernels<ParticuleClass,CellClass, TreeHeight> { +protected: + + // _GRAVITATIONAL_ + static const int FMB_Info_eps_soft_square = 1; + + // LMax is used in several algorithm + // it is just a copy of FMB_Info.P + static const int LMax = FMB_Info_P; + + // Can be 2 * FMB_Info_P if user ask to + static const int FMB_Info_M2L_P = 2 * FMB_Info_P; + static const int FMB_Info_M2L_exp_size = ((FMB_Info_M2L_P)+1) * ((FMB_Info_M2L_P)+2) * 0.5; + + // Default value set in main + static const int FMB_Info_ws = 1; + + // INTERACTION_LIST_SIZE_ALONG_1_DIM + static const int size1Dim = (2*(2*(FMB_Info_ws)+1) +1); + // HALF_INTERACTION_LIST_SIZE_ALONG_1_DIM + static const int halphSize1Dim = (2*(FMB_Info_ws)+1); + + // EXPANSION_SIZE(FMB_Info.P) + static const int FMB_Info_exp_size = ((FMB_Info_P)+1) * ((FMB_Info_P)+2) * 0.5; + // NEXP_SIZE(FMB_Info.P) + static const int FMB_Info_nexp_size = (FMB_Info_P + 1) * (FMB_Info_P + 1); + + // Width of the box at the root level + FReal treeWidthAtRoot; + + // transfer_M2M_container + FComplexe transitionM2M[TreeHeight][8][FMB_Info_exp_size]; + // transfer_L2L_container + FComplexe transitionL2L[TreeHeight][8][FMB_Info_exp_size]; + + // transfer_container + FComplexe* transferM2L[TreeHeight][size1Dim][size1Dim][size1Dim]; + + //[OK] spherical_harmonic_Outer_coefficients_array + FReal sphereHarmoOuterCoef[FMB_Info_M2L_P+1]; + //[OK] spherical_harmonic_Inner_coefficients_array + FReal sphereHarmoInnerCoef[FMB_Info_exp_size]; + + FComplexe current_thread_Y[FMB_Info_exp_size]; + + // p_Y_theta_derivated + FComplexe current_thread_Y_theta_derivated[FMB_Info_exp_size]; + + // [OK?] p_precomputed_cos_and_sin_array + FComplexe cosSin[FMB_Info_M2L_P + 1]; + // [OK?] p_associated_Legendre_function_Array + FReal legendre[FMB_Info_M2L_exp_size]; + + // pow_of_I_array + static const FReal PiArrayInner[4]; + // pow_of_O_array + static const FReal PiArrayOuter[4]; + + // To store spherical position + struct Spherical { + FReal r, cosTheta, sinTheta, phi; + }; + + int expansion_Redirection_array_for_j[FMB_Info_M2L_P + 1 ]; + + bool FMB_Info_up_to_P_in_M2L; + + static const int FF_MATRIX_ROW_DIM = FMB_Info_exp_size; + static const int FF_MATRIX_COLUMN_DIM = FMB_Info_nexp_size; + static const int FF_MATRIX_SIZE = long(FF_MATRIX_ROW_DIM) * long(FF_MATRIX_COLUMN_DIM); + + ////////////////////////////////////////////////////////////////// + // Allocation + ////////////////////////////////////////////////////////////////// + + void expansion_Redirection_array_for_j_Initialize() { + for( int h = 0; h <= FMB_Info_M2L_P ; ++h ){ + expansion_Redirection_array_for_j[h] = static_cast<int>( h * ( h + 1 ) * 0.5 ); + } + } + + //spherical_harmonic_Outer_and_Inner_coefficients_array_Initialize + void sphericalHarmonicInitialize(){ + // Outer coefficients: + //std::cout << "sphereHarmoOuterCoef\n"; + FReal factOuter = 1.0; + // in FMB code stoped at <= FMB_Info_M2L_P but this is not sufficient + for(int idxP = 0 ; idxP <= FMB_Info_M2L_P; factOuter *= (++idxP) ){ + this->sphereHarmoOuterCoef[idxP] = factOuter; + //printf("spherical_harmonic_Outer_coefficients_array %e\n",this->sphereHarmoOuterCoef[idxP]); + //printf("fact_l %e\n",factOuter); + //printf("l %d\n",idxP); + } + + // Inner coefficients: + FReal* currentInner = this->sphereHarmoInnerCoef; + FReal factInner = 1.0; + FReal powN1idxP = 1.0; + //std::cout << "sphereHarmoInnerCoef\n"; + for(int idxP = 0 ; idxP <= this->FMB_Info_M2L_P ; factInner *= (++idxP), powN1idxP = -powN1idxP){ + for(int idxMP = 0, fact_l_m = factInner; idxMP <= idxP ; fact_l_m *= idxP+(++idxMP), ++currentInner){ + *currentInner = powN1idxP / fact_l_m; + //std::cout << (*currentInner) << "\n"; + } + } + + //for(int temp = 0 ; temp < 6 ; ++temp){ + // std::cout << this->sphereHarmoInnerCoef[temp] << "\n"; + //} + } + + + // transfer_L2L_Allocate + // transfer_M2M_Allocate + // transfer_M2L_Allocate + void transferAllocate(){ + // M2M L2L + /*this->transitionM2M = new FComplexe**[TreeHeight]; + this->transitionL2L = new FComplexe**[TreeHeight]; + + for(int idxLevel = 0 ; idxLevel < TreeHeight ; ++idxLevel){ + + this->transitionM2M[idxLevel] = new FComplexe*[8]; + this->transitionL2L[idxLevel] = new FComplexe*[8]; + + for(long idxChild = 0; idxChild < 8; ++idxChild){ + this->transitionM2M[idxLevel][idxChild] = new FComplexe[FMB_Info_exp_size]; + this->transitionL2L[idxLevel][idxChild] = new FComplexe[FMB_Info_exp_size]; + } + }*/ + // M2L + //this->transferM2L = new FComplexe****[TreeHeight+1]; + + for(int idxLevel = 0; idxLevel < TreeHeight; ++idxLevel){ + //this->transferM2L[idxLevel] = new FComplexe***[this->size1Dim]; + + for(long idxD1 = 0 ; idxD1 < this->size1Dim; ++idxD1){ + //this->transferM2L[idxLevel][idxD1] = new FComplexe**[this->size1Dim]; + + for(long idxD2 = 0; idxD2 < this->size1Dim; ++idxD2){ + //this->transferM2L[idxLevel][idxD1][idxD2] = new FComplexe*[this->size1Dim]; + + for(long idxD3 = 0; idxD3 < this->size1Dim; ++idxD3){ + const int x = idxD1 - this->halphSize1Dim; + const int y = idxD2 - this->halphSize1Dim; + const int z = idxD3 - this->halphSize1Dim; + + if( ( x*x + y*y + z*z ) >= ( 3*this->FMB_Info_ws*this->FMB_Info_ws + 0.1 ) ){ + //[Blas] this->transferM2L[idxLevel][idxD1][idxD2][idxD3] = new FComplexe[this->FMB_Info_exp_size * this->FMB_Info_nexp_size]; + this->transferM2L[idxLevel][idxD1][idxD2][idxD3] = new FComplexe[this->FF_MATRIX_SIZE]; + } + else { + this->transferM2L[idxLevel][idxD1][idxD2][idxD3] = NULL; + } + } + } + } + } + } + // transfer_L2L_free + // transfer_M2M_free + // transfer_M2L_free + void transferDeallocate(){ + // M2M L2L + /*for(long idxLevel = 0 ; idxLevel < TreeHeight ; ++idxLevel){ + for(long idxChild = 0; idxChild < 8; ++idxChild){ + delete [] this->transitionM2M[idxLevel][idxChild]; + delete [] this->transitionL2L[idxLevel][idxChild]; + } + delete [] this->transitionM2M[idxLevel]; + delete [] transitionL2L[idxLevel]; + } + delete [] this->transitionM2M; + delete [] this->transitionL2L;*/ + // M2L + for(int idxLevel = 0 ; idxLevel < TreeHeight; ++idxLevel){ + for(long idxD1 = 0 ; idxD1 < this->size1Dim ; ++idxD1){ + for(long idxD2 = 0 ; idxD2 < this->size1Dim ; ++idxD2){ + for(long idxD3 = 0 ; idxD3 < this->size1Dim; ++idxD3){ + delete [] this->transferM2L[idxLevel][idxD1][idxD2][idxD3]; + } + //delete [] this->transferM2L[idxLevel][idxD1][idxD2]; + } + //delete [] this->transferM2L[idxLevel][idxD1]; + } + //delete [] this->transferM2L[idxLevel]; + } + //delete [] this->transferM2L; + } + + ////////////////////////////////////////////////////////////////// + // Utils + ////////////////////////////////////////////////////////////////// + + // position_2_r_cos_th_sin_th_ph + Spherical positionTsmphere(const F3DPosition& inVector){ + const FReal x2y2 = (inVector.getX() * inVector.getX()) + (inVector.getY() * inVector.getY()); + + Spherical outSphere; + + outSphere.r = FMath::Sqrt( x2y2 + (inVector.getZ() * inVector.getZ())); + outSphere.phi = FMath::Atan2(inVector.getY(),inVector.getX()); + outSphere.cosTheta = inVector.getZ() / outSphere.r; // cos_th = z/r + outSphere.sinTheta = FMath::Sqrt(x2y2) / outSphere.r; // sin_th = sqrt(x^2 + y^2)/r + + return outSphere; + } + + // associated_Legendre_function_Fill_complete_array_of_values_for_cos + void legendreFunction( const FReal inCosTheta, const FReal inSinTheta, FReal* const outResults ){ + // l=0: results[current++] = 1.0; // P_0^0(cosTheta) = 1 + int idxCurrent = 0; + outResults[idxCurrent++] = 1.0; + + // l=1: + // Compute P_1^{0} using (3) and store it into results_array: + outResults[idxCurrent++] = inCosTheta; + + // Compute P_1^1 using (2 bis) and store it into results_array + const FReal invSinTheta = -inSinTheta; // somx2 => -sinTheta + outResults[idxCurrent++] = invSinTheta; + + // l>1: + int idxCurrent1m = 1; //pointer on P_{l-1}^m P_1^0 + int idxCurrent2m = 0; //pointer on P_{l-2}^m P_0^0 + FReal fact = 3.0; + + // Remark: p_results_array_l_minus_1_m and p_results_array_l_minus_2_m + // just need to be incremented at each iteration. + for(int idxl = 2; idxl <= FMB_Info_M2L_P ; ++idxl ){ + for( int idxm = 0; idxm <= idxl - 2 ; ++idxm , ++idxCurrent , ++idxCurrent1m , ++idxCurrent2m ){ + // Compute P_l^m, l >= m+2, using (1) and store it into results_array: + outResults[idxCurrent] = (inCosTheta * ( 2 * idxl - 1 ) * outResults[idxCurrent1m] - ( idxl + idxm - 1 ) + * outResults[idxCurrent2m] ) / ( idxl - idxm ); + } + // p_results_array_l_minus_1_m now points on P_{l-1}^{l-1} + + // Compute P_l^{l-1} using (3) and store it into ptrResults: + outResults[idxCurrent++] = inCosTheta * ( 2 * idxl - 1 ) * outResults[idxCurrent1m]; + + // Compute P_l^l using (2 bis) and store it into results_array: + outResults[idxCurrent++] = fact * invSinTheta * outResults[idxCurrent1m]; + + fact += 2.0; + ++idxCurrent1m; + } + + //for(int idxprint = 0 ; idxprint <= FMB_Info_M2L_P ; ++idxprint){ + // printf("\t legendre[%d] = %e\n",idxprint ,outResults[idxprint]); + //} + } + + // spherical_harmonic_Inner + //2.7 these + void harmonicInner(const Spherical& inSphere, FComplexe* const outResults){ + + FComplexe* ptrCosSin = this->cosSin; + for(int idxl = 0 , idxlMod4 = 0; idxl <= LMax ; ++idxl, ++idxlMod4, ++ptrCosSin){ + if(idxlMod4 == 4) idxlMod4 = 0; + const FReal angleinter = FReal(idxl) * inSphere.phi; + const FReal angle = angleinter + this->PiArrayInner[idxlMod4]; + + ptrCosSin->setReal( FMath::Sin(angle + FMath::FPiDiv2) ); + ptrCosSin->setImag( FMath::Sin(angle) ); + + //printf("%d=%.15e/%.15e (angle %.15e=%d * %.15e + %.15e // (%.15e))\n",idxl,ptrCosSin->getReal(),ptrCosSin->getImag(), + // angle,idxl,inSphere.phi,this->PiArrayInner[idxlMod4],angleinter); + + //printf("sin(%.15e) = %.15e\n", + // angle + FMath::FPiDiv2,FMath::Sin(angle + FMath::FPiDiv2)); + } + + legendreFunction(inSphere.cosTheta, inSphere.sinTheta, this->legendre); + /*printf("FMB_Info_M2L_exp_size=%d\n",FMB_Info_M2L_exp_size); + for(int temp = 0 ; temp < FMB_Info_M2L_exp_size ; ++temp){ + printf("%e\n",this->legendre[temp]); + }*/ + + FComplexe* currentResult = outResults; + int idxLegendre = 0;//ptr_associated_Legendre_function_Array + int idxSphereHarmoCoef = 0; + FReal idxRl = 1.0 ; + + //printf("lmax = %d\n",LMax); + for(int idxl = 0; idxl <= this->LMax ; ++idxl, idxRl *= inSphere.r){ + for(int idxm = 0 ; idxm <= idxl ; ++idxm, ++currentResult, ++idxSphereHarmoCoef, ++idxLegendre){ + const FReal magnitude = this->sphereHarmoInnerCoef[idxSphereHarmoCoef] * idxRl * legendre[idxLegendre]; + currentResult->setReal( magnitude * this->cosSin[idxm].getReal() ); + currentResult->setImag( magnitude * this->cosSin[idxm].getImag() ); + + //printf("\t\tl = %d m = %d\n",idxl,idxm); + //printf("\t\tmagnitude=%e idxRl=%e sphereHarmoInnerCoef=%e real=%e imag=%e\n",magnitude,idxRl,this->sphereHarmoInnerCoef[idxSphereHarmoCoef],currentResult->getReal(),currentResult->getImag()); + } + } + + } + // spherical_harmonic_Outer + void harmonicOuter(const Spherical& inSphere, FComplexe* const outResults){ + + FComplexe* ptrCosSin = this->cosSin; + for(int idxl = 0, idxlMod4 = 0; idxl <= FMB_Info_M2L_P ; ++idxl, ++idxlMod4, ++ptrCosSin){ + if(idxlMod4 == 4) idxlMod4 = 0; + const FReal angle = idxl * inSphere.phi + this->PiArrayOuter[idxlMod4]; + + ptrCosSin->setReal( FMath::Sin(angle + FMath::FPiDiv2) ); + ptrCosSin->setImag( FMath::Sin(angle) ); + + //printf("l=%d \t inSphere.phi=%e \t this->PiArray[idxlMod4]=%e \t angle=%e \t FMath::Sin(angle + FMath::FPiDiv2)=%e \t FMath::Sin(angle)=%e\n", + // idxl, inSphere.phi, this->PiArrayOuter[idxlMod4], angle, FMath::Sin(angle + FMath::FPiDiv2) , FMath::Sin(angle)); + } + + legendreFunction(inSphere.cosTheta, inSphere.sinTheta, this->legendre); + + int idxLegendre = 0; + FComplexe* currentResult = outResults; + + const FReal invR = 1/inSphere.r; + FReal idxRl1 = invR; + for(int idxl = 0 ; idxl <= FMB_Info_M2L_P ; ++idxl, idxRl1 *= invR){ + for(int idxm = 0 ; idxm <= idxl ; ++idxm, ++currentResult, ++idxLegendre){ + const FReal magnitude = this->sphereHarmoOuterCoef[idxl-idxm] * idxRl1 * this->legendre[idxLegendre]; + currentResult->setReal( magnitude * this->cosSin[idxm].getReal() ); + currentResult->setImag( magnitude * this->cosSin[idxm].getImag() ); + //printf("l=%d\t m=%d\t idxRl1=%e\t magnitude=%e\n",idxl,idxm,idxRl1,magnitude); + //printf("l=%d\t m=%d\t this->cosSin[idxm].getReal()=%e\t this->cosSin[idxm].getImag()=%e\n", + // idxl,idxm,this->cosSin[idxm].getReal(),this->cosSin[idxm].getImag()); + //printf("this->sphereHarmoOuterCoef[idxl-idxm] = %e \t this->legendre[idxLegendre] = %e \n", + // this->sphereHarmoOuterCoef[idxl-idxm], + // this->legendre[idxLegendre]); + + //for(int idxTemp = 0 ; idxTemp <= idxl-idxm ; idxTemp++ ){ + // printf("\t this->sphereHarmoOuterCoef[%d] = %e\n", idxTemp,this->sphereHarmoOuterCoef[idxTemp] ); + //} + } + } + } + + /** spherical_harmonic_Inner_and_theta_derivated + * Returns the value of the partial derivative of the spherical harmonic + *relative to theta. We have for all m such that -(l-1) <= m <= l-1 : + * + *(d H_l^m(theta, phi))/(d theta) + *= (-1)^m sqrt((l-|m|)!/(l+|m|)!) (d P_l^{|m|}(cos theta))/(d theta) e^{i.m.phi} + *= (-1)^m sqrt((l-|m|)!/(l+|m|)!) 1/sqrt{1-cos^2 theta} [l cos theta P_l^{|m|}(cos theta) - (l+|m|) P_{l-1}^{|m|}(cos theta) ] e^{i.m.phi} + *Since theta is in the range [0, Pi], we have: sin theta > 0 and therefore + *sqrt{1-cos^2 theta} = sin theta. Thus: + * + *(d H_l^m(theta, phi))/(d theta) + *= (-1)^m sqrt((l-|m|)!/(l+|m|)!) 1/(sin theta) [l cos theta P_l^{|m|}(cos theta) - (l+|m|) P_{l-1}^{|m|}(cos theta) ] e^{i.m.phi} + *For |m|=l, we have~: + *(d H_l^l(theta, phi))/(d theta) + *= (-1)^m sqrt(1/(2l)!) 1/(sin theta) [l cos theta P_l^l(cos theta) ] e^{i.m.phi} + * + *Remark: for 0<m<=l: + *(d H_l^{-m}(theta, phi))/(d theta) = [(d H_l^{-m}(theta, phi))/(d theta)]* + * + * + * + *Therefore, we have for (d Inner_l^m(r, theta, phi))/(d theta): + * + *|m|<l: (d Inner_l^m(r, theta, phi))/(d theta) = + *(i^m (-1)^l / (l+|m|)!) 1/(sin theta) [l cos theta P_l^{|m|}(cos theta) - (l+|m|) P_{l-1}^{|m|}(cos theta) ] e^{i.m.phi} r^l + *|m|=l: (d Inner_l^m(r, theta, phi))/(d theta) = + *(i^m (-1)^l / (l+|m|)!) 1/(sin theta) [l cos theta P_l^l(cos theta) ] e^{i.m.phi} r^l + * + * + */ + void harmonicInnerThetaDerivated( + const Spherical& inSphere, + FComplexe * results_array, + FComplexe * theta_derivated_results_array + ){ + FComplexe *p_term = results_array; + FComplexe *p_theta_derivated_term = theta_derivated_results_array; + FReal *p_spherical_harmonic_Inner_coefficients_array = sphereHarmoInnerCoef; + FReal *ptr_associated_Legendre_function_Array = legendre; + FReal *start_ptr_associated_Legendre_function_Array = ptr_associated_Legendre_function_Array; + + + //printf("HarmoInnerTheta \t lmax = %d \t r = %e \t cos_theta = %e \t sin_theta = %e \t phi = %e\n", + // FMB_Info_P,inSphere.r,inSphere.cosTheta,inSphere.sinTheta,inSphere.phi); + + // Initialization of precomputed_cos_and_sin_array: + for(int idxm = 0 , idxmMod4 = 0; idxm <= FMB_Info_P ; ++idxm, ++idxmMod4){ + if(idxmMod4 == 4) idxmMod4 = 0; + const FReal angle = idxm*inSphere.phi + PiArrayInner[idxmMod4]; + (cosSin + idxm)->setReal(FMath::Sin(angle + FMath::FPiDiv2)); + (cosSin + idxm)->setImag(FMath::Sin(angle)); + + //printf("l=%d \t inSphere.phi=%e \t this->PiArrayOuter[idxlMod4]=%e \t angle=%e \t FMath::Sin(angle + FMath::FPiDiv2)=%e \t FMath::Sin(angle)=%e\n", + // idxm, inSphere.phi, this->PiArrayInner[idxmMod4], angle, FMath::Sin(angle + FMath::FPiDiv2) , FMath::Sin(angle)); + } + + // Initialization of associated_Legendre_function_Array: + legendreFunction( inSphere.cosTheta, inSphere.sinTheta, this->legendre); + + // r^l + FReal r_l = 1.0; + for (int l = 0 ; l <= FMB_Info_P ; ++l, r_l *= inSphere.r){ + FReal magnitude; + // m<l: + int m = 0; + for(; m < l ; ++m, ++p_term, ++p_theta_derivated_term, ++p_spherical_harmonic_Inner_coefficients_array, ++ptr_associated_Legendre_function_Array){ + magnitude = (*p_spherical_harmonic_Inner_coefficients_array) * r_l * (*ptr_associated_Legendre_function_Array); + + // Computation of Inner_l^m(r, theta, phi): + p_term->setReal( magnitude * (cosSin + m)->getReal()); + p_term->setImag( magnitude * (cosSin + m)->getImag()); + + /*printf("%d/%d - magnitude=%e ptr_precomputed_cos_and_sin_array real=%e imag=%e p_term real=%e imag=%e\n", + l,m, + magnitude, + (cosSin + m)->getReal(), + (cosSin + m)->getImag(), + p_term->getReal(), + p_term->getImag());*/ + /*printf("\t p_spherical_harmonic_Inner_coefficients_array = %e \t ptr_associated_Legendre_function_Array = %e \t r_l = %e\n", + *p_spherical_harmonic_Inner_coefficients_array, + *ptr_associated_Legendre_function_Array, + r_l + );*/ + + // Computation of {\partial Inner_l^m(r, theta, phi)}/{\partial theta}: + magnitude = (*p_spherical_harmonic_Inner_coefficients_array) * r_l * ((l*inSphere.cosTheta*(*ptr_associated_Legendre_function_Array) + - (l+m)*(*(start_ptr_associated_Legendre_function_Array + expansion_Redirection_array_for_j[l-1] + m))) / inSphere.sinTheta); + p_theta_derivated_term->setReal(magnitude * (cosSin + m)->getReal()); + p_theta_derivated_term->setImag(magnitude * (cosSin + m)->getImag()); + + //printf("magnitude=%e r_l=%e p_spherical_harmonic_Inner_coefficients_array=%e real=%e imag=%e\n", + // magnitude,r_l,*p_spherical_harmonic_Inner_coefficients_array,p_theta_derivated_term->getReal(),p_theta_derivated_term->getImag()); + } + + // m=l: + // Computation of Inner_m^m(r, theta, phi): + magnitude = (*p_spherical_harmonic_Inner_coefficients_array) * r_l * (*ptr_associated_Legendre_function_Array); + p_term->setReal(magnitude * (cosSin + m)->getReal()); + p_term->setImag(magnitude * (cosSin + m)->getImag()); + + /*printf("%d - magnitude=%e ptr_precomputed_cos_and_sin_array real=%e imag=%e p_term real=%e imag=%e\n", + l, + magnitude, + (cosSin + m)->getReal(), + (cosSin + m)->getImag(), + p_term->getReal(), + p_term->getImag());*/ + /*printf("\t p_spherical_harmonic_Inner_coefficients_array = %e \t ptr_associated_Legendre_function_Array = %e \t r_l = %e\n", + *p_spherical_harmonic_Inner_coefficients_array, + *ptr_associated_Legendre_function_Array, + r_l + );*/ + + // Computation of {\partial Inner_m^m(r, theta, phi)}/{\partial theta}: + magnitude = (*p_spherical_harmonic_Inner_coefficients_array) * r_l * (m * inSphere.cosTheta * (*ptr_associated_Legendre_function_Array) / inSphere.sinTheta); + p_theta_derivated_term->setReal(magnitude * (cosSin + m)->getReal()); + p_theta_derivated_term->setImag(magnitude * (cosSin + m)->getImag()); + + //printf("magnitude=%e r_l=%e p_spherical_harmonic_Inner_coefficients_array=%e real=%e imag=%e\n", + // magnitude,r_l,*p_spherical_harmonic_Inner_coefficients_array,p_theta_derivated_term->getReal(),p_theta_derivated_term->getImag()); + + ++p_term; + ++p_theta_derivated_term; + ++p_spherical_harmonic_Inner_coefficients_array; + ++ptr_associated_Legendre_function_Array; + } + } + + ////////////////////////////////////////////////////////////////// + // Precompute + ////////////////////////////////////////////////////////////////// + + // transfer_M2M_Precompute_all_levels + // transfer_L2L_Precompute_all_levels + void precomputeM2M(){ + FReal treeWidthAtLevel = this->treeWidthAtRoot/2; + + for(int idxLevel = 0 ; idxLevel < TreeHeight - 1 ; ++idxLevel ){ + const F3DPosition father(treeWidthAtLevel,treeWidthAtLevel,treeWidthAtLevel); + treeWidthAtLevel /= 2; + + //std::cout << "[precomputeM2M]treeWidthAtLevel=" << treeWidthAtLevel << "\n"; + //printf("\tidxLevel=%d\tFather.x=%e\tFather.y=%e\tFather.z=%e\n",idxLevel,father.getX(),father.getY(),father.getZ()); + + for(int idxChild = 0 ; idxChild < 8 ; ++idxChild ){ + FTreeCoordinate childBox; + childBox.setPositionFromMorton(idxChild,1); + + const F3DPosition M2MVector ( + father.getX() - (treeWidthAtLevel * (1 + (childBox.getX() * 2))), + father.getY() - (treeWidthAtLevel * (1 + (childBox.getY() * 2))), + father.getZ() - (treeWidthAtLevel * (1 + (childBox.getZ() * 2))) + ); + + harmonicInner(positionTsmphere(M2MVector),this->transitionM2M[idxLevel][idxChild]); + + const F3DPosition L2LVector ( + (treeWidthAtLevel * (1 + (childBox.getX() * 2))) - father.getX(), + (treeWidthAtLevel * (1 + (childBox.getY() * 2))) - father.getY(), + (treeWidthAtLevel * (1 + (childBox.getZ() * 2))) - father.getZ() + ); + + harmonicInner(positionTsmphere(L2LVector),this->transitionL2L[idxLevel][idxChild]); + + //printf("[M2M_vector]%d/%d = %e/%e/%e\n", idxLevel , idxChild , M2MVector.getX() , M2MVector.getY() , M2MVector.getZ() ); + //Spherical = positionTsmphere(M2MVector); + //printf("[M2M_vectorSpherical]%d/%d = %e/%e/%e/%e\n", + // idxLevel , idxChild , sphericalM2M.r , sphericalM2M.cosTheta , sphericalM2M.sinTheta , sphericalM2M.phi ); + //for(int idxExpSize = 0 ; idxExpSize < FMB_Info_exp_size ; ++idxExpSize){ + // printf("transitionM2M[%d][%d][%d]=%e/%e\n", idxLevel , idxChild , idxExpSize , this->transitionM2M[idxLevel][idxChild][idxExpSize].getReal(),this->transitionM2M[idxLevel][idxChild][idxExpSize].getImag()); + //} + } + } + } + + + // transfer_M2L_Precompute_all_levels + void precomputeM2L(){ + //[Blas] FComplexe tempComplexe[FMB_Info_M2L_exp_size]; + //printf("FMB_Info.M2L_exp_size = %d\n",FMB_Info_M2L_exp_size); + + FReal treeWidthAtLevel = this->treeWidthAtRoot; + for(int idxLevel = 0 ; idxLevel < TreeHeight ; ++idxLevel ){ + //printf("level = %d \t width = %lf\n",idxLevel,treeWidthAtLevel); + for( int idxd1 = 0; idxd1 < this->size1Dim ; ++idxd1 ){ + + for( int idxd2 = 0; idxd2 < this->size1Dim ; ++idxd2 ){ + + for( int idxd3 = 0; idxd3 < this->size1Dim ; ++idxd3 ){ + const long x = idxd1 - this->halphSize1Dim; + const long y = idxd2 - this->halphSize1Dim; + const long z = idxd3 - this->halphSize1Dim; + + //printf("x=%ld \t y=%ld \t z=%ld\n",x,y,z); + + if( ( x*x + y*y + z*z ) >= ( 3*FMB_Info_ws*FMB_Info_ws + 0.1 ) ){ + const F3DPosition relativePos( x*treeWidthAtLevel , y*treeWidthAtLevel , z*treeWidthAtLevel ); + + + //printf("blas\n"); + //printf("transferM2L[%d][%d][%d][%d]\n", idxLevel, idxd1, idxd2, idxd3); + FComplexe tempComplexe[FMB_Info_M2L_exp_size]; + harmonicOuter(positionTsmphere(relativePos),tempComplexe); + + + FComplexe* p_ff_transfer_matrix = this->transferM2L[idxLevel][idxd1][idxd2][idxd3]; + + for(int M = 0 ; M <= FMB_Info_P ; ++M){ + for (int m = 0 ; m <= M ; ++m){ + for (int N = 0 ; N <= FMB_Info_P ; ++N){ + for (int n = 0 ; n <= 2*N ; ++n, ++p_ff_transfer_matrix){ + + //printf("M %d N %d n %d p_ff_transfer_matrix %d\n", M,N,n,int(p_ff_transfer_matrix-this->transferM2L[idxLevel][idxd1][idxd2][idxd3])); + + const int k = N-n-m; + if (k < 0){ + //printf("\t expansion_Get_p_term(transfer_exp, M+N, -k) %d\n",expansion_Redirection_array_for_j[M+N] - k); + const int pow_of_minus_1 = ((k%2) ? -1 : 1); + p_ff_transfer_matrix->setReal( pow_of_minus_1 * (tempComplexe + expansion_Redirection_array_for_j[M+N] - k)->getReal()); + p_ff_transfer_matrix->setImag((-pow_of_minus_1) * (tempComplexe + expansion_Redirection_array_for_j[M+N] - k)->getImag()); + } + else{ + //printf("\t expansion_Get_p_term(transfer_exp, M+N, k) %d\n",expansion_Redirection_array_for_j[M+N] + k); + *p_ff_transfer_matrix = *(tempComplexe + expansion_Redirection_array_for_j[M+N] + k); + } + //printf("\t p_ff_transfer_matrix real %e imag %e\n",p_ff_transfer_matrix->getReal(),p_ff_transfer_matrix->getImag()); + } + } + } + } + + //for(int idxTemp = 0 ; idxTemp < this->FMB_Info_M2L_exp_size ; ++idxTemp){ + // printf("transferM2L[%d][%d][%d][%d][%d]=%e/%e\n", idxLevel, idxd1, idxd2, idxd3, idxTemp, tempComplexe[idxTemp].getReal(),tempComplexe[idxTemp].getImag()); + //} + } + + } + } + } + treeWidthAtLevel /= 2; + } + } + + void buildPrecompute(){ + expansion_Redirection_array_for_j_Initialize(); + sphericalHarmonicInitialize(); + transferAllocate(); + + precomputeM2M(); + precomputeM2L(); + } + +public: + FFmbKernelsBlas(const FReal inTreeWidth) : + treeWidthAtRoot(inTreeWidth), FMB_Info_up_to_P_in_M2L(true) { + buildPrecompute(); + } + + FFmbKernelsBlas(const FFmbKernelsBlas& other) + : treeWidthAtRoot(other.treeWidthAtRoot), FMB_Info_up_to_P_in_M2L(other.FMB_Info_up_to_P_in_M2L) { + buildPrecompute(); + } + + virtual void init(){ + FTRACE( FTrace::Controller.enterFunction(FTrace::KERNELS, __FUNCTION__ , __FILE__ , __LINE__) ); + FTRACE( FTrace::Controller.leaveFunction(FTrace::KERNELS) ); + } + + /** Default destructor */ + virtual ~FFmbKernelsBlas(){ + transferDeallocate(); + } + + + ///////////////////////////////////////////////////////////////////////////////// + // Upward + ///////////////////////////////////////////////////////////////////////////////// + + /** OK! + * expansion_P2M_add + * Multipole expansion with m charges q_i in Q_i=(rho_i, alpha_i, beta_i) + *whose relative coordinates according to *p_center are: + *Q_i - *p_center = (rho'_i, alpha'_i, beta'_i); + * + *For j=0..P, k=-j..j, we have: + * + *M_j^k = (-1)^j { sum{i=1..m} q_i Inner_j^k(rho'_i, alpha'_i, beta'_i) } + * + *However the extern loop is over the bodies (i=1..m) in our code and as an + *intern loop we have: j=0..P, k=-j..j + * + *and the potential is then given by: + * + * Phi(x) = sum_{n=0}^{+} sum_{m=-n}^{n} M_n^m O_n^{-m} (x - *p_center) + * + */ + void P2M(CellClass* const inPole, const FList<ParticuleClass*>* const inParticules) { + FTRACE( FTrace::Controller.enterFunction(FTrace::KERNELS, __FUNCTION__ , __FILE__ , __LINE__) ); + + for(typename FList<ParticuleClass*>::ConstBasicIterator iterParticule(*inParticules); + iterParticule.isValide() ; iterParticule.progress()){ + + + //std::cout << "Working on part " << iterParticule.value()->getPhysicalValue() << "\n"; + //F3DPosition tempPos = iterParticule.value()->getPosition() - inPole->getPosition(); + //ok printf("\tpos_rel.x=%e\tpos_rel.y=%e\tpos_rel.z=%e\n",tempPos.getX(),tempPos.getY(),tempPos.getZ()); + //ok printf("\tp_center.x=%e\tp_center.y=%e\tp_center.z=%e\n",inPole->getPosition().getX(),inPole->getPosition().getY(),inPole->getPosition().getZ()); + //ok printf("\tbody.x=%e\tbody.y=%e\tbody.z=%e\n",iterParticule.value()->getPosition().getX(),iterParticule.value()->getPosition().getY(),iterParticule.value()->getPosition().getZ()); + + harmonicInner(positionTsmphere(iterParticule.value()->getPosition() - inPole->getPosition()),current_thread_Y); + + //ok printf("\tr=%e\tcos_theta=%e\tsin_theta=%e\tphi=%e\n",spherical.r,spherical.cosTheta,spherical.sinTheta,spherical.phi); + + FComplexe* p_exp_term = inPole->getMultipole(); + FComplexe* p_Y_term = current_thread_Y; + FReal pow_of_minus_1_j = 1.0;//(-1)^j + const FReal valueParticule = iterParticule.value()->getPhysicalValue(); + + for(int j = 0 ; j <= FMB_Info_P ; ++j, pow_of_minus_1_j = -pow_of_minus_1_j ){ + for(int k = 0 ; k <= j ; ++k, ++p_Y_term, ++p_exp_term){ + p_Y_term->mulRealAndImag( valueParticule * pow_of_minus_1_j ); + (*p_exp_term) += (*p_Y_term); + //printf("\tj=%d\tk=%d\tp_exp_term.real=%e\tp_exp_term.imag=%e\tp_Y_term.real=%e\tp_Y_term.imag=%e\tpow_of_minus_1_j=%e\n", + // j,k,(*p_exp_term).getReal(),(*p_exp_term).getImag(),(*p_Y_term).getReal(),(*p_Y_term).getImag(),pow_of_minus_1_j); + } + } + } + + FTRACE( FTrace::Controller.leaveFunction(FTrace::KERNELS) ); + } + + /** + *----------------------------------- + *octree_Upward_pass_internal_cell + *expansion_M2M_add + *----------------------------------- + *We compute the translation of multipole_exp_src from *p_center_of_exp_src to + *p_center_of_exp_target, and add the result to multipole_exp_target. + * + * O_n^l (with n=0..P, l=-n..n) being the former multipole expansion terms + * (whose center is *p_center_of_multipole_exp_src) we have for the new multipole + * expansion terms (whose center is *p_center_of_multipole_exp_target): + + * M_j^k = sum{n=0..j} + * sum{l=-n..n, |k-l|<=j-n} + * O_n^l Inner_{j-n}^{k-l}(rho, alpha, beta) + * + * where (rho, alpha, beta) are the spherical coordinates of the vector : + * p_center_of_multipole_exp_target - *p_center_of_multipole_exp_src + * + * Warning: if j-n < |k-l| we do nothing. + */ + void M2M(CellClass* const FRestrict inPole, const CellClass *const FRestrict *const FRestrict inChild, const int inLevel) { + FTRACE( FTrace::Controller.enterFunction(FTrace::KERNELS, __FUNCTION__ , __FILE__ , __LINE__) ); + + // We do NOT have: for(l=n-j+k; l<=j-n+k ;++l){} <=> for(l=-n; l<=n ;++l){if (j-n >= abs(k-l)){}} + // But we have: for(k=MAX(0,n-j+l); k<=j-n+l; ++k){} <=> for(k=0; k<=j; ++k){if (j-n >= abs(k-l)){}} + // (This is not the same as in L2L since the range of values of k is not the same, compared to "n-j" or "j-n".) + // Therefore the loops over n and l are the outmost ones and + // we invert the loop over j with the summation with n: + // for{j=0..P} sum_{n=0}^j <-> sum_{n=0}^P for{j=n..P} + FComplexe* const multipole_exp_target = inPole->getMultipole(); + //printf("Morton = %lld\n",inPole->getMortonIndex()); + + for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){ + if(!inChild[idxChild]) continue; + //printf("\tChild %d\n",idxChild); + + const FComplexe* const multipole_exp_src = inChild[idxChild]->getMultipole(); + + const FComplexe* const M2M_transfer = transitionM2M[inLevel][idxChild]; + + for(int n = 0 ; n <= FMB_Info_P ; ++n ){ + // l<0 // (-1)^l + FReal pow_of_minus_1_for_l = ( n % 2 ? -1 : 1); + + // O_n^l : here points on the source multipole expansion term of degree n and order |l| + const FComplexe* p_src_exp_term = multipole_exp_src + expansion_Redirection_array_for_j[n]+n; + //printf("\t[p_src_exp_term] expansion_Redirection_array_for_j[n]=%d\tn=%d\n",expansion_Redirection_array_for_j[n],n); + + int l = -n; + for(; l<0 ; ++l, --p_src_exp_term, pow_of_minus_1_for_l = -pow_of_minus_1_for_l){ + + for(int j = n ; j<= FMB_Info_P ; ++j ){ + // M_j^k + FComplexe *p_target_exp_term = multipole_exp_target + expansion_Redirection_array_for_j[j]; + //printf("\t[p_target_exp_term] expansion_Redirection_array_for_j[j]=%d\n",expansion_Redirection_array_for_j[j]); + // Inner_{j-n}^{k-l} : here points on the M2M transfer function/expansion term of degree n-j and order |k-l| + const FComplexe *p_Inner_term= M2M_transfer + expansion_Redirection_array_for_j[j-n]-l /* k==0 */; + //printf("\t[p_Inner_term] expansion_Redirection_array_for_j[j-n]=%d\tl=%d\n",expansion_Redirection_array_for_j[j-n],-l); + + // since n-j+l<0 + for(int k=0 ; k <= (j-n+l) ; ++k, ++p_target_exp_term, ++p_Inner_term){ // l<0 && k>=0 => k-l>0 + p_target_exp_term->incReal( pow_of_minus_1_for_l * + ((p_src_exp_term->getReal() * p_Inner_term->getReal()) + + (p_src_exp_term->getImag() * p_Inner_term->getImag()))); + p_target_exp_term->incImag( pow_of_minus_1_for_l * + ((p_src_exp_term->getReal() * p_Inner_term->getImag()) - + (p_src_exp_term->getImag() * p_Inner_term->getReal()))); + /*printf("1\n"); + printf("\tp_src_exp_term->dat[REAL]=%e\tp_src_exp_term->dat[IMAG]=%e\n", p_src_exp_term->getReal(),p_src_exp_term->getImag()); + printf("\tp_Inner_term->dat[REAL]=%e\tp_Inner_term->dat[IMAG]=%e\n", p_Inner_term->getReal(),p_Inner_term->getImag()); + printf("\tn[%d]l[%d]j[%d]k[%d] = %e / %e\n", + n,l,j,k, + p_target_exp_term->getReal(),p_target_exp_term->getImag());*/ + } // for k + } // for j + } // for l + + // l>=0 + for(; l <= n ; ++l, ++p_src_exp_term, pow_of_minus_1_for_l = -pow_of_minus_1_for_l){ + + for( int j=n ; j <= FMB_Info_P ; ++j ){ + // (-1)^k + FReal pow_of_minus_1_for_k = ( FMath::Max(0,n-j+l) %2 ? -1 : 1 ); + // M_j^k + FComplexe *p_target_exp_term = multipole_exp_target + expansion_Redirection_array_for_j[j] + FMath::Max(0,n-j+l); + // Inner_{j-n}^{k-l} : here points on the M2M transfer function/expansion term of degree n-j and order |k-l| + const FComplexe *p_Inner_term = M2M_transfer + expansion_Redirection_array_for_j[j-n] + l - FMath::Max(0,n-j+l);// -(k-l) + + int k = FMath::Max(0,n-j+l); + for(; k <= (j-n+l) && (k-l) < 0 ; ++k, ++p_target_exp_term, --p_Inner_term, pow_of_minus_1_for_k = -pow_of_minus_1_for_k){ /* l>=0 && k-l<0 */ + p_target_exp_term->incReal( pow_of_minus_1_for_k * pow_of_minus_1_for_l * + ((p_src_exp_term->getReal() * p_Inner_term->getReal()) + + (p_src_exp_term->getImag() * p_Inner_term->getImag()))); + p_target_exp_term->incImag(pow_of_minus_1_for_k * pow_of_minus_1_for_l * + ((p_src_exp_term->getImag() * p_Inner_term->getReal()) - + (p_src_exp_term->getReal() * p_Inner_term->getImag()))); + /*printf("2\n"); + printf("\tp_src_exp_term->dat[REAL]=%e\tp_src_exp_term->dat[IMAG]=%e\n", p_src_exp_term->getReal(),p_src_exp_term->getImag()); + printf("\tp_Inner_term->dat[REAL]=%e\tp_Inner_term->dat[IMAG]=%e\n", p_Inner_term->getReal(),p_Inner_term->getImag()); + printf("\tn[%d]l[%d]j[%d]k[%d] = %e / %e\n", + n,l,j,k, + p_target_exp_term->getReal(),p_target_exp_term->getImag());*/ + } // for k + + for(; k <= (j - n + l) ; ++k, ++p_target_exp_term, ++p_Inner_term){ // l>=0 && k-l>=0 + p_target_exp_term->incReal( + (p_src_exp_term->getReal() * p_Inner_term->getReal()) - + (p_src_exp_term->getImag() * p_Inner_term->getImag())); + p_target_exp_term->incImag( + (p_src_exp_term->getImag() * p_Inner_term->getReal()) + + (p_src_exp_term->getReal() * p_Inner_term->getImag())); + /*printf("3\n"); + printf("\tp_src_exp_term->dat[REAL]=%e\tp_src_exp_term->dat[IMAG]=%e\n", p_src_exp_term->getReal(),p_src_exp_term->getImag()); + printf("\tp_Inner_term->dat[REAL]=%e\tp_Inner_term->dat[IMAG]=%e\n", p_Inner_term->getReal(),p_Inner_term->getImag()); + printf("\tn[%d]l[%d]j[%d]k[%d] = %e / %e\n", + n,l,j,k, + p_target_exp_term->getReal(),p_target_exp_term->getImag());*/ + + } // for k + } // for j + } // for l + } // for n + } + + FTRACE( FTrace::Controller.leaveFunction(FTrace::KERNELS) ); + } + + void convert_exp2nexp_inplace(FComplexe* const exp){ + int j, k; + int P = FMB_Info_P; + FReal pow_of_minus_1_for_k; + FComplexe* p_exp = NULL; + FComplexe* p_nexp = NULL; + + for (j=P; j>=0; --j){ + /* Position in 'exp': (j*(j+1)*0.5) + k + * Position in 'nexp': j*(j+1) + k + */ + int jj_plus_1 = j*(j+1); + int half_jj_plus_1 = (int) jj_plus_1*0.5; + p_exp = exp + half_jj_plus_1 + j; + p_nexp = exp + jj_plus_1 + j; + + /* Positive (or null) orders: */ + for (k=j; k>=0; --k){ + *p_nexp = *p_exp; + //printf("[%d] real = %e imag %e\n",p_nexp-exp,p_nexp->getReal(),p_nexp->getImag()); + --p_exp; + --p_nexp; + } /* for k */ + + /* Negative orders: */ + p_exp += j+1 + half_jj_plus_1; + p_nexp -= j-1; + for (k= -j, + pow_of_minus_1_for_k = ((j%2) ? -1 : 1); + k<0; + ++k, + pow_of_minus_1_for_k = -pow_of_minus_1_for_k){ + p_nexp->setReal(pow_of_minus_1_for_k * p_exp->getReal()); + p_nexp->setImag((-pow_of_minus_1_for_k) * p_exp->getImag()); + //printf("[%d] real = %e imag %e\n",p_nexp-exp,p_nexp->getReal(),p_nexp->getImag()); + --p_exp; + ++p_nexp; + } /* for k */ + + } /* j */ + } + + ///////////////////////////////////////////////////////////////////////////////// + // M2L + ///////////////////////////////////////////////////////////////////////////////// + /** + *------------------ + * expansion_M2L_add + * octree_Compute_local_exp_M2L + *------------------- + *We compute the conversion of multipole_exp_src in *p_center_of_exp_src to + *a local expansion in *p_center_of_exp_target, and add the result to local_exp_target. + * + *O_n^l (with n=0..P, l=-n..n) being the former multipole expansion terms + *(whose center is *p_center_of_multipole_exp_src) we have for the new local + *expansion terms (whose center is *p_center_of_local_exp_target): + * + *L_j^k = sum{n=0..+} + *sum{l=-n..n} + *O_n^l Outer_{j+n}^{-k-l}(rho, alpha, beta) + * + *where (rho, alpha, beta) are the spherical coordinates of the vector : + *p_center_of_local_exp_src - *p_center_of_multipole_exp_target + * + *Remark: here we have always j+n >= |-k-l| + * + */ + void M2L(CellClass* const FRestrict pole, const CellClass*const FRestrict *const FRestrict distantNeighbors, const int size, const int inLevel) { + FTRACE( FTrace::Controller.enterFunction(FTrace::KERNELS, __FUNCTION__ , __FILE__ , __LINE__) ); + FTreeCoordinate coordCenter; + coordCenter.setPositionFromMorton(pole->getMortonIndex(),inLevel); + + for(int idxSize = 0 ; idxSize < size ; ++idxSize){ + FTreeCoordinate coordNeighbors; + coordNeighbors.setPositionFromMorton(distantNeighbors[idxSize]->getMortonIndex(),inLevel); + + ///printf("Morton = %lld\n",pole->getMortonIndex()); + ///printf("\tMorton Neighbors = %lld\n",distantNeighbors[idxSize]->getMortonIndex()); + //printf("\tidxSize = %d\tleve = %d\tMorton = %lld\n",idxSize,inLevel,distantNeighbors[idxSize]->getMortonIndex()); + + const FComplexe* const transfer_M2L_matrix = transferM2L[inLevel] + [(coordCenter.getX()+halphSize1Dim-coordNeighbors.getX())] + [(coordCenter.getY()+halphSize1Dim-coordNeighbors.getY())] + [(coordCenter.getZ()+halphSize1Dim-coordNeighbors.getZ())]; + ///printf("level = %d\tx=%ld\ty=%ld\tz=%ld\n", inLevel, + /// (coordCenter.getX()-coordNeighbors.getX()), + /// (coordCenter.getY()-coordNeighbors.getY()), + /// (coordCenter.getZ()-coordNeighbors.getZ())); + /*printf("M2L_transfer[0]= %e/%e\n",M2L_transfer->getReal(),M2L_transfer->getImag()); + printf("M2L_transfer[1]= %e/%e\n",M2L_transfer[1].getReal(),M2L_transfer[1].getImag()); + printf("M2L_transfer[2]= %e/%e\n",M2L_transfer[2].getReal(),M2L_transfer[2].getImag());*/ + const FComplexe* const multipole_exp_src = distantNeighbors[idxSize]->getMultipole(); + FComplexe* p_target_exp_term = pole->getLocal(); + + //ff_full_matrix_Product + FReal alpha_and_beta[2] = {1.0,0.0}; + //FReal beta[2] = {0.0,0.0}; + + FComplexe multipole_exp_src_changed[FF_MATRIX_COLUMN_DIM]; + memcpy(multipole_exp_src_changed,multipole_exp_src,sizeof(FComplexe)*FF_MATRIX_ROW_DIM); + + convert_exp2nexp_inplace(multipole_exp_src_changed); + + ///for(int j = 0 ; j < FF_MATRIX_ROW_DIM ; ++j){ + /// printf("\t\t local_exp[%d] real = %e imag = %e\n", + /// j,p_target_exp_term[j].getReal(),p_target_exp_term[j].getImag()); + ///} + + /*{ + FILE * trans = fopen("trans.dt","r"); + FILE * exp = fopen("exp.dt","r"); + + fread(transfer_M2L_matrix, sizeof(double),FF_MATRIX_ROW_DIM * FF_MATRIX_COLUMN_DIM*2, trans); + fread(multipole_exp_src_changed, sizeof(double), FF_MATRIX_COLUMN_DIM * 2, exp); + + fclose(trans); + fclose(exp); + }*/ + + // c float cblas_cgemv + // z double cblas_zgemv + //zgemv("T", FF_MATRIX_COLUMN_DIM, FF_MATRIX_ROW_DIM, *((dcmplx*) &(alpha_and_beta)), (dcmplx *) (transfer_M2L_matrix), FF_MATRIX_COLUMN_DIM, (dcmplx *) (multipole_exp_src), 1, *((dcmplx *)&(alpha_and_beta)), (dcmplx *) (p_target_exp_term), 1) + //cgemv("T", FF_MATRIX_COLUMN_DIM, FF_MATRIX_ROW_DIM, *((scmplx*) &(alpha_and_beta)), (scmplx *) (transfer_M2L_matrix), FF_MATRIX_COLUMN_DIM, (scmplx *) (multipole_exp_src), 1, *((scmplx *)&(alpha_and_beta)), (scmplx *) (p_target_exp_term), 1); + cblas_zgemv(CblasColMajor, CblasTrans, FF_MATRIX_COLUMN_DIM, FF_MATRIX_ROW_DIM, + alpha_and_beta, transfer_M2L_matrix, + FF_MATRIX_COLUMN_DIM,multipole_exp_src_changed, 1, + alpha_and_beta, p_target_exp_term, 1); + //cblas_zgemv(CblasColMajor, CblasTrans, FF_MATRIX_COLUMN_DIM, FF_MATRIX_ROW_DIM, (void *) &(alpha_and_beta), (void *) (transfer_M2L_matrix), FF_MATRIX_COLUMN_DIM, (void *) (multipole_exp_src), 1, (void *) &(alpha_and_beta), (void *) (p_target_exp_term), 1); + + ///printf("FF_MATRIX_COLUMN_DIM = %d, FF_MATRIX_ROW_DIM = %d\n", + /// FF_MATRIX_COLUMN_DIM,FF_MATRIX_ROW_DIM); + + ///printf("CblasColMajor = %d, CblasTrans = %d\n", + /// CblasColMajor,CblasTrans); + + /*for(int j = 0 ; j < FF_MATRIX_ROW_DIM * FF_MATRIX_COLUMN_DIM; ++j){ + printf("\t transfer_M2L_matrix[%d] real = %e imag = %e\n", + j, + //transfer_M2L_matrix[j].getReal(),transfer_M2L_matrix[j].getImag() + ((const double*)(const void*)transfer_M2L_matrix)[2*j], + ((const double*)(const void*)transfer_M2L_matrix)[2*j + 1] + ); + }*/ + + /*for(int j = 0 ; j < FF_MATRIX_COLUMN_DIM ; ++j){ + printf("\t\t multipole_exp_src[%d] real = %e imag = %e (%p)\n", + j, + //multipole_exp_src_changed[j].getReal(),multipole_exp_src_changed[j].getImag(), + ((double*)(void*)multipole_exp_src_changed)[2*j], + ((double*)(void*)multipole_exp_src_changed)[2*j + 1], + &multipole_exp_src_changed[j]); + }*/ + /*for(int j = 0 ; j < FF_MATRIX_ROW_DIM ; ++j){ + printf("\t\t local_exp[%d] real = %e imag = %e\n", + j,p_target_exp_term[j].getReal(),p_target_exp_term[j].getImag()); + }*/ + + } + + FTRACE( FTrace::Controller.leaveFunction(FTrace::KERNELS) ); + } + + ///////////////////////////////////////////////////////////////////////////////// + // Downard + ///////////////////////////////////////////////////////////////////////////////// + + /** expansion_L2L_add + *We compute the shift of local_exp_src from *p_center_of_exp_src to + *p_center_of_exp_target, and set the result to local_exp_target. + * + *O_n^l (with n=0..P, l=-n..n) being the former local expansion terms + *(whose center is *p_center_of_exp_src) we have for the new local + *expansion terms (whose center is *p_center_of_exp_target): + * + *L_j^k = sum{n=j..P} + *sum{l=-n..n} + *O_n^l Inner_{n-j}^{l-k}(rho, alpha, beta) + * + *where (rho, alpha, beta) are the spherical coordinates of the vector : + *p_center_of_exp_target - *p_center_of_exp_src + * + *Warning: if |l-k| > n-j, we do nothing. + */ + void L2L(const CellClass* const FRestrict pole, CellClass* FRestrict *const FRestrict child, const int inLevel) { + FTRACE( FTrace::Controller.enterFunction(FTrace::KERNELS, __FUNCTION__ , __FILE__ , __LINE__) ); + + for(int idxChild = 0 ; idxChild < 8 ; ++idxChild){ + // if no child at this position + if(!child[idxChild]) continue; + + const FComplexe* const L2L_tranfer = transitionL2L[inLevel][idxChild]; + const FComplexe* const local_exp_src = pole->getLocal(); + FComplexe* const local_exp_target = child[idxChild]->getLocal(); + + //printf("Level %d\n", inLevel); + //printf("Father morton %lld\n", pole->getMortonIndex()); + //printf("Child morton %lld\n", child[idxChild]->getMortonIndex()); + + //printf("local exp target %h\n", local_exp_target); + + // L_j^k + FComplexe* p_target_exp_term = local_exp_target; + for (int j=0 ; j<= FMB_Info_P ; ++j){ + // (-1)^k + FReal pow_of_minus_1_for_k = 1.0; + for (int k=0 ; k <= j ; ++k, pow_of_minus_1_for_k = -pow_of_minus_1_for_k, ++p_target_exp_term){ + for (int n=j; n<=FMB_Info_P;++n){ + // O_n^l : here points on the source multipole expansion term of degree n and order |l| + const FComplexe* p_src_exp_term = local_exp_src + expansion_Redirection_array_for_j[n] + n-j+k; + //printf("expansion_Redirection_array_for_j[n] + n-j+k %d\n", expansion_Redirection_array_for_j[n] + n-j+k); + int l = n-j+k; + // Inner_{n-j}^{l-k} : here points on the L2L transfer function/expansion term of degree n-j and order |l-k| + const FComplexe* p_Inner_term = L2L_tranfer + expansion_Redirection_array_for_j[n-j] + l-k; + + //printf("1\n"); + for ( ; l-k>0; --l, --p_src_exp_term, --p_Inner_term){ /* l>0 && l-k>0 */ + p_target_exp_term->incReal( (p_src_exp_term->getReal() * p_Inner_term->getReal()) - + (p_src_exp_term->getImag() * p_Inner_term->getImag())); + p_target_exp_term->incImag( (p_src_exp_term->getImag() * p_Inner_term->getReal()) + + (p_src_exp_term->getReal() * p_Inner_term->getImag())); + /*printf("\t p_src_exp_term->real = %lf \t p_src_exp_term->imag = %lf \n", + p_src_exp_term->getReal(),p_src_exp_term->getImag()); + printf("\t p_Inner_term->real = %lf \t p_Inner_term->imag = %lf \n", + p_Inner_term->getReal(),p_Inner_term->getImag()); + printf("\t\t p_target_exp_term->real = %lf \t p_target_exp_term->imag = %lf \n", + p_target_exp_term->getReal(),p_target_exp_term->getImag()); + printf("\tp_target_exp_term = %d\n",p_target_exp_term-local_exp_target);*/ + } + + //printf("2\n"); + // (-1)^l + FReal pow_of_minus_1_for_l = ((l%2) ? -1 : 1); + for (; l>0 && l>=j-n+k; --l, pow_of_minus_1_for_l = -pow_of_minus_1_for_l, --p_src_exp_term, ++p_Inner_term){ /* l>0 && l-k<=0 */ + p_target_exp_term->incReal( pow_of_minus_1_for_l * pow_of_minus_1_for_k * + ((p_src_exp_term->getReal() * p_Inner_term->getReal()) + + (p_src_exp_term->getImag() * p_Inner_term->getImag()))); + p_target_exp_term->incImag( pow_of_minus_1_for_l * pow_of_minus_1_for_k * + ((p_src_exp_term->getImag() * p_Inner_term->getReal()) - + (p_src_exp_term->getReal() * p_Inner_term->getImag()))); + /*printf("\t p_src_exp_term->real = %lf \t p_src_exp_term->imag = %lf \n", + p_src_exp_term->getReal(),p_src_exp_term->getImag()); + printf("\t p_Inner_term->real = %lf \t p_Inner_term->imag = %lf \n", + p_Inner_term->getReal(),p_Inner_term->getImag()); + printf("\t\t p_target_exp_term->real = %lf \t p_target_exp_term->imag = %lf \n", + p_target_exp_term->getReal(),p_target_exp_term->getImag()); + printf("\tp_target_exp_term = %d\n",p_target_exp_term-local_exp_target);*/ + } + + //printf("3\n"); + // l<=0 && l-k<=0 + for (; l>=j-n+k; --l, ++p_src_exp_term, ++p_Inner_term){ + p_target_exp_term->incReal( pow_of_minus_1_for_k * + ((p_src_exp_term->getReal() * p_Inner_term->getReal()) - + (p_src_exp_term->getImag() * p_Inner_term->getImag()))); + p_target_exp_term->decImag( pow_of_minus_1_for_k * + ((p_src_exp_term->getImag() * p_Inner_term->getReal()) + + (p_src_exp_term->getReal() * p_Inner_term->getImag()))); + /*printf("\t p_src_exp_term->real = %lf \t p_src_exp_term->imag = %lf \n", + p_src_exp_term->getReal(),p_src_exp_term->getImag()); + printf("\t p_Inner_term->real = %lf \t p_Inner_term->imag = %lf \n", + p_Inner_term->getReal(),p_Inner_term->getImag()); + printf("\t\t p_target_exp_term->real = %lf \t p_target_exp_term->imag = %lf \n", + p_target_exp_term->getReal(),p_target_exp_term->getImag()); + printf("\tj=%d\tk=%d\tn=%d\tl=%d\tpow_of_minus_1_for_k=%e\n",j,k,n,l,pow_of_minus_1_for_k); + printf("\tp_target_exp_term = %d\n",p_target_exp_term-local_exp_target);*/ + } + /*printf("\tj=%d\tk=%d\tn=%d\tl=%d\n",j,k,n,l); + printf("\t\t p_target_exp_term->real = %lf \t p_target_exp_term->imag = %lf \n", + p_target_exp_term->getReal(),p_target_exp_term->getImag());*/ + } + } + } + } + FTRACE( FTrace::Controller.leaveFunction(FTrace::KERNELS) ); + } + + + /** bodies_L2P + * expansion_L2P_add_to_force_vector_and_to_potential + * expansion_L2P_add_to_force_vector + * expansion_Evaluate_local_with_Y_already_computed + */ + void L2P(const CellClass* const local, FList<ParticuleClass*>* const particules){ + FTRACE( FTrace::Controller.enterFunction(FTrace::KERNELS, __FUNCTION__ , __FILE__ , __LINE__) ); + typename FList<ParticuleClass*>::BasicIterator iterTarget(*particules); + while( iterTarget.isValide() ){ + //printf("Morton %lld\n",local->getMortonIndex()); + + F3DPosition force_vector_in_local_base; + typename FFmbKernelsBlas<ParticuleClass,CellClass,TreeHeight>::Spherical spherical; + spherical = positionTsmphere( iterTarget.value()->getPosition() - local->getPosition()); + + /*printf("\t\t bodies_it_Get_p_position(&it) x = %lf \t y = %lf \t z = %lf \n", + (iterTarget.value()->getPosition()).getX(), + (iterTarget.value()->getPosition()).getY(), + (iterTarget.value()->getPosition()).getZ()); + printf("\t\t p_leaf_center x = %lf \t y = %lf \t z = %lf \n", + (local->getPosition()).getX(), + (local->getPosition()).getY(), + (local->getPosition()).getZ());*/ + /*printf("\t\t p_position_to_leaf_center x = %lf \t y = %lf \t z = %lf \n", + (iterTarget.value()->getPosition() - local->getPosition()).getX(), + (iterTarget.value()->getPosition() - local->getPosition()).getY(), + (iterTarget.value()->getPosition() - local->getPosition()).getZ());*/ + /*printf("\t\t phi = %lf \t cos = %lf \t sin = %lf \t r= %lf \n", + spherical.phi,spherical.cosTheta,spherical.sinTheta,spherical.r);*/ + + harmonicInnerThetaDerivated( spherical, FFmbKernelsBlas<ParticuleClass,CellClass,TreeHeight>::current_thread_Y, FFmbKernelsBlas<ParticuleClass,CellClass,TreeHeight>::current_thread_Y_theta_derivated); + + // The maximum degree used here will be P. + const FComplexe* p_Y_term = FFmbKernelsBlas<ParticuleClass,CellClass,TreeHeight>::current_thread_Y+1; + const FComplexe* p_Y_theta_derivated_term = FFmbKernelsBlas<ParticuleClass,CellClass,TreeHeight>::current_thread_Y_theta_derivated+1; + const FComplexe* p_local_exp_term = local->getLocal()+1; + + for (int j = 1 ; j <= FMB_Info_P ; ++j ){ + FComplexe exp_term_aux; + + // k=0: + // F_r: + exp_term_aux.setReal( (p_Y_term->getReal() * p_local_exp_term->getReal()) - (p_Y_term->getImag() * p_local_exp_term->getImag()) ); + exp_term_aux.setImag( (p_Y_term->getReal() * p_local_exp_term->getImag()) + (p_Y_term->getImag() * p_local_exp_term->getReal()) ); + + force_vector_in_local_base.setX( force_vector_in_local_base.getX() + j * exp_term_aux.getReal()); + // F_phi: k=0 => nothing to do for F_phi + // F_theta: + exp_term_aux.setReal( (p_Y_theta_derivated_term->getReal() * p_local_exp_term->getReal()) - (p_Y_theta_derivated_term->getImag() * p_local_exp_term->getImag()) ); + exp_term_aux.setImag( (p_Y_theta_derivated_term->getReal() * p_local_exp_term->getImag()) + (p_Y_theta_derivated_term->getImag() * p_local_exp_term->getReal()) ); + + force_vector_in_local_base.setY( force_vector_in_local_base.getY() + exp_term_aux.getReal()); + + + /*printf("\t j = %d \t exp_term_aux real = %lf imag = %lf \n", + j, + exp_term_aux.getReal(), + exp_term_aux.getImag());*/ + /*printf("\t\t\t p_Y_theta_derivated_term->getReal = %lf \t p_Y_theta_derivated_term->getImag = %lf \n", + p_Y_theta_derivated_term->getReal(),p_Y_theta_derivated_term->getImag());*/ + //printf("\t\t\t p_local_exp_term->getReal = %lf \t p_local_exp_term->getImag = %lf \n", + // p_local_exp_term->getReal(),p_local_exp_term->getImag()); + //printf("\t\t\t p_Y_term->getReal = %lf \t p_Y_term->getImag = %lf \n",p_Y_term->getReal(),p_Y_term->getImag()); + + //printf("[LOOP1] force_vector_in_local_base x = %lf \t y = %lf \t z = %lf \n", + // force_vector_in_local_base.getX(),force_vector_in_local_base.getY(),force_vector_in_local_base.getZ()); + + + ++p_local_exp_term; + ++p_Y_term; + ++p_Y_theta_derivated_term; + + + // k>0: + for (int k=1; k<=j ;++k, ++p_local_exp_term, ++p_Y_term, ++p_Y_theta_derivated_term){ + // F_r: + + exp_term_aux.setReal( (p_Y_term->getReal() * p_local_exp_term->getReal()) - (p_Y_term->getImag() * p_local_exp_term->getImag()) ); + exp_term_aux.setImag( (p_Y_term->getReal() * p_local_exp_term->getImag()) + (p_Y_term->getImag() * p_local_exp_term->getReal()) ); + + force_vector_in_local_base.setX(force_vector_in_local_base.getX() + 2 * j * exp_term_aux.getReal()); + // F_phi: + force_vector_in_local_base.setZ( force_vector_in_local_base.getZ() - 2 * k * exp_term_aux.getImag()); + // F_theta: + + /*printf("\t\t k = %d \t j = %d \t exp_term_aux real = %e imag = %e \n", + k,j, + exp_term_aux.getReal(), + exp_term_aux.getImag());*/ + //printf("\t\t\t p_Y_term->getReal = %e \t p_Y_term->getImag = %e \n",p_Y_term->getReal(),p_Y_term->getImag()); + //printf("\t\t\t p_local_exp_term->getReal = %e \t p_local_exp_term->getImag = %e \n",p_local_exp_term->getReal(),p_local_exp_term->getImag()); + + exp_term_aux.setReal( (p_Y_theta_derivated_term->getReal() * p_local_exp_term->getReal()) - (p_Y_theta_derivated_term->getImag() * p_local_exp_term->getImag()) ); + exp_term_aux.setImag( (p_Y_theta_derivated_term->getReal() * p_local_exp_term->getImag()) + (p_Y_theta_derivated_term->getImag() * p_local_exp_term->getReal()) ); + + force_vector_in_local_base.setY(force_vector_in_local_base.getY() + 2 * exp_term_aux.getReal()); + + /*printf("\t\t k = %d \t j = %d \t exp_term_aux real = %lf imag = %lf \n", + k,j, + exp_term_aux.getReal(), + exp_term_aux.getImag());*/ + /*printf("\t\t\t p_Y_theta_derivated_term->getReal = %lf \t p_Y_theta_derivated_term->getImag = %lf \n", + p_Y_theta_derivated_term->getReal(),p_Y_theta_derivated_term->getImag());*/ + /*printf("\t\t\t p_local_exp_term->getReal = %lf \t p_local_exp_term->getImag = %lf \n", + p_local_exp_term->getReal(),p_local_exp_term->getImag());*/ + + /*printf("[LOOP2] force_vector_in_local_base x = %lf \t y = %lf \t z = %lf \n", + force_vector_in_local_base.getX(),force_vector_in_local_base.getY(),force_vector_in_local_base.getZ());*/ + } + } + + /*printf("[END LOOP] force_vector_in_local_base x = %lf \t y = %lf \t z = %lf \n", + force_vector_in_local_base.getX(),force_vector_in_local_base.getY(),force_vector_in_local_base.getZ());*/ + + // We want: - gradient(POTENTIAL_SIGN potential). + // The -(- 1.0) computing is not the most efficient programming ... + //#define FMB_TMP_SIGN -(POTENTIAL_SIGN 1.0) + force_vector_in_local_base.setX( force_vector_in_local_base.getX() * (-1.0) / spherical.r); + force_vector_in_local_base.setY( force_vector_in_local_base.getY() * (-1.0) / spherical.r); + force_vector_in_local_base.setZ( force_vector_in_local_base.getZ() * (-1.0) / (spherical.r * spherical.sinTheta)); + //#undef FMB_TMP_SIGN + + ///////////////////////////////////////////////////////////////////// + + //spherical_position_Set_ph + //FMB_INLINE COORDINATES_T angle_Convert_in_MinusPi_Pi(COORDINATES_T a){ + FReal ph = FMath::Fmod(spherical.phi, 2*FMath::FPi); + if (ph > M_PI) ph -= 2*FMath::FPi; + if (ph < -M_PI + FMath::Epsilon) ph += 2 * FMath::Epsilon; + + //spherical_position_Set_th + FReal th = FMath::Fmod(FMath::ACos(spherical.cosTheta), 2*FMath::FPi); + if (th < 0.0) th += 2*FMath::FPi; + if (th > FMath::FPi){ + th = 2*FMath::FPi - th; + //spherical_position_Set_ph(p, spherical_position_Get_ph(p) + M_PI); + ph = FMath::Fmod(ph + FMath::FPi, 2*FMath::FPi); + if (ph > M_PI) ph -= 2*FMath::FPi; + if (ph < -M_PI + FMath::Epsilon) ph += 2 * FMath::Epsilon; + th = FMath::Fmod(th, 2*FMath::FPi); + if (th > M_PI) th -= 2*FMath::FPi; + if (th < -M_PI + FMath::Epsilon) th += 2 * FMath::Epsilon; + } + //spherical_position_Set_r + FReal rh = spherical.r; + if (spherical.r < 0){ + rh = -spherical.r; + //spherical_position_Set_ph(p, M_PI - spherical_position_Get_th(p)); + ph = FMath::Fmod(FMath::FPi - th, 2*FMath::FPi); + if (ph > M_PI) ph -= 2*FMath::FPi; + if (ph < -M_PI + FMath::Epsilon) ph += 2 * FMath::Epsilon; + //spherical_position_Set_th(p, spherical_position_Get_th(p) + M_PI); + th = FMath::Fmod(th + FMath::FPi, 2*FMath::FPi); + if (th < 0.0) th += 2*FMath::FPi; + if (th > FMath::FPi){ + th = 2*FMath::FPi - th; + //spherical_position_Set_ph(p, spherical_position_Get_ph(p) + M_PI); + ph = FMath::Fmod(ph + FMath::FPi, 2*FMath::FPi); + if (ph > M_PI) ph -= 2*FMath::FPi; + if (ph < -M_PI + FMath::Epsilon) ph += 2 * FMath::Epsilon; + th = FMath::Fmod(th, 2*FMath::FPi); + if (th > M_PI) th -= 2*FMath::FPi; + if (th < -M_PI + FMath::Epsilon) th += 2 * FMath::Epsilon; + } + } + + /*printf("[details] ph = %e , rh = %e , th = %e \n", + ph,rh,th);*/ + + + const FReal cos_theta = FMath::Cos(th); + const FReal cos_phi = FMath::Cos(ph); + const FReal sin_theta = FMath::Sin(th); + const FReal sin_phi = FMath::Sin(ph); + + /*printf("[details] cos_theta = %e \t cos_phi = %e \t sin_theta = %e \t sin_phi = %e \n", + cos_theta, cos_phi, sin_theta, sin_phi);*/ + /*printf("[force_vector_in_local_base] x = %lf \t y = %lf \t z = %lf \n", + force_vector_in_local_base.getX(),force_vector_in_local_base.getY(),force_vector_in_local_base.getZ());*/ + + F3DPosition force_vector_tmp; + + force_vector_tmp.setX( + cos_phi * sin_theta * force_vector_in_local_base.getX() + + cos_phi * cos_theta * force_vector_in_local_base.getY() + + (-sin_phi) * force_vector_in_local_base.getZ()); + + force_vector_tmp.setY( + sin_phi * sin_theta * force_vector_in_local_base.getX() + + sin_phi * cos_theta * force_vector_in_local_base.getY() + + cos_phi * force_vector_in_local_base.getZ()); + + force_vector_tmp.setZ( + cos_theta * force_vector_in_local_base.getX() + + (-sin_theta) * force_vector_in_local_base.getY()); + + /*printf("[force_vector_tmp] = %lf \t y = %lf \t z = %lf \n", + force_vector_tmp.getX(),force_vector_tmp.getY(),force_vector_tmp.getZ());*/ + + ///////////////////////////////////////////////////////////////////// + + //#ifndef _DIRECT_MATRIX_ + // When _DIRECT_MATRIX_ is defined, this multiplication is done in 'leaf_Sum_near_and_far_fields()' + force_vector_tmp *= iterTarget.value()->getPhysicalValue(); + //#endif + + /*printf("[force_vector_tmp] fx = %e \t fy = %e \t fz = %e \n", + force_vector_tmp.getX(),force_vector_tmp.getY(),force_vector_tmp.getZ());*/ + + iterTarget.value()->setForces( iterTarget.value()->getForces() + force_vector_tmp ); + + FReal potential; + expansion_Evaluate_local_with_Y_already_computed(local->getLocal(),&potential); + iterTarget.value()->setPotential(potential); + + /*printf("[END] fx = %e \t fy = %e \t fz = %e \n\n", + iterTarget.value()->getForces().getX(),iterTarget.value()->getForces().getY(),iterTarget.value()->getForces().getZ());*/ + //printf("p_potential = %lf\n", potential); + + iterTarget.progress(); + } + FTRACE( FTrace::Controller.leaveFunction(FTrace::KERNELS) ); + } + + + void expansion_Evaluate_local_with_Y_already_computed(const FComplexe* local_exp, + FReal* const p_result){ + FTRACE( FTrace::Controller.enterFunction(FTrace::KERNELS, __FUNCTION__ , __FILE__ , __LINE__) ); + + FReal result = 0.0; + + FComplexe* p_Y_term = FFmbKernelsBlas<ParticuleClass,CellClass,TreeHeight>::current_thread_Y; + for(int j = 0 ; j<= FMB_Info_P ; ++j){ + // k=0 + (*p_Y_term) *= (*local_exp); + result += p_Y_term->getReal(); + //printf("\t\t p_Y_term->real = %e p_Y_term->imag = %e \t local_exp->real = %e local_exp->imag = %e \n", + // p_Y_term->getReal(), p_Y_term->getImag(), local_exp->getReal(), local_exp->getImag()); + ++p_Y_term; + ++local_exp; + + // k>0 + for (int k=1; k<=j ;++k, ++p_Y_term, ++local_exp){ + (*p_Y_term) *= (*local_exp); + result += 2 * p_Y_term->getReal(); + //printf("\t\t p_Y_term->real = %e p_Y_term->imag = %e \t local_exp->real = %e local_exp->imag = %e \n", + // p_Y_term->getReal(), p_Y_term->getImag(), local_exp->getReal(), local_exp->getImag()); + } + } + + *p_result = result; + + FTRACE( FTrace::Controller.leaveFunction(FTrace::KERNELS) ); + } + + + + + /** void bodies_Compute_direct_interaction ( + * bodies_t *FMB_RESTRICT p_b_target, + * bodies_t *FMB_RESTRICT p_b_src, + * bool mutual + * ) + * + */ + void P2P(FList<ParticuleClass*>* const FRestrict targets, const FList<ParticuleClass*>* const FRestrict sources, + const FList<ParticuleClass*>* FRestrict const* FRestrict directNeighbors, const int size) { + FTRACE( FTrace::Controller.enterFunction(FTrace::KERNELS, __FUNCTION__ , __FILE__ , __LINE__) ); + typename FList<ParticuleClass*>::BasicIterator iterTarget(*targets); + while( iterTarget.isValide() ){ + + for(int idxDirectNeighbors = 0 ; idxDirectNeighbors < size ; ++idxDirectNeighbors){ + typename FList<ParticuleClass*>::ConstBasicIterator iterSource(*directNeighbors[idxDirectNeighbors]); + while( iterSource.isValide() ){ + DIRECT_COMPUTATION_NO_MUTUAL_SOFT(&iterTarget.value(), + iterSource.value()); + iterSource.progress(); + } + } + + typename FList<ParticuleClass*>::ConstBasicIterator iterSameBox(*sources); + while( iterSameBox.isValide() ){ + if(iterSameBox.value() != iterTarget.value()){ + DIRECT_COMPUTATION_NO_MUTUAL_SOFT(&iterTarget.value(), + iterSameBox.value()); + } + iterSameBox.progress(); + } + + //printf("x = %e \t y = %e \t z = %e \n",iterTarget.value()->getPosition().getX(),iterTarget.value()->getPosition().getY(),iterTarget.value()->getPosition().getZ()); + //printf("\t P2P fx = %e \t fy = %e \t fz = %e \n",iterTarget.value()->getForces().getX(),iterTarget.value()->getForces().getY(),iterTarget.value()->getForces().getZ()); + //printf("\t potential = %e \n",iterTarget.value()->getPotential()); + + iterTarget.progress(); + } + FTRACE( FTrace::Controller.leaveFunction(FTrace::KERNELS) ); + } + void DIRECT_COMPUTATION_NO_MUTUAL_SOFT(ParticuleClass** const target, const ParticuleClass* const source){ + FTRACE( FTrace::Controller.enterFunction(FTrace::KERNELS, __FUNCTION__ , __FILE__ , __LINE__) ); + const FReal dx = (*target)->getPosition().getX() - source->getPosition().getX(); + const FReal dy = (*target)->getPosition().getY() - source->getPosition().getY(); + const FReal dz = (*target)->getPosition().getZ() - source->getPosition().getZ(); + + FReal inv_square_distance = 1.0/ (dx*dx + dy*dy + dz*dz + FFmbKernelsBlas<ParticuleClass,CellClass,TreeHeight>::FMB_Info_eps_soft_square); + FReal inv_distance = FMath::Sqrt(inv_square_distance); + inv_distance *= (*target)->getPhysicalValue() * source->getPhysicalValue(); + inv_square_distance *= inv_distance; + + (*target)->setForces( + (*target)->getForces().getX() + dx * inv_square_distance, + (*target)->getForces().getY() + dy * inv_square_distance, + (*target)->getForces().getZ() + dz * inv_square_distance + ); + + (*target)->setPotential( inv_distance + (*target)->getPotential()); + FTRACE( FTrace::Controller.leaveFunction(FTrace::KERNELS) ); + } +}; + + +template< class ParticuleClass, class CellClass, int TreeHeight> +const FReal FFmbKernelsBlas<ParticuleClass,CellClass,TreeHeight>::PiArrayInner[4] = {0, FMath::FPiDiv2, FMath::FPi, -FMath::FPiDiv2}; + + +template< class ParticuleClass, class CellClass, int TreeHeight> +const FReal FFmbKernelsBlas<ParticuleClass,CellClass,TreeHeight>::PiArrayOuter[4] = {0, -FMath::FPiDiv2, FMath::FPi, FMath::FPiDiv2}; + + + +#endif //FFMBKERNELSBLAS_HPP + +// [--LICENSE--] diff --git a/Sources/Utils/FComplexe.hpp b/Sources/Utils/FComplexe.hpp index 13554ba6a310086d47cf3389de0d512578096289..27d47270caa9b0e3cc088481e5119cc2885933d0 100644 --- a/Sources/Utils/FComplexe.hpp +++ b/Sources/Utils/FComplexe.hpp @@ -12,12 +12,12 @@ * Propose basic complexe class. */ class FComplexe { - FReal imag; //< Imaginary FReal real; //< Real + FReal imag; //< Imaginary public: /** Default Constructor (set real&imaginary to 0) */ - FComplexe() : imag(0), real(0){ + FComplexe() : real(0),imag(0){ } /** Constructor with values @@ -25,12 +25,12 @@ public: * @param inReal the real */ FComplexe(const FReal inImag, const FReal inReal) - : imag(inImag), real(inReal){ + : real(inReal),imag(inImag){ } /** Copy constructor */ FComplexe(const FComplexe& other) - : imag(other.imag), real(other.real){ + : real(other.real), imag(other.imag){ } /** Copy operator */ diff --git a/Sources/Utils/FGlobal.hpp b/Sources/Utils/FGlobal.hpp index ca7854cce5a5963be77bb368df42e8fe807ea106..778090340156444a242dca6eb7f21baa7f9431c2 100644 --- a/Sources/Utils/FGlobal.hpp +++ b/Sources/Utils/FGlobal.hpp @@ -41,7 +41,7 @@ static const int FThreadNumbers = 4; // Types /////////////////////////////////////////////////////// -typedef float FReal; +typedef double FReal; /////////////////////////////////////////////////////// // Restrict diff --git a/Sources/Utils/FMath.hpp b/Sources/Utils/FMath.hpp index 83b6a1800121bbf6efe01f1d001d7f1d8008dfe8..ab464fbf4b788e3e7894c77a4a2a5e2d5ff4a170 100644 --- a/Sources/Utils/FMath.hpp +++ b/Sources/Utils/FMath.hpp @@ -13,9 +13,9 @@ * Propose basic math functions or indirections to std math. */ struct FMath{ - static const FReal FPi; //< Pi constant - static const FReal FPiDiv2; //< Pi/2 constant - static const FReal Epsilon; //< Epsilon + static const double FPi; //< Pi constant + static const double FPiDiv2; //< Pi/2 constant + static const double Epsilon; //< Epsilon /** To get absolute value */ template <class NumType> @@ -94,9 +94,9 @@ struct FMath{ } }; -const FReal FMath::FPi = M_PI; -const FReal FMath::FPiDiv2 = M_PI_2; -const FReal FMath::Epsilon = 0.00000000000000000001; +const double FMath::FPi = M_PI; +const double FMath::FPiDiv2 = M_PI_2; +const double FMath::Epsilon = 0.00000000000000000001; #endif //FMATH_HPP diff --git a/Tests/testApplication.cpp b/Tests/testApplication.cpp index 9eb65975ed6d59c48fec778244716c83c3ab522b..8ea44c6314a3dc62fad2178ef2f1d4c8698786f5 100644 --- a/Tests/testApplication.cpp +++ b/Tests/testApplication.cpp @@ -15,11 +15,11 @@ // Compile by mpic++ testApplication.cpp ../Sources/Utils/FAssertable.cpp -o testApplication.exe // run by mpirun -np 4 ./testApplication.exe #include "../Sources/Utils/FMpiApplication.hpp" -#define ApplicationImplementation FMpiApplication +typedef FMpiApplication ApplicationImplementation; #else // Compile by g++ testApplication.cpp ../Sources/Utils/FAssertable.cpp -o testApplication.exe #include "../Sources/Utils/FSingleApplication.hpp" -#define ApplicationImplementation FSingleApplication +typedef FSingleApplication ApplicationImplementation; #endif //================================================================================================ diff --git a/Tests/testFmbBlasAlgorithm.cpp b/Tests/testFmbBlasAlgorithm.cpp new file mode 100644 index 0000000000000000000000000000000000000000..c158dc7593b1997004078db4ce473b69586c813a --- /dev/null +++ b/Tests/testFmbBlasAlgorithm.cpp @@ -0,0 +1,172 @@ +// /!\ Please, you must read the license at the bottom of this page + +#include <iostream> + +#include <stdio.h> +#include <stdlib.h> + +#include "../Sources/Utils/FTic.hpp" + +#include "../Sources/Containers/FOctree.hpp" +#include "../Sources/Containers/FList.hpp" + +#include "../Sources/Components/FFmaParticule.hpp" +#include "../Sources/Extenssions/FExtendForces.hpp" +#include "../Sources/Extenssions/FExtendPotential.hpp" + +#include "../Sources/Components/FBasicCell.hpp" +#include "../Sources/Fmb/FExtendFmbCell.hpp" + +#include "../Sources/Core/FFmmAlgorithm.hpp" +#include "../Sources/Core/FFmmAlgorithmArray.hpp" +#include "../Sources/Core/FFmmAlgorithmThreaded.hpp" +#include "../Sources/Core/FFmmAlgorithmTask.hpp" + +#include "../Sources/Components/FSimpleLeaf.hpp" + +#include "../Sources/Fmb/FFmbKernelsBlas.hpp" + +#include "../Sources/Files/FFMALoader.hpp" + +// With openmp : g++ testFmbBlasAlgorithm.cpp ../Sources/Utils/FAssertable.cpp ../Sources/Utils/FDebug.cpp ../Sources/Utils/FTrace.cpp -lgomp -fopenmp -lblas -O2 -o testFmbBlasAlgorithm.exe +// icpc -openmp -openmp-lib=compat testFmbAlgorithm.cpp ../Sources/Utils/FAssertable.cpp ../Sources/Utils/FDebug.cpp -O2 -o testFmbAlgorithm.exe + +/** This program show an example of use of + * the fmm basic algo + * it also check that eachh particules is little or longer + * related that each other + */ + + +/** Fmb class has to extend {FExtendForces,FExtendPotential,FExtendPhysicalValue} + * Because we use fma loader it needs {FFmaParticule} + */ +class FmbParticule : public FFmaParticule, public FExtendForces, public FExtendPotential { +public: +}; + +/** Custom cell + * + */ +class FmbCell : public FBasicCell, public FExtendFmbCell { +public: +}; + + +// Simply create particules and try the kernels +int main(int argc, char ** argv){ + ///////////////////////What we do///////////////////////////// + std::cout << ">> This executable has to be used to test fmb algorithm.\n"; + ////////////////////////////////////////////////////////////// + + const int NbLevels = 9;//10; + const int SizeSubLevels = 3;//3 + FTic counter; + const char* const defaultFilename = "testLoaderFMA.fma"; //../../Data/ "testLoaderFMA.fma" "testFMAlgorithm.fma" Sphere.fma + const char* filename; + + if(argc == 1){ + std::cout << "You have to give a .fma file in argument.\n"; + std::cout << "The program will try a default file : " << defaultFilename << "\n"; + filename = defaultFilename; + } + else{ + filename = argv[1]; + std::cout << "Opening : " << filename << "\n"; + } + + FFMALoader<FmbParticule> loader(filename); + if(!loader.isValide()){ + std::cout << "Loader Error, " << filename << " is missing\n"; + return 1; + } + + // ----------------------------------------------------- + + FOctree<FmbParticule, FmbCell, FSimpleLeaf, NbLevels, SizeSubLevels> tree(loader.getBoxWidth(),loader.getCenterOfBox()); + + // ----------------------------------------------------- + + std::cout << "Creating " << loader.getNumberOfParticules() << " particules ..." << std::endl; + counter.tic(); + + FmbParticule* particules = new FmbParticule[loader.getNumberOfParticules()]; + + for(int idxPart = 0 ; idxPart < loader.getNumberOfParticules() ; ++idxPart){ + loader.fillParticule(&particules[idxPart]); + } + + counter.tac(); + std::cout << "Done " << "(" << counter.elapsed() << "s)." << std::endl; + + // ----------------------------------------------------- + + std::cout << "Inserting particules ..." << std::endl; + counter.tic(); + for(long idxPart = 0 ; idxPart < loader.getNumberOfParticules() ; ++idxPart){ + tree.insert(&particules[idxPart]); + } + counter.tac(); + std::cout << "Done " << "(" << counter.elapsed() << "s)." << std::endl; + + // ----------------------------------------------------- + + std::cout << "Working on particules ..." << std::endl; + counter.tic(); + + //FFmbKernelsBlas + FFmbKernelsBlas<FmbParticule, FmbCell, NbLevels> kernels(loader.getBoxWidth()); + //FFMMAlgorithm FFMMAlgorithmThreaded FFMMAlgorithmArray FFMMAlgorithmTask + FFmmAlgorithm<FFmbKernelsBlas, FmbParticule, FmbCell, FSimpleLeaf, NbLevels, SizeSubLevels> algo(&tree,&kernels); + algo.execute(); + + counter.tac(); + std::cout << "Done " << "(" << counter.elapsed() << "s)." << std::endl; + + //std::cout << "Foces Sum x = " << kernels.getForcesSum().getX() << " y = " << kernels.getForcesSum().getY() << " z = " << kernels.getForcesSum().getZ() << std::endl; + //std::cout << "Potential = " << kernels.getPotential() << std::endl; + + { // get sum forces&potential + FReal potential = 0; + F3DPosition forces; + FOctree<FmbParticule, FmbCell, FSimpleLeaf, NbLevels, SizeSubLevels>::Iterator octreeIterator(&tree); + octreeIterator.gotoBottomLeft(); + do{ + FList<FmbParticule*>::ConstBasicIterator iter(*octreeIterator.getCurrentListTargets()); + while( iter.isValide() ){ + potential += iter.value()->getPotential() * iter.value()->getPhysicalValue(); + forces += iter.value()->getForces(); + + //printf("x = %e y = %e z = %e \n",iter.value()->getPosition().getX(),iter.value()->getPosition().getY(),iter.value()->getPosition().getZ()); + //printf("\t fx = %e fy = %e fz = %e \n",iter.value()->getForces().getX(),iter.value()->getForces().getY(),iter.value()->getForces().getZ()); + + //printf("\t\t Sum Forces ( %e , %e , %e)\n", + //forces.getX(),forces.getY(),forces.getZ()); + + iter.progress(); + } + } while(octreeIterator.moveRight()); + + std::cout << "Foces Sum x = " << forces.getX() << " y = " << forces.getY() << " z = " << forces.getZ() << std::endl; + std::cout << "Potential = " << potential << std::endl; + } + + + // ----------------------------------------------------- + + std::cout << "Deleting particules ..." << std::endl; + counter.tic(); + for(long idxPart = 0 ; idxPart < loader.getNumberOfParticules() ; ++idxPart){ + particules[idxPart].~FmbParticule(); + } + delete [] particules; + counter.tac(); + std::cout << "Done " << "(" << counter.elapsed() << "s)." << std::endl; + + // ----------------------------------------------------- + + return 0; +} + + +// [--LICENSE--] diff --git a/Tests/testFmmAlgorithmTsm.cpp b/Tests/testFmmAlgorithmTsm.cpp index 38a2243e3e1224c77202b13569bb3ce5ff10045a..df424af45a4140df266e3f1fe6a1a5fcaa52a2b7 100644 --- a/Tests/testFmmAlgorithmTsm.cpp +++ b/Tests/testFmmAlgorithmTsm.cpp @@ -22,10 +22,7 @@ #include "../Sources/Extenssions/FExtendCellType.hpp" #include "../Sources/Core/FFmmAlgorithmTsm.hpp" -#include "../Sources/Core/FFmmAlgorithm.hpp" -#include "../Sources/Core/FFmmAlgorithmArray.hpp" -#include "../Sources/Core/FFmmAlgorithmThreaded.hpp" -#include "../Sources/Core/FFmmAlgorithmTask.hpp" +#include "../Sources/Core/FFmmAlgorithmArrayTsm.hpp" #include "../Sources/Components/FBasicKernels.hpp" @@ -93,8 +90,8 @@ int main(int argc, char ** argv){ // FTestKernels FBasicKernels FTestKernels<FTestParticuleTsm, FTestCellTsm, NbLevels> kernels; - //FFMMAlgorithm FFMMAlgorithmThreaded FFMMAlgorithmArray FFMMAlgorithmTask - FFmmAlgorithm<FTestKernels, FTestParticuleTsm, FTestCellTsm, FTypedLeaf, NbLevels, SizeSubLevels> algo(&tree,&kernels); + //FFMMAlgorithmTsm FFMMAlgorithmArrayTsm + FFmmAlgorithmArrayTsm<FTestKernels, FTestParticuleTsm, FTestCellTsm, FTypedLeaf, NbLevels, SizeSubLevels> algo(&tree,&kernels); algo.execute(); counter.tac();