diff --git a/Addons/CKernelApi/Src/FScalFMMEngine.hpp b/Addons/CKernelApi/Src/FScalFMMEngine.hpp index a397fb7dfebc00b758f150a96ce1f3b20cc132fe..39cb818787eeb3669f25bb014210ee40d19c0839 100644 --- a/Addons/CKernelApi/Src/FScalFMMEngine.hpp +++ b/Addons/CKernelApi/Src/FScalFMMEngine.hpp @@ -178,37 +178,6 @@ public: } - /** Test ... */ - struct RunContainer{ - template< int nbAttributeToInsert,class ContainerClass,class LeafClass, class CellClass> - static void Run(FOctree<FReal,CellClass,ContainerClass,LeafClass> * octree, - int NbPartToInsert,int * strideForEachAtt, - FReal* rawDatas){ - generic_tree_abstract_insert<ContainerClass,LeafClass,CellClass,nbAttributeToInsert>(octree, - NbPartToInsert,strideForEachAtt,rawDatas); - } - }; - - template<class ContainerClass,class LeafClass, class CellClass, int nbAttributeToInsert> - void generic_tree_abstract_insert(FOctree<FReal,CellClass,ContainerClass,LeafClass> * octree, - int NbPartToInsert,int * strideForEachAtt, - FReal* rawDatas){ - for(FSize idxPart = 0; idxPart<NbPartToInsert ; ++idxPart){ - FPoint<FReal> pos = FPoint<FReal>(rawDatas[0],rawDatas[1],rawDatas[2]); - MortonIndex index = octree->getMortonFromPosition(pos); - //Insert with how many attributes ??? - octree->insert(pos,idxPart); - //Get again the container - ContainerClass * containerToFill = octree->getLeafSrc(index);//cannot be nullptr - std::array<FReal,nbAttributeToInsert> arrayOfAttribute; - for(int idxAtt = 0; idxAtt<nbAttributeToInsert ; ++idxAtt){ - arrayOfAttribute[idxAtt] = rawDatas[3+ strideForEachAtt[idxAtt]]; - } - int idxToRemove = containerToFill->getNbParticles(); - containerToFill->remove(&idxToRemove,1); - containerToFill->push(pos,idxPart,arrayOfAttribute); - } - } template<class ContainerClass,class LeafClass,class CellClass> void generic_get_forces_xyz(FOctree<FReal,CellClass,ContainerClass,LeafClass> * octree, @@ -820,15 +789,17 @@ public: * get the time spent in each operator. */ virtual void get_timers(FReal * Timers){ - const FTic * timers = algoTimer->getAllTimers(); - int nbTimers = algoTimer->getNbOfTimerRecorded(); - for(int idTimer = 0; idTimer<nbTimers ; ++idTimer){ - Timers[idTimer] = timers[idTimer].elapsed(); - } + Timers[0] = algoTimer->Timers["P2M"].elapsed(); + Timers[1] = algoTimer->Timers["M2M"].elapsed(); + Timers[2] = algoTimer->Timers["M2L"].elapsed(); + Timers[3] = algoTimer->Timers["L2L"].elapsed(); + Timers[4] = algoTimer->Timers["L2P"].elapsed(); + Timers[5] = algoTimer->Timers["P2P"].elapsed(); + Timers[6] = algoTimer->Timers["NearField"].elapsed(); } virtual int get_nb_timers(){ - return algoTimer->getNbOfTimerRecorded(); + return FAlgorithmTimers::nbTimers; } virtual void set_upper_limit(int upperLimit){ diff --git a/CMakeLists.txt b/CMakeLists.txt index 5a771b490d74dbcb44b33cc751d48686e2bd3384..6ce3ed3e37524e5a9cf1ad014d276dc6184bb449 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -34,7 +34,7 @@ GetCpuInfos() # SCALFMM version number. An even minor number corresponds to releases. set(SCALFMM_MAJOR_VERSION 1) -set(SCALFMM_MINOR_VERSION 4) +set(SCALFMM_MINOR_VERSION 5) set(SCALFMM_PATCH_VERSION 0) set(SCALFMM_VERSION "${SCALFMM_MAJOR_VERSION}.${SCALFMM_MINOR_VERSION}.${SCALFMM_PATCH_VERSION}" ) @@ -375,35 +375,53 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/morse/ list(APPEND CMAKE_INSTALL_RPATH "${LAPACK_LIBRARY_DIRS}") endif() message (STATUS "check BLAS Fortran mangling") - # check blas and lapack symbols naming - set(CMAKE_REQUIRED_LIBRARIES "${BLAS_LIBRARIES}") - check_function_exists(dgemv_ DGEMV_ADD_) - set (SCALFMM_BLAS_UPCASE OFF) - set (SCALFMM_BLAS_NOCHANGE OFF) - message (STATUS "BLAS dgemv_ " ${DGEMV_ADD_} ${SCALFMM_BLAS_UPCASE}) - if (DGEMV_ADD_) - set (SCALFMM_BLAS_ADD_ ON) - message (STATUS "BLAS dgemv_ symbol found, SCALFMM_BLAS_ADD_ is ON") - else (DGEMV_ADD_) - set (SCALFMM_BLAS_ADD_ OFF) + # add options to let the user be able to force a behavior + option( SCALFMM_BLAS_ADD_ "Set to ON to force calls to BLAS Fortran symbols with _ (ex: dgemm_)" OFF ) + option( SCALFMM_BLAS_UPCASE "Set to ON to force calls to BLAS Fortran symbols in capital (ex: DGEMM)" OFF ) + option( SCALFMM_BLAS_NOCHANGE "Set to ON to force calls to BLAS Fortran symbols with no change (ex: dgemm)" OFF ) + # if options not changed by user then auto-detection + if (NOT SCALFMM_BLAS_ADD_ AND NOT SCALFMM_BLAS_UPCASE AND NOT SCALFMM_BLAS_NOCHANGE) + # give blas libraries and check dgemm symbol + set(CMAKE_REQUIRED_LIBRARIES "${BLAS_LIBRARIES}") + check_function_exists(dgemv_ DGEMV_ADD_) check_function_exists(DGEMV DGEMV_UPCASE) - if (DGEMV_UPCASE) - set (SCALFMM_BLAS_UPCASE ON) - message (STATUS "BLAS DGEMV symbol found, SCALFMM_BLAS_UPCASE is ON") - else (DGEMV_UPCASE) - # set (SCALFMM_BLAS_UPCASE OFF) - check_function_exists(dgemv DGEMV_NOCHANGE) - if (DGEMV_NOCHANGE) - set (SCALFMM_BLAS_NOCHANGE ON) - message (STATUS "BLAS dgemv symbol found, SCALFMM_BLAS_NOCHANGE is ON") - # else (DGEMV_NOCHANGE) - # set (SCALFMM_BLAS_NOCHANGE OFF) - endif (DGEMV_NOCHANGE) - endif (DGEMV_UPCASE) - endif (DGEMV_ADD_) - if ( (NOT DGEMV_ADD_) AND (NOT DGEMV_UPCASE) AND (NOT DGEMV_NOCHANGE) ) - message(FATAL_ERROR "BLAS Fortran mangling cannot be properly detected") - endif () + check_function_exists(dgemv DGEMV_NOCHANGE) + # here we consider that the first kind of symbol found will be the one used + # current order is: ADD_, UPCASE, NOCHANGE + if (DGEMV_ADD_) + set (SCALFMM_BLAS_ADD_ ON) + set (SCALFMM_BLAS_UPCASE OFF) + set (SCALFMM_BLAS_NOCHANGE OFF) + message (STATUS "BLAS dgemv_ symbol found, SCALFMM_BLAS_ADD_ is ON") + else (DGEMV_ADD_) + if (DGEMV_UPCASE) + set (SCALFMM_BLAS_ADD_ OFF) + set (SCALFMM_BLAS_UPCASE ON) + set (SCALFMM_BLAS_NOCHANGE OFF) + message (STATUS "BLAS DGEMV symbol found, SCALFMM_BLAS_UPCASE is ON") + else (DGEMV_UPCASE) + if (DGEMV_NOCHANGE) + set (SCALFMM_BLAS_ADD_ OFF) + set (SCALFMM_BLAS_UPCASE OFF) + set (SCALFMM_BLAS_NOCHANGE ON) + message (STATUS "BLAS dgemv symbol found, SCALFMM_BLAS_NOCHANGE is ON") + endif (DGEMV_NOCHANGE) + endif (DGEMV_UPCASE) + endif (DGEMV_ADD_) + if ( (NOT DGEMV_ADD_) AND (NOT DGEMV_UPCASE) AND (NOT DGEMV_NOCHANGE) ) + message(FATAL_ERROR "BLAS Fortran mangling is not properly detected - please check your BLAS libraries") + endif () + else (NOT SCALFMM_BLAS_ADD_ AND NOT SCALFMM_BLAS_UPCASE AND NOT SCALFMM_BLAS_NOCHANGE) + if (SCALFMM_BLAS_ADD_) + message (STATUS "SCALFMM_BLAS_ADD_ is set to ON") + endif() + if (SCALFMM_BLAS_UPCASE) + message (STATUS "SCALFMM_BLAS_UPCASE is set to ON") + endif() + if (SCALFMM_BLAS_NOCHANGE) + message (STATUS "SCALFMM_BLAS_NOCHANGE is set to ON") + endif() + endif (NOT SCALFMM_BLAS_ADD_ AND NOT SCALFMM_BLAS_UPCASE AND NOT SCALFMM_BLAS_NOCHANGE) else() message(WARNING "BLAS has not been found, SCALFMM will continue to compile but some applications will be disabled.") message(WARNING "If you have BLAS set BLAS_LIBDIR, BLAS_INCDIR or BLAS_DIR (CMake variables using -D or environment variables).") @@ -466,8 +484,12 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/morse/ endif(SCALFMM_USE_FFT) list(APPEND FUSE_LIST "FFT") message(STATUS "SCALFMM_USE_FFT = ${SCALFMM_USE_FFT}") - list(REMOVE_DUPLICATES SCALFMM_INCLUDES) - list(REMOVE_DUPLICATES SCALFMM_LIBRARIES) + if (SCALFMM_INCLUDES) + list(REMOVE_DUPLICATES SCALFMM_INCLUDES) + endif() + if (SCALFMM_LIBRARIES) + list(REMOVE_DUPLICATES SCALFMM_LIBRARIES) + endif() # # C++ 2011 diff --git a/Src/GroupTree/Chebyshev/FChebCellPOD.hpp b/Src/GroupTree/Chebyshev/FChebCellPOD.hpp index d16972dac0bb4d73e4085d8f9a21c3028ce6b16b..8d801f5c54d169cf2e22212ccb23dddf33d1dbd2 100644 --- a/Src/GroupTree/Chebyshev/FChebCellPOD.hpp +++ b/Src/GroupTree/Chebyshev/FChebCellPOD.hpp @@ -51,6 +51,16 @@ public: symb->mortonIndex = inMortonIndex; } + /** To get the cell level */ + int getLevel() const { + return symb->level; + } + + /** To set the cell level */ + void setLevel(const int level) { + symb->level = level; + } + /** To get the position */ FTreeCoordinate getCoordinate() const { return FTreeCoordinate(symb->coordinates[0], @@ -103,4 +113,3 @@ public: #endif // FCHEBCELLPOD_HPP - diff --git a/Src/GroupTree/Core/FBasicCellPOD.hpp b/Src/GroupTree/Core/FBasicCellPOD.hpp index 1060b846b79b308b66d1b2054a0e912b38e1ff17..d47258c98c7310d0ea584216f1f8aee967caa9ca 100644 --- a/Src/GroupTree/Core/FBasicCellPOD.hpp +++ b/Src/GroupTree/Core/FBasicCellPOD.hpp @@ -9,7 +9,7 @@ struct alignas(FStarPUDefaultAlign::StructAlign) FBasicCellPOD { MortonIndex mortonIndex; int coordinates[3]; + int level; }; #endif // FBASICCELLPOD_HPP - diff --git a/Src/GroupTree/Core/FGroupTree.hpp b/Src/GroupTree/Core/FGroupTree.hpp index 944d3f7c40e4a788ede9aa7fc2f36e90193485b1..1bcb7d7118ac972a75b99b516451a0042b679aeb 100644 --- a/Src/GroupTree/Core/FGroupTree.hpp +++ b/Src/GroupTree/Core/FGroupTree.hpp @@ -99,6 +99,7 @@ public: CompositeCellClass newNode = newBlock->getCompleteCell(cellIdInBlock); newNode.setMortonIndex(newNodeIndex); newNode.setCoordinate(newNodeCoordinate); + newNode.setLevel(idxLevel); // Add leaf nbParticlesOffsetBeforeLeaf = newParticleBlock->newLeaf(newNodeIndex, cellIdInBlock, @@ -149,6 +150,7 @@ public: CompositeCellClass newNode = newBlock->getCompleteCell(cellIdInBlock); newNode.setMortonIndex(newNodeIndex); newNode.setCoordinate(newNodeCoordinate); + newNode.setLevel(idxLevel); cellIdInBlock += 1; blockIteratorInOctree.moveRight(); diff --git a/Src/GroupTree/Rotation/FRotationCellPOD.hpp b/Src/GroupTree/Rotation/FRotationCellPOD.hpp index 8b86ae7c11f7bbd6bf7457f7bf2958546da90df5..10e13f04ff4b68f66335ffca8e2b024a7ea1ec0e 100644 --- a/Src/GroupTree/Rotation/FRotationCellPOD.hpp +++ b/Src/GroupTree/Rotation/FRotationCellPOD.hpp @@ -53,6 +53,16 @@ public: symb->mortonIndex = inMortonIndex; } + /** To get the cell level */ + int getLevel() const { + return symb->level; + } + + /** To set the cell level */ + void setLevel(const int level) { + symb->level = level; + } + /** To get the position */ FTreeCoordinate getCoordinate() const { return FTreeCoordinate(symb->coordinates[0], @@ -109,4 +119,3 @@ public: #endif // FROTATIONCELLPOD_HPP - diff --git a/Src/GroupTree/Taylor/FTaylorCellPOD.hpp b/Src/GroupTree/Taylor/FTaylorCellPOD.hpp index 9fba78bf2524c40b16de1f95e97e7e4d77864ac8..3c4ff6d22d77bb9999da2b79850de9045b16cc32 100644 --- a/Src/GroupTree/Taylor/FTaylorCellPOD.hpp +++ b/Src/GroupTree/Taylor/FTaylorCellPOD.hpp @@ -51,6 +51,16 @@ public: symb->mortonIndex = inMortonIndex; } + /** To get the cell level */ + int getLevel() const { + return symb->level; + } + + /** To set the cell level */ + void setLevel(const int level) { + symb->level = level; + } + /** To get the position */ FTreeCoordinate getCoordinate() const { return FTreeCoordinate(symb->coordinates[0], @@ -102,4 +112,3 @@ public: }; #endif // FTAYLORCELLPOD_HPP - diff --git a/Src/GroupTree/TestKernel/FTestCellPOD.hpp b/Src/GroupTree/TestKernel/FTestCellPOD.hpp index 1dd5d2110bf0e46bacfba25cabb02af5a84d77c8..6ac22cd16f432a84c1b3385786608ac7766db604 100644 --- a/Src/GroupTree/TestKernel/FTestCellPOD.hpp +++ b/Src/GroupTree/TestKernel/FTestCellPOD.hpp @@ -34,6 +34,16 @@ public: symb->mortonIndex = inMortonIndex; } + /** To get the cell level */ + int getLevel() const { + return symb->level; + } + + /** To set the cell level */ + void setLevel(const int level) { + symb->level = level; + } + /** To get the position */ FTreeCoordinate getCoordinate() const { return FTreeCoordinate(symb->coordinates[0], @@ -81,4 +91,3 @@ public: #endif // FTESTCELLPOD_HPP - diff --git a/Src/GroupTree/Uniform/FUnifCellPOD.hpp b/Src/GroupTree/Uniform/FUnifCellPOD.hpp index 578ee670c2677df738719093379dd62d2b2da4b1..65cca0d4721c81d11c7bafc73702e6e9a688be33 100644 --- a/Src/GroupTree/Uniform/FUnifCellPOD.hpp +++ b/Src/GroupTree/Uniform/FUnifCellPOD.hpp @@ -51,6 +51,16 @@ public: symb->mortonIndex = inMortonIndex; } + /** To get the cell level */ + int getLevel() const { + return symb->level; + } + + /** To set the cell level */ + void setLevel(const int level) { + symb->level = level; + } + /** To get the position */ FTreeCoordinate getCoordinate() const { return FTreeCoordinate(symb->coordinates[0], @@ -129,4 +139,3 @@ public: }; #endif // FUNIFCELLPOD_HPP - diff --git a/Src/Kernels/Interpolation/FInterpMatrixKernel_TensorialInteractions.hpp b/Src/Kernels/Interpolation/FInterpMatrixKernel_TensorialInteractions.hpp index e05977bfa8a209b4a7b31ddfc8ab9a947d205808..e018112f8655543d0f91a78498712bb313c14d47 100644 --- a/Src/Kernels/Interpolation/FInterpMatrixKernel_TensorialInteractions.hpp +++ b/Src/Kernels/Interpolation/FInterpMatrixKernel_TensorialInteractions.hpp @@ -45,16 +45,42 @@ * The table applyTab provides the indices in the reduced storage table * corresponding to the application scheme depicted earlier. * - * PB: BEWARE! Homogeneous matrix kernels do not support cell width extension + * \warning BEWARE! Homogeneous matrix kernels do not support cell width extension * yet. Is it possible to find a reference width and a scale factor such that * only 1 set of M2L opt can be used for all levels?? - * + * The definition of the potential p and force f are extended to the case +* of tensorial interaction kernels: +* +*\f$ p_i(x) = K_{ip}(x,y)w_p(y),\f$ \f$ \forall i=1..NPOT, p=1..NPV\f$ +* +* \f$f_{ik}= w_p(x)K_{ip,k}(x,y)w_p(y)\f$ +* +* Since the interpolation scheme is such that +* +*\f$ p_i(x) \approx S^m(x) L^{m}_{ip}\f$ +* +* \f$f_{ik}= w_p(x) \nabla_k S^m(x) L^{m}_{ip}\f$ +* +* with +* +* \f$ L^{m}_{ip} = K^{mn}_{ip} S^n(y) w_p(y)\f$ (local expansion) +* + *\f$ M^{m}_{p} = S^n(y) w_p(y)\f$ (multipole expansion) +* +* then the multipole exp have NPV components and the local exp NPOT*NPV. +* +* NB1: Only the computation of forces requires that the sum over p is +* performed at L2P step. It could be done at M2L step for the potential. +* +* NB2: An efficient application of the matrix kernel is highly kernel +* dependent, we recommand overriding the P2M/M2L/L2P function of the kernel +* you are using in order to have opsimal performances + set your own NRHS/NLHS.* */ //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// -// +/// // Tensorial Matrix Kernels (NCMP>1) // // The definition of the potential p and force f are extended to the case diff --git a/Src/Utils/FAlgorithmTimers.hpp b/Src/Utils/FAlgorithmTimers.hpp index f9eb1f2a56a7b70e5cf03258e160a75f8d408eed..dc5563b98a1c9d142fd0b11712c3124954a6668e 100644 --- a/Src/Utils/FAlgorithmTimers.hpp +++ b/Src/Utils/FAlgorithmTimers.hpp @@ -56,8 +56,13 @@ public: /// Elapsed time between last FTic::tic() and FTic::tac() for given timer. double getTime(std::string TimerName) const{ - //assert to verify size - return Timers.at(TimerName).elapsed(); + double res = 0; + try { + res = Timers.at(TimerName).elapsed(); + } catch(std::out_of_range) { + res = 0; + } + return res; } /// Cumulated time between all FTic::tic() and FTic::tac() for given timer.