From c3e0c4a8bf11763a0f4205edbdb62a57075d3623 Mon Sep 17 00:00:00 2001 From: Olivier Coulaud <Olivier.Coulaud@inria.fr> Date: Thu, 14 Dec 2017 17:09:57 +0100 Subject: [PATCH] Restore letSTF just after separation with develop --- CMakeLists.txt | 9 +- .../algorithm/distributed/distribute.hpp | 1 + Contribs/inria/linear_tree/balance_tree.hpp | 6 +- Examples/CMakeLists.txt | 1 - Examples/RotationFMM.cpp | 53 +- LICENCE | 2 +- Obsolete/GroupTree/FBasicCellPOD.hpp | 6 +- Obsolete/GroupTree/FChebCellPOD.hpp | 8 +- Src/Adaptive/FAdaptiveStarPU.hpp | 4 +- Src/Adaptive/starpu_node_data_handles.hpp | 4 +- Src/Containers/FOctree.hpp | 7 +- Src/Core/FCoreCommon.hpp | 22 +- Src/Files/FAbstractLoader.hpp | 2 + Src/Files/FBlockedMpiInterpolation.hpp | 228 + Src/Files/FFmaGenericLoader.hpp | 28 +- Src/Files/FMpiFmaGenericLoader.hpp | 266 + Src/Files/FTreeMpiCsvSaver.hpp | 4 +- Src/GroupTree/Core/FBlockedLinearTree.hpp | 196 - Src/GroupTree/Core/FBuildGroupTree.hpp | 397 ++ Src/GroupTree/Core/FCheckResults.hpp | 93 + .../Core/FDistributedGroupTreeBuilder.hpp | 1515 ++++-- .../FDistributedLETGroupTreeValidator.hpp | 260 + Src/GroupTree/Core/FGroupLinearTree.hpp | 238 +- Src/GroupTree/Core/FGroupOfCells.hpp | 28 +- Src/GroupTree/Core/FGroupOfParticles.hpp | 16 +- .../FGroupTaskStarpuImplicitAlgorithm.hpp | 4659 +++++++++-------- Src/GroupTree/Core/FGroupTools.hpp | 266 + Src/GroupTree/Core/FGroupTree.hpp | 2721 ++++++---- Src/GroupTree/Core/FOutOfBlockInteraction.hpp | 2 +- .../Core/FP2PGroupParticleContainer.hpp | 6 +- .../FStarPUCptInteractionsWrapper.hpp | 8 +- Src/Utils/FMpi.hpp | 37 +- Src/Utils/FValidationAlgorithm.hpp | 219 + Tests/CMakeLists.txt | 1 + Tests/GroupTree/testBlockedChebyshev.cpp | 3 +- .../testBlockedImplicitChebyshev.cpp | 3 + .../GroupTree/testBlockedImplicitUniform.cpp | 3 +- .../GroupTree/testBlockedMpiInterpolation.cpp | 68 + .../LET_STF_Implicit/testCompareGroupTree.cpp | 431 ++ .../testFMMInterpolationStarPU.hpp | 284 + .../testGroupTreeFromLinearTree.cpp | 218 + Tests/LET_STF_Implicit/testLETGroupTree.cpp | 254 + .../testLetImplicitChebyshev.cpp | 30 + .../testLetImplicitUniform.cpp | 29 + Tests/LET_STF_Implicit/testSizeGroupTree.cpp | 356 ++ .../LET_STF_Implicit/testSizeLETGroupTree.cpp | 267 + UTests/utestLetTree.cpp | 213 + Utils/stdComplex.hpp | 26 + 48 files changed, 9428 insertions(+), 4070 deletions(-) create mode 100644 Src/Files/FBlockedMpiInterpolation.hpp delete mode 100644 Src/GroupTree/Core/FBlockedLinearTree.hpp create mode 100644 Src/GroupTree/Core/FBuildGroupTree.hpp create mode 100644 Src/GroupTree/Core/FCheckResults.hpp create mode 100644 Src/GroupTree/Core/FDistributedLETGroupTreeValidator.hpp create mode 100644 Src/GroupTree/Core/FGroupTools.hpp create mode 100644 Tests/GroupTree/testBlockedMpiInterpolation.cpp create mode 100644 Tests/LET_STF_Implicit/testCompareGroupTree.cpp create mode 100644 Tests/LET_STF_Implicit/testFMMInterpolationStarPU.hpp create mode 100644 Tests/LET_STF_Implicit/testGroupTreeFromLinearTree.cpp create mode 100644 Tests/LET_STF_Implicit/testLETGroupTree.cpp create mode 100644 Tests/LET_STF_Implicit/testLetImplicitChebyshev.cpp create mode 100644 Tests/LET_STF_Implicit/testLetImplicitUniform.cpp create mode 100644 Tests/LET_STF_Implicit/testSizeGroupTree.cpp create mode 100644 Tests/LET_STF_Implicit/testSizeLETGroupTree.cpp create mode 100644 UTests/utestLetTree.cpp create mode 100644 Utils/stdComplex.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 593bb1fef..a397bb14d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -7,11 +7,12 @@ endif() cmake_policy(SET CMP0004 NEW) set(CMAKE_DISABLE_IN_SOURCE_BUILD ON) +set(FUSE_LIST " MPI;BLAS;FFT;STARPU;CUDA;OPENCL;OMP4;SSE;AVX;AVX2;MIC;MPI2") #=========================================================================== # Project Declaration #=========================================================================== -project(SCALFMM C CXX) +project(SCALFMM C CXX ) # check if compiling into source directories string(COMPARE EQUAL "${CMAKE_CURRENT_SOURCE_DIR}" "${CMAKE_CURRENT_BINARY_DIR}" insource) @@ -215,7 +216,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/morse/ ############################################################################## # FUSE list # ############################################################################## - set(FUSE_LIST "") + # set(FUSE_LIST "") # then do list(APPEND FUSE_LIST "BLAS") to protect from FUSE_BLAS list(APPEND FUSE_LIST "MPI") @@ -307,7 +308,6 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/morse/ endif() find_package(MPI REQUIRED) - if (MPI_CXX_INCLUDE_PATH) include_directories( ${MPI_CXX_INCLUDE_PATH} ) endif() @@ -315,7 +315,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/morse/ set(SCALFMM_CXX_FLAGS "${SCALFMM_CXX_FLAGS} ${MPI_CXX_COMPILE_FLAGS}") endif() if (MPI_CXX_INCLUDE_PATH) - set(SCALFMM_INCLUDES "${SCALFMM_INCLUDES}; ${MPI_CXX_INCLUDE_PATH}") + set(SCALFMM_INCLUDES "${SCALFMM_INCLUDES};${MPI_CXX_INCLUDE_PATH}") endif() if (MPI_CXX_LINK_FLAGS) list(APPEND "CMAKE_EXE_LINKER_FLAGS ${MPI_CXX_LINK_FLAGS}") @@ -821,6 +821,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/morse/ # Build - lib # ################################################################## # + MESSAGE(STATUS "FUSE : ${FUSE_LIST} ") add_subdirectory(Src) # Link with scalfmm lib set(scalfmm_lib scalfmm) diff --git a/Contribs/inria/algorithm/distributed/distribute.hpp b/Contribs/inria/algorithm/distributed/distribute.hpp index ed9958f8c..a391dda3e 100644 --- a/Contribs/inria/algorithm/distributed/distribute.hpp +++ b/Contribs/inria/algorithm/distributed/distribute.hpp @@ -13,6 +13,7 @@ #include "inria/utils.hpp" #include "inria/meta.hpp" + #include <numeric> #include <iterator> #include <vector> diff --git a/Contribs/inria/linear_tree/balance_tree.hpp b/Contribs/inria/linear_tree/balance_tree.hpp index b37ebbf8f..b8c7d8bfe 100644 --- a/Contribs/inria/linear_tree/balance_tree.hpp +++ b/Contribs/inria/linear_tree/balance_tree.hpp @@ -189,8 +189,8 @@ create_balanced_linear_tree( * @return [description] */ std::size_t send_get_max_morton_idx( - inria::mpi_config& conf, - std::size_t& max_morton_idx + inria::mpi_config& conf, + std::size_t& max_morton_idx ) { // Setting parametter @@ -291,7 +291,7 @@ std::vector<details::cblt::node_info_from_range<Range>> create_balanced_linear_t last_morton_index = curr_idx_morton; ++nb_leaf; } - } + } return {begin(lin_tree), end(lin_tree)}; } diff --git a/Examples/CMakeLists.txt b/Examples/CMakeLists.txt index 36551cded..aad3d39cb 100644 --- a/Examples/CMakeLists.txt +++ b/Examples/CMakeLists.txt @@ -14,7 +14,6 @@ file( ./*.cpp ) - # Adding the project sources dir as an include dir INCLUDE_DIRECTORIES( ${SCALFMM_BINARY_DIR}/Src diff --git a/Examples/RotationFMM.cpp b/Examples/RotationFMM.cpp index f2ff07731..148545cf4 100644 --- a/Examples/RotationFMM.cpp +++ b/Examples/RotationFMM.cpp @@ -56,7 +56,7 @@ int main(int argc, char* argv[]) FHelpDescribeAndExit(argc, argv, "Driver for HArmonic Spherical + Rotation -- kernel (1/r kernel).", FParameterDefinitions::InputFile, FParameterDefinitions::OctreeHeight, - FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::InputFile, + FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::OutputFile, FParameterDefinitions::NbThreads); const std::string defaultFile(/*SCALFMMDataPath+*/"../Data/test20k.fma"); @@ -188,7 +188,56 @@ int main(int argc, char* argv[]) } // ----------------------------------------------------- - + // ----------------------------------------------------- + if(FParameters::existParameter(argc, argv, FParameterDefinitions::OutputFile.options)){ + std::string name(FParameters::getStr(argc,argv,FParameterDefinitions::OutputFile.options, "output.fma")); + FFmaGenericWriter<FReal> writer(name) ; + // + FSize NbPoints = loader.getNumberOfParticles(); + FReal * particles ; + particles = new FReal[8*NbPoints] ; + memset(particles,0,8*NbPoints*sizeof(FReal)); + FSize j = 0 ; + tree.forEachLeaf([&](LeafClass* leaf){ + // + // Input + const FReal*const posX = leaf->getTargets()->getPositions()[0]; + const FReal*const posY = leaf->getTargets()->getPositions()[1]; + const FReal*const posZ = leaf->getTargets()->getPositions()[2]; + const FReal*const physicalValues = leaf->getTargets()->getPhysicalValues(); + const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); + // + // Computed data + const FReal*const potentials = leaf->getTargets()->getPotentials(); + const FReal*const forcesX = leaf->getTargets()->getForcesX(); + const FReal*const forcesY = leaf->getTargets()->getForcesY(); + const FReal*const forcesZ = leaf->getTargets()->getForcesZ(); + // + const FSize nbParticlesInLeaf = leaf->getTargets()->getNbParticles(); + for(FSize idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart){ + j = 8*indexes[idxPart]; + particles[j] = posX[idxPart] ; + particles[j+1] = posY[idxPart] ; + particles[j+2] = posZ[idxPart] ; + particles[j+3] = physicalValues[idxPart] ; + particles[j+4] = potentials[idxPart] ; + particles[j+5] = forcesX[idxPart] ; + particles[j+6] = forcesY[idxPart] ; + particles[j+7] = forcesZ[idxPart] ; + } + }); + + writer.writeHeader( loader.getCenterOfBox(), loader.getBoxWidth() , NbPoints, sizeof(FReal), 8) ; + writer.writeArrayOfReal(particles, 8 , NbPoints); + + delete[] particles; + + // + std::string name1( "output.fma"); + // + FFmaGenericWriter<FReal> writer1(name1) ; + writer1.writeDistributionOfParticlesFromOctree(&tree,NbPoints) ; + } return 0; } diff --git a/LICENCE b/LICENCE index 1c9d7daf2..dec2b2083 100644 --- a/LICENCE +++ b/LICENCE @@ -1,4 +1,4 @@ -Copyright ScalFmm 2011-2016 INRIA +Copyright ScalFmm 2011-2017 INRIA ================================= This software is a computer program which purpose is to compute the FMM. diff --git a/Obsolete/GroupTree/FBasicCellPOD.hpp b/Obsolete/GroupTree/FBasicCellPOD.hpp index d47258c98..132c43c01 100644 --- a/Obsolete/GroupTree/FBasicCellPOD.hpp +++ b/Obsolete/GroupTree/FBasicCellPOD.hpp @@ -2,9 +2,9 @@ #define FBASICCELLPOD_HPP -#include "../../Utils/FGlobal.hpp" -#include "../../Containers/FTreeCoordinate.hpp" -#include "../StarPUUtils/FStarPUDefaultAlign.hpp" +#include "../../Src/Utils/FGlobal.hpp" +#include "../../Src/Containers/FTreeCoordinate.hpp" +#include "../../Src/GroupTree/StarPUUtils/FStarPUDefaultAlign.hpp" struct alignas(FStarPUDefaultAlign::StructAlign) FBasicCellPOD { MortonIndex mortonIndex; diff --git a/Obsolete/GroupTree/FChebCellPOD.hpp b/Obsolete/GroupTree/FChebCellPOD.hpp index 8d801f5c5..e79b6b8d5 100644 --- a/Obsolete/GroupTree/FChebCellPOD.hpp +++ b/Obsolete/GroupTree/FChebCellPOD.hpp @@ -1,10 +1,10 @@ #ifndef FCHEBCELLPOD_HPP #define FCHEBCELLPOD_HPP -#include "../../Utils/FGlobal.hpp" -#include "../Core/FBasicCellPOD.hpp" -#include "../StarPUUtils/FStarPUDefaultAlign.hpp" -#include "../../Kernels/Chebyshev/FChebTensor.hpp" +#include "../../Src/Utils/FGlobal.hpp" +#include "FBasicCellPOD.hpp" +#include "../Src/GroupTree/StarPUUtils/FStarPUDefaultAlign.hpp" +#include "../../Src/Kernels/Chebyshev/FChebTensor.hpp" typedef FBasicCellPOD FChebCellPODCore; diff --git a/Src/Adaptive/FAdaptiveStarPU.hpp b/Src/Adaptive/FAdaptiveStarPU.hpp index f22ba0f36..4b8d57c70 100644 --- a/Src/Adaptive/FAdaptiveStarPU.hpp +++ b/Src/Adaptive/FAdaptiveStarPU.hpp @@ -1,6 +1,8 @@ #ifndef SCALFMM_STARPU_ALGO_HPP_ #define SCALFMM_STARPU_ALGO_HPP_ +//@FUSE_STARPU + #include <algorithm> #include <cmath> // Used to round box differences #include <functional> @@ -9,7 +11,7 @@ #include <vector> #include <unordered_map> -#include <starpu/1.2/starpu.h> +#include <starpu.h> #include "Core/FCoreCommon.hpp" #include "Containers/FTreeCoordinate.hpp" diff --git a/Src/Adaptive/starpu_node_data_handles.hpp b/Src/Adaptive/starpu_node_data_handles.hpp index 6e17f68cc..d18959e07 100644 --- a/Src/Adaptive/starpu_node_data_handles.hpp +++ b/Src/Adaptive/starpu_node_data_handles.hpp @@ -8,7 +8,9 @@ #ifndef _SCALFMM_STARPU_NODE_DATA_HANDLES_HPP_ #define _SCALFMM_STARPU_NODE_DATA_HANDLES_HPP_ -#include <starpu/1.2/starpu.h> +// @FUSE_STARPU + +#include <starpu.h> #include <utility> diff --git a/Src/Containers/FOctree.hpp b/Src/Containers/FOctree.hpp index 28f69cea8..57a1dc87b 100644 --- a/Src/Containers/FOctree.hpp +++ b/Src/Containers/FOctree.hpp @@ -43,9 +43,10 @@ class FOctree : public FNoCopyable { public: using FRealType = FReal; using CellClassType = CellClass; - using ContainerClassType = ContainerClass; - using LeafClassType = LeafClass_; //< The type of the Leaf used in the Octree - using LeafClass = LeafClass_; //< The type of the Leaf used in the Octree + using ContainerClassType = ContainerClass; //< The type of the container used to store particles in the Octree + using LeafClassType = LeafClass_; //< The type of the Leaf used in the Octree + using LeafClass = LeafClass_; //< The type of the Leaf used in the Octree + using LeafClass_T = LeafClass_; //< The type of the Leaf used in the Octree protected: typedef FOctree<FReal, CellClass , ContainerClass, LeafClass, CellAllocatorClass> OctreeType; diff --git a/Src/Core/FCoreCommon.hpp b/Src/Core/FCoreCommon.hpp index d89b0b1af..50b4a51a4 100644 --- a/Src/Core/FCoreCommon.hpp +++ b/Src/Core/FCoreCommon.hpp @@ -29,6 +29,27 @@ enum FFmmOperations { // FFmmNearAndFarFields = (FFmmNearField|FFmmFarField) }; +std::string FFmmOperations_string(/*enum FFmmOperations*/ const unsigned int & value){ + + //if (value & FFmmNearAndFarFields) return std::string("FFmmNearAndFarFields") ; + //if (value & FFmmFarField) return std::string("FFmmFarField") ; + //if (value & FFmmNearField) return std::string("FFmmNearField" ); + std::string op(""); + if (value & FFmmP2P) + op += " FFmmP2P |"; + if (value & FFmmP2M) + op += " FFmmP2M |"; + if (value & FFmmM2M) + op += " FFmmM2M |"; + if (value & FFmmM2L) + op += " FFmmM2L |"; + if (value & FFmmL2L) + op += " FFmmL2L |"; + if (value & FFmmL2P) + op += " FFmmL2P |"; + op.erase(op.size()-2,op.size()-1); + return op; +}; /** * \brief Algorithm interface @@ -98,7 +119,6 @@ protected: FAssertLF(FAbstractAlgorithm::upperWorkingLevel <= FAbstractAlgorithm::lowerWorkingLevel); FAssertLF(2 <= FAbstractAlgorithm::upperWorkingLevel); } - virtual void executeCore(const unsigned operationsToProceed) = 0; public: diff --git a/Src/Files/FAbstractLoader.hpp b/Src/Files/FAbstractLoader.hpp index 410cb52b2..d25be50ec 100644 --- a/Src/Files/FAbstractLoader.hpp +++ b/Src/Files/FAbstractLoader.hpp @@ -24,6 +24,8 @@ template <class FReal> class FAbstractLoader { public: + using dataType= FReal ; + /** Default destructor */ virtual ~FAbstractLoader(){ } diff --git a/Src/Files/FBlockedMpiInterpolation.hpp b/Src/Files/FBlockedMpiInterpolation.hpp new file mode 100644 index 000000000..41731c094 --- /dev/null +++ b/Src/Files/FBlockedMpiInterpolation.hpp @@ -0,0 +1,228 @@ +#ifndef _F_BLOCKED_MPI_INTERPOLATION_HPP_ +#define _F_BLOCKED_MPI_INTERPOLATION_HPP_ + + +#include "../../Src/Utils/FGlobal.hpp" + +#include "../../Src/GroupTree/Core/FGroupTree.hpp" + +#include "../../Src/Components/FSimpleLeaf.hpp" +#include "../../Src/Components/FSymbolicData.hpp" +#include "../../Src/Containers/FVector.hpp" + +#include "../../Src/Kernels/P2P/FP2PParticleContainer.hpp" + +#include "../../Src/Utils/FMath.hpp" +#include "../../Src/Utils/FMemUtils.hpp" +#include "../../Src/Utils/FParameters.hpp" + +#include "../../Src/Files/FRandomLoader.hpp" +#include "../../Src/Files/FFmaGenericLoader.hpp" + +#include "../../Src/GroupTree/Core/FGroupSeqAlgorithm.hpp" +#include "../../Src/GroupTree/Core/FGroupTaskAlgorithm.hpp" +#include "../../Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp" +#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp" + +#include "../../Src/Utils/FParameterNames.hpp" + +#include "../../Src/Components/FTestParticleContainer.hpp" +#include "../../Src/Components/FTestCell.hpp" +#include "../../Src/Components/FTestKernels.hpp" + +#include "../../Src/Core/FFmmAlgorithmThreadProc.hpp" +#include "../../Src/Files/FMpiTreeBuilder.hpp" +#include "../../Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp" +#include "../../Src/Core/FFmmAlgorithm.hpp" //For validation + +#include "../../Src/Files/FMpiFmaGenericLoader.hpp" +#include "../../Src/Containers/FCoordinateComputer.hpp" + +#include "../../Src/GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp" + +#include <memory> +using namespace std; + +namespace blockedMpiInterpolation{ + +//Function header +void timeAverage(int mpi_rank, int nproc, double elapsedTime); +FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total); + +template< + class GroupCellClass, + class GroupCellUpClass, + class GroupCellDownClass, + class GroupCellSymbClass, + class KernelClass, + class MatrixKernelClass + > +auto execute_algorithm(int argc, char* argv[]){ + //Define parameters + const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree"}; + const FParameterNames LocalOptionEllipsoid = {{"-ellipsoid"} , " non uniform distribution on an ellipsoid of aspect ratio given by a=0.5 b=0.25 c=0.125"}; + const FParameterNames LocalOptionCube = {{"-cube", "-uniform"} , " uniform distribution on cube (default)"}; + // Define types + using FReal = double; + using GroupContainerClass = + FP2PGroupParticleContainer<FReal>; + using GroupOctreeClass = + FGroupTree< FReal, GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass, GroupContainerClass, 1, 4, FReal>; + using GroupKernelClass = + FStarPUAllCpuCapacities<KernelClass>; + using GroupCpuWrapper = + FStarPUCpuWrapper<typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass> ; + using GroupAlgorithm = + FGroupTaskStarPUMpiAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupCpuWrapper> ; + // Init MPI_COM + FMpi mpiComm(argc,argv); + + // Init timer + FTic timer; + + // Getting parameters + const int groupSize = + FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250); + const unsigned int TreeHeight = + FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5); + + const FSize totalNbParticles = + FParameters::getValue(argc,argv,FParameterDefinitions::NbParticles.options, FSize(20)); + + const FSize NbParticles = + getNbParticlesPerNode(mpiComm.global().processCount(), mpiComm.global().processId(), totalNbParticles); + + // init particles position and physical value + struct TestParticle{ + FPoint<FReal> position; + FReal physicalValue; + const FPoint<FReal>& getPosition(){ + return position; + } + const unsigned int getWriteDataSize(void) const { + return sizeof(FReal); + } + const unsigned int getWriteDataNumber(void) const { + return 3; + } + const FReal* getPtrFirstData(void) const { + return position.data(); + } + }; + + // LOADING PARTICLE + #ifndef LOAD_FILE + FRandomLoader<FReal> loader(NbParticles, 1.0, FPoint<FReal>(0,0,0), mpiComm.global().processId()); + FAssertLF(loader.isOpen()); + TestParticle* allParticles = new TestParticle[loader.getNumberOfParticles()]; + memset(allParticles,0,(unsigned int) (sizeof(TestParticle)* loader.getNumberOfParticles())); + for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + loader.fillParticle(&allParticles[idxPart].position); + allParticles[idxPart].physicalValue = 0.1; + } + #else + const char* const filename = FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma"); + FMpiFmaGenericLoader<FReal> loader(filename,mpiComm.global()); + FAssertLF(loader.isOpen()); + TestParticle* allParticles = new TestParticle[loader.getMyNumberOfParticles()]; + memset(allParticles,0,(unsigned int) (sizeof(TestParticle)* loader.getMyNumberOfParticles())); + for(FSize idxPart = 0 ; idxPart < loader.getMyNumberOfParticles() ; ++idxPart){ + loader.fillParticle(&allParticles[idxPart].position,&allParticles[idxPart].physicalValue); + } + #endif + + FVector<TestParticle> myParticles; + FLeafBalance balancer; + FMpiTreeBuilder< FReal, + TestParticle >::DistributeArrayToContainer( + mpiComm.global(), + allParticles, + loader.getNumberOfParticles(), + loader.getCenterOfBox(), + loader.getBoxWidth(), + TreeHeight, + &myParticles, + &balancer); + + // Each proc need to know the righest morton index + const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPosition<FReal>( + loader.getCenterOfBox(), + loader.getBoxWidth(), + TreeHeight, + myParticles[myParticles.getSize()-1].position ); + const MortonIndex myLeftLimite = host.getMortonIndex(); + MortonIndex leftLimite = -1; + if(mpiComm.global().processId() != 0){ + FMpi::Assert(MPI_Recv(&leftLimite, sizeof(leftLimite), MPI_BYTE, + mpiComm.global().processId()-1, 0, + mpiComm.global().getComm(), MPI_STATUS_IGNORE), __LINE__); + } + if(mpiComm.global().processId() != mpiComm.global().processCount()-1){ + FMpi::Assert(MPI_Send(const_cast<MortonIndex*>(&myLeftLimite), sizeof(myLeftLimite), MPI_BYTE, + mpiComm.global().processId()+1, 0, + mpiComm.global().getComm()), __LINE__); + } + FLOG(std::cout << "My last index is " << leftLimite << "\n"); + FLOG(std::cout << "My left limite is " << myLeftLimite << "\n"); + + // Put the data into the tree + FP2PParticleContainer<FReal> myParticlesInContainer; + for(FSize idxPart = 0 ; idxPart < myParticles.getSize() ; ++idxPart){ + myParticlesInContainer.push(myParticles[idxPart].position, + myParticles[idxPart].physicalValue); + } + GroupOctreeClass groupedTree(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, + &myParticlesInContainer, true, leftLimite); + timer.tac(); + std::cerr << "Done " << "(@Creating and Inserting Particles = " << timer.elapsed() << "s)." << std::endl; + + int operationsToProceed = FFmmP2P | FFmmP2M | FFmmM2M | FFmmM2L | FFmmL2L | FFmmL2P; + { // ----------------------------------------------------- + + + const MatrixKernelClass MatrixKernel; + // Create Matrix Kernel + GroupKernelClass groupkernel(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), &MatrixKernel); + // Run the algorithm + GroupAlgorithm groupalgo(mpiComm.global(), &groupedTree,&groupkernel); + mpiComm.global().barrier(); + timer.tic(); + starpu_fxt_start_profiling(); + groupalgo.execute(operationsToProceed); + mpiComm.global().barrier(); + starpu_fxt_stop_profiling(); + timer.tac(); + timeAverage(mpiComm.global().processId(), mpiComm.global().processCount(), timer.elapsed()); + } + return &groupedTree; +} + +void timeAverage(int mpi_rank, int nproc, double elapsedTime){ + if(mpi_rank == 0){ + double sumElapsedTimeMin = elapsedTime; + double sumElapsedTimeMax = elapsedTime; + for(int i = 1; i < nproc; ++i){ + double tmp; + MPI_Recv(&tmp, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + if(tmp < sumElapsedTimeMin) + sumElapsedTimeMin = tmp; + if(tmp > sumElapsedTimeMax) + sumElapsedTimeMax = tmp; + } + std::cout << "Min time per node (MPI) : " << sumElapsedTimeMin << "s" << std::endl; + std::cout << "Max time per node (MPI) : " << sumElapsedTimeMax << "s" << std::endl; + } else { + MPI_Send(&elapsedTime, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); + } + MPI_Barrier(MPI_COMM_WORLD); +} + +FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total){ + if(mpi_rank < (total%mpi_count)) + return ((total - (total%mpi_count))/mpi_count)+1; + return ((total - (total%mpi_count))/mpi_count); +} + +} + +#endif diff --git a/Src/Files/FFmaGenericLoader.hpp b/Src/Files/FFmaGenericLoader.hpp index f2e6b0f36..909937e88 100644 --- a/Src/Files/FFmaGenericLoader.hpp +++ b/Src/Files/FFmaGenericLoader.hpp @@ -199,7 +199,6 @@ private: FReal * tmpVal; ///< Temporary array to read data /// Count of other data pieces to read in a particle record after the 4 first ones. unsigned int otherDataToRead; - void open_file(const std::string filename, const bool binary) { if(binary) { this->file = new std::fstream (filename.c_str(),std::ifstream::in| std::ios::binary); @@ -216,6 +215,7 @@ private: } public: + using dataType= FReal ; // Just to what kind of data we handle /** * This constructor opens a file using the given mode and reads its * header. The file will be kept opened until destruction of the object. @@ -543,8 +543,8 @@ template <class FReal> class FFmaGenericWriter { protected: - std::fstream *file; ///< the stream used to read the file - bool binaryFile ; ///< if true the file to read is in binary mode + std::fstream *file; ///< the stream used to write the file + bool _binaryFile ; ///< if true the file is in binary mode public: /** @@ -555,11 +555,11 @@ public: * * @param filename the name of the file to open. */ - FFmaGenericWriter(const std::string & filename): binaryFile(false) { + FFmaGenericWriter(const std::string & filename): _binaryFile(false) { std::string ext(".bfma"); // open particle file if(filename.find(".bfma") !=std::string::npos) { - binaryFile = true; + _binaryFile = true; this->file = new std::fstream (filename.c_str(),std::ifstream::out| std::ios::binary); } else if(filename.find(".fma")!=std::string::npos ) { @@ -583,7 +583,7 @@ public: * @param filename the name of the file to open. * @param binary true if the file to open is in binary mode */ - FFmaGenericWriter(const std::string & filename, const bool binary ) : file(nullptr), binaryFile(binary) + FFmaGenericWriter(const std::string & filename, const bool binary ) : file(nullptr), _binaryFile(binary) { if(binary) { this->file = new std::fstream (filename.c_str(),std::ifstream::out| std::ios::binary); @@ -616,6 +616,14 @@ public: } /** + * To know if opened file is in binary mode + * @return true ifopened file is in binary mode + */ + bool isBinary() const{ + return this->_binaryFile; + } + + /** * Writes the header of the FMA file * \warning All values inside typePart should be of the same type (float or double) * @@ -633,7 +641,7 @@ public: typeFReal[1] = ndata; } FReal x = boxWidth * FReal(0.5); - if(this->binaryFile) { + if(this->_binaryFile) { this->writerBinaryHeader(centerOfBox,x,nbParticles,typeFReal); } else { @@ -656,7 +664,7 @@ public: const unsigned int dataType, const unsigned int nbDataPerRecord) { unsigned int typeFReal[2] = {dataType , nbDataPerRecord }; FReal x = boxWidth * FReal(0.5); - if(this->binaryFile) { + if(this->_binaryFile) { this->writerBinaryHeader(centerOfBox,x,nbParticles,typeFReal); } else { @@ -698,7 +706,7 @@ public: template <class dataPart> void writeArrayOfParticles(const dataPart *dataToWrite, const FSize N){ // std::cout << "NB points to write: "<< N <<std::endl; - if(binaryFile){ + if(_binaryFile){ unsigned int recordSize= dataToWrite[0].getWriteDataSize() ; unsigned int typeFReal[2] = {sizeof(FReal) , sizeof(dataPart) / sizeof(FReal) }; // std::cout << "typeData "<< typeFReal[0] << " "<< typeFReal[1] <<" "<< std::endl; @@ -755,7 +763,7 @@ public: * \endcode */ void writeArrayOfReal(const FReal *dataToWrite, const FSize nbData, const FSize N){ - if(binaryFile){ + if(_binaryFile){ file->write((const char*)(dataToWrite), N*nbData*sizeof(FReal)); } else{ diff --git a/Src/Files/FMpiFmaGenericLoader.hpp b/Src/Files/FMpiFmaGenericLoader.hpp index 1a1053728..85c18e134 100644 --- a/Src/Files/FMpiFmaGenericLoader.hpp +++ b/Src/Files/FMpiFmaGenericLoader.hpp @@ -8,6 +8,8 @@ #ifndef FMPIFMAGENERICLOADER_HPP #define FMPIFMAGENERICLOADER_HPP +#include <cstdlib> +#include <vector> #include "Utils/FMpi.hpp" #include "Files/FFmaGenericLoader.hpp" @@ -69,5 +71,269 @@ public: } }; +/** + * + * \brief Writes a set of distributed particles to an FMA formated file. + * + * The file may be in ASCII or binary mode. The example below shows how to use the class. + * + * \code + * // Instanciate the writer with a binary fma file (extension .bfma). + * \endcode + * ---------------------------------------- + * FMA is a simple format to store particles in a file. It is organized as follow. + * + * file + */ +template <class FReal> +class FMpiFmaGenericWriter : public FFmaGenericWriter<FReal> { + +protected: + const FMpi* _parallelManager ; + bool _writeDone ; + int _headerSize; + int _nbDataTowritePerRecord ; //< number of data to write for one particle + FSize _numberOfParticles ; //< number of particle (global) to write in the file + using FFmaGenericWriter<FReal>::file; + MPI_File _mpiFile; //< MPI pointer on data file (write mode) + +public: + /** + * This constructor opens a file to be written to. + * + * - The opening mode is guessed from the file extension : `.fma` will open + * in ASCII mode, `.bfma` will open in binary mode. + * + * @param filename the name of the file to open. + */ + FMpiFmaGenericWriter(const std::string inFilename, const FMpi& para) : FFmaGenericWriter<FReal>(inFilename), + _parallelManager(¶),_writeDone(false),_headerSize(0),_nbDataTowritePerRecord(8),_numberOfParticles(0) + { + if ( ! this->isBinary()){ + std::cout << "FMpiFmaGenericWriter only works with binary file (.bfma)." << std::endl; + std::exit(EXIT_FAILURE); + } + int fileIsOpen = MPI_File_open( _parallelManager->global().getComm(), inFilename.c_str(), + MPI_MODE_WRONLY | MPI_MODE_CREATE, MPI_INFO_NULL, &_mpiFile ); + // Is it open? + if(fileIsOpen != MPI_SUCCESS){ + std::cout << "Cannot create parallel file, FMpiFmaGenericWriter constructeur abort." << std::endl; + std::exit(EXIT_FAILURE); + return; + } + } + /** + * Writes the header of FMA file. + * + * Should be used if we write the particles with writeArrayOfReal method + * + * @param centerOfBox The center of the Box (FPoint<FReal> class) + * @param boxWidth The width of the box + * @param nbParticles Number of particles in the box (or to save) + * @param dataType Size of the data type of the values in particle + * @param nbDataPerRecord Number of record/value per particle + */ + void writeHeader(const FPoint<FReal> ¢erOfBox,const FReal &boxWidth, const FSize &nbParticles, + const unsigned int dataType, const unsigned int nbDataPerRecord) { +// * \code +// * DatatypeSize Number_of_record_per_line +// * NB_particles half_Box_width Center_X Center_Y Center_Z +// * Particle_values +// * \endcode + _headerSize = 0 ; + _nbDataTowritePerRecord = nbDataPerRecord ; + _numberOfParticles = nbParticles ; + if(_parallelManager->global().processId()==0){ + int sizeType=0 ; + int ierr = 0 ; + MPI_Datatype mpiFSize_t = _parallelManager->GetType(nbParticles) ; + MPI_Datatype mpiFReal_t = _parallelManager->GetType(boxWidth) ; + // + unsigned int typeFReal[2] = {sizeof(FReal) , nbDataPerRecord}; + + ierr =MPI_File_write_at(_mpiFile, 0, &typeFReal, 2, MPI_INT, MPI_STATUS_IGNORE); + MPI_Type_size(MPI_INT, &sizeType) ; + _headerSize += sizeType*2 ; + ierr =MPI_File_write_at(_mpiFile, _headerSize, &nbParticles, 1, mpiFSize_t, MPI_STATUS_IGNORE); + MPI_Type_size(mpiFSize_t, &sizeType) ; + _headerSize += sizeType*1 ; + + FReal boxSim[4] = {boxWidth ,centerOfBox.getX() , centerOfBox.getX() , centerOfBox.getX() } ; + + ierr =MPI_File_write_at(_mpiFile, _headerSize, &boxSim[0], 4, mpiFReal_t, MPI_STATUS_IGNORE); + MPI_Type_size(mpiFReal_t, &sizeType) ; + _headerSize += sizeType*4 ; + // Build the header offset + std::cout << " headerSize "<< _headerSize << std::endl; + } + MPI_Bcast(&_headerSize,1,MPI_INT,0,_parallelManager->global().getComm()); + std::cout << " _headerSize " << _headerSize <<std::endl; + + } + ~FMpiFmaGenericWriter(){ + MPI_File_close(&_mpiFile ); + } + + /** + * Write all for all particles the position, physical values, potential and forces + * + * @param myOctree the octree + * @param nbParticlesnumber of particles + * @param mortonLeafDistribution the morton distribution of the leaves (this is a vecor of size 2* the number of MPI processes + * + */ + template <class OCTREECLASS> + void writeDistributionOfParticlesFromOctree( OCTREECLASS &myOctree, const FSize& nbParticles, const std::vector<MortonIndex> &mortonLeafDistribution){ + // + // Write the header + int sizeType = 0,ierr = 0 ; + FReal tt =0.0 ; + MPI_Datatype mpiFSize_t = _parallelManager->GetType(nbParticles) ; + MPI_Datatype mpiFReal_t = _parallelManager->GetType(tt) ; + MPI_Type_size(mpiFReal_t, &sizeType) ; + int myRank = _parallelManager->global().processId() ; + _headerSize = 0 ; + // + unsigned int typeFReal[2] = {sizeof(FReal) , static_cast<unsigned int>(_nbDataTowritePerRecord)}; + if(myRank==0){ + ierr =MPI_File_write_at(_mpiFile, 0, &typeFReal, 2, MPI_INT, MPI_STATUS_IGNORE); + } + MPI_Type_size(MPI_INT, &sizeType) ; + _headerSize += sizeType*2 ; + if(myRank==0){ + ierr =MPI_File_write_at(_mpiFile, _headerSize, &nbParticles, 1, mpiFSize_t, MPI_STATUS_IGNORE); + } + MPI_Type_size(mpiFSize_t, &sizeType) ; + _headerSize += sizeType*1 ; + auto centerOfBox =myOctree.getBoxCenter() ; + FReal boxSim[4] = {myOctree.getBoxWidth()*0.5 , centerOfBox.getX() , centerOfBox.getX() , centerOfBox.getX() } ; + + if(myRank==0){ + ierr =MPI_File_write_at(_mpiFile, _headerSize, &boxSim[0], 4, mpiFReal_t, MPI_STATUS_IGNORE); + } + if(ierr >0){ + std::cerr << "Error during the construction of the header in FMpiFmaGenericWriter::writeDistributionOfParticlesFromOctree"<<std::endl; + } + MPI_Type_size(mpiFReal_t, &sizeType) ; + _headerSize += sizeType*4 ; + // + // Construct the local number of particles on my process + FSize nbLocalParticles =0 ,maxPartLeaf =0; + MortonIndex starIndex = mortonLeafDistribution[2*myRank], endIndex = mortonLeafDistribution[2*myRank+1]; + myOctree.template forEachCellMyLeaf<typename OCTREECLASS::LeafClass_T >( + [&](typename OCTREECLASS::GroupSymbolCellClass_T* gsymb , + typename OCTREECLASS::GroupCellUpClass_T* /* gmul */, + typename OCTREECLASS::GroupCellDownClass_T* /* gloc */, + typename OCTREECLASS::LeafClass_T * leafTarget + ) + { + if (! (gsymb->getMortonIndex() < starIndex || gsymb->getMortonIndex() > endIndex)) { + auto n = leafTarget->getNbParticles(); + nbLocalParticles += n; + maxPartLeaf = std::max(maxPartLeaf,n); + } + } + ); + std::vector<FReal> particles(maxPartLeaf*_nbDataTowritePerRecord); + // Build the offset for eaxh processes + FSize before=0; // Number of particles before me (rank < myrank) + MPI_Scan(&nbLocalParticles,&before,1,mpiFSize_t,MPI_SUM,_parallelManager->global().getComm()); + before -= nbLocalParticles ; + MPI_Offset offset = _headerSize + sizeType*_nbDataTowritePerRecord*before; + // + // Write particles in file + myOctree.template forEachCellMyLeaf<typename OCTREECLASS::LeafClass_T >( + [&](typename OCTREECLASS::GroupSymbolCellClass_T* gsymb , + typename OCTREECLASS::GroupCellUpClass_T* /* gmul */, + typename OCTREECLASS::GroupCellDownClass_T* /* gloc */, + typename OCTREECLASS::LeafClass_T * leafTarget + ) + { + if (! (gsymb->getMortonIndex() < starIndex || gsymb->getMortonIndex() > endIndex)) { + const FSize nbPartsInLeaf = leafTarget->getNbParticles(); + const FReal*const posX = leafTarget->getPositions()[0]; + const FReal*const posY = leafTarget->getPositions()[1]; + const FReal*const posZ = leafTarget->getPositions()[2]; + const FReal*const physicalValues = leafTarget->getPhysicalValues(); + const FReal*const forceX = leafTarget->getForcesX(); + const FReal*const forceY = leafTarget->getForcesY(); + const FReal*const forceZ = leafTarget->getForcesZ(); + const FReal*const potential = leafTarget->getPotentials(); + for (int i=0, k=0 ; i < nbPartsInLeaf ;++i,k+=_nbDataTowritePerRecord ) { + particles[k] = posX[i]; particles[k+1] = posY[i]; particles[k+2] = posZ[i]; + particles[k+3] = physicalValues[i]; particles[k+4] = potential[i]; + particles[k+5] = forceX[i]; particles[k+6] = forceY[i]; particles[k+7] = forceZ[i]; + } + MPI_File_write_at(_mpiFile, offset, particles.data(), static_cast<int>(_nbDataTowritePerRecord*nbPartsInLeaf), mpiFReal_t, MPI_STATUS_IGNORE); + offset+=sizeType*_nbDataTowritePerRecord*nbPartsInLeaf; + } + } + ); + + MPI_File_close(&_mpiFile ); + + } + +// /** +// * Write an array of data in a file Fill +// * +// * @param dataToWrite array of particles of type FReal +// * @param nbData number of data per particle +// * @param N number of particles +// * +// * The size of the array is N*nbData +// * +// * example +// * \code +// * FmaRParticle * const particles = new FmaRParticle[nbParticles]; +// * memset(particles, 0, sizeof(FmaRParticle) * nbParticles) ; +// * ... +// * FFmaGenericWriter<FReal> writer(filenameOut) ; +// * Fwriter.writeHeader(Centre,BoxWith, nbParticles,*particles) ; +// * Fwriter.writeArrayOfReal(particles, nbParticles); +// * \endcode +// */ +// void writeArrayOfReal(const FReal *dataToWrite, const FSize nbData, const FSize N){ +// /* +// if(! _writeDone){ +// FSize previousNumberofParticles; +// MPI_Scan(&N,&previousNumberofParticles,1,_parallelManager->GetType(N),MPI_SUM,_parallelManager->global().getComm()); +// FSize offset= previousNumberofParticles-N; +// //To this header size, we had the parts that belongs to proc on my left +// this->skipHeaderAndPart(offset) ; +// FFmaGenericWriter<FReal>::writeArrayOfReal(dataToWrite,4,N) ; +// // + +// std::cout <<" node " << _parallelManager->global().processId() << "Npart " << N << " before Me" << previousNumberofParticles-N<< std::endl; +// _writeDone = true; +// } +// else { +// std::cerr << " The writeArrayOfReal should be call only once !!!! "<< std::endl; +// std::exit(EXIT_FAILURE); +// } +// */ +// } +//private: +// void skipHeaderAndPart(const FSize &numberOfParticleToSkip){ +// if(this->binaryFile) { +// //This is header size in bytes +// // MEANING : sizeof(FReal)+nbAttr, nb of parts, boxWidth+boxCenter +// _headerSize = sizeof(int)*2 + sizeof(FSize) + sizeof(FReal)*4; +// file->seekg(_headerSize+numberOfParticleToSkip* 4/*FFmaGenericWriter<FReal>::getNbRecordPerline()*/*sizeof(FReal), std::ios::beg); + +// } else { +// // First finish to read the current line +// file->ignore(std::numeric_limits<std::streamsize>::max(), '\n'); +// for(int i = 0; i < numberOfParticleToSkip; ++i) { +// file->ignore(std::numeric_limits<std::streamsize>::max(), '\n'); +// } +// } +// } + + + +} ; +#ifdef __EXPRIMENTAL_DOESNT_WORK +#endif #endif //FMPIFMAGENERICLOADER_HPP diff --git a/Src/Files/FTreeMpiCsvSaver.hpp b/Src/Files/FTreeMpiCsvSaver.hpp index 903c917b2..80a77880a 100644 --- a/Src/Files/FTreeMpiCsvSaver.hpp +++ b/Src/Files/FTreeMpiCsvSaver.hpp @@ -3,8 +3,8 @@ #define FTREEMPICSVSAVER_HPP -#include "../Utils/FGlobal.hpp" -#include "../Utils/FMpi.hpp" +#include "Utils/FGlobal.hpp" +#include "Utils/FMpi.hpp" #include <cstring> #include <iostream> diff --git a/Src/GroupTree/Core/FBlockedLinearTree.hpp b/Src/GroupTree/Core/FBlockedLinearTree.hpp deleted file mode 100644 index dd2229d28..000000000 --- a/Src/GroupTree/Core/FBlockedLinearTree.hpp +++ /dev/null @@ -1,196 +0,0 @@ -#ifndef _FBLOCKED_LINEAR_TREE_HPP_ -#define _FBLOCKED_LINEAR_TREE_HPP_ - -#include <vector> -#include "../../Utils/FLog.hpp" -#include "FDistributedGroupTreeBuilder.hpp" - -using FReal = double; - -template<class node_t> -class FBlockedLinearTree { - -protected: - - int block_size; - int nb_block; - - std::vector<node_t>* linear_tree; - -public: - -//////////////////////////////////////////////// -// constructor -//////////////////////////////////////////////// - - /** - * FBlockedLinearTree Constructor of blocked linear tree - * @author benjamin.dufoyer@inria.fr - * @param in_block_size Block size needed - * @param in_linear_tree Linear tree - * @param in_box_center Box Center of particle container - * @param in_box_width Box Width of particle container - */ - FBlockedLinearTree(){} - - /** - * This function create a blocked linear tree from the current distributed - * linear tree - * This function stock the linear tree with his adress - * @author benjamin.dufoyer@inria.fr - * @param in_linear_tree linear tree - * @param in_block_size block size - */ - void create_local_blocked_linear_tree( - std::vector<node_t>* in_linear_tree, - int in_block_size - ){ - this->create(in_linear_tree,in_block_size); - } - - /** - * this function create a blocked linear tree from the current distributed - * linear tree and she redistribute block according to the block size - * the function stock the linear tree with his adress - * @author benjamin.dufoyer@inria.fr - * @param in_linear_tree linear tree - * @param in_block_size blocksize needed - * @param conf [description] - */ - void create_global_blocked_linear_tree( - std::vector<node_t>* in_linear_tree, - int in_block_size, - const inria::mpi_config& conf - ){ - this->create(in_linear_tree,in_block_size); - this->redistribute_block(conf); - } - - void create( - std::vector<node_t>* in_linear_tree, - int in_block_size - ){ - this->block_size = in_block_size; - this->linear_tree = in_linear_tree; - this->nb_block = (int)in_linear_tree->size()/in_block_size; - if(this->linear_tree->size()%this->block_size != 0) - this->nb_block += 1; - } - -//////////////////////////////////////////////// -// destructor -//////////////////////////////////////////////// - - ~FBlockedLinearTree(){ - linear_tree = nullptr; - } - -//////////////////////////////////////////////// -// Function -//////////////////////////////////////////////// - - /** - * redistribute_block redistribute leaf of the linear_tree with the good - * block size. For N proc, N-1 proc have the same number of leaf, - * the rest is for the proc N - * @author benjamin.dufoyer@inria.fr - * @param conf mpi configuration to work with the other process - */ - void redistribute_block(const inria::mpi_config& conf){ - - dstr_grp_tree_builder::parrallel_build_block( - conf, - this->linear_tree, - this->block_size); - //Update nb_block - if(this->linear_tree->size()%block_size == 0) - this->nb_block = (int)this->linear_tree->size()/block_size; - else - this->nb_block = (int)this->linear_tree->size()/block_size+1; - - } - - size_t get_nb_leaf() const{ - return this->linear_tree->size(); - } - - int get_nb_block() const{ - return this->nb_block; - } - - int get_block_size() const{ - return this->block_size; - } - - /** - * get_block_size_at return the block size of the number of the block - * placed in parametter, - * [INFO] first block is 0 - * [INFO] last block is this->nb_block-1 - * @author benjamin.dufoyer@inria.fr - * @param num_block number of the block - * @return size of the block - */ - int get_block_size_at(int num_block) const{ - FAssertLF(num_block < this->nb_block); - int size; - if(num_block == this->nb_block-1){ - size = this->linear_tree->size() - ((this->nb_block-1)*this->block_size); - } else { - size = this->block_size; - } - return size; - } - - /** - * get_leaf_at return the leaf at the position placed in parameter - * @author benjamin.dufoyer@inria.fr - * @param position position of the leaf - * @return the leaf - */ - node_t get_leaf_at(int position){ - return this->linear_tree->at(position); - } - - /** - * get_leaf_at return the leaf at the position placed in parameter - * @author benjamin.dufoyer@inria.fr - * @param position position of the leaf - * @return the leaf - */ - node_t at(int position){ - return this->get_leaf_at(position); - } - - size_t get_leaf_level() const{ - return this->linear_tree->back().level; - } - - size_t get_tree_height() const{ - return this->get_leaf_level(); - } - - size_t get_first_morton_index(){ - return this->linear_tree->front().morton_index; - } - - size_t get_last_morton_index(){ - return this->linear_tree->back().morton_index; - } - - void print_info_tree(){ - std::cout << " nb_leaf : " << this->linear_tree->size() << std::endl; - std::cout << " nb_block : " << nb_block << std::endl; - std::cout << " block_size : " << block_size << std::endl; - for(int i = 0 ; i < this->linear_tree->size() ; i++){ - std::cout << linear_tree->at(i) << std::endl; - } - } - - std::vector<node_t>* get_tree(){ - return this->linear_tree; - } - -}; - -#endif //_FBLOCKED_LINEAR_TREE_HPP_ diff --git a/Src/GroupTree/Core/FBuildGroupTree.hpp b/Src/GroupTree/Core/FBuildGroupTree.hpp new file mode 100644 index 000000000..626b97b66 --- /dev/null +++ b/Src/GroupTree/Core/FBuildGroupTree.hpp @@ -0,0 +1,397 @@ +// ==== CMAKE ===== +// @FUSE_MPI +// @FUSE_STARPU +// +#ifndef FBuildGroupTree +#define FBuildGroupTree + +#include <vector> +#include <string> +#include "Utils/FGlobal.hpp" + +// include algo for linear tree +#include "inria/algorithm/distributed/mpi.hpp" +#include "inria/linear_tree/node.hpp" +#include "inria/linear_tree/balance_tree.hpp" +// tree class +#include "GroupTree/Core/FGroupTree.hpp" +// symbolic data +#include "Components/FSymbolicData.hpp" +// GroupParticleContainer +#include "GroupTree/Core/FP2PGroupParticleContainer.hpp" +// file loader +#include "Files/FMpiFmaGenericLoader.hpp" +// FBox +#include "Adaptive/FBox.hpp" +// Group linear tree +#include "GroupTree/Core/FGroupLinearTree.hpp" +// Function for GroupLinearTree +#include "GroupTree/Core/FDistributedGroupTreeBuilder.hpp" +// +#include "GroupTree/Core/FGroupTools.hpp" + +// To construct either the duplicated Octree or the LET +#include "Utils/FLeafBalance.hpp" + +namespace groupTree { + // + // @param[in] mpi_comm the MPI communicator + // @param[inout] myParticleslocal array of particles on my node. On output the array is sorted + // @param[in] total number of particles in the simulation + // @param[in] box size of the simulation box + // @param[in] TreeHeight Height of the tree + // @param[inout] localGroupTree the LET of the octree + // @param[out] m_idx_distribution Distribution of the leaves on the processors + // @param[out] nb_blocks + template <class LOADER, class particleType , class OCTREEGRPOUPCLASS> + void buildLetTree( inria::mpi::communicator & mpi_comm, LOADER& loader, std::vector<particleType> &myParticles, + const FBox<FPoint<FReal>> box, + const int TreeHeight, const int groupSize, + OCTREEGRPOUPCLASS * &localGroupTree, + std::vector<MortonIndex> &m_idx_distribution, int & nb_blocks + ){ + // + const std::size_t max_level = sizeof(MortonIndex) * 8 / 3; + const FSize totalNumberOfParticles = loader.getNumberOfParticles() ; + const FSize localNumberOfParticles = loader.getMyNumberOfParticles() ; + + myParticles.resize(localNumberOfParticles) ; + + // iterate on all of my particles + for(FSize idxPart = 0; idxPart < static_cast<FSize>(localNumberOfParticles );++idxPart){ + particleType tmp; + // get the current particles + loader.fillParticle(&tmp.pos,&tmp.phi); + // set the morton index of the current particle at the max_level + tmp.morton_index = inria::linear_tree::get_morton_index(tmp.pos, box, max_level); + // set the weight of the particle + // tmp.phi = 0.1; + // add the particle to my vector of particle + myParticles[idxPart].fill(tmp.pos, tmp.phi,tmp.morton_index); + } + // Now i have all of my particles in a vector, they all have a morton index + // now we will sort them + inria::sort(mpi_comm,myParticles, [](const auto& p1, const auto& p2) { + return p1.morton_index < p2.morton_index; + }); + + std::cout << " I have " << myParticles.size() << " particles ..." << std::endl; + std::cout << "For a total of " + << totalNumberOfParticles << " particles ..." << std::endl; + + // create the linear tree + // a linear tree is a tree, with only the leaf + int level = TreeHeight -1 ; + auto linear_tree = inria::linear_tree::create_balanced_linear_tree_at_level( + mpi_comm, + level, + box, + myParticles); + + // create GroupLinearTree + FGroupLinearTree<typename decltype(linear_tree)::value_type> group_linear_tree{mpi_comm}; + group_linear_tree.create_local_group_linear_tree( &linear_tree, groupSize ); + + // group_linear_tree.print_info_tree() ; + + // Redistribute the particle according to the linear tree + // Redistribution of particles + inria::linear_tree::redistribute_particles(mpi_comm, + linear_tree, + myParticles); + + // Now we need to modify the morton index of of all particle to + // have the morton index at TreeHeight-1 +#pragma omp parallel for shared(myParticles) + for(unsigned i = 0 ; i < myParticles.size(); ++i){ + myParticles[i].morton_index = inria::linear_tree::get_morton_index(myParticles[i].pos, box, level); + } + + // Now we need to share the particle distribution to build the GroupTree + group_linear_tree.set_index_particle_distribution(myParticles); + + // Now i can declare my groupTree + // it's a empty instance of the FGroupTree + FReal width = std::max(box.width(0) , std::max(box.width(1) ,box.width(2) )) ; + // using test = typename std::remove_pointer<typename std::remove_reference<decltype(localGroupTree)>::type >::type; + // // std::cout << "&&&&&"<<typeid(test).name() <<std::endl; + // + localGroupTree = new OCTREEGRPOUPCLASS (TreeHeight,groupSize, box.center(), box.c1() /* corner*/, + width, width/FReal(1<<(TreeHeight-1))); + // Now i can fill the localGroupTree + localGroupTree->create_tree(group_linear_tree,myParticles); + localGroupTree->printInfoBlocks(); + // get the index particle distribution (needed by the algorithm) + + m_idx_distribution = group_linear_tree.get_index_particle_distribution_implicit(); + // for(int i = 0 ; i < mpi_comm.size() ;++i) + // m_idx_distribution[2*i] += 1; + nb_blocks = dstr_grp_tree_builder::set_cell_group_global_index(*localGroupTree,mpi_comm); + // now we create the LET + localGroupTree->create_LET(group_linear_tree); + + // std::cout << " End buildLetTree function " << std::endl; + } + // BuilddMortonDistributionForCGroupCellInTree + // + // @param[in] parallelManager The Height of the octree + // @param[in] mortonLeaves The Height of the octree + // @param[in] TreeHeight The Height of the octree + // @param[in] groupSize + // @param[in] MortonIndexDistribution The Morton distribution at the leaf level + // @param[in] nodeRepartition + // @param[out] sizeForEachGroup For each level give the size of all group cell in the process + void BuilddMortonDistributionForCGroupCellInTree(const FMpi ¶llelManager, std::vector<MortonIndex> &mortonLeaves, + const int & TreeHeight, const int groupSize, + const std::vector<MortonIndex> &MortonIndexDistribution, + std::vector<std::vector<std::vector<MortonIndex>>>& nodeRepartition, + std::vector< std::vector<int>> &sizeForEachGroup ){ + // + const int nproc = parallelManager.global().processCount() ; + // + // Build the groupe size of all groups in the Tree (sequential one) + // + std::cout << "Morton distribution inside BuilddMortonDistributionForCGroupCellInTree " <<std::endl; + for (auto v : MortonIndexDistribution) + std::cout << " " << v ; + std::cout << std::endl; + int processId ; + for( processId = 0; processId < nproc; ++processId) + { + FSize size_last, countGroup; + // pas de +1 si on ne commence pas à 0 + FSize leafOnProcess = MortonIndexDistribution[2*processId+1] - MortonIndexDistribution[2*processId] ; + size_last = leafOnProcess%groupSize; + countGroup = (leafOnProcess - size_last)/groupSize; + for(int i = 0; i < countGroup; ++i) + sizeForEachGroup[TreeHeight-1].push_back(groupSize); + if(size_last > 0) + sizeForEachGroup[TreeHeight-1].push_back((int)size_last); + } + // + //Pour chaque niveau calcul de la taille des groupe + for(int idxLevel = TreeHeight - 2; idxLevel >= 0; --idxLevel) + { + processId = 0; + int countCellsInTheGroup = 0; + MortonIndex previousMortonCell = -1; + + std::cout << "Compute Level " << idxLevel << std::endl; + for(std::size_t idxLeaf = 0; idxLeaf < mortonLeaves.size(); ++idxLeaf) + { + MortonIndex mortonCell = (mortonLeaves[idxLeaf]) >> (3*(TreeHeight - 1 - idxLevel)); + if(mortonCell <= nodeRepartition[idxLevel][processId][1]) //Si l'indice est dans le working interval + { + if(mortonCell != previousMortonCell) //Si c'est un nouvelle indice + { + ++countCellsInTheGroup; //On le compte dans le groupe + previousMortonCell = mortonCell; + if(countCellsInTheGroup == groupSize) //Si le groupe est plein on ajoute le compte + { + sizeForEachGroup[idxLevel].push_back(groupSize); + countCellsInTheGroup = 0; + } + } + } + else //Si l'on change d'interval de process on ajoute ce que l'on a compté + { + if(countCellsInTheGroup > 0) + sizeForEachGroup[idxLevel].push_back(countCellsInTheGroup); + countCellsInTheGroup = 1; + previousMortonCell = mortonCell; + ++processId; + } + } + if(countCellsInTheGroup > 0) + sizeForEachGroup[idxLevel].push_back(countCellsInTheGroup); + // + // Print sizeForEachGroup at the current level + for( auto v : sizeForEachGroup[idxLevel]) + std::cout << " "<< v ; + std::cout << std::endl; + } + + + } + // Build Node distribution for all LEvel starting with Leaf Distribution + // + void createNodeRepartition(std::vector<MortonIndex> distributedMortonIndex, + std::vector<std::vector<std::vector<MortonIndex>>>& nodeRepartition, + int nproc, int treeHeight) { + // + nodeRepartition.resize(treeHeight, std::vector<std::vector<MortonIndex>>(nproc, std::vector<MortonIndex>(2))); + for(int node_id = 0; node_id < nproc; ++node_id){ + nodeRepartition[treeHeight-1][node_id][0] = distributedMortonIndex[node_id*2]; + nodeRepartition[treeHeight-1][node_id][1] = distributedMortonIndex[node_id*2+1]; + } + for(int idxLevel = treeHeight - 2; idxLevel >= 0 ; --idxLevel){ + nodeRepartition[idxLevel][0][0] = nodeRepartition[idxLevel+1][0][0] >> 3; + nodeRepartition[idxLevel][0][1] = nodeRepartition[idxLevel+1][0][1] >> 3; + for(int node_id = 1; node_id < nproc; ++node_id){ + nodeRepartition[idxLevel][node_id][0] = FMath::Max(nodeRepartition[idxLevel+1][node_id][0] >> 3, nodeRepartition[idxLevel][node_id-1][0]+1); //Berenger phd :) + nodeRepartition[idxLevel][node_id][1] = nodeRepartition[idxLevel+1][node_id][1] >> 3; + } + } + } + + // + // + // @param[in] mpi_comm the MPI communicator + // @param[in] filename Particles name file + // @param[in] option option to build the groupTree + // 1 we use a given Morton distribution + // 2 we build the Morton distribution + // @param[out] myParticleslocal array of particles on my node. On output the array is sorted + // @param[in] box size of the simulation box + // @param[in] TreeHeight Height of the tree + // @param[in] localGroupTree the LET of the octree + // @param[inout] localGroupTree the LET of the octree + // @param[inout] m_idx_distribution Distribution of the leaves on the processors + // @param[out] nb_blocks + template <class PARTICLE_T , class OCTREEGRPOUPCLASS> + void buildDuplicatedTree( const FMpi ¶llelManager, const int option, const std::string &filename, + std::vector<PARTICLE_T> &myParticles, const FBox<FPoint<FReal>>& box, + const int TreeHeight, const int groupSize, OCTREEGRPOUPCLASS * &GroupTree, + std::vector<MortonIndex> &MortonIndexDistribution ,int & nb_block) + { + + // + //loader + std::cout << "Opening : " << filename << " ..."; + FFmaGenericLoader<FReal> loader(filename); + FAssertLF(loader.isOpen()); + std::cout << " done." << std::endl; + const FSize totalNbParticles = loader.getNumberOfParticles(); + // + const std::size_t max_level = sizeof(PARTICLE_T::morton_index) * 8 / 3; + // + myParticles.resize(totalNbParticles); + for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + FPoint<FReal> pos ; + FReal physicalValue ; + loader.fillParticle(&pos, &physicalValue);//Same with file or not + // + // physicalValue = 0.1 ; + MortonIndex morton = inria::linear_tree::get_morton_index( pos, box, max_level); + myParticles[idxPart].fill(pos,physicalValue,morton) ; + } + std::sort(myParticles.begin(), myParticles.end(), [&](const PARTICLE_T& a, const PARTICLE_T& b) { + return (a.getMorton() < b.getMorton() ) ; + } + ); + // + FP2PParticleContainer<FReal> allParticles; + + // Set the right MortonIndex + // MortonIndex mm = 0 ; + for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + myParticles[idxPart].morton_index = inria::linear_tree::get_morton_index( myParticles[idxPart] .pos, box, + TreeHeight-1); + // mm = std::max(mm,myParticles[idxPart].morton_index ); + allParticles.push(myParticles[idxPart].getPosition(), myParticles[idxPart].physicalValue() ); + } + // Create the linear tree + // a linear tree is a tree, with only the leaf + // Build a vector of MortonIndex at Leaf level from particles + // + std::size_t nbLeaves = 1 , pos=0; + MortonIndex previousMorton = myParticles[0].morton_index; + for(std::size_t idxPart = 1 ; idxPart < myParticles.size(); ++idxPart){ + if(previousMorton != myParticles[idxPart].morton_index){ + previousMorton = myParticles[idxPart].morton_index ; + ++nbLeaves ; + } + } + std::cout<< "Number of leaves" << nbLeaves <<std::endl ; + std::vector<MortonIndex> mortonLeaves(nbLeaves,-1) ; + + previousMorton = myParticles[0].morton_index; + mortonLeaves[pos] = myParticles[0].morton_index; + + for(std::size_t idxPart = 1 ; idxPart < myParticles.size(); ++idxPart){ + if(previousMorton != myParticles[idxPart].getMorton() ){ + ++pos ; + previousMorton = myParticles[idxPart].morton_index ; + mortonLeaves[pos] = myParticles[idxPart].morton_index ; + } + } + const int nproc = parallelManager.global().processCount() ; // + if(option >1 ) { + std::cout << " Construct the distribution used in Beregnger's thesis "<< std::endl; + FLeafBalance balancer; + MortonIndexDistribution.clear() ; + // + // Build the Morton index as in Berenger's thesis + //Calcul du working interval au niveau des feuilles + previousMorton = -1; + int countLeaf = 0; + int processId = 0; + FSize leafOnProcess = balancer.getRight(nbLeaves, nproc, 0) - balancer.getLeft(nbLeaves, nproc, 0); + std::cout << " leafOnProcess " << leafOnProcess << " empty? " << MortonIndexDistribution.empty() << " " << MortonIndexDistribution.size() <<std::endl; + MortonIndexDistribution.push_back(previousMorton); + for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart) + { + if(myParticles[idxPart].morton_index != previousMorton) + { + previousMorton = myParticles[idxPart].morton_index ; + ++countLeaf; + if(countLeaf == leafOnProcess) + { + ++processId; + if (processId < nproc){ + leafOnProcess = balancer.getRight(nbLeaves, nproc, processId) - balancer.getLeft(nbLeaves, nproc, processId); + MortonIndexDistribution.push_back(previousMorton); + MortonIndexDistribution.push_back(previousMorton); + countLeaf = 0; + } + } + } + } + MortonIndexDistribution.push_back(myParticles[loader.getNumberOfParticles() - 1].morton_index) ; + // + } + // otherwise we use the given Morton distribution + std::cout << " Morton distribution to build the duplicated tree " <<MortonIndexDistribution.size() << " "<<std::endl<<std::flush; + for (auto v : MortonIndexDistribution) + std::cout << " " << v ; + std::cout << std::endl; + ////////////////////////////////////////////////////////////////////////// + std::vector< std::vector<std::vector<MortonIndex>>> nodeRepartition; + std::vector< std::vector<int>> sizeForEachGroup(TreeHeight); + createNodeRepartition(MortonIndexDistribution, nodeRepartition, nproc, TreeHeight) ; + for ( std::size_t idLevel=0; idLevel< nodeRepartition.size() ; ++idLevel){ + std::cout << " nodeRepartition at level " << idLevel << std::endl ; + for ( std::size_t procID=0 ; procID< nodeRepartition[idLevel].size(); ++procID){ + std::cout << " n proc( " << procID << " ) " << + " [ " << nodeRepartition[idLevel][procID][0] << ", " + << nodeRepartition[idLevel][procID][1] <<" ]" <<std::endl ; + } + } + + BuilddMortonDistributionForCGroupCellInTree(parallelManager,mortonLeaves,TreeHeight,groupSize, + MortonIndexDistribution,nodeRepartition,sizeForEachGroup ) ; + + // + // Print group size per level + std::cout << std::endl<< " Group size at the leaf level " << std::endl ; + int totalLeaves = 0 ; + for ( std::size_t idLevel=2; idLevel< sizeForEachGroup.size() ; ++idLevel){ + std::cout << " Group size at level " << idLevel << std::endl ; + totalLeaves = 0 ; + for ( auto v : sizeForEachGroup[idLevel]){ + totalLeaves += v; + std::cout << " " << v ; + } + std::cout << std::endl ;std::cout << " Total number of leaves: " <<totalLeaves << std::endl; + } + // + GroupTree = new OCTREEGRPOUPCLASS (TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), + groupSize, &allParticles, sizeForEachGroup, true); + // + // + + + } +} + +#endif diff --git a/Src/GroupTree/Core/FCheckResults.hpp b/Src/GroupTree/Core/FCheckResults.hpp new file mode 100644 index 000000000..f07bf7323 --- /dev/null +++ b/Src/GroupTree/Core/FCheckResults.hpp @@ -0,0 +1,93 @@ +#ifndef _FGROUPTREE_CHECK_RESULTS_ +#define _FGROUPTREE_CHECK_RESULTS_ + +#include <iostream> +#include <string> +#include <vector> +#include <algorithm> + +#include "Utils/FGlobal.hpp" +#include "Utils/FAssert.hpp" +#include "Utils/FMath.hpp" +#include "Files/FFmaGenericLoader.hpp" +#include "Utils/FPoint.hpp" +// FBox +#include "Adaptive/FBox.hpp" +// Group linear tree +#include "GroupTree/Core/FGroupLinearTree.hpp" +// +#include "GroupTree/Core/FGroupTools.hpp" +// +// +// param[in] FMpiComm +// param[in] seqLoader +// param[in] box +// param[in] TreeHeight +// param[inout] myParticles +// +template < class LOADER_T, typename PARTICLE_T, typename BOX_T > +void readAndSortAllParticle(LOADER_T & seqLoader, const BOX_T & box, + std::vector<PARTICLE_T> &myParticles, const int TreeHeight ){ + + using REAL= typename LOADER_T::dataType ; + FAssertLF(seqLoader.isOpen()); + const FSize NbParticles = seqLoader.getNumberOfParticles(); + // + // Read File + myParticles.clear() ; + + myParticles.resize(NbParticles) ; + const std::size_t max_level = sizeof(PARTICLE_T::morton_index) * 8 / 3; + + for(FSize idxPart = 0 ; idxPart < NbParticles; ++idxPart){ + FPoint<REAL> pos ; + REAL physicalValue ; + seqLoader.fillParticle(&pos, &physicalValue);//Same with file or not + // + MortonIndex morton = inria::linear_tree::get_morton_index( pos, box, max_level); + myParticles[idxPart].fill(pos,physicalValue,morton) ; + } + std::sort(myParticles.begin(), myParticles.end(), [&](const PARTICLE_T& a, const PARTICLE_T& b) { + return (a.getMorton() < b.getMorton() ) ; + } + ); + // Set the right MortonIndex + for(FSize idxPart = 0 ; idxPart < NbParticles ; ++idxPart){ + myParticles[idxPart].morton_index = inria::linear_tree::get_morton_index( myParticles[idxPart] .pos, box, + TreeHeight-1); + } +} +// +// param[in] FMpiComm +// param[in] elapsedTime time on each processor +// param[out] minTime the minimum time on each processor +// param[out] maxTime the maximal time on each processor +// param[out] meanTime the mean time on each processor +// +template <typename PARTICLE, class REAl, typename OCTREECLASS1, + typename OCTREECLASS2,class FmmClass1, class FmmClass2> +void checkWithDuplicatedTree( const int& myrank, const PARTICLE &arrayParticles, + OCTREECLASS1 & treeCheck, + FmmClass1 & algorithm, + OCTREECLASS2 & grouptree, + FmmClass2 & groupalgo, + const int &operationsToProceed, + const REAl& epsilon ) { + // + std::cout << "checkWithDuplicatedTree - nb part " << arrayParticles.size() <<std::endl; + + // Compute a sequential FMM + algorithm.execute(operationsToProceed); + // + std::string fileName("output-Let-") ; + fileName += std::to_string(myrank) + ".fma" ; + groupTree::saveSolutionInFile(fileName, arrayParticles.size() ,treeCheck) ; + + groupTree::checkCellTree(grouptree, groupalgo, treeCheck, epsilon) ; + groupTree::checkLeaves(grouptree, groupalgo, treeCheck, epsilon) ; + + std::cout << "Comparing is over" << std::endl; +} + + +#endif diff --git a/Src/GroupTree/Core/FDistributedGroupTreeBuilder.hpp b/Src/GroupTree/Core/FDistributedGroupTreeBuilder.hpp index 0ab85ceed..f79e2edc5 100644 --- a/Src/GroupTree/Core/FDistributedGroupTreeBuilder.hpp +++ b/Src/GroupTree/Core/FDistributedGroupTreeBuilder.hpp @@ -2,23 +2,26 @@ * This file contain function to manage the FGroupLinearTree and build a * GroupTree with LET * The LET is the Local Essential Tree - * The LET is the symbolic information of leaf for P2P and M2L operation + * The LET is the symbolic information of leaf for P2P M2L and M2M operation * * @author benjamin.dufoyer@inria.fr */ - +// ==== CMAKE ===== +// @FUSE_MPI +// ================ +// #ifndef _FDISTRIBUTED_GROUPTREE_BUILDER_HPP_ #define _FDISTRIBUTED_GROUPTREE_BUILDER_HPP_ -#include "FGroupTree.hpp" -#include "FOutOfBlockInteraction.hpp" #include <cmath> #include <algorithm> - #include <stdint.h> #include <limits.h> +#include "inria/algorithm/distributed/mpi.hpp" + + // Define a MPI type for std::size_t #if SIZE_MAX == UCHAR_MAX #define my_MPI_SIZE_T MPI_UNSIGNED_CHAR @@ -31,187 +34,13 @@ #elif SIZE_MAX == ULLONG_MAX #define my_MPI_SIZE_T MPI_UNSIGNED_LONG_LONG #else - #error "what is happening here?" + #error "FDistributedGroupTreeBuilder.hpp: MPI_TYPE what is happening here?" #endif -#define MAX_SIZE_MPI_MESSAGE 4000000 namespace dstr_grp_tree_builder{ - -/** - * Return the number of MPI Message needed to send this buffer according to the - * MAX_SIZE_MPI_MESSAGE variables define in the front of this documents - * @author benjamin.dufoyer@inria.fr - * @param size_buffer size of the buffer - * @return the number of message - */ -unsigned get_nb_mpi_msg(long unsigned size_buffer){ - unsigned nb_message = 1; - while(size_buffer > MAX_SIZE_MPI_MESSAGE){ - size_buffer -= size_buffer - MAX_SIZE_MPI_MESSAGE; - nb_message++; - } - return nb_message; -} -unsigned get_nb_mpi_msg(int size_obj, int nb_obj_to_send){ - return get_nb_mpi_msg(size_obj*nb_obj_to_send); -} -template<class send_type> -unsigned get_nb_mpi_msg(send_type obj_to_send, int nb_obj_to_send){ - return get_nb_mpi_msg(sizeof(send_type)*nb_obj_to_send); -} - - -/** - * This function return the number of element to put into a MPI message - * according to the MAX_SIZE_MPI_MESSAGE variables define in the front of this - * documents - * @author benjamin.dufoyer@inria.fr - * @param obj_to_send a object to send - * @param nb_obj_to_send number of object to send - * @return the number of element per message - */ -unsigned get_nb_elt_interval(unsigned long int size_buffer){ - return (unsigned)MAX_SIZE_MPI_MESSAGE/(unsigned)size_buffer; -} -template<class send_type> -unsigned get_nb_elt_interval(send_type obj_to_send, int nb_obj_to_send){ - return get_nb_elt_interval(size_of(obj_to_send)*nb_obj_to_send); -} -unsigned get_nb_elt_interval(int size_obj, int nb_obj_to_send){ - return get_nb_elt_interval(size_obj*nb_obj_to_send); -} - -/** - * This function split MPI message if the buffer is too large - * this funciton run with the irecv_splited - * She send 1 or more message if the buffer have a - * good size and send more than 1 message if the size of the buffer - * is bigger than MAX_SIZE_MPI_MESSAGE define at the front of this documents - * @author benjamin.dufoyer@inria.fr - * @param conf MPI Conf - * @param addr_send Vector address of data to send - * @param idx_to_send index where the data to send start - * @param nb_element_to_send number of element to send - * @param destination number of the destination proc - * @param tag MPI tag - */ -template<class class_sended> -void isend_splited(const inria::mpi_config& conf, - std::vector<class_sended>* addr_send, - unsigned* idx_to_send, - std::size_t nb_element_to_send, - int& destination, - int tag = 1 ) -{ - // getting usefull variable - unsigned size_buffer = (unsigned)sizeof(class_sended)*(unsigned)nb_element_to_send; - unsigned nb_message = get_nb_mpi_msg(size_buffer); - // Check the number of message - if(nb_message > 1){ - unsigned nb_elt_interval = get_nb_elt_interval(size_buffer); - // Send all messages - unsigned nb_elt; - for(unsigned i = 0; i < nb_message ; ++i ){ - if(nb_element_to_send > nb_elt_interval){ - nb_elt = nb_elt_interval; - nb_element_to_send -= nb_elt_interval; - } else { - // last message - nb_elt = (unsigned)nb_element_to_send; - } - conf.comm.isend( - &addr_send->data()[*idx_to_send], - (int)sizeof(class_sended)*(int)nb_elt, - MPI_CHAR, - destination,tag - ); - *idx_to_send += nb_elt; - } - } else { - // send 1 message if the buffer is not too big - conf.comm.isend( - &addr_send->data()[*idx_to_send], - (int)sizeof(class_sended)*(int)nb_element_to_send, - MPI_CHAR, - destination,tag - ); - *idx_to_send += (unsigned)nb_element_to_send; - } -} - -/** - * This function post 1 or more MPI Irecv. She check if the buffer is too big - * She modify dynamicly the vector of request for the waitAll, she realloc - * every time when it's needly - * @author benjamin.dufoyer@inria.fr - * @param conf MPI conf - * @param vector_request Adress of the vector with MPI status - * @param idx_request Index of the current MPI status - * @param addr_recev Address of the vector where data will be stock - * @param idx_reception Index of the vector where data will be stock - * @param nb_element_to_recv Number of element to recv - * @param destination number of the destination proc - * @param tag tag of the communication - */ -template<class class_recv> -void irecv_splited(const inria::mpi_config& conf, - std::vector<inria::mpi::request>* vector_request, - int* idx_request, - std::vector<class_recv>* addr_recev, - unsigned* idx_reception, - std::size_t nb_element_to_recv, - int& destination, - int tag = 1 ) -{ - // getting usefull variable - unsigned long int size_buffer = sizeof(class_recv)*nb_element_to_recv; - unsigned nb_message = get_nb_mpi_msg(size_buffer); - // check if this function is call at good time - if( nb_message > 1){ - unsigned nb_elt_interval = get_nb_elt_interval(size_buffer); - // resize the vector of request - { - // we do -1 because, we don't count the message already allocate - unsigned current_nb_msg = (unsigned)vector_request->size()-1; - vector_request->resize(current_nb_msg+nb_message); - } - // send the good number of message - unsigned nb_elt = 0; - for(unsigned i = 0; i < nb_message ; ++i ){ - // compute the number of element recev - if(nb_element_to_recv > nb_elt_interval){ - nb_elt = nb_elt_interval; - nb_element_to_recv -= nb_elt_interval; - } else { - // last message - nb_elt = (unsigned)nb_element_to_recv; - } - vector_request->data()[*idx_request] = - conf.comm.irecv( - &addr_recev->data()[*idx_reception], - (int)sizeof(class_recv)*(int)nb_elt, - MPI_CHAR, - destination,tag - ); - *idx_reception += nb_elt; - *idx_request+=1; - } - } else { - vector_request->data()[*idx_request] = - conf.comm.irecv( - &addr_recev->data()[*idx_reception], - (int)sizeof(class_recv)*(int)nb_element_to_recv, - MPI_CHAR, - destination,tag - ); - *idx_request += 1; - *idx_reception =+ (unsigned)nb_element_to_recv; - } -} - /** * fill_new_linear_tree this function fill the new linear tree with the value * of the current linear tree @@ -242,9 +71,11 @@ void fill_new_linear_tree( unsigned destination_interval = max_destination-min_destination; unsigned source_interval = max_copy-min_copy; if(source_interval < destination_interval){ - memcpy(&destination->data()[min_destination],&source->data()[min_copy],sizeof(node_t)*source_interval); + memcpy(&destination->data()[min_destination],&source->data()[min_copy], + sizeof(node_t)*source_interval); } else { - memcpy(&destination->data()[min_destination],&source->data()[min_copy],sizeof(node_t)*destination_interval); + memcpy(&destination->data()[min_destination],&source->data()[min_copy], + sizeof(node_t)*destination_interval); } } @@ -274,7 +105,7 @@ void fill_new_linear_tree( //int* array_global_nb_leaf = (int *)malloc(sizeof(int) * nb_proc); //nb leaf const int my_rank = conf.comm.rank(); // Check if i have leaf on my proc - FAssert(nb_local_leaf > 0); + FAssertLF(nb_local_leaf > 0); // Distribute the local number of leaf to every process conf.comm.allgather(&nb_local_leaf, 1, @@ -292,33 +123,36 @@ void fill_new_linear_tree( int nb_leaf_needed = nb_local_group * group_size; // Check if we habe enought leafs for every proc if( (nb_leaf_needed*(nb_proc-1)) > nb_global_leaf ){ - std::cout << " nb_leaf_needed : " << nb_leaf_needed << std::endl; - std::cout << " nb_global_leaf : " << nb_global_leaf << std::endl; - std::cout << " res : " << (nb_leaf_needed*(nb_proc-1)) << std::endl; + std::cout << " nb_leaf_needed: " << nb_leaf_needed << std::endl; + std::cout << " nb_global_leaf: " << nb_global_leaf << std::endl; + std::cout << " res: " << (nb_leaf_needed*(nb_proc-1)) << std::endl; } - FAssert( (nb_leaf_needed*(nb_proc-1)) < nb_global_leaf ); + FAssertLF( (nb_leaf_needed*(nb_proc-1)) < nb_global_leaf ); // OC: Pourquoi cela ? Ne suffit il pas de faire un exit dans le if ?? struct message_info{ int process_rank; int nb_leaf; }; - // We stock the future interaction in 2 vector + // We stock the future interaction in 2 vectors std::vector<message_info> interaction_send; std::vector<message_info> interaction_recev; // The number of leaf send and revev from left // it's used to fill the new linear_tree - int nb_leaf_recev_left = 0; + int nb_leaf_recev_left = 0; int nb_leaf_recev_right = 0; - int nb_leaf_send_right = 0; int nb_leaf_send_left = 0; + // // COMPUTE FOR LEFT PROCESS // Check to know if the current proc need to send leaf // The compute is from left to right because it's the right process // who don't have a fix number of particle - if(!my_rank == 0){ //The first process don't have computation on his left - for(int i = 1 ; i < my_rank ; i++ ){ + // + // OC: Ne peut-on mettre une topologie 1d dans le communicateur pour simplifier le code + // + if(!my_rank == 0){ //The first process don't have computation on his left OC: Execpt in periodic + for(int i = 1 ; i < my_rank ; ++i ){ array_global_nb_leaf[i] += array_global_nb_leaf[i-1]; } // Check if on left process need leaf or have too many leaf @@ -373,7 +207,7 @@ void fill_new_linear_tree( } } - // Now we have 2 vector with all interaction with other process + // Now we have 2 vectors with all interaction with other process // in the first we will post every recev message // in a second time we post every send message @@ -400,8 +234,8 @@ void fill_new_linear_tree( } ////Posting sending message - for(unsigned i = 0 ; i < (unsigned)interaction_send.size(); i++ ){ - int size_send = (int)sizeof(node_t)*interaction_send[i].nb_leaf; + for(unsigned i = 0 ; i < (unsigned)interaction_send.size(); ++i ){ + int sizeToSend = (int)sizeof(node_t)*interaction_send[i].nb_leaf; // Compute the pointer to send cell unsigned start = 0; if(my_rank < interaction_send[i].process_rank){ @@ -410,7 +244,7 @@ void fill_new_linear_tree( //sending leaf conf.comm.isend(&linear_tree->data()[start], - size_send, + sizeToSend, MPI_CHAR, interaction_send[i].process_rank,1); } @@ -426,11 +260,10 @@ void fill_new_linear_tree( nb_leaf_send_left, nb_leaf_send_right); - - - - // waiting for the end of MPI request + // waiting for the send of all MPI request + // usefull as buffer are local in the procedure inria::mpi::request::waitall(interaction_recev.size(),tab_mpi_status); + // //free(array_global_nb_leaf); // swaping linear_tree pointer @@ -455,7 +288,7 @@ void fill_new_linear_tree( class type2_t> void share_particle_division( const inria::mpi_config& conf, - std::pair<type1_t,type2_t> my_pair, + std::pair<type1_t,type2_t>& my_pair, std::vector<std::pair<type1_t,type2_t>>& particle_index_distribution ){ conf.comm.allgather( @@ -484,8 +317,8 @@ void share_particle_division( std::vector<particle_t>& particle, std::vector<std::pair<type1_t,type2_t>>& particle_index_distribution) { - FAssert(particle_index_distribution.size() == (unsigned)conf.comm.size()); - FAssert(particle.size() > 0); + FAssertLF(particle_index_distribution.size() == (unsigned)conf.comm.size()); + FAssertLF(particle.size() > 0); std::pair<type1_t,type2_t> my_idx; my_idx.first = particle.front().morton_index; @@ -508,9 +341,6 @@ std::vector<MortonIndex> sort_and_delete_duplicate_data( if(nb_data != 0) { // Sort every morton index - //std::sort(data_to_modify.begin(),data_to_modify.begin()+nb_data, [](MortonIndex a, MortonIndex b){ - // return a < b; - //}); FQuickSort<MortonIndex>::QsSequential(data_to_modify.data(),nb_data); // Compute the number of different morton index @@ -520,7 +350,7 @@ std::vector<MortonIndex> sort_and_delete_duplicate_data( for(unsigned i = 1 ; i < nb_data ; ++i){ if(last_m_idx != data_to_modify[i]){ last_m_idx = data_to_modify[i]; - nb_leaf++; + ++nb_leaf; } } // Alloc the returned vector @@ -548,7 +378,6 @@ std::vector<MortonIndex> sort_and_delete_duplicate_data( /** - * IDEA Factoriser la fin avec la fonction pour le M2L IDEA * This function compute the morton index of every leaf needed for the P2P * First we compute every morton index needed for every leaf * We sort the result @@ -567,14 +396,14 @@ std::vector<MortonIndex> get_leaf_P2P_interaction( const MortonIndex& local_min_m_idx, const MortonIndex& local_max_m_idx ){ - // 26 is for every interaction - std::vector<MortonIndex> external_interaction(tree.getTotalNbLeaf()*26,0); + // 26 is the number of neigbors of one cell + std::vector<MortonIndex> external_interaction(tree.getTotalNbLeaf()*26,0); //OC: Tableau tres grand // Reset interactions // idx to know where we are in the vector unsigned idx_vector= 0; // First leaf level { - // We iterate on every particle group + // We iterate on all particle group // OC: Local on the each proc ? for(int idxGroup = 0 ; idxGroup < tree.getNbParticleGroup() ; ++idxGroup){ // get the particle group // it's ugly but, if i use template, it's not convenient @@ -598,7 +427,9 @@ std::vector<MortonIndex> get_leaf_P2P_interaction( // Iterate on every neighbors for(int idxInter = 0 ; idxInter < counter ; ++idxInter){ // Check if the current proc already have the leaf - if(interactionsIndexes[idxInter] < local_min_m_idx || interactionsIndexes[idxInter] > local_max_m_idx ){ + if(interactionsIndexes[idxInter] >= local_min_m_idx && interactionsIndexes[idxInter] <= local_max_m_idx){ + // do nothing + } else { // Check if the leaf exist if(interactionsIndexes[idxInter] >= global_min_m_idx && interactionsIndexes[idxInter] <= global_max_m_idx ){ external_interaction[idx_vector] = interactionsIndexes[idxInter]; @@ -615,9 +446,6 @@ std::vector<MortonIndex> get_leaf_P2P_interaction( /** - * IDEA on peut factoriser le post traitement de du P2P avec celui qui est fait - * ici IDEA - * * This function compute the leaf needed for the M2L operation * We take every leaf of the tree, get her parent, get the neigbors of * the parents and take every child of the parent's neighbors. @@ -641,10 +469,11 @@ std::vector<MortonIndex> get_leaf_M2L_interaction_at_level( GroupOctreeClass& tree, int dim = 3) { + // idx to fill the vector unsigned idx_vector = 0; // All External leaf - std::vector<MortonIndex> external_interaction(tree.getNbCellGroupAtLevel(level)*tree.getNbElementsPerBlock()*216,0); + std::vector<MortonIndex> external_interaction(tree.getNbCellGroupAtLevel(level)*tree.getNbElementsPerBlock()*189,0); // iterate on the group for(int idxGroup = 0 ; idxGroup < tree.getNbCellGroupAtLevel(level) ; ++idxGroup){ auto* containers = tree.getCellGroup(level,idxGroup); @@ -655,11 +484,9 @@ std::vector<MortonIndex> get_leaf_M2L_interaction_at_level( ++leafIdx){ // Getting the current morton index curr_m_idx = containers->getCellMortonIndex(leafIdx); - // Compute the morton index of the father - // If it's a new father // Compute coordinate - MortonIndex interactionsIndexes[216]; - int interactionsPosition[216]; + MortonIndex interactionsIndexes[189]; + int interactionsPosition[189]; FTreeCoordinate coord(curr_m_idx); // Getting neigbors of the father int counter = coord.getInteractionNeighbors(level,interactionsIndexes,interactionsPosition); @@ -668,12 +495,11 @@ std::vector<MortonIndex> get_leaf_M2L_interaction_at_level( if( tmp >= global_min_m_idx && tmp <= global_max_m_idx) { - if(tmp < local_min_m_idx || - tmp > local_max_m_idx){ + if(tmp >= local_min_m_idx && + tmp <= local_max_m_idx){ + // do nothing + } else { //Stock the leaf - if(idx_vector > external_interaction.size()){ - std::cout << "ERROR " << std::endl; - } external_interaction[idx_vector] = tmp; ++idx_vector; } @@ -730,7 +556,7 @@ std::vector<MortonIndex> concat_M2L_P2P( nb_leaf += (leaf_P2P.size()) - idx_P2P; } // Allocate the vector - std::vector<MortonIndex> leaf_needed(nb_leaf,0); + std::vector<MortonIndex> leaf_needed(nb_leaf,-1); idx_P2P = 0; idx_M2L = 0; std::size_t idx_leaf = 0; @@ -752,19 +578,24 @@ std::vector<MortonIndex> concat_M2L_P2P( ++idx_P2P; ++idx_M2L; } - idx_leaf++; + ++idx_leaf; } + // std::cout << " idx_leaf " << idx_leaf << " nb_leaf " << nb_leaf <<std::endl; // Copy the rest of leaf with a memcpy if(idx_leaf < nb_leaf){ - void* destination = &leaf_needed.data()[idx_leaf]; + // std::cout << " MEMCOPY " << std::endl; + void* destination = &leaf_needed.data()[idx_leaf]; void* source; std::size_t num = 0; if(idx_P2P == leaf_P2P.size()){ + // std::cout << " M2L " <<idx_M2L << " " << leaf_M2L[idx_M2L]<< " " + // << leaf_M2L.size() -1 << " " << leaf_M2L[leaf_M2L.size() -1 ] + // << " "<< leaf_M2L.size() << " " << leaf_M2L.size() - idx_M2L <<std::endl; source = &leaf_M2L[idx_M2L]; - num = sizeof(MortonIndex)* ((leaf_M2L.size()-1) - idx_M2L); + num = sizeof(MortonIndex)*(leaf_M2L.size() - idx_M2L); } else { source = &leaf_P2P[idx_P2P]; - num = sizeof(MortonIndex)* ((leaf_P2P.size()-1) - idx_P2P); + num = sizeof(MortonIndex)* (leaf_P2P.size() - idx_P2P); } memcpy(destination,source,num); } @@ -794,33 +625,50 @@ std::vector<std::vector<std::size_t>> get_matrix_interaction( { // Getting MPI Info const int nb_proc = conf.comm.size(); + const int my_rank = conf.comm.rank(); // Alloc interaction matrix std::vector<std::vector<std::size_t>> matrix_interaction(2,std::vector<std::size_t>(nb_proc,0)); std::vector<std::size_t> global_matrix_interaction(nb_proc,0); // Initialise idx on particle_distribution - std::size_t idx_part = 0; - // Interate on every leaf to know where she is + size_t idx_part = 0; + // Iterate on every leaf to know where she is + MortonIndex max_morton_index = 0; + if(needed_leaf.size() > 0) + max_morton_index = needed_leaf[needed_leaf.size()-1]+1; + // iterate on every mortonIndex for(unsigned idx_leaf = 0; idx_leaf < needed_leaf.size(); ++idx_leaf){ MortonIndex current_leaf = needed_leaf[idx_leaf]; // if she is on the current proc if(current_leaf >= particle_distribution[idx_part].first && current_leaf <= particle_distribution[idx_part].second){ - matrix_interaction[0][idx_part] += 1; + if(idx_part == (unsigned)my_rank){ + needed_leaf[idx_leaf] = max_morton_index; + } else { + matrix_interaction[0][idx_part] += 1; + } } else { // While the current leaf is not on the good interval - while(particle_distribution[idx_part].second < current_leaf){ + while(idx_part < particle_distribution.size() && particle_distribution[idx_part].second < current_leaf){ idx_part += 1; } + if(idx_part == particle_distribution.size()) + break; if(particle_distribution[idx_part].first > current_leaf){ // in this case the leaf is not in interval, so she doesn't exist - needed_leaf[idx_leaf] = 0; + needed_leaf[idx_leaf] = max_morton_index; } else { // In the case it's a normal case, we juste increment the // number of leaf send at the proc idx_part - matrix_interaction[0][idx_part] += 1; + if(idx_part == (unsigned)my_rank){ + needed_leaf[idx_leaf] = max_morton_index; + } else { + matrix_interaction[0][idx_part] += 1; + } } } } + // i don't need to send to me + matrix_interaction[0][my_rank] = 0; // now we have the number of leaf to send at every proc // we proceed a AlltoAll to share this information at every proc conf.comm.alltoall(matrix_interaction[0].data(), @@ -830,125 +678,58 @@ std::vector<std::vector<std::size_t>> get_matrix_interaction( 1, my_MPI_SIZE_T); // removing bad leaf - needed_leaf.erase(std::remove(needed_leaf.begin(),needed_leaf.end(),0),needed_leaf.end()); + needed_leaf.erase(std::remove(needed_leaf.begin(),needed_leaf.end(),max_morton_index),needed_leaf.end()); return {begin(matrix_interaction),end(matrix_interaction)}; } - /** -* This function compute the number of block needed to send all leaf -* stock in leaf_needed. -* This function return a vector with all idx of block needed by the proc -* @author benjamin.dufoyer@inria.fr -* @param tree GroupTree -* @param leaf_needed Vector where leaf are stock -* @return Vector with all block idx -*/ -template<class GroupOctreeClass> -std::vector<MortonIndex> get_nb_block_from_leaf(GroupOctreeClass& tree, - MortonIndex* leaf_needed, - std::size_t nb_leaf) -{ - std::vector<MortonIndex> block_to_send(tree.getNbParticleGroup(),0); - if(nb_leaf == 0) - return {block_to_send.begin(),block_to_send.begin()}; - // declaration of idx varaibles - unsigned idx_vector = 0; - unsigned idx_leaf = 0; - // iterate on every group - for(int idx_group = 0 ; idx_group < tree.getNbParticleGroup() ; ++idx_group){ - if(idx_leaf >= nb_leaf) - break; - // get the current block - auto* container = tree.getParticleGroup(idx_group); - // get first leaf in this interval - while( idx_leaf < nb_leaf && container->getStartingIndex() > leaf_needed[idx_leaf]){ - ++idx_leaf; - } - if(idx_leaf >= nb_leaf) - break; - while( container->getEndingIndex() < leaf_needed[idx_leaf] && - idx_leaf < nb_leaf){ - // if the leaf exist, keep the leaf - if(container->exists(leaf_needed[idx_leaf])){ - block_to_send[idx_vector] = idx_group; - ++idx_vector; - ++idx_leaf; - break; - } - ++idx_leaf; - } - if(idx_leaf == nb_leaf) - break; - } - return {block_to_send.begin(),block_to_send.begin()+idx_vector}; -} -/* - template<class GroupOctreeClass> - std::vector<MortonIndex> get_nb_block_from_node(GroupOctreeClass& tree, - MortonIndex* node_needed, - std::size_t nb_node, - int level, - std::vector<bool>* block_already_send) - { - std::vector<int> block_to_send(tree.getNbCellGroupAtLevel(level),0); - int idx_vect = 0 ; - // iterate of every node - for(unsigned i = 0 ; i < nb_node; ++i){ - // iteracte of every block - for(unsigned idxGroup = 0 ; idxGroup < (unsigned)tree.getNbCellGroupAtLevel(level) ; ++idxGroup){ - // If the block is not already send - if(block_already_send->at(idxGroup) == false){ - auto* containers = tree.getCellGroup(level,idxGroup); - if(containers->isInside(node_needed[i])){ - block_to_send[idx_vect] = idxGroup; - ++idx_vect; - block_already_send->at(idxGroup) = true; - } - } - } - } - return {block_to_send.begin(),block_to_send.begin()+idx_vect}; - }*/ - - + * This function return the number of block at node level + * This algo is different than the computation at leaf level, because + * it's only the proc who have the smallest rank who have the attribution of + * the block + * @author benjamin.dufoyer@inria.fr + * @param tree local GroupTree + * @param node_needed List of needed node + * @param nb_node Number of node needed in the array + * @param level Level of the node + * @return Vector of index of block + */ template<class GroupOctreeClass> std::vector<MortonIndex> get_nb_block_from_node(GroupOctreeClass& tree, MortonIndex* node_needed, std::size_t nb_node, - int level, - std::vector<bool>* block_already_send) + int level) { int idx_vect = 0 ; std::vector<int> block_to_send(tree.getNbCellGroupAtLevel(level),0); - unsigned idx_node = 0; // iterate on every group for(unsigned idx_group = 0; idx_group < (unsigned)tree.getNbCellGroupAtLevel(level) ;++idx_group){ // if the current block hasnt been already send - if(!block_already_send->at(idx_group)){ - auto* containers = tree.getCellGroup(level,idx_group); - // check if we have check every node - if(idx_node == nb_node){ - break; - } - // while the morton index of the current node is not high - while(idx_node < nb_node && node_needed[idx_node] < containers->getStartingIndex()){ - ++idx_node; - } - while(idx_node < nb_node && node_needed[idx_node] < containers->getEndingIndex()){ - if(containers->isInside(node_needed[idx_node])){ - block_to_send[idx_vect] = idx_group; - ++idx_vect; - ++idx_node; - break; - } + auto* containers = tree.getCellGroup(level,idx_group); + // check if we have check every node + if(idx_node == nb_node){ + break; + } + // while the morton index of the current node is not high + while(idx_node < nb_node && node_needed[idx_node] < containers->getStartingIndex()){ + ++idx_node; + } + // while the current morton index is in the block + while(idx_node < nb_node && node_needed[idx_node] < containers->getEndingIndex()){ + // if the container have the current morton index + // keep the block and go out of the while + if(containers->isInside(node_needed[idx_node])){ + block_to_send[idx_vect] = idx_group; + ++idx_vect; ++idx_node; - } - if(idx_node == nb_node){ - break; - } + break; + } + ++idx_node; + } + if(idx_node == nb_node){ + break; } } return {block_to_send.begin(),block_to_send.begin()+idx_vect}; @@ -971,20 +752,17 @@ std::vector<MortonIndex> get_nb_block_from_leaf(GroupOctreeClass& tree, void send_get_number_of_block_node_level( std::vector<MortonIndex>& vect_recv, std::vector<std::vector<std::size_t>> global_matrix_interaction, - //std::vector<std::size_t>& global_matrix_interaction, std::size_t& nb_msg_recv, GroupOctreeClass& tree, std::vector<std::pair<int,int>>& nb_block_to_receiv, - std::vector<std::pair<int,std::vector<MortonIndex>>>& leaf_to_send, + std::vector<std::pair<int,std::vector<MortonIndex>>>& block_to_send, int level, const inria::mpi_config& conf ) { int idx_status = 0; - int idx_proc = 0; + int idx_proc = 0; inria::mpi::request tab_mpi_status[nb_msg_recv]; - bool leaf_level = (tree.getHeight()-1 == level); - std::vector<bool> block_already_send(tree.getNbCellGroupAtLevel(level),false); // Post the number reception of the number of block for(unsigned i = 0; i < global_matrix_interaction[0].size() ; ++i) @@ -1010,27 +788,20 @@ std::vector<MortonIndex> get_nb_block_from_leaf(GroupOctreeClass& tree, // If we have interaction with this proc if(global_matrix_interaction[1][i] != 0){ // Compute the number of leaf - if(leaf_level){ - leaf_to_send[idx_status].second = get_nb_block_from_leaf( - tree, - &vect_recv.data()[idx_vect], - global_matrix_interaction[1][i]); - } else { - leaf_to_send[idx_status].second = get_nb_block_from_node( - tree, + int nb_block; + block_to_send[idx_status].second = get_nb_block_from_node( + tree, &vect_recv.data()[idx_vect], - global_matrix_interaction[1][i], - level, - &block_already_send); - } - int nb_block = (int)leaf_to_send[idx_status].second.size(); - leaf_to_send[idx_status].first = idx_proc; + global_matrix_interaction[1][i], + level); + nb_block = (int)block_to_send[idx_status].second.size(); + block_to_send[idx_status].first = i; // send the number of leaf conf.comm.isend( &nb_block, 1, MPI_INT, - idx_proc,1 + i,1 ); idx_vect += global_matrix_interaction[1][i]; idx_status += 1; @@ -1057,7 +828,6 @@ std::vector<MortonIndex> get_nb_block_from_leaf(GroupOctreeClass& tree, std::vector<MortonIndex> send_get_leaf_morton( std::vector<MortonIndex>& needed_leaf, std::vector<std::vector<std::size_t>>& global_matrix_interaction, - //std::vector<std::size_t>& global_matrix_interaction, std::size_t& nb_msg_recv, std::size_t& nb_leaf_recv, const inria::mpi_config& conf) @@ -1075,15 +845,15 @@ std::vector<MortonIndex> send_get_leaf_morton( // Posting every recv message for(unsigned i = 0; i < global_matrix_interaction[1].size() ; ++i ){ if(global_matrix_interaction[1][i] != 0){ - irecv_splited( - conf, - &tab_mpi_status, - &idx_status, - &vect_recv, - &idx_vect, - global_matrix_interaction[1][i], - idx_proc,1 + std::size_t nb_leaf = global_matrix_interaction[1][i]; + tab_mpi_status[idx_status] = conf.comm.irecv( + &vect_recv[idx_vect], + int(nb_leaf*sizeof(MortonIndex)), + MPI_CHAR, + i,1 ); + idx_vect += (unsigned)nb_leaf; + idx_status+= 1; } idx_proc += 1; } @@ -1093,39 +863,47 @@ std::vector<MortonIndex> send_get_leaf_morton( idx_vect = 0; for(unsigned i = 0; i < global_matrix_interaction[0].size() ; ++i){ if(global_matrix_interaction[0][i] != 0){ - isend_splited( - conf, - &needed_leaf, - &idx_vect, - global_matrix_interaction[0][i], - idx_proc,1 + std::size_t nb_leaf = global_matrix_interaction[0][i]; + conf.comm.isend( + &needed_leaf[idx_vect], + int(nb_leaf*sizeof(MortonIndex)), + MPI_CHAR, + i,1 ); + idx_vect += (unsigned)nb_leaf; } idx_proc += 1; } if(nb_msg_recv != 0 ){ - inria::mpi::request::waitall(tab_mpi_status.size(),tab_mpi_status.data()); + inria::mpi::request::waitall(idx_status,tab_mpi_status.data()); } + conf.comm.barrier(); return{begin(vect_recv),end(vect_recv)}; } +struct particle_symbolic_block{ + int idx_global_block; + FSize nb_particles; + std::vector<FSize> nb_particle_per_leaf; + friend + std::ostream& operator<<(std::ostream& os, const particle_symbolic_block& n) { + return os << "---> nb particle " << n.nb_particles << "<--"; + } +}; -/** - * This struct is used to stock information who wille be send to other proc - */ -struct block_t{ - std::size_t n_block; +struct cell_symbolic_block{ + int idx_global_block; MortonIndex start_index; MortonIndex end_index; int nb_leaf_in_block; - // used to show the block + std::vector<MortonIndex> m_idx_in_block; + friend - std::ostream& operator<<(std::ostream& os, const block_t& n) { - return os << "--> n_block : " << n.n_block << " start : " << n.start_index << " end : " << n.end_index << " nb_leaf " << n.nb_leaf_in_block << "<--"; + std::ostream& operator<<(std::ostream& os, const cell_symbolic_block& n) { + return os << "--> n_block : " << n.idx_global_block << " start : " << n.start_index << " end : " << n.end_index << " nb_leaf " << n.nb_leaf_in_block << "<--"; } - }; /** @@ -1143,22 +921,39 @@ struct block_t{ * @param tree it's the GroupTree where block are stock * @param conf it's the MPI conf */ + template<class GroupOctreeClass> -std::vector<std::vector<block_t>> exchange_block( - std::vector<std::pair<int,int>> nb_block_to_receiv, - std::vector<std::pair<int,std::vector<MortonIndex>>> block_to_send, +std::pair<std::vector<cell_symbolic_block>,std::vector<particle_symbolic_block>> exchange_block( + std::vector<std::pair<int,int>>& nb_block_to_receiv, + std::vector<std::pair<int,std::vector<MortonIndex>>>& block_to_send, GroupOctreeClass& tree, int level, const inria::mpi_config& conf ) { + struct sending_cell_structure{ + int idx_global_block; + MortonIndex start_index; + MortonIndex end_index; + int nb_leaf_in_block; + }; + + int my_rank = conf.comm.rank(); + bool leaf_level = ( level == tree.getHeight() -1 ); + int block_size = tree.getNbElementsPerBlock(); // declaration of the array of MPI status for synchro - unsigned nb_message_recv = 0; + unsigned nb_message_recv = 0; + unsigned nb_block_to_recv = 0; for(unsigned i = 0 ; i < nb_block_to_receiv.size() ;++i ){ - if(nb_block_to_receiv[i].second != 0 ){ + if(nb_block_to_receiv[i].second != 0 && nb_block_to_receiv[i].first != my_rank){ + // computing of the number of message and the number of block to recv ++nb_message_recv; + nb_block_to_recv += nb_block_to_receiv[i].second; } } + if(leaf_level) + nb_message_recv = nb_message_recv+ (nb_message_recv*2); + // compute the total number of message std::vector<inria::mpi::request> tab_mpi_status(nb_message_recv); // Compute the total number of block i will send @@ -1167,69 +962,279 @@ std::vector<std::vector<block_t>> exchange_block( total_size += block_to_send[i].second.size(); } - // Declaration of the buffer of block - std::vector<block_t> data_to_send(total_size); + // Buffer to send the cell structure + std::vector<sending_cell_structure> cell_symb_to_send(total_size); + + // buffer to send the morton index + std::vector<size_t> morton_index_send(total_size*block_size,0); + // buffer to send particles block + std::vector<FSize> nb_particle_per_leaf(0,0); + std::vector<unsigned> particle_symb_to_send(0,0); + + if(leaf_level){ + nb_particle_per_leaf.resize(total_size*block_size,0); + particle_symb_to_send.resize(total_size); + } + + std::size_t idx_vect_to_send = 0; + std::size_t idx_m_idx = 0; // Filling the buffer of block for(unsigned i = 0 ; i < block_to_send.size(); ++i){ - for(unsigned j = 0 ; j < block_to_send[i].second.size() ; j++){ - auto* container = tree.getCellGroup(level ,((int)block_to_send[i].second[j])); - block_t block_to_add{ - (size_t)block_to_send[i].second[j], - container->getStartingIndex(), - container->getEndingIndex(), - container->getNumberOfCellsInBlock() - }; - data_to_send[idx_vect_to_send] = block_to_add; - ++idx_vect_to_send; + for(unsigned j = 0 ; j < block_to_send[i].second.size() ; ++j){ + if(block_to_send[i].first != my_rank){ + auto* container = tree.getCellGroup(level ,((int)block_to_send[i].second[j])); + sending_cell_structure block_to_add{ + container->getIdxGlobal(), + container->getStartingIndex(), + container->getEndingIndex(), + container->getNumberOfCellsInBlock() + }; + // Get all morton index of the block + for(int k = 0 ; k < container->getNumberOfCellsInBlock(); ++k ){ + morton_index_send[idx_m_idx+k] = container->getCellMortonIndex(k); + } + for(int k = container->getNumberOfCellsInBlock(); k < block_size; ++k ){ + morton_index_send[idx_m_idx+k] = container->getCellMortonIndex(container->getNumberOfCellsInBlock()-1); + } + // add the block to the vector + cell_symb_to_send[idx_vect_to_send] = block_to_add; + if(leaf_level){ + // get the particle container associated + auto* container_particle = tree.getParticleGroup(((int)block_to_send[i].second[j])); + particle_symb_to_send[idx_vect_to_send] = container_particle->getIdxGlobal(); + // iterate on every leaf + for(int k = 0 ; k < container_particle->getNumberOfLeavesInBlock(); ++k){ + // stock the number of particles in the leaf + nb_particle_per_leaf[idx_m_idx+k] = container_particle->getNbParticlesInLeaf(k); + } + } + idx_m_idx += block_size; + ++idx_vect_to_send; + } } } - // Posting recv - std::vector<std::vector<block_t>> block_t_recv(nb_block_to_receiv.size()); + // Now i have my vector(s) to send all of my blocks + + // the first vector will contain all of cell_block and the send all of + // particle block + std::vector<sending_cell_structure> symbolic_block_rcv(nb_block_to_recv); + + int size_of_vect = nb_block_to_recv*block_size; + std::vector<FSize> nb_part_leaf(0,0); + std::vector<unsigned> idx_global_particle_rcv(0,0); + if(leaf_level){ + nb_part_leaf.resize(size_of_vect,0); + idx_global_particle_rcv.resize(nb_block_to_recv,0); + } + + int idx_status = 0; + unsigned offset_block = 0; + unsigned offset_m_idx = 0; + // Posting recv for(unsigned i = 0; i < nb_block_to_receiv.size(); ++i) { + if(nb_message_recv == 0) + break; // Setting parameter int source = nb_block_to_receiv[i].first; int nb_block = nb_block_to_receiv[i].second; - if(nb_block != 0){ - block_t_recv[i].resize(nb_block); + if(nb_block != 0 && source != my_rank){ // Posting reveiv message - unsigned idx = 0; - irecv_splited( - conf, - &tab_mpi_status, - &idx_status, - &block_t_recv.data()[i], - &idx, - nb_block, + tab_mpi_status[idx_status] = + conf.comm.irecv( + &symbolic_block_rcv[offset_block], + int(nb_block*sizeof(sending_cell_structure)), + MPI_CHAR, source,1 ); + idx_status += 1; + + // if it's the leaf level, i need to recv the particle block + if(leaf_level){ + tab_mpi_status[idx_status] = + conf.comm.irecv( + &nb_part_leaf[offset_m_idx], + int((nb_block*block_size*sizeof(FSize))), + MPI_CHAR, + source,3 + ); + idx_status += 1; + + tab_mpi_status[idx_status] = + conf.comm.irecv( + &idx_global_particle_rcv[offset_block], + nb_block, + MPI_UNSIGNED, + source,4 + ); + idx_status += 1; + + } + offset_block += nb_block; + offset_m_idx += (nb_block*block_size); } } + FAssertLF(idx_status == (int)nb_message_recv); // post sending message - unsigned offset_block = 0; + offset_block = 0; + offset_m_idx = 0; for(unsigned i = 0 ; i < block_to_send.size(); ++i){ - // Setting parameters - int destination = block_to_send[i].first; - size_t nb_block = (int)block_to_send[i].second.size(); // Posting send message - if(nb_block != 0){ - isend_splited( - conf, - &data_to_send, - &offset_block, - nb_block, + int nb_block = (int)block_to_send[i].second.size(); + int destination = block_to_send[i].first; + + if(nb_block != 0 && destination != my_rank){ + // Setting parameters + conf.comm.isend( + &cell_symb_to_send[offset_block], + int(nb_block*sizeof(sending_cell_structure)), + MPI_CHAR, destination,1 ); + + if(leaf_level){ + conf.comm.isend( + &nb_particle_per_leaf[offset_m_idx], + int(nb_block*block_size*sizeof(FSize)), + MPI_CHAR, + destination,3 + ); + conf.comm.isend( + &particle_symb_to_send[offset_block], + nb_block, + MPI_UNSIGNED, + destination,4 + ); + } + offset_block = (offset_block+nb_block); + offset_m_idx = (offset_m_idx + (nb_block*block_size)); } + } // Waiting for all request - if(nb_message_recv != 0){ - inria::mpi::request::waitall(tab_mpi_status.size(),tab_mpi_status.data()); + inria::mpi::request::waitall(idx_status,tab_mpi_status.data()); + // Sending morton idx + if(leaf_level) + nb_message_recv /= 2; + idx_status = 0; + std::vector<size_t> m_idx_to_recv(size_of_vect,0); + inria::mpi::request tab_status[nb_message_recv]; + offset_block = 0; + offset_m_idx = 0; + for(unsigned i = 0; i < nb_block_to_receiv.size(); ++i) + { + if(nb_message_recv == 0) + break; + // Setting parameter + int source = nb_block_to_receiv[i].first; + int nb_block = nb_block_to_receiv[i].second; + if(nb_block != 0 && source != my_rank){ + // Posting reveiv message + tab_status[idx_status] = + conf.comm.irecv( + &m_idx_to_recv.data()[offset_m_idx], + int(nb_block*block_size), + my_MPI_SIZE_T, + source,2 + ); + idx_status += 1; + offset_m_idx = (offset_m_idx + (nb_block*block_size)); + } + } + + offset_block = 0; + offset_m_idx = 0; + for(unsigned i = 0 ; i < block_to_send.size(); ++i){ + // Posting send message + int nb_block = (int)block_to_send[i].second.size(); + int destination = block_to_send[i].first; + + if(nb_block != 0 && destination != my_rank){ + conf.comm.isend( + &morton_index_send.data()[offset_m_idx], + int(nb_block*block_size), + my_MPI_SIZE_T, + destination,2 + ); + offset_m_idx = (offset_m_idx + (nb_block*block_size)); + } } - return{begin(block_t_recv),end(block_t_recv)}; + if(nb_message_recv > 0) + inria::mpi::request::waitall(idx_status,tab_status); + conf.comm.barrier(); + + if(nb_message_recv > 0){ + std::pair<std::vector<cell_symbolic_block>, + std::vector<particle_symbolic_block>> pair_return; + pair_return.first.resize(symbolic_block_rcv.size()); + + if(leaf_level) + pair_return.second.resize(symbolic_block_rcv.size()); + else + pair_return.second.resize(0); + + int nb_leaf_before_me = 0; + + for(unsigned i = 0 ; i < symbolic_block_rcv.size() ; ++i){ + // filling symbolique information + cell_symbolic_block new_block{ + symbolic_block_rcv[i].idx_global_block, + symbolic_block_rcv[i].start_index, + symbolic_block_rcv[i].end_index, + symbolic_block_rcv[i].nb_leaf_in_block + }; + // filling morton index vector + new_block.m_idx_in_block.clear(); + new_block.m_idx_in_block.insert( + new_block.m_idx_in_block.begin(), + m_idx_to_recv.begin()+nb_leaf_before_me, + m_idx_to_recv.begin()+(nb_leaf_before_me+new_block.nb_leaf_in_block)); + for (size_t nb = 0; nb < new_block.m_idx_in_block.size(); nb++) { + if(new_block.m_idx_in_block[nb] > symbolic_block_rcv[i].end_index || new_block.m_idx_in_block[nb] < symbolic_block_rcv[i].start_index){ + std::cout << "ERROR" << i << '\n'; + std::cout << new_block.m_idx_in_block[nb] << " " ; + std::cout << symbolic_block_rcv[i].end_index << " "; + std::cout << symbolic_block_rcv[i].start_index << '\n'; + std::cout << "nb_leaf_in_block "<< symbolic_block_rcv[i].nb_leaf_in_block << "\n"; + std::cout << m_idx_to_recv.size() << std::endl; + for(int idx = nb_leaf_before_me ; idx < nb_leaf_before_me+block_size; ++idx ){ + std::cout << " " << m_idx_to_recv[i] ; + } + std::cout << std::endl; + } + } + // adding to the vector + pair_return.first[i] = new_block; + if(leaf_level){ + particle_symbolic_block new_p_block; + new_p_block.idx_global_block =idx_global_particle_rcv[i]; + new_p_block.nb_particle_per_leaf.clear(); + new_p_block.nb_particle_per_leaf.insert( + new_p_block.nb_particle_per_leaf.begin(), + nb_part_leaf.begin()+nb_leaf_before_me, + nb_part_leaf.begin()+(nb_leaf_before_me+new_block.nb_leaf_in_block)); + + FSize nb_particles_in_block = 0; + + for(unsigned j = 0 ; j < new_p_block.nb_particle_per_leaf.size() ; ++j){ + nb_particles_in_block += new_p_block.nb_particle_per_leaf[j]; + } + + new_p_block.nb_particles = nb_particles_in_block; + pair_return.second[i] = new_p_block; + } + nb_leaf_before_me += block_size; + } + return {pair_return.first,pair_return.second}; + } + std::pair<std::vector<cell_symbolic_block>, + std::vector<particle_symbolic_block>> pair_return; + pair_return.first.resize(0); + pair_return.second.resize(0); + return {pair_return.first,pair_return.second}; } @@ -1253,7 +1258,7 @@ void compute_block_node_level( int level, GroupOctreeClass& tree ){ - FAssert(under_level.size() == current_level.size() ); + FAssertLF(under_level.size() == current_level.size() ); // Iterate on every interaction of the under level for(unsigned i = 0 ; i < under_level.size() ; ++i){ // Init variables for the search @@ -1356,7 +1361,7 @@ void compute_block_node_level( * @param conf MPI conf */ template<class GroupOctreeClass> -std::vector<std::vector<block_t>> send_get_symbolic_block_at_level( +std::pair<std::vector<cell_symbolic_block>,std::vector<particle_symbolic_block>> send_get_symbolic_block_at_level( std::vector<MortonIndex>& needed_leaf, std::vector<std::vector<size_t>>& matrix_interaction, GroupOctreeClass& tree, @@ -1381,6 +1386,7 @@ std::vector<std::vector<block_t>> send_get_symbolic_block_at_level( } } + //////////////////////////////////////////////////////////// /// FIRST STEP /// Getting the list of leaf needed by every proc @@ -1392,12 +1398,14 @@ std::vector<std::vector<block_t>> send_get_symbolic_block_at_level( nb_msg_recv, nb_leaf_recv, conf); + // free needed_leaf + std::vector<MortonIndex>().swap(needed_leaf); + //////////////////////////////////////////////////////////// // SECOND STEP // Compute the block to send to other proc // And send the number of block sended //////////////////////////////////////////////////////////// - // Init variable to stock std::vector<std::pair<int,int>> nb_block_to_receiv(nb_msg_send); std::vector<std::pair<int,std::vector<MortonIndex>>> @@ -1413,6 +1421,11 @@ std::vector<std::vector<block_t>> send_get_symbolic_block_at_level( level, conf); + std::vector<MortonIndex>().swap(vect_recv); + + + + //////////////////////////////////////////////////////////// /// THIRD STEP /// Getting the list of leaf needed by every proc @@ -1425,51 +1438,629 @@ std::vector<std::vector<block_t>> send_get_symbolic_block_at_level( conf); } + /** - * This function call a function of groupTree to create the block recev - * to create a LET group tree - * The principe is, we send to the group tree the vector and + * This algorithm compute the global index of every block in the local tree * @author benjamin.dufoyer@inria.fr - * @param tree local group tree - * @param let_block block to create the LET - * @param local_min_m_idx local Morton Index of my GroupTree + * @param tree [description] + * @param conf [description] + */ + template<class GroupOctreeClass> + int set_cell_group_global_index_at( + GroupOctreeClass& tree, + int level, + int nb_block_under_level, + const inria::mpi_config& conf, + bool particle = false + ){ + int nb_proc = conf.comm.size(); + int my_rank = conf.comm.rank(); + int nb_block_before_me = 0; + int my_nb_block; + if(!particle) { + my_nb_block = tree.getNbCellGroupAtLevel(level); + } else { + my_nb_block = tree.getNbParticleGroup(); + } + // get the number of block at the under level + if(my_rank == 0){ + conf.comm.recv( + &nb_block_before_me, + 1, + MPI_INT, + nb_proc-1,level); + } else if( my_rank == nb_proc-1){ + conf.comm.send( + &nb_block_under_level, + 1, + MPI_INT, + 0,level + ); + } + + if(nb_proc != 0){ + // get the number of block before me + if(my_rank != 0){ + conf.comm.recv( + &nb_block_before_me, + 1, + MPI_INT, + my_rank-1,level + ); + } + // send the number of block before me with my number + if(my_rank != (nb_proc-1) ){ + int nb_block_after_me = my_nb_block + nb_block_before_me; + conf.comm.send( + &nb_block_after_me, + 1, + MPI_INT, + my_rank+1,level + ); + } + } + // Now i have the total number of block before me, i will compute + // the idex of all of my block at this level + for(int idx_group = 0 ; idx_group < my_nb_block ;++idx_group){ + if(!particle){ + auto* container = tree.getCellGroup(level,idx_group); + container->setIdxGlobal(nb_block_before_me); + } else { + auto* container = tree.getParticleGroup(idx_group); + container->setIdxGlobal(int(nb_block_before_me)); + } + ++nb_block_before_me; + } + return nb_block_before_me; +} + +/** + * This function launch the computaition of the flobal index of every + * group at every level + * @author benjamin.dufoyer@inria.fr + * @param tree local group tree + * @param conf MPI conf + * @param level_min [OPTIONNAL] minimum level */ template<class GroupOctreeClass> -void add_let_leaf_block_to_tree( - GroupOctreeClass& tree, - std::vector<std::vector<block_t>>& let_block, - const MortonIndex& local_min_m_idx, - int level) -{ - // if we have no block to add - if(let_block.size() == 0) - return; - - // Compute the number of block for each level - int nb_block = 0; - for(unsigned i = 0 ; i < let_block.size(); ++i){ - for(unsigned j = 0 ; j < let_block[i].size() ; ++j){ - ++nb_block; +int set_cell_group_global_index( + GroupOctreeClass& tree, + const inria::mpi_config& conf, + int level_min = 1 +){ + int nb_proc = conf.comm.size(); + if(nb_proc > 1){ + // Can be a task + int nb_block_before_me = 0; + // set the idx global on the particle block + nb_block_before_me = set_cell_group_global_index_at(tree,0,nb_block_before_me,conf,true); + + for(int i = tree.getHeight()-1; i >= level_min ; --i){ + nb_block_before_me = set_cell_group_global_index_at(tree,i,nb_block_before_me,conf); + } + conf.comm.bcast( + &nb_block_before_me, + 1, + MPI_INT, + nb_proc-1 + ); + return nb_block_before_me; + } else { + int idx_global = 0; + for(int i = 0 ; i < tree.getNbParticleGroup(); ++i){ + tree.getParticleGroup(i)->setIdxGlobal(idx_global); + ++idx_global; } + for(int i = tree.getHeight()-1; i >= 1 ; --i){ + for(int j = 0; j < tree.getNbCellGroupAtLevel(i) ; ++j ){ + tree.getCellGroup(i,j)->setIdxGlobal(idx_global); + ++idx_global; + } + } + return idx_global; } +} - unsigned idx_vect = 0; - std::vector<block_t> leaf_block_to_add(nb_block); - for(unsigned i = 0 ; i < let_block.size() ;++i){ - for(unsigned j =0 ; j < let_block[i].size();j++){ - leaf_block_to_add[idx_vect]= let_block[i][j]; - ++idx_vect; + +/** + * This function add the blocks for the M2M operation + * 1) Compute the min and max morton index of my distribution at the level + * 2) Check if i have this morton index at the upper level + * 3) Share this information at my neighboor + * 4) Send the morton index needed and post recv of block + * 5) Send the block to my neihboor if needed + * 6) Add block to the tree + * + * [RESTRICTION] You need to add the LET to the tree BEFORE calling this + * function + * + * @author benjamin.dufoyer@inria.fr + * @param tree LET GroupTree + * @param conf MPI conf + * @param level Level to check + */ +template<class GroupOctreeClassWithLET> +void send_get_block_M2M_at_level( + GroupOctreeClassWithLET& tree, + const inria::mpi_config& conf, + int level +){ + // structure for sending message + struct sending_cell_structure_M2M{ + int idx_global_block; + MortonIndex start_index; + MortonIndex end_index; + int nb_leaf_in_block; + int idx_global_particle_block = 0; + }; + // boolean to know if we are at the leaf level + bool leaf_level = (tree.getHeight()-1 == level); + // get the block_size + int block_size = tree.getNbElementsPerBlock(); + // Prepare buffer for sending to other proc + sending_cell_structure_M2M block_needer_min{-1,0,0,0}; + sending_cell_structure_M2M block_needer_max{-1,0,0,0}; + std::vector<MortonIndex> m_idx_min(block_size,0); + std::vector<MortonIndex> m_idx_max(block_size,0); + std::vector<FSize> nb_particle_min(0,0); + std::vector<FSize> nb_particle_max(0,0); + // IDEA can be a task + // Compute the minimum morton index of my distribution + // iterate on every group + for(int idx_group = 0 ; idx_group < tree.getNbCellGroupAtLevel(level);++idx_group){ + auto* container = tree.getCellGroup(level,idx_group); + // if the block is mine + if(container->isMine()){ + // get the symbolic information of the block + block_needer_min.idx_global_block = container->getIdxGlobal(); + block_needer_min.start_index = container->getStartingIndex(); + block_needer_min.end_index = container->getEndingIndex(); + block_needer_min.nb_leaf_in_block = container->getNumberOfCellsInBlock(); + // get every morton index + for(int idx_cell = 0 ; idx_cell < container->getNumberOfCellsInBlock() ; ++idx_cell){ + m_idx_min[idx_cell] = container->getCellMortonIndex(idx_cell); + } + // if it's the leaf level we need to send the number of particule + // too + if(leaf_level){ + auto* container_particle = tree.getParticleGroup(idx_group); + block_needer_min.idx_global_particle_block = container_particle->getIdxGlobal(); + nb_particle_min.resize(block_size,0); + for(int idx_cell = 0 ; idx_cell < container_particle->getNumberOfLeavesInBlock() ; ++idx_cell){ + nb_particle_min[idx_cell] = container_particle->getNbParticlesInLeaf(idx_cell); + } + } + // break the loop + break; } } - // Now i have a vector with all leaf block - // Sorting block - std::sort(leaf_block_to_add.begin(),leaf_block_to_add.end(),[](block_t a, block_t b){ - return a.start_index < b.start_index; - }); - // Add the block to the tree - tree.add_LET_block(leaf_block_to_add,level,local_min_m_idx); + // IDEA can be a task + // compute the maximum morton index of my distribution + // iterate on every groups + for(int idx_group = tree.getNbCellGroupAtLevel(level)-1 ; idx_group >= 0; --idx_group){ + auto* container = tree.getCellGroup(level,idx_group); + // if the block is Mine + if(container->isMine()){ + // stock symbolic information + block_needer_max.idx_global_block = container->getIdxGlobal(); + block_needer_max.start_index = container->getStartingIndex(); + block_needer_max.end_index = container->getEndingIndex(); + block_needer_max.nb_leaf_in_block = container->getNumberOfCellsInBlock(); + // get every morton index + for(int idx_cell = 0 ; idx_cell < container->getNumberOfCellsInBlock() ; ++idx_cell){ + m_idx_max[idx_cell] = container->getCellMortonIndex(idx_cell); + } + // if it's the leaf level we need to send to number of particule + // per leaf + if(leaf_level){ + auto* container_particle = tree.getParticleGroup(idx_group); + block_needer_max.idx_global_particle_block = container_particle->getIdxGlobal(); + nb_particle_max.resize(block_size,0); + for(int idx_cell = 0 ; idx_cell < container_particle->getNumberOfLeavesInBlock() ; ++idx_cell){ + nb_particle_max[idx_cell] = container_particle->getNbParticlesInLeaf(idx_cell); + } + } + break; + } + } + // compute the max and the min morton Index att the upper level + + // Now we have our max and our min at the current level + // Now we want to check if we have the parents of our min and our max + bool flag_min = false; + bool flag_max = false; + + // MPI info + int nb_proc = conf.comm.size(); + int my_rank = conf.comm.rank(); + + // reception buffer + // Symbolic block buffer + sending_cell_structure_M2M buffer_right_neighbor{-1,0,0,0}; + sending_cell_structure_M2M buffer_left_neighbor{-1,0,0,0}; + // Morton index buffer + std::vector<MortonIndex> buffer_m_idx_right(block_size,0); + std::vector<MortonIndex> buffer_m_idx_left(block_size,0); + // number of particle buffer + std::vector<FSize> buffer_nb_particle_right(0,0); + std::vector<FSize> buffer_nb_particle_left(0,0); + + // flag for neighboot + bool flag_right_neighboor = false; + bool flag_left_neighboor = false; + + // array of request + int nb_message = 0; + inria::mpi::request tab_mpi_status[12]; + + // if i'm 0, i don't need a block from left + if(my_rank == 0){ + flag_min = true; + flag_left_neighboor = true; + } + // if i'm the last proc, i don't need block from right + if(my_rank == nb_proc-1){ + flag_max = true; + flag_right_neighboor = true; + } + + // Now we need to send to the neighboor if we need a block, and recv if + // he need block + + // First send and recv from left + + if(my_rank != 0){ + ///////////////////////////////////////////// + //// SENDING + ///////////////////////////////////////////// + // Send symbolic information of my min block + tab_mpi_status[nb_message] = + conf.comm.isend( + &block_needer_min, + sizeof(sending_cell_structure_M2M), + MPI_CHAR, + my_rank-1,1); + ++nb_message; + // send the morton index of the min block + tab_mpi_status[nb_message] = + conf.comm.isend( + m_idx_min.data(), + int(sizeof(MortonIndex)*block_size), + MPI_CHAR, + my_rank-1,2); + ++nb_message; + // if it's the leaf level + if(leaf_level){ + // send the number of particle of the particle block attached + tab_mpi_status[nb_message] = + conf.comm.isend( + nb_particle_min.data(), + int(sizeof(FSize)*block_size), + MPI_CHAR, + my_rank-1,3); + ++nb_message; + } + // recev the symbolic block from my left neighbor + tab_mpi_status[nb_message] = + conf.comm.irecv( + &buffer_left_neighbor, + sizeof(sending_cell_structure_M2M), + MPI_CHAR, + my_rank-1,1); + ++nb_message; + // recv the morton index of the block send by my left neighbor + tab_mpi_status[nb_message] = + conf.comm.irecv( + buffer_m_idx_left.data(), + int(sizeof(MortonIndex)*block_size), + MPI_CHAR, + my_rank-1,2); + ++nb_message; + // if it's the leaf level + if(leaf_level){ + // need to recev the number of particle of the particle attached + buffer_nb_particle_left.resize(block_size,0); + tab_mpi_status[nb_message] = + conf.comm.irecv( + buffer_nb_particle_left.data(), + int(sizeof(FSize)*block_size), + MPI_CHAR, + my_rank-1,3); + ++nb_message; + } + } + // Send and recv from right + if(my_rank != nb_proc-1){ + // send my block max + tab_mpi_status[nb_message] = + conf.comm.isend( + &block_needer_max, + sizeof(sending_cell_structure_M2M), + MPI_CHAR, + my_rank+1,1); + ++nb_message; + // send the morton index of the right block + tab_mpi_status[nb_message] = + conf.comm.isend( + m_idx_max.data(), + int(sizeof(MortonIndex)*block_size), + MPI_CHAR, + my_rank+1,2); + ++nb_message; + // if it's the leaf level + if(leaf_level){ + // send the number of particle of the particle block attached + tab_mpi_status[nb_message] = + conf.comm.isend( + nb_particle_max.data(), + int(sizeof(FSize)*block_size), + MPI_CHAR, + my_rank+1,3); + ++nb_message; + } + + // recv the block from the right + tab_mpi_status[nb_message] = + conf.comm.irecv( + &buffer_right_neighbor, + sizeof(sending_cell_structure_M2M), + MPI_CHAR, + my_rank+1,1); + ++nb_message; + // recv the morton index of the right + tab_mpi_status[nb_message] = + conf.comm.irecv( + buffer_m_idx_right.data(), + int(sizeof(MortonIndex)*block_size), + MPI_CHAR, + my_rank+1,2); + ++nb_message; + // if it's leaf level + if(leaf_level){ + // recv number of particle + buffer_nb_particle_right.resize(block_size,0); + tab_mpi_status[nb_message] = + conf.comm.irecv( + buffer_nb_particle_right.data(), + int(sizeof(FSize)*block_size), + MPI_CHAR, + my_rank+1,3); + ++nb_message; + } + + } + // Wait all request + inria::mpi::request::waitall(nb_message,tab_mpi_status); + + // Now we have the min and the max block at the level L + // But now we need to send the block needer of this block + // to insert task on him with starPU + + // buffer to recv blocks + sending_cell_structure_M2M block_from_left; + sending_cell_structure_M2M block_from_right; + + // buffer to recv morton index + std::vector<MortonIndex> m_idx_from_left(block_size,-1); + std::vector<MortonIndex> m_idx_from_right(block_size,-1); + + // buffer to send morton index + std::vector<MortonIndex> m_idx_to_send_right(block_size,-1); + std::vector<MortonIndex> m_idx_to_send_left(block_size,-1); + // buffer to send symbolic information + sending_cell_structure_M2M block_to_left{-1,0,0,0}; + sending_cell_structure_M2M block_to_right{-1,0,0,0}; + + + nb_message = 0; + // IDEA can be a task + // we post the recv for the left block + if(!flag_min){ + // posting reception of the block + tab_mpi_status[nb_message] = + conf.comm.irecv( + &block_from_left, + sizeof(sending_cell_structure_M2M), + MPI_CHAR, + my_rank-1,2); + ++nb_message; + tab_mpi_status[nb_message] = + conf.comm.irecv( + m_idx_from_left.data(), + int(sizeof(MortonIndex)*block_size), + MPI_CHAR, + my_rank-1,3); + ++nb_message; + } + // IDEA can be a task + // we post the recv for the right block + if(!flag_max){ + // posting the reception buffer + tab_mpi_status[nb_message] = + conf.comm.irecv( + &block_from_right, + sizeof(sending_cell_structure_M2M), + MPI_CHAR, + my_rank+1,2); + ++nb_message; + tab_mpi_status[nb_message] = + conf.comm.irecv( + m_idx_from_right.data(), + int(sizeof(MortonIndex)*block_size), + MPI_CHAR, + my_rank+1,3); + ++nb_message; + } + + // IDEA Can be a task + // if i need to send to the right + if(!flag_right_neighboor){ + bool flag = false; + // seeking the first block who is mine at the upper level + for(int i = (tree.getNbCellGroupAtLevel(level-1)-1) ; i >= 0 ; --i){ + auto* container = tree.getCellGroup(level-1,i); + // if the block is mine + if(container->isMine()){ + // stock symbolic information + block_to_right.idx_global_block = container->getIdxGlobal(); + block_to_right.start_index = container->getStartingIndex(); + block_to_right.end_index = container->getEndingIndex(); + block_to_right.nb_leaf_in_block = container->getNumberOfCellsInBlock(); + // stock the morton index of the block + for(int idx_cell = 0 ; idx_cell < container->getNumberOfCellsInBlock(); ++idx_cell ){ + m_idx_to_send_right[idx_cell] = container->getCellMortonIndex(idx_cell); + } + // put the flag on true + flag = true; + // send the 2 buffer + tab_mpi_status[nb_message] = + conf.comm.isend( + &block_to_right, + sizeof(sending_cell_structure_M2M), + MPI_CHAR, + my_rank+1,2); + ++nb_message; + tab_mpi_status[nb_message] = + conf.comm.isend( + &m_idx_to_send_right.data()[0], + int(sizeof(MortonIndex)*block_size), + MPI_CHAR, + my_rank+1,3); + ++nb_message; + break; + } + } + // we don't have block at the upper level + if(!flag){ + // send fake block + tab_mpi_status[nb_message] = + conf.comm.isend( + &block_to_right, + sizeof(sending_cell_structure_M2M), + MPI_CHAR, + my_rank+1,2); + ++nb_message; + tab_mpi_status[nb_message] = + conf.comm.isend( + m_idx_to_send_right.data(), + int(sizeof(MortonIndex)*block_size), + MPI_CHAR, + my_rank+1,3); + ++nb_message; + } + } + // IDEA Can be a task + + if(!flag_left_neighboor){ + bool flag = false; + // seek the first block who is mine + for(int i = 0 ; i < tree.getNbCellGroupAtLevel(level-1) ; ++i){ + auto* container = tree.getCellGroup(level-1,i); + // send the first left block who is mine + if(container->isMine()){ + // stock symbolic information + block_to_left.idx_global_block = container->getIdxGlobal(); + block_to_left.start_index = container->getStartingIndex(); + block_to_left.end_index = container->getEndingIndex(); + block_to_left.nb_leaf_in_block = container->getNumberOfCellsInBlock(); + // stock morton index + for(int idx_cell = 0 ; idx_cell < container->getNumberOfCellsInBlock(); ++idx_cell ){ + m_idx_to_send_left[idx_cell] = container->getCellMortonIndex(idx_cell); + } + // put the flag on true + flag = true; + // send block + tab_mpi_status[nb_message] = + conf.comm.isend( + &block_to_left, + sizeof(sending_cell_structure_M2M), + MPI_CHAR, + my_rank-1,2); + ++nb_message; + tab_mpi_status[nb_message] = + conf.comm.isend( + m_idx_to_send_left.data(), + int(sizeof(MortonIndex)*block_size), + MPI_CHAR, + my_rank-1,3); + ++nb_message; + break; + } + } + // we don't have block at the upper level + if(!flag){ + // send fake block + tab_mpi_status[nb_message] = + conf.comm.isend( + &block_to_left, + sizeof(sending_cell_structure_M2M), + MPI_CHAR, + my_rank-1,2); + ++nb_message; + tab_mpi_status[nb_message] = + conf.comm.isend( + m_idx_to_send_left.data(), + int(sizeof(MortonIndex)*block_size), + MPI_CHAR, + my_rank-1,3); + ++nb_message; + } + } + + + // Wait for the send/recv + if(nb_message > 0) + inria::mpi::request::waitall(nb_message,tab_mpi_status); + + // now i have the block needed for the M2M + // now we need to add this block + + // We add the block, if the idx_global_block is -1, the block + // is invalid so we don't need him, and we don't need to add him to the tree + if(!flag_min && block_from_left.idx_global_block != -1){ + tree.insert_block(block_from_left,m_idx_from_left,level-1); + } + if(!flag_max && block_from_right.idx_global_block != -1){ + tree.insert_block(block_from_right,m_idx_from_right,level-1); + } + if(!flag_right_neighboor && buffer_right_neighbor.idx_global_block != -1){ + tree.insert_block(buffer_right_neighbor,buffer_m_idx_right,level,&buffer_nb_particle_right); + } + if(!flag_left_neighboor && buffer_left_neighbor.idx_global_block != -1){ + tree.insert_block(buffer_left_neighbor,buffer_m_idx_left,level,&buffer_nb_particle_left); + } } +/** + * This function exchange blocks with neighbors proc + * The left proc have the first block + * The right proc have the last block + * + * The blocks send are the block who have the boolean "isMine" on 1 on the + * sender + * + * @author benjamin.dufoyer@inria.fr + * @param tree The group tree + * @param conf + * @param level_min [OPTIONNAL] minimum level to apply this function + */ +template<class GroupOctreeClass> +void send_get_block_M2M( + GroupOctreeClass& tree, + const inria::mpi_config& conf, + int level_min = 1 +){ + int nb_proc = conf.comm.size(); + // if we have less than 1 proc, we don't need to exchange block + if(nb_proc > 1){ + // get the M2M block at every level + for(int i = tree.getHeight()-1 ; i > level_min ; --i){ + send_get_block_M2M_at_level(tree,conf,i); + } + } +} + + } diff --git a/Src/GroupTree/Core/FDistributedLETGroupTreeValidator.hpp b/Src/GroupTree/Core/FDistributedLETGroupTreeValidator.hpp new file mode 100644 index 000000000..bfc954905 --- /dev/null +++ b/Src/GroupTree/Core/FDistributedLETGroupTreeValidator.hpp @@ -0,0 +1,260 @@ +// ==== CMAKE ==== +// @FUSE_MPI +// ================ +// + + +#ifndef _FDISTRIBUTED_LET_GROUPTREE_VALIDATOR_ +#define _FDISTRIBUTED_LET_GROUPTREE_VALIDATOR_ + + +#include "inria/algorithm/distributed/mpi.hpp" + + +namespace dstr_grp_tree_vldr{ + +/** + * This function check the level of the LetGroupTree to check if we forget + * a group + * The principe is simple. We compute every interaction of every cell at the + * level, we check if we have the morton index of the interaction in our tree + * If we don't have this index, we send a request to the proc who have this + * index to check if he exist, if he exist, it's a error + * @author benjamin.dufoyer@inria.fr + * @param tree localGroupTree + * @param level level to check + * @param conf conf MPI + * @return true if it's ok, false if we forget a group + */ +template<class GroupOctreeClass> +bool validate_group_tree_at_level( + GroupOctreeClass& tree, + int level, + const inria::mpi_config& conf +){ + // MPI information + const int nb_proc = conf.comm.size(); + const int my_rank = conf.comm.rank(); + + // Compute my min and my max morton index at the level + MortonIndex min_morton_index_at_level = 0; + MortonIndex max_morton_index_at_level = 0; + for(int i = 0 ; i < tree.getNbCellGroupAtLevel(level) ; ++i){ + auto* container = tree.getCellGroup(level,i); + if(container->isMine()){ + min_morton_index_at_level = container->getStartingIndex(); + break; + } + } + for(int i = tree.getNbCellGroupAtLevel(level)-1; i >= 0 ; --i){ + auto* container = tree.getCellGroup(level,i); + if(container->isMine()){ + max_morton_index_at_level = container->getEndingIndex(); + break; + } + } + + // Sharing my interval and getting interval from all proc + std::pair<MortonIndex,MortonIndex> my_interval(min_morton_index_at_level,max_morton_index_at_level); + std::vector<std::pair<MortonIndex,MortonIndex>> all_interval(nb_proc); + conf.comm.allgather(&my_interval, + sizeof(my_interval), + MPI_CHAR, + all_interval.data(), + sizeof(my_interval), + MPI_CHAR); + + // if i have 1 block or more + // Get all MortonIndex for interaction + std::vector<MortonIndex> morton_index_not_in_tree(0); + if(my_interval.second != 0){ + // vector to stock all MortonIndex + std::vector<MortonIndex> external_interaction(tree.getNbCellGroupAtLevel(level)*tree.getNbElementsPerBlock()*189,0); + unsigned idx_vector = 0; + // iterate on every group + for(int idx_group = 0 ; idx_group < tree.getNbCellGroupAtLevel(level) ; ++idx_group){ + // get the current group + auto* container = tree.getCellGroup(level,idx_group); + if(container->isMine()){ + // iterate on every cell + for(int cell_idx = 0; + cell_idx < container->getNumberOfCellsInBlock(); + ++cell_idx){ + // Getting the current morton index + MortonIndex curr_m_idx = container->getCellMortonIndex(cell_idx); + MortonIndex interactionsIndexes[189]; + int interactionsPosition[189]; + FTreeCoordinate coord(curr_m_idx); + // Getting neigbors of the father + int counter = coord.getInteractionNeighbors(level,interactionsIndexes,interactionsPosition); + for(int idx_neighbor = 0 ; idx_neighbor < counter ; ++idx_neighbor){ + MortonIndex tmp = interactionsIndexes[idx_neighbor]; + if(tmp >= min_morton_index_at_level && tmp < max_morton_index_at_level){ + // do nothing, it's my interval + } else { + //Stock the index + external_interaction[idx_vector] = tmp; + ++idx_vector; + } + } // end for neigbors + } // end for leaf + } // end for group + } + if(idx_vector > 0){ + FQuickSort<MortonIndex>::QsSequential(external_interaction.data(),idx_vector); + // vector to have all mortonIndex with no duplicate data + std::vector<MortonIndex> morton_needed(0); + MortonIndex last_morton_index = -1; + for(unsigned i = 0 ; i < idx_vector ; ++i){ + if(external_interaction[i] != last_morton_index){ + morton_needed.push_back(external_interaction[i]); + last_morton_index = external_interaction[i]; + } + } + // free the old vector + std::vector<MortonIndex>().swap(external_interaction); + // vector to stock morton index who are not in the tree + + for(unsigned i = 0 ; i < morton_needed.size(); ++i ){ + bool flag = false; + MortonIndex current_morton_index = morton_needed[i]; + for(int j = 0 ; j < tree.getNbCellGroupAtLevel(level); ++j){ + auto* container = tree.getCellGroup(level,j); + if(!container->isMine()){ + if(container->isInside(current_morton_index) || container->getEndingIndex() == current_morton_index){ + flag =true; + break; + } + } + } + // if we are here, we don't have the interaction + if(!flag) + morton_index_not_in_tree.push_back(current_morton_index); + } + } + } + + // Now we have all morton index who is not in our tree + std::vector<unsigned> nb_message_to_send(nb_proc,0); + std::vector<unsigned> nb_message_to_recev(nb_proc,0); + for(unsigned i = 0 ; i < morton_index_not_in_tree.size() ;++i ){ + for(unsigned j = 0 ; j < all_interval.size() ; ++j){ + MortonIndex min = all_interval[j].first; + MortonIndex max = all_interval[j].second; + if(morton_index_not_in_tree[i] >= min && morton_index_not_in_tree[i] <= max ){ + nb_message_to_send[j] += 1; + break; + } + } + } + + // Send the number of morton index we will send + conf.comm.alltoall(nb_message_to_send.data(), + 1, + MPI_UNSIGNED, + nb_message_to_recev.data(), + 1, + MPI_UNSIGNED); + + // Compute the number of message and the number of morton index + int nb_morton_index = 0; + int nb_message =0; + for(unsigned i = 0 ; i < nb_message_to_recev.size() ; ++i){ + nb_morton_index += nb_message_to_recev[i]; + if(nb_message_to_recev[i] > 0){ + ++nb_message; + } + if(nb_message_to_send[i] > 0){ + ++nb_message; + } + } + + // declare the reception buffer + std::vector<MortonIndex> morton_recv(nb_morton_index,0); + // tab of MPI request to wait the completion + inria::mpi::request tab_mpi_status[nb_message]; + + int idx_message =0; + unsigned offset = 0; + // post all reception + for(unsigned i = 0 ; i < nb_message_to_recev.size(); ++i){ + if(nb_message_to_recev[i] > 0){ + unsigned nb_m_idx = nb_message_to_recev[i]; + tab_mpi_status[idx_message] = conf.comm.irecv(&morton_recv[offset], + int(nb_m_idx*sizeof(MortonIndex)), + MPI_CHAR, + i,1); + ++idx_message; + offset += nb_m_idx; + } + } + + offset = 0; + // post all send message + for(unsigned i = 0 ; i < nb_message_to_send.size(); ++i){ + if(nb_message_to_send[i] > 0){ + unsigned nb_m_idx = nb_message_to_send[i]; + tab_mpi_status[idx_message] = conf.comm.isend(&morton_index_not_in_tree[offset], + int(nb_m_idx*sizeof(MortonIndex)), + MPI_CHAR, + i,1); + ++idx_message; + offset += nb_m_idx; + } + } + + // Wait all request + inria::mpi::request::waitall(idx_message,tab_mpi_status); + + offset = 0 ; + bool flag = true; + for(unsigned i = 0 ; i < nb_message_to_recev.size() ; ++i ){ + unsigned nb_morton_index_2 = nb_message_to_recev[i]; + for(unsigned j = 0 ; j < nb_morton_index_2 ; ++j ){ + MortonIndex current_idx = morton_recv[j+offset]; + for(int k = 0; k < tree.getNbCellGroupAtLevel(level);++k){ + auto* container = tree.getCellGroup(level,k); + if(container->isMine()){ + if(container->isInside(current_idx)){ + std::cout << " [Error][level "<<level << "] " << current_idx << " on " << my_rank << " Not transfered to " << i << std::endl; + flag = false; + } + } + } + } + offset += nb_morton_index_2; + } + + // return the flag + return flag; + +} + +/** + * This function check every level of the LetGroupTree to know if we forget + * a group + * @author benjamin.dufoyer@inria.fr + * @param tree local group tree + let + * @param conf MPI cong + * @return true if the tree is ok + */ +template<class GroupOctreeClass> +bool validate_group_tree( + GroupOctreeClass& tree, + const inria::mpi_config& conf +){ + bool res = true; + // check every level + for(int i = tree.getHeight()-1 ; i > 0 ; --i){ + res = validate_group_tree_at_level(tree,i,conf); + // if the current level is not good + if(!res) + break; + } + return res; +} + + +} + +#endif diff --git a/Src/GroupTree/Core/FGroupLinearTree.hpp b/Src/GroupTree/Core/FGroupLinearTree.hpp index aed3a5983..c7a877fd7 100644 --- a/Src/GroupTree/Core/FGroupLinearTree.hpp +++ b/Src/GroupTree/Core/FGroupLinearTree.hpp @@ -3,7 +3,6 @@ #include <vector> #include "../../Utils/FLog.hpp" -#include "FDistributedGroupTreeBuilder.hpp" using FReal = double; @@ -14,14 +13,15 @@ class FGroupLinearTree { protected: - int block_size; - int nb_block; + int block_size; //< + int nb_block; //< - const inria::mpi_config& mpi_conf; + // Copy of the MPI conf + const inria::mpi_config mpi_conf; //< - std::vector<node_t>* linear_tree; - std::vector<std::pair<MortonIndex,MortonIndex>> index_particle_distribution; - bool unknow_index_particle_distribution = true; + std::vector<node_t>* linear_tree; //< + std::vector<std::pair<MortonIndex,MortonIndex>> index_particle_distribution; //< + bool unknow_index_particle_distribution = true; //< public: @@ -36,14 +36,19 @@ public: * @param in_linear_tree Linear tree * @param in_box_center Box Center of particle container * @param in_box_width Box Width of particle container + * @warning We copy the MPI comm because O3 compilation fail the utest */ - FGroupLinearTree(const inria::mpi_config& conf): + FGroupLinearTree(const inria::mpi_config conf): mpi_conf(conf), index_particle_distribution(conf.comm.size()) { linear_tree = new std::vector<node_t>[1]; } +//////////////////////////////////////////////////// +// Function of initialisation +//////////////////////////////////////////////////// + /** * This function create a blocked linear tree from the current distributed * linear tree @@ -187,6 +192,12 @@ public: return this->linear_tree->back().morton_index; } + + const inria::mpi_config get_mpi_conf() const{ + return mpi_conf; + } + + /** * This function print the information of this current class * @author benjamin.dufoyer@inria.fr @@ -215,35 +226,92 @@ public: * @param particle_container [description] */ template<class particle_t> - void set_index_particle_distribution( - std::vector<particle_t> particle_container) + void set_index_particle_distribution( std::vector<particle_t>& particle_container) { - unknow_index_particle_distribution = false; - dstr_grp_tree_builder::share_particle_division( - this->mpi_conf, - particle_container, - index_particle_distribution); + unknow_index_particle_distribution = false; + if(this->mpi_conf.comm.size() > 1){ + dstr_grp_tree_builder::share_particle_division( + this->mpi_conf, + particle_container, + this->index_particle_distribution); + } else { + this->index_particle_distribution.resize(1); + std::pair<MortonIndex,MortonIndex> my_distrib; + my_distrib.first = particle_container.front().morton_index; + my_distrib.second = particle_container.back().morton_index; + this->index_particle_distribution[0] = my_distrib; + } } + /**ad + * this function do a update of the current particle distribution + * and the pair is put in parameter + * @author benjamin.dufoyer@inria.fr + * @param new_distrib [description] + */ void update_index_particle_distribution(std::pair<MortonIndex,MortonIndex> new_distrib){ - dstr_grp_tree_builder::share_particle_division( - this->mpi_conf, - new_distrib, - index_particle_distribution); + unknow_index_particle_distribution = false; + if(this->mpi_conf.comm.size() > 1){ + dstr_grp_tree_builder::share_particle_division( + this->mpi_conf, + new_distrib, + this->index_particle_distribution); + } else { + this->index_particle_distribution.resize(1); + this->index_particle_distribution[0] = new_distrib; + } } - std::vector<std::pair<MortonIndex,MortonIndex>>* + /** + * this function return a pointer of the total particule repartition + * @author benjamin.dufoyer@inria.fr + */ + std::vector<std::pair<MortonIndex,MortonIndex>> get_index_particle_distribution(){ // TO get the particle repartition, you will compute it before FAssert(!unknow_index_particle_distribution); - return &this->index_particle_distribution; + return this->index_particle_distribution; } - std::pair<MortonIndex,MortonIndex> get_index_particle_distribution_at(unsigned i){ + + std::vector<MortonIndex> get_index_particle_distribution_implicit(){ + + std::vector<MortonIndex> distribution( (this->index_particle_distribution.size()*2) /*+2*/,-1); // Pouruoi +2 OC ? + if(this->mpi_conf.comm.size() == 0){ + for(unsigned i = 1; i < distribution.size() ; ++i ){ + distribution[i] = this->index_particle_distribution[0].second; + } + } + else { + // int idx_vect = 0 ; + // distribution[0] = ; + distribution[1] = this->index_particle_distribution[0].second ; + + for(unsigned i = 1 ; i < this->index_particle_distribution.size() ; ++i){ + distribution[2*i] = this->index_particle_distribution[i-1].second ; + distribution[2*i+1] = this->index_particle_distribution[i].second; + } +// int idx_vect = static_cast<int>(2*this->index_particle_distribution.size() ); +// ///////////// TO REMOVE ??? +// distribution[idx_vect] = this->index_particle_distribution[this->index_particle_distribution.size()-1].second; +// ++idx_vect; +// distribution[idx_vect] = this->index_particle_distribution[this->index_particle_distribution.size()-1].second; + } + return distribution; + } + + /** + * this function return the particle distribution for a rank of proc + * put in parameter + * @author benjamin.dufoyer@inria.fr + * @param proc_rank rank of the proc + * @return a pair of morton index + */ + std::pair<MortonIndex,MortonIndex> get_index_particle_distribution_at(unsigned proc_rank){ // TO get the particle repartition, you will compute it before FAssert(!unknow_index_particle_distribution); - FAssert(i < this->index_particle_distribution.size()); - return this->index_particle_distribution.data()[i]; + FAssert(proc_rank < this->index_particle_distribution.size()); + return this->index_particle_distribution.data()[proc_rank]; } /** @@ -259,127 +327,25 @@ public: int my_rank = this->mpi_conf.comm.rank(); MortonIndex left_limit = -1; if(my_rank != 0){ - left_limit = (MortonIndex )this->index_particle_distribution[my_rank-1].second; + left_limit = static_cast<MortonIndex>(this->index_particle_distribution[my_rank-1].second); } return left_limit; } + /** - * This function compute the leaf needed to build the LET part of the Group - * Tree. - * After she send block needed by other proc and she recev block needed + * This function is used to show the FGroupLinearTee more easly * @author benjamin.dufoyer@inria.fr - * @param tree local group tree - * [Optionial] - * @param dim Dimension of coordinate of particle */ - template<class GroupTreeClass> - void create_let_group_tree_at_level( - GroupTreeClass& tree, - int level, - int dim = 3 - ){ - FAssert(index_particle_distribution.size() != 0 ); - FAssert(dim > 0); - bool leaf_level = (tree.getHeight()-1 == level); - // Compute min and max global morton index at the level needed - // This variable is used to put value in const - MortonIndex gmin = this->index_particle_distribution.front().first; - MortonIndex gmax = this->index_particle_distribution.back().second; - // update the morton index - if(!leaf_level){ - gmin = gmin >> 3; - gmax = gmax >> 3; - } - const MortonIndex global_min_m_idx = gmin; - const MortonIndex global_max_m_idx = gmax; - - // Compute min and max local morton index - const MortonIndex local_min_m_idx = - tree.getParticleGroup(0)->getStartingIndex() >>( (tree.getHeight()-1-level)*dim); - const MortonIndex local_max_m_idx = tree.getParticleGroup( - (tree.getNbParticleGroup()-1) )->getEndingIndex() >>( (tree.getHeight()-1-level)*dim); - - std::vector<MortonIndex> leaf_P2P; - if(leaf_level){ - // IDEA : can be a task - // This function compute the leaf needed by the P2P operation - // This function return a vector with all leaf needed - // get leaf P2P - leaf_P2P = dstr_grp_tree_builder::get_leaf_P2P_interaction( - tree, - global_min_m_idx, - global_max_m_idx, - local_min_m_idx, - local_max_m_idx); - } - - // IDEA can be a task - // This function compute the leaf needed by the M2L operation - // This function return a vector with all leaf needed - // get leaf M2L - std::vector<MortonIndex> leaf_M2L = - dstr_grp_tree_builder::get_leaf_M2L_interaction_at_level( - global_min_m_idx, - global_max_m_idx, - local_min_m_idx, - local_max_m_idx, - level, - tree, - dim); - - std::vector<MortonIndex> needed_leaf; - if(leaf_level){ - // this function return the concatenation of the leaf for the P2P and - // the leaf for the M2L - needed_leaf = dstr_grp_tree_builder::concat_M2L_P2P(leaf_P2P,leaf_M2L); - } else { - needed_leaf = leaf_M2L; - this->update_index_particle_distribution( - std::pair<MortonIndex,MortonIndex>(local_min_m_idx - ,local_max_m_idx) - ); - } - - std::vector<std::vector<size_t>> global_matrix_interaction = dstr_grp_tree_builder::get_matrix_interaction( - needed_leaf, - index_particle_distribution, - this->mpi_conf); - - // Send and get leaf - // Auto is used to get the block more easly - // it's a vector<vector<block_t>> - // block_t is a struct define on FDistributedGroupTreeBuilder.hpp - auto let_block = - dstr_grp_tree_builder::send_get_symbolic_block_at_level( - needed_leaf, - global_matrix_interaction, - tree, - level, - this->mpi_conf); - - // Add the block recev to the local group tree - dstr_grp_tree_builder::add_let_leaf_block_to_tree( - tree, - let_block, - local_min_m_idx, - level); - + friend + std::ostream& operator<<(std::ostream& os, const FGroupLinearTree& n) { + return os << "--> Number of leaf : " << n.get_nb_leaf() + << "\n first leaf : " << n.get_first_morton_index() + << "\n last leaf : " << n.get_last_morton_index() + << "\n block_size " << n.get_block_size() + << "\n number of block : " << n.get_nb_block(); } - /** - * This function is used to show the FGroupLinearTee more easly - * @author benjamin.dufoyer@inria.fr - */ - friend - std::ostream& operator<<(std::ostream& os, const FGroupLinearTree& n) { - return os << "--> Number of leaf : " << n.get_nb_leaf() - << "\n first leaf : " << n.get_first_morton_index() - << "\n last leaf : " << n.get_last_morton_index() - << "\n block_size " << n.get_block_size() - << "\n number of block : " << n.get_nb_block(); - } - }; diff --git a/Src/GroupTree/Core/FGroupOfCells.hpp b/Src/GroupTree/Core/FGroupOfCells.hpp index 9462df8ee..ed3e7c8d7 100644 --- a/Src/GroupTree/Core/FGroupOfCells.hpp +++ b/Src/GroupTree/Core/FGroupOfCells.hpp @@ -21,6 +21,8 @@ class FGroupOfCells { MortonIndex startingIndex; MortonIndex endingIndex; int numberOfCellsInBlock; + int idxGlobal; + bool isMine; }; protected: @@ -45,9 +47,9 @@ protected: bool deleteBuffer; public: - using multipole_t = PoleCellClass; + using multipole_t = PoleCellClass; using local_expansion_t = LocalCellClass; - using symbolic_data_t = SymbolCellClass; + using symbolic_data_t = SymbolCellClass; FGroupOfCells() : allocatedMemoryInByte(0), memoryBuffer(nullptr), @@ -156,6 +158,8 @@ public: blockHeader->startingIndex = inStartingIndex; blockHeader->endingIndex = inEndingIndex; blockHeader->numberOfCellsInBlock = inNumberOfCells; + blockHeader->idxGlobal = -1; + blockHeader->isMine = false; #ifndef SCALFMM_SIMGRID_NODATA cellMultipoles = (PoleCellClass*)FAlignedMemory::AllocateBytes<32>(inNumberOfCells*sizeof(PoleCellClass)); cellLocals = (LocalCellClass*)FAlignedMemory::AllocateBytes<32>(inNumberOfCells*sizeof(LocalCellClass)); @@ -263,6 +267,26 @@ public: return blockHeader->numberOfCellsInBlock; } + /** The index of the current block in the global range **/ + int getIdxGlobal() const{ + return blockHeader->idxGlobal; + } + + /* set the index of the current block */ + void setIdxGlobal(int idx){ + blockHeader->idxGlobal = idx; + } + + // Return true if the current block is owned by the local tree + bool isMine() const { + return blockHeader->isMine; + } + + // declare the block owned by the local tree + void declare_mine(){ + blockHeader->isMine = true; + } + /** The size of the interval endingIndex-startingIndex (set from the constructor) */ MortonIndex getSizeOfInterval() const { return MortonIndex(blockHeader->endingIndex-blockHeader->startingIndex); diff --git a/Src/GroupTree/Core/FGroupOfParticles.hpp b/Src/GroupTree/Core/FGroupOfParticles.hpp index 810bf307b..bf1a86d4d 100644 --- a/Src/GroupTree/Core/FGroupOfParticles.hpp +++ b/Src/GroupTree/Core/FGroupOfParticles.hpp @@ -24,7 +24,7 @@ class FGroupOfParticles { MortonIndex startingIndex; MortonIndex endingIndex; int numberOfLeavesInBlock; - + int idxGlobal; //< The real number of particles allocated FSize nbParticlesAllocatedInGroup; //< Starting point of position @@ -166,7 +166,7 @@ public: blockHeader->numberOfLeavesInBlock = inNumberOfLeaves; blockHeader->nbParticlesAllocatedInGroup = nbParticlesAllocatedInGroup; blockHeader->nbParticlesInGroup = inNbParticles; - + blockHeader->idxGlobal = -1; // Init particle pointers blockHeader->positionsLeadingDim = (sizeof(FReal) * nbParticlesAllocatedInGroup); particlePosition[0] = reinterpret_cast<FReal*>((reinterpret_cast<size_t>(leafHeader + inNumberOfLeaves) @@ -268,6 +268,14 @@ public: return blockHeader->startingIndex <= inIndex && inIndex < blockHeader->endingIndex; } + int getIdxGlobal() const{ + return blockHeader->idxGlobal; + } + + void setIdxGlobal(int idxGlobal){ + blockHeader->idxGlobal = idxGlobal; + } + /** Return the idx in array of the cell */ MortonIndex getLeafMortonIndex(const int id) const{ FAssertLF(id < blockHeader->numberOfLeavesInBlock); @@ -330,6 +338,10 @@ public: } } + // This function return the number of particle in the leaf who have the id + FSize getNbParticlesInLeaf(int id) const{ + return leafHeader[id].nbParticles; + } /** Return the address of the leaf if it exists (or NULL) */ template<class ParticlesAttachedClass> diff --git a/Src/GroupTree/Core/FGroupTaskStarpuImplicitAlgorithm.hpp b/Src/GroupTree/Core/FGroupTaskStarpuImplicitAlgorithm.hpp index 017dc3e8f..46c4f0287 100644 --- a/Src/GroupTree/Core/FGroupTaskStarpuImplicitAlgorithm.hpp +++ b/Src/GroupTree/Core/FGroupTaskStarpuImplicitAlgorithm.hpp @@ -2,6 +2,15 @@ #ifndef FGROUPTASKSTARPUALGORITHM_HPP #define FGROUPTASKSTARPUALGORITHM_HPP +// @FUSE_STARPU +// @FUSE_MPI + + +#include <vector> +#include <list> +#include <memory> +#include <iostream> + #include "../../Utils/FGlobal.hpp" #include "../../Core/FCoreCommon.hpp" #include "../../Utils/FQuickSort.hpp" @@ -13,8 +22,7 @@ #include "FOutOfBlockInteraction.hpp" -#include <vector> -#include <memory> + #ifdef SCALFMM_USE_STARPU_EXTRACT #include <list> #endif @@ -52,9 +60,8 @@ #endif #include "Containers/FBoolArray.hpp" -#include <iostream> -#include <vector> -using namespace std; + +//using namespace std; //#define STARPU_USE_REDUX template <class OctreeClass, class CellContainerClass, class KernelClass, class ParticleGroupClass, class StarPUCpuWrapperClass @@ -68,2506 +75,2978 @@ template <class OctreeClass, class CellContainerClass, class KernelClass, class > class FGroupTaskStarPUImplicitAlgorithm : public FAbstractAlgorithm { protected: - typedef FGroupTaskStarPUImplicitAlgorithm<OctreeClass, CellContainerClass, KernelClass, ParticleGroupClass, StarPUCpuWrapperClass + typedef FGroupTaskStarPUImplicitAlgorithm<OctreeClass, CellContainerClass, KernelClass, ParticleGroupClass, StarPUCpuWrapperClass #ifdef SCALFMM_ENABLE_CUDA_KERNEL - , StarPUCudaWrapperClass + , StarPUCudaWrapperClass #endif #ifdef SCALFMM_ENABLE_OPENCL_KERNEL - , StarPUOpenClWrapperClass -#endif - > ThisClass; - - template <class OtherBlockClass> - struct BlockInteractions{ - OtherBlockClass* otherBlock; - int otherBlockId; - std::vector<OutOfBlockInteraction> interactions; - }; - - struct CellHandles{ - starpu_data_handle_t symb; - starpu_data_handle_t up; - starpu_data_handle_t down; - int intervalSize; - }; - - struct ParticleHandles{ - starpu_data_handle_t symb; - starpu_data_handle_t down; - int intervalSize; - }; - - std::vector< std::vector< std::vector<BlockInteractions<CellContainerClass>>>> externalInteractionsAllLevel; + , StarPUOpenClWrapperClass +#endif + > ThisClass; + + template <class OtherBlockClass> + struct BlockInteractions{ + OtherBlockClass* otherBlock; //< + int otherBlockId; //< + std::vector<OutOfBlockInteraction> interactions; //< + }; + + struct CellHandles{ + starpu_data_handle_t symb; //< Symbolique part of a cell + starpu_data_handle_t up; //< Multipole expansion into a cell + starpu_data_handle_t down; //< Local expansion into a cell + int intervalSize; + int groupID ; //< Group Id + }; + + struct ParticleHandles{ + starpu_data_handle_t symb; //< Symbolique part of a leaf + starpu_data_handle_t down; //< Local expansion into a leaf ?????? + int intervalSize; + }; + + std::vector< std::vector< std::vector<BlockInteractions<CellContainerClass>>>> externalInteractionsAllLevel; #ifdef SCALFMM_USE_STARPU_EXTRACT - std::vector< std::vector< std::vector<std::vector<int>>>> externalInteractionsAllLevelInnerIndexes; - std::vector< std::vector< std::vector<std::vector<int>>>> externalInteractionsAllLevelOuterIndexes; + std::vector< std::vector< std::vector<std::vector<int>>>> externalInteractionsAllLevelInnerIndexes; + std::vector< std::vector< std::vector<std::vector<int>>>> externalInteractionsAllLevelOuterIndexes; #endif - std::vector< std::vector<BlockInteractions<ParticleGroupClass>>> externalInteractionsLeafLevel; + std::vector< std::vector<BlockInteractions<ParticleGroupClass>>> externalInteractionsLeafLevel; #ifdef SCALFMM_USE_STARPU_EXTRACT - std::vector< std::vector<std::vector<int>>> externalInteractionsLeafLevelOuter; - std::vector< std::vector<std::vector<int>>> externalInteractionsLeafLevelInner; + std::vector< std::vector<std::vector<int>>> externalInteractionsLeafLevelOuter; + std::vector< std::vector<std::vector<int>>> externalInteractionsLeafLevelInner; #endif - std::list<const std::vector<OutOfBlockInteraction>*> externalInteractionsLeafLevelOpposite; - - OctreeClass*const tree; //< The Tree - KernelClass*const originalCpuKernel; + std::list<const std::vector<OutOfBlockInteraction>*> externalInteractionsLeafLevelOpposite; - std::vector<CellHandles>* cellHandles; - std::vector<ParticleHandles> particleHandles; + OctreeClass*const tree; //< A pointer on the Tree either duplicated or the let tree + KernelClass*const originalCpuKernel; //< - starpu_codelet p2m_cl; - starpu_codelet m2m_cl; - starpu_codelet l2l_cl; - starpu_codelet l2l_cl_nocommute; - starpu_codelet l2p_cl; + std::vector<CellHandles>* cellHandles; //< Pointer on the vector of cell handle + std::vector<ParticleHandles> particleHandles; //< + int nb_block; //< Number of block ?? + starpu_codelet p2m_cl; + starpu_codelet m2m_cl; + starpu_codelet l2l_cl; + starpu_codelet l2l_cl_nocommute; + starpu_codelet l2p_cl; - starpu_codelet m2l_cl_in; - starpu_codelet m2l_cl_inout; - starpu_codelet m2l_cl_inout_mpi; + starpu_codelet m2l_cl_in; + starpu_codelet m2l_cl_inout; + starpu_codelet m2l_cl_inout_mpi; - starpu_codelet p2p_cl_in; - starpu_codelet p2p_cl_inout; - starpu_codelet p2p_cl_inout_mpi; + starpu_codelet p2p_cl_in; + starpu_codelet p2p_cl_inout; + starpu_codelet p2p_cl_inout_mpi; #ifdef STARPU_USE_REDUX - starpu_codelet p2p_redux_init; - starpu_codelet p2p_redux_perform; - starpu_codelet p2p_redux_read; + starpu_codelet p2p_redux_init; + starpu_codelet p2p_redux_perform; + starpu_codelet p2p_redux_read; #endif - const bool noCommuteAtLastLevel; - const bool noCommuteBetweenLevel; + const bool noCommuteAtLastLevel; + const bool noCommuteBetweenLevel; #ifdef STARPU_USE_CPU - StarPUCpuWrapperClass cpuWrapper; + StarPUCpuWrapperClass cpuWrapper; #endif #ifdef SCALFMM_ENABLE_CUDA_KERNEL - StarPUCudaWrapperClass cudaWrapper; + StarPUCudaWrapperClass cudaWrapper; #endif #ifdef SCALFMM_ENABLE_OPENCL_KERNEL - StarPUOpenClWrapperClass openclWrapper; + StarPUOpenClWrapperClass openclWrapper; #endif - FStarPUPtrInterface wrappers; - FStarPUPtrInterface* wrapperptr; + FStarPUPtrInterface wrappers; + FStarPUPtrInterface* wrapperptr; #ifdef STARPU_SUPPORT_ARBITER - starpu_arbiter_t arbiterGlobal; + starpu_arbiter_t arbiterGlobal; #endif #ifdef STARPU_USE_TASK_NAME #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS - std::vector<std::unique_ptr<char[]>> m2mTaskNames; - std::vector<std::unique_ptr<char[]>> m2lTaskNames; - std::vector<std::unique_ptr<char[]>> m2lOuterTaskNames; - std::vector<std::unique_ptr<char[]>> l2lTaskNames; - std::unique_ptr<char[]> p2mTaskNames; - std::unique_ptr<char[]> l2pTaskNames; - std::unique_ptr<char[]> p2pTaskNames; - std::unique_ptr<char[]> p2pOuterTaskNames; + std::vector<std::unique_ptr<char[]>> m2mTaskNames; + std::vector<std::unique_ptr<char[]>> m2lTaskNames; + std::vector<std::unique_ptr<char[]>> m2lOuterTaskNames; + std::vector<std::unique_ptr<char[]>> l2lTaskNames; + std::unique_ptr<char[]> p2mTaskNames; + std::unique_ptr<char[]> l2pTaskNames; + std::unique_ptr<char[]> p2pTaskNames; + std::unique_ptr<char[]> p2pOuterTaskNames; #else - FStarPUTaskNameParams* taskNames = nullptr; + FStarPUTaskNameParams* taskNames = nullptr; #endif #endif #ifdef SCALFMM_STARPU_USE_PRIO - typedef FStarPUFmmPrioritiesV2 PrioClass;// FStarPUFmmPriorities + typedef FStarPUFmmPrioritiesV2 PrioClass;// FStarPUFmmPriorities #endif - int mpi_rank, nproc; - std::vector<std::vector<std::vector<MortonIndex>>> nodeRepartition; - + // + // Parallel data + int mpi_rank ; //< rank of the current processus + int nproc; //< number of mpi processes + std::vector< std::vector< std::vector<MortonIndex>>> _nodeRepartition; //< The morton index distributions level by level #ifdef SCALFMM_USE_STARPU_EXTRACT - struct ParticleExtractedHandles{ - starpu_data_handle_t symb; - size_t size; - std::unique_ptr<unsigned char[]> data; - std::vector<int> leavesToExtract; - }; + struct ParticleExtractedHandles{ + starpu_data_handle_t symb; + size_t size; + std::unique_ptr<unsigned char[]> data; + std::vector<int> leavesToExtract; + }; - std::list<ParticleExtractedHandles> extractedParticlesBuffer; + std::list<ParticleExtractedHandles> extractedParticlesBuffer; - struct DuplicatedParticlesHandle{ - starpu_data_handle_t symb; - size_t size; - unsigned char* data; // Never delete it, we reuse already allocate memory here - }; + struct DuplicatedParticlesHandle{ + starpu_data_handle_t symb; + size_t size; + unsigned char* data; // Never delete it, we reuse already allocate memory here + }; - std::list<DuplicatedParticlesHandle> duplicatedParticlesBuffer; + std::list<DuplicatedParticlesHandle> duplicatedParticlesBuffer; - starpu_codelet p2p_extract; - starpu_codelet p2p_insert; - starpu_codelet p2p_insert_bis; + starpu_codelet p2p_extract; + starpu_codelet p2p_insert; + starpu_codelet p2p_insert_bis; - struct CellExtractedHandles{ - starpu_data_handle_t all; - size_t size; - std::unique_ptr<unsigned char[]> data; - std::vector<int> cellsToExtract; - }; + struct CellExtractedHandles{ + starpu_data_handle_t all; + size_t size; + std::unique_ptr<unsigned char[]> data; + std::vector<int> cellsToExtract; + }; - std::list<CellExtractedHandles> extractedCellBuffer; + std::list<CellExtractedHandles> extractedCellBuffer; - struct DuplicatedCellHandle{ - starpu_data_handle_t symb; - size_t sizeSymb; - unsigned char* dataSymb; // Never delete it, we reuse already allocate memory here - starpu_data_handle_t other; - size_t sizeOther; - unsigned char* dataOther; // Never delete it, we reuse already allocate memory here + struct DuplicatedCellHandle{ + starpu_data_handle_t symb; + size_t sizeSymb; + unsigned char* dataSymb; // Never delete it, we reuse already allocate memory here + starpu_data_handle_t other; + size_t sizeOther; + unsigned char* dataOther; // Never delete it, we reuse already allocate memory here - std::unique_ptr<unsigned char[]> dataSymbPtr; - std::unique_ptr<unsigned char[]> dataOtherPtr; - }; + std::unique_ptr<unsigned char[]> dataSymbPtr; + std::unique_ptr<unsigned char[]> dataOtherPtr; + }; - std::list<DuplicatedCellHandle> duplicatedCellBuffer; + std::list<DuplicatedCellHandle> duplicatedCellBuffer; - starpu_codelet cell_extract_up; - starpu_codelet cell_insert_up; - starpu_codelet cell_insert_up_bis; + starpu_codelet cell_extract_up; + starpu_codelet cell_insert_up; + starpu_codelet cell_insert_up_bis; #endif public: - FGroupTaskStarPUImplicitAlgorithm(OctreeClass*const inTree, KernelClass* inKernels, std::vector<MortonIndex>& distributedMortonIndex) - : tree(inTree), originalCpuKernel(inKernels), - cellHandles(nullptr), - noCommuteAtLastLevel(FEnv::GetBool("SCALFMM_NO_COMMUTE_LAST_L2L", true)), - noCommuteBetweenLevel(FEnv::GetBool("SCALFMM_NO_COMMUTE_M2L_L2L", false)), - #ifdef STARPU_USE_CPU - cpuWrapper(tree->getHeight()), - #endif - #ifdef SCALFMM_ENABLE_CUDA_KERNEL - cudaWrapper(tree->getHeight()), - #endif - #ifdef SCALFMM_ENABLE_OPENCL_KERNEL - openclWrapper(tree->getHeight()), - #endif - wrapperptr(&wrappers){ - FAssertLF(tree, "tree cannot be null"); - FAssertLF(inKernels, "kernels cannot be null"); - - FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight()); - struct starpu_conf conf; - FAssertLF(starpu_conf_init(&conf) == 0); + FGroupTaskStarPUImplicitAlgorithm(OctreeClass*const inTree, KernelClass* inKernels, + std::vector<MortonIndex>& distributedMortonIndex, + const int nb_block_in = -1) + : tree(inTree), nb_block(nb_block_in), originalCpuKernel(inKernels), + cellHandles(nullptr), + noCommuteAtLastLevel(FEnv::GetBool("SCALFMM_NO_COMMUTE_LAST_L2L", true)), + noCommuteBetweenLevel(FEnv::GetBool("SCALFMM_NO_COMMUTE_M2L_L2L", false)), + #ifdef STARPU_USE_CPU + cpuWrapper(tree->getHeight()), + #endif + #ifdef SCALFMM_ENABLE_CUDA_KERNEL + cudaWrapper(tree->getHeight()), + #endif + #ifdef SCALFMM_ENABLE_OPENCL_KERNEL + openclWrapper(tree->getHeight()), + #endif + wrapperptr(&wrappers){ + FAssertLF(tree, "tree cannot be null"); + FAssertLF(inKernels, "kernels cannot be null"); + + FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight()); + struct starpu_conf conf; + FAssertLF(starpu_conf_init(&conf) == 0); #ifdef SCALFMM_STARPU_USE_PRIO - PrioClass::Controller().init(&conf, tree->getHeight(), inKernels); + PrioClass::Controller().init(&conf, tree->getHeight(), inKernels); #endif - FAssertLF(starpu_init(&conf) == 0); - FAssertLF(starpu_mpi_init ( 0, 0, 0 ) == 0); - MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank); - MPI_Comm_size(MPI_COMM_WORLD,&nproc); + FAssertLF(starpu_init(&conf) == 0); + FAssertLF(starpu_mpi_init ( 0, 0, 0 ) == 0); + MPI_Comm_rank(MPI_COMM_WORLD,&mpi_rank); + MPI_Comm_size(MPI_COMM_WORLD,&nproc); #ifdef STARPU_USE_TASK_NAME #ifdef SCALFMM_SIMGRID_TASKNAMEPARAMS - taskNames = new FStarPUTaskNameParams(mpi_rank, nproc); + taskNames = new FStarPUTaskNameParams(mpi_rank, nproc); #endif #endif - starpu_malloc_set_align(32); + starpu_malloc_set_align(32); - starpu_pthread_mutex_t initMutex; - starpu_pthread_mutex_init(&initMutex, NULL); + starpu_pthread_mutex_t initMutex; + starpu_pthread_mutex_init(&initMutex, NULL); #ifdef STARPU_USE_CPU - FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){ - starpu_pthread_mutex_lock(&initMutex); - cpuWrapper.initKernel(starpu_worker_get_id(), inKernels); - starpu_pthread_mutex_unlock(&initMutex); - }); - wrappers.set(FSTARPU_CPU_IDX, &cpuWrapper); + FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){ + starpu_pthread_mutex_lock(&initMutex); + cpuWrapper.initKernel(starpu_worker_get_id(), inKernels); + starpu_pthread_mutex_unlock(&initMutex); + }); + wrappers.set(FSTARPU_CPU_IDX, &cpuWrapper); #endif #ifdef SCALFMM_ENABLE_CUDA_KERNEL - FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){ - starpu_pthread_mutex_lock(&initMutex); - cudaWrapper.initKernel(starpu_worker_get_id(), inKernels); - starpu_pthread_mutex_unlock(&initMutex); - }); - wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper); + FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){ + starpu_pthread_mutex_lock(&initMutex); + cudaWrapper.initKernel(starpu_worker_get_id(), inKernels); + starpu_pthread_mutex_unlock(&initMutex); + }); + wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper); #endif #ifdef SCALFMM_ENABLE_OPENCL_KERNEL - FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){ - starpu_pthread_mutex_lock(&initMutex); - openclWrapper.initKernel(starpu_worker_get_id(), inKernels); - starpu_pthread_mutex_unlock(&initMutex); - }); - wrappers.set(FSTARPU_OPENCL_IDX, &openclWrapper); + FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){ + starpu_pthread_mutex_lock(&initMutex); + openclWrapper.initKernel(starpu_worker_get_id(), inKernels); + starpu_pthread_mutex_unlock(&initMutex); + }); + wrappers.set(FSTARPU_OPENCL_IDX, &openclWrapper); #endif - starpu_pthread_mutex_destroy(&initMutex); + starpu_pthread_mutex_destroy(&initMutex); - starpu_pause(); + starpu_pause(); - cellHandles = new std::vector<CellHandles>[tree->getHeight()]; + cellHandles = new std::vector<CellHandles>[tree->getHeight()]; #ifdef STARPU_SUPPORT_ARBITER - arbiterGlobal = starpu_arbiter_create(); + arbiterGlobal = starpu_arbiter_create(); #endif - initCodelet(); - initCodeletMpi(); - createMachinChose(distributedMortonIndex); - rebuildInteractions(); + initCodelet(); + initCodeletMpi(); + // + std::cout << " ------------------ inside the constructor of the Algorithm ------------------" + << distributedMortonIndex.size() <<std::endl; + setMortonDistribution(distributedMortonIndex); + this->printDataDistributionInfo(); + + this->rebuildInteractions(); - FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max Worker " << starpu_worker_get_count() << ")\n"); + FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max Worker " << starpu_worker_get_count() << ")\n"); #ifdef STARPU_USE_CPU - FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CPU " << starpu_cpu_worker_get_count() << ")\n"); + FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CPU " << starpu_cpu_worker_get_count() << ")\n"); #endif #ifdef SCALFMM_ENABLE_OPENCL_KERNEL - FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max OpenCL " << starpu_opencl_worker_get_count() << ")\n"); + FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max OpenCL " << starpu_opencl_worker_get_count() << ")\n"); #endif #ifdef SCALFMM_ENABLE_CUDA_KERNEL - FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CUDA " << starpu_cuda_worker_get_count() << ")\n"); + FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max CUDA " << starpu_cuda_worker_get_count() << ")\n"); #endif - FLOG(FLog::Controller << "SCALFMM_NO_COMMUTE_LAST_L2L " << noCommuteAtLastLevel << "\n"); - FLOG(FLog::Controller << "SCALFMM_NO_COMMUTE_M2L_L2L " << noCommuteBetweenLevel << "\n"); + FLOG(FLog::Controller << "SCALFMM_NO_COMMUTE_LAST_L2L " << noCommuteAtLastLevel << "\n"); + FLOG(FLog::Controller << "SCALFMM_NO_COMMUTE_M2L_L2L " << noCommuteBetweenLevel << "\n"); - buildTaskNames(); - } + this->buildTaskNames(); + } - void buildTaskNames(){ + void buildTaskNames(){ #ifdef STARPU_USE_TASK_NAME #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS - const int namesLength = 128; - m2mTaskNames.resize(tree->getHeight()); - m2lTaskNames.resize(tree->getHeight()); - m2lOuterTaskNames.resize(tree->getHeight()); - l2lTaskNames.resize(tree->getHeight()); - for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){ - m2mTaskNames[idxLevel].reset(new char[namesLength]); - snprintf(m2mTaskNames[idxLevel].get(), namesLength, "M2M-level-%d", idxLevel); - m2lTaskNames[idxLevel].reset(new char[namesLength]); - snprintf(m2lTaskNames[idxLevel].get(), namesLength, "M2L-level-%d", idxLevel); - m2lOuterTaskNames[idxLevel].reset(new char[namesLength]); - snprintf(m2lOuterTaskNames[idxLevel].get(), namesLength, "M2L-out-level-%d", idxLevel); - l2lTaskNames[idxLevel].reset(new char[namesLength]); - snprintf(l2lTaskNames[idxLevel].get(), namesLength, "L2L-level-%d", idxLevel); - } - - p2mTaskNames.reset(new char[namesLength]); - snprintf(p2mTaskNames.get(), namesLength, "P2M"); - l2pTaskNames.reset(new char[namesLength]); - snprintf(l2pTaskNames.get(), namesLength, "L2P"); - p2pTaskNames.reset(new char[namesLength]); - snprintf(p2pTaskNames.get(), namesLength, "P2P"); - p2pOuterTaskNames.reset(new char[namesLength]); - snprintf(p2pOuterTaskNames.get(), namesLength, "P2P-out"); -#endif -#endif - } - - void syncData(){ - for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){ - for(int idxHandle = 0 ; idxHandle < int(cellHandles[idxLevel].size()) ; ++idxHandle){ - if(isDataOwnedBerenger(tree->getCellGroup(idxLevel, idxHandle)->getStartingIndex(), idxLevel)) {//Clean only our data handle - starpu_data_acquire(cellHandles[idxLevel][idxHandle].symb, STARPU_R); - starpu_data_release(cellHandles[idxLevel][idxHandle].symb); - starpu_data_acquire(cellHandles[idxLevel][idxHandle].up, STARPU_R); - starpu_data_release(cellHandles[idxLevel][idxHandle].up); - starpu_data_acquire(cellHandles[idxLevel][idxHandle].down, STARPU_R); - starpu_data_release(cellHandles[idxLevel][idxHandle].down); - } - } - } - { - for(int idxHandle = 0 ; idxHandle < int(particleHandles.size()) ; ++idxHandle){ - if(isDataOwnedBerenger(tree->getCellGroup(tree->getHeight()-1, idxHandle)->getStartingIndex(), tree->getHeight()-1)) {//Clean only our data handle - starpu_data_acquire(particleHandles[idxHandle].symb, STARPU_R); - starpu_data_release(particleHandles[idxHandle].symb); - starpu_data_acquire(particleHandles[idxHandle].down, STARPU_R); - starpu_data_release(particleHandles[idxHandle].down); - } + const int namesLength = 128; + m2mTaskNames.resize(tree->getHeight()); + m2lTaskNames.resize(tree->getHeight()); + m2lOuterTaskNames.resize(tree->getHeight()); + l2lTaskNames.resize(tree->getHeight()); + for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){ + m2mTaskNames[idxLevel].reset(new char[namesLength]); + snprintf(m2mTaskNames[idxLevel].get(), namesLength, "M2M-level-%d", idxLevel); + m2lTaskNames[idxLevel].reset(new char[namesLength]); + snprintf(m2lTaskNames[idxLevel].get(), namesLength, "M2L-level-%d", idxLevel); + m2lOuterTaskNames[idxLevel].reset(new char[namesLength]); + snprintf(m2lOuterTaskNames[idxLevel].get(), namesLength, "M2L-out-level-%d", idxLevel); + l2lTaskNames[idxLevel].reset(new char[namesLength]); + snprintf(l2lTaskNames[idxLevel].get(), namesLength, "L2L-level-%d", idxLevel); + } + + p2mTaskNames.reset(new char[namesLength]); + snprintf(p2mTaskNames.get(), namesLength, "P2M"); + l2pTaskNames.reset(new char[namesLength]); + snprintf(l2pTaskNames.get(), namesLength, "L2P"); + p2pTaskNames.reset(new char[namesLength]); + snprintf(p2pTaskNames.get(), namesLength, "P2P"); + p2pOuterTaskNames.reset(new char[namesLength]); + snprintf(p2pOuterTaskNames.get(), namesLength, "P2P-out"); +#endif +#endif + } + + void syncData(){ + for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){ + for(int idxHandle = 0 ; idxHandle < int(cellHandles[idxLevel].size()) ; ++idxHandle){ + if(isDataOwnedBerenger(tree->getCellGroup(idxLevel, idxHandle)->getStartingIndex(), idxLevel)) {//Clean only our data handle + starpu_data_acquire(cellHandles[idxLevel][idxHandle].symb, STARPU_R); + starpu_data_release(cellHandles[idxLevel][idxHandle].symb); + starpu_data_acquire(cellHandles[idxLevel][idxHandle].up, STARPU_R); + starpu_data_release(cellHandles[idxLevel][idxHandle].up); + starpu_data_acquire(cellHandles[idxLevel][idxHandle].down, STARPU_R); + starpu_data_release(cellHandles[idxLevel][idxHandle].down); + } + } + } + { + for(int idxHandle = 0 ; idxHandle < int(particleHandles.size()) ; ++idxHandle){ + if(isDataOwnedBerenger(tree->getCellGroup(tree->getHeight()-1, idxHandle)->getStartingIndex(), tree->getHeight()-1)) {//Clean only our data handle + starpu_data_acquire(particleHandles[idxHandle].symb, STARPU_R); + starpu_data_release(particleHandles[idxHandle].symb); + starpu_data_acquire(particleHandles[idxHandle].down, STARPU_R); + starpu_data_release(particleHandles[idxHandle].down); } } } + } - ~FGroupTaskStarPUImplicitAlgorithm(){ - starpu_resume(); + ~FGroupTaskStarPUImplicitAlgorithm(){ + starpu_resume(); - cleanHandle(); - delete[] cellHandles; + cleanHandle(); + delete[] cellHandles; - starpu_pthread_mutex_t releaseMutex; - starpu_pthread_mutex_init(&releaseMutex, NULL); + starpu_pthread_mutex_t releaseMutex; + starpu_pthread_mutex_init(&releaseMutex, NULL); #ifdef STARPU_USE_CPU - FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){ - starpu_pthread_mutex_lock(&releaseMutex); - cpuWrapper.releaseKernel(starpu_worker_get_id()); - starpu_pthread_mutex_unlock(&releaseMutex); - }); - wrappers.set(FSTARPU_CPU_IDX, &cpuWrapper); + FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){ + starpu_pthread_mutex_lock(&releaseMutex); + cpuWrapper.releaseKernel(starpu_worker_get_id()); + starpu_pthread_mutex_unlock(&releaseMutex); + }); + wrappers.set(FSTARPU_CPU_IDX, &cpuWrapper); #endif #ifdef SCALFMM_ENABLE_CUDA_KERNEL - FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){ - starpu_pthread_mutex_lock(&releaseMutex); - cudaWrapper.releaseKernel(starpu_worker_get_id()); - starpu_pthread_mutex_unlock(&releaseMutex); - }); - wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper); + FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){ + starpu_pthread_mutex_lock(&releaseMutex); + cudaWrapper.releaseKernel(starpu_worker_get_id()); + starpu_pthread_mutex_unlock(&releaseMutex); + }); + wrappers.set(FSTARPU_CUDA_IDX, &cudaWrapper); #endif #ifdef SCALFMM_ENABLE_OPENCL_KERNEL - FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){ - starpu_pthread_mutex_lock(&releaseMutex); - openclWrapper.releaseKernel(starpu_worker_get_id()); - starpu_pthread_mutex_unlock(&releaseMutex); - }); - wrappers.set(FSTARPU_OPENCL_IDX, &openclWrapper); + FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){ + starpu_pthread_mutex_lock(&releaseMutex); + openclWrapper.releaseKernel(starpu_worker_get_id()); + starpu_pthread_mutex_unlock(&releaseMutex); + }); + wrappers.set(FSTARPU_OPENCL_IDX, &openclWrapper); #endif - starpu_pthread_mutex_destroy(&releaseMutex); + starpu_pthread_mutex_destroy(&releaseMutex); #ifdef STARPU_SUPPORT_ARBITER - starpu_arbiter_destroy(arbiterGlobal); + starpu_arbiter_destroy(arbiterGlobal); #endif - for(auto externalInteraction : externalInteractionsLeafLevelOpposite) - delete externalInteraction; + for(auto externalInteraction : externalInteractionsLeafLevelOpposite) + delete externalInteraction; - starpu_mpi_shutdown(); - starpu_shutdown(); - } + starpu_mpi_shutdown(); + starpu_shutdown(); + } - void rebuildInteractions(){ - FAssertLF(getenv("OMP_WAIT_POLICY") == nullptr - || strcmp(getenv("OMP_WAIT_POLICY"), "PASSIVE") == 0 - || strcmp(getenv("OMP_WAIT_POLICY"), "passive") == 0); + void rebuildInteractions(){ + std::cout << " begin rebuildInteractions " << std::endl; + FAssertLF(getenv("OMP_WAIT_POLICY") == nullptr + || strcmp(getenv("OMP_WAIT_POLICY"), "PASSIVE") == 0 + || strcmp(getenv("OMP_WAIT_POLICY"), "passive") == 0); #pragma omp parallel #pragma omp single - buildExternalInteractionVecs(); - + buildExternalInteractionVecs(); + if(this->nb_block <= 0){ + std::cout << " buildHandles " <<std::endl; buildHandles(); - } + } + else { + std::cout << " buildDistributedHandles " <<std::endl; + buildDistributedHandles(); + } + std::cout << " end rebuildInteractions " << std::endl; + this->printCellHandels (); + } #ifdef STARPU_USE_CPU - void forEachCpuWorker(std::function<void(void)> func){ - starpu_resume(); - FStarPUUtils::ExecOnWorkers(STARPU_CPU, func); - starpu_pause(); - } - - void forEachCpuWorker(std::function<void(KernelClass*)> func){ - starpu_resume(); - FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){ - func(cpuWrapper.getKernel(starpu_worker_get_id())); - }); - starpu_pause(); - } + void forEachCpuWorker(std::function<void(void)> func){ + starpu_resume(); + FStarPUUtils::ExecOnWorkers(STARPU_CPU, func); + starpu_pause(); + } + + void forEachCpuWorker(std::function<void(KernelClass*)> func){ + starpu_resume(); + FStarPUUtils::ExecOnWorkers(STARPU_CPU, [&](){ + func(cpuWrapper.getKernel(starpu_worker_get_id())); + }); + starpu_pause(); + } #endif #ifdef SCALFMM_ENABLE_CUDA_KERNEL - void forEachCudaWorker(std::function<void(void)> func){ - starpu_resume(); - FStarPUUtils::ExecOnWorkers(STARPU_CUDA, func); - starpu_pause(); - } - void forEachCudaWorker(std::function<void(void*)> func){ - starpu_resume(); - FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){ - func(cudaWrapper.getKernel(starpu_worker_get_id())); - }); - starpu_pause(); - } + void forEachCudaWorker(std::function<void(void)> func){ + starpu_resume(); + FStarPUUtils::ExecOnWorkers(STARPU_CUDA, func); + starpu_pause(); + } + void forEachCudaWorker(std::function<void(void*)> func){ + starpu_resume(); + FStarPUUtils::ExecOnWorkers(STARPU_CUDA, [&](){ + func(cudaWrapper.getKernel(starpu_worker_get_id())); + }); + starpu_pause(); + } #endif #ifdef SCALFMM_ENABLE_OPENCL_KERNEL - void forEachOpenCLWorker(std::function<void(void)> func){ - starpu_resume(); - FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, func); - starpu_pause(); - } - void forEachOpenCLWorker(std::function<void(void*)> func){ - starpu_resume(); - FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){ - func(openclWrapper.getKernel(starpu_worker_get_id())); - }); - starpu_pause(); - } -#endif - - int getRank(void) const { - return mpi_rank; - } - int getNProc(void) const { - return nproc; - } - bool isDataOwnedBerenger(MortonIndex const idx, int const idxLevel) const { - return dataMappingBerenger(idx, idxLevel) == mpi_rank; - } - void createMachinChose(std::vector<MortonIndex> distributedMortonIndex) { - nodeRepartition.resize(tree->getHeight(), std::vector<std::vector<MortonIndex>>(nproc, std::vector<MortonIndex>(2))); - for(int node_id = 0; node_id < nproc; ++node_id){ - nodeRepartition[tree->getHeight()-1][node_id][0] = distributedMortonIndex[node_id*2]; - nodeRepartition[tree->getHeight()-1][node_id][1] = distributedMortonIndex[node_id*2+1]; - } - for(int idxLevel = tree->getHeight() - 2; idxLevel >= 0 ; --idxLevel){ - nodeRepartition[idxLevel][0][0] = nodeRepartition[idxLevel+1][0][0] >> 3; - nodeRepartition[idxLevel][0][1] = nodeRepartition[idxLevel+1][0][1] >> 3; - for(int node_id = 1; node_id < nproc; ++node_id){ - nodeRepartition[idxLevel][node_id][0] = FMath::Max(nodeRepartition[idxLevel+1][node_id][0] >> 3, nodeRepartition[idxLevel][node_id-1][0]+1); //Berenger phd :) - nodeRepartition[idxLevel][node_id][1] = nodeRepartition[idxLevel+1][node_id][1] >> 3; - } - } - } - int getOppositeInterIndex(const int index) const { - // ((( (xdiff+3) * 7) + (ydiff+3))) * 7 + zdiff + 3 - return 343-index-1; - } + void forEachOpenCLWorker(std::function<void(void)> func){ + starpu_resume(); + FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, func); + starpu_pause(); + } + void forEachOpenCLWorker(std::function<void(void*)> func){ + starpu_resume(); + FStarPUUtils::ExecOnWorkers(STARPU_OPENCL, [&](){ + func(openclWrapper.getKernel(starpu_worker_get_id())); + }); + starpu_pause(); + } +#endif + + int getRank(void) const { + return mpi_rank; + } + int getNProc(void) const { + return nproc; + } + + // \brief + // + // @param[in] + // @param[in] + // + // @return + // + bool isDataOwnedBerenger(MortonIndex const idx, int const idxLevel) const { + return dataMappingBerenger(idx, idxLevel) == mpi_rank; + } + + // \brief construct the distribution of the cells and the leaves + // + // @param[in] distributedMortonIndex the morton distrinution at the leave lever + void setMortonDistribution(const std::vector<MortonIndex> &distributedMortonIndex) { + // + std::cout << "setMortonDistribution: " <<std::endl + << " input distribution: " << distributedMortonIndex.size() << std::endl << " "; + for (auto v : distributedMortonIndex) { + std::cout << " " << v; + } + std::cout << std::endl ; + _nodeRepartition.resize(tree->getHeight(), + std::vector<std::vector<MortonIndex>>(nproc, + std::vector<MortonIndex>(2))); + for(int node_id = 0; node_id < nproc; ++node_id){ + _nodeRepartition[tree->getHeight()-1][node_id][0] = distributedMortonIndex[node_id*2]; + _nodeRepartition[tree->getHeight()-1][node_id][1] = distributedMortonIndex[node_id*2+1]; + } + for(int idxLevel = tree->getHeight() - 2; idxLevel >= 0 ; --idxLevel){ + _nodeRepartition[idxLevel][0][0] = _nodeRepartition[idxLevel+1][0][0] >> 3; + _nodeRepartition[idxLevel][0][1] = _nodeRepartition[idxLevel+1][0][1] >> 3; + for(int node_id = 1; node_id < nproc; ++node_id){ + _nodeRepartition[idxLevel][node_id][0] = FMath::Max(_nodeRepartition[idxLevel+1][node_id][0] >> 3, _nodeRepartition[idxLevel][node_id-1][0]+1); //Berenger phd :) + _nodeRepartition[idxLevel][node_id][1] = _nodeRepartition[idxLevel+1][node_id][1] >> 3; + } + } + this->printDataDistributionInfo(); + } + // + // \brief print the local morton index distribution on the current node + void printDataDistributionInfo(){ + std::cout << "Group Tree information on node "<< mpi_rank << "\n"; + std::cout << "\t Group Size = " << -1 << "\n"; + std::cout << "\t Tree height = " << tree->getHeight() << "\n"; + for(int idxLevel = 1 ; idxLevel < tree->getHeight() ; ++idxLevel){ + std::cout << "Level "<< idxLevel <<std::endl; + std::cout << "\t Starting Index = " << _nodeRepartition[idxLevel][mpi_rank][0] + << "\t Ending Index = " << _nodeRepartition[idxLevel][mpi_rank][1] + << " Number of group "<<-1 << std::endl; + } + } + // + // + // + int getOppositeInterIndex(const int index) const { + // ((( (xdiff+3) * 7) + (ydiff+3))) * 7 + zdiff + 3 + return 343-index-1; + } protected: - /** + /** * Runs the complete algorithm. */ - void executeCore(const unsigned operationsToProceed) override { - FLOG( FLog::Controller << "\tStart FGroupTaskStarPUAlgorithm\n" ); - const bool directOnly = (tree->getHeight() <= 2); + void executeCore(const unsigned operationsToProceed) override { + FLOG( FLog::Controller << "\tStart FGroupTaskStarPUAlgorithm\n" ); + const bool directOnly = (tree->getHeight() <= 2); + std::cout << " Algo executeCore " <<std::endl + << " upperWorkingLevel: " << FAbstractAlgorithm::upperWorkingLevel + << " lowerWorkingLevel: " << FAbstractAlgorithm::lowerWorkingLevel <<std::endl + << " Operation to proceed: "<< FFmmOperations_string(operationsToProceed) << std::endl; #ifdef STARPU_USE_CPU - FTIME_TASKS(cpuWrapper.taskTimeRecorder.start()); -#endif - starpu_resume(); - FLOG( FTic timerSoumission; ); - - if( operationsToProceed & FFmmP2P ) directPass(); - - if(operationsToProceed & FFmmP2M && !directOnly) bottomPass(); - - if(operationsToProceed & FFmmM2M && !directOnly) upwardPass(); - - if(operationsToProceed & FFmmM2L && !directOnly) transferPass(FAbstractAlgorithm::upperWorkingLevel, FAbstractAlgorithm::lowerWorkingLevel-1 , true, true); - - if(operationsToProceed & FFmmL2L && !directOnly) downardPass(); - - if(operationsToProceed & FFmmM2L && !directOnly) transferPass(FAbstractAlgorithm::lowerWorkingLevel-1, FAbstractAlgorithm::lowerWorkingLevel, true, true); - - if( operationsToProceed & FFmmL2P && !directOnly) mergePass(); + FTIME_TASKS(cpuWrapper.taskTimeRecorder.start()); +#endif + starpu_resume(); + FLOG( FTic timerSoumission; ); + + if( operationsToProceed & FFmmP2P ) { + this->directPass(); } + + if(operationsToProceed & FFmmP2M && !directOnly) this->bottomPass(); + + if(operationsToProceed & FFmmM2M && !directOnly){ + if(this->nb_block > 0 && nproc > 1 ){ + std::cout << " upwardPassNoDuplicate"<<std::endl; + upwardPassNoDuplicate(); + } else { + std::cout << " upwardPassDuplicate" <<std::endl; + upwardPassDuplicate(); + } + } + // + if(operationsToProceed & FFmmM2L && !directOnly) { + transferPass(FAbstractAlgorithm::upperWorkingLevel, + FAbstractAlgorithm::lowerWorkingLevel-1 , true, true); + } + + if(operationsToProceed & FFmmL2L && !directOnly){ + if(this->nb_block > 0 && nproc > 1){ + std::cout << " downardPassNoDuplicate " <<std::endl; + this->downardPassNoDuplicate(); + } else { + std::cout << " downardPassDuplicate " <<std::endl; + //downardPass() ; + this->downardPassDuplicate(); + } + } + //if(operationsToProceed & FFmmL2L && !directOnly) this->downardPassDuplicate(); + + if(operationsToProceed & FFmmM2L && !directOnly) transferPass(FAbstractAlgorithm::lowerWorkingLevel-1, FAbstractAlgorithm::lowerWorkingLevel, true, true); + + if( operationsToProceed & FFmmL2P && !directOnly) mergePass(); #ifdef STARPU_USE_REDUX - if( operationsToProceed & FFmmL2P && !directOnly) readParticle(); + if( operationsToProceed & FFmmL2P && !directOnly) readParticle(); #endif - FLOG( FLog::Controller << "\t\t Submitting the tasks took " << timerSoumission.tacAndElapsed() << "s\n" ); + FLOG( FLog::Controller << "\t\t Submitting the tasks took " << timerSoumission.tacAndElapsed() << "s\n" ); - starpu_task_wait_for_all(); + starpu_task_wait_for_all(); - FLOG( FTic timerSync; ); - syncData(); - FLOG( FLog::Controller << "\t\t Moving data to the host took " << timerSync.tacAndElapsed() << "s\n" ); + FLOG( FTic timerSync; ); + syncData(); + FLOG( FLog::Controller << "\t\t Moving data to the host took " << timerSync.tacAndElapsed() << "s\n" ); - starpu_pause(); + starpu_pause(); #ifdef STARPU_USE_CPU - FTIME_TASKS(cpuWrapper.taskTimeRecorder.end()); - FTIME_TASKS(cpuWrapper.taskTimeRecorder.saveToDisk("/tmp/taskstime-FGroupTaskStarPUAlgorithm.txt")); + FTIME_TASKS(cpuWrapper.taskTimeRecorder.end()); + FTIME_TASKS(cpuWrapper.taskTimeRecorder.saveToDisk("/tmp/taskstime-FGroupTaskStarPUAlgorithm.txt")); #endif - } + } - void initCodelet(){ - memset(&p2m_cl, 0, sizeof(p2m_cl)); + void initCodelet(){ + memset(&p2m_cl, 0, sizeof(p2m_cl)); #ifdef STARPU_USE_CPU - if(originalCpuKernel->supportP2M(FSTARPU_CPU_IDX)){ - p2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::bottomPassCallback; - p2m_cl.where |= STARPU_CPU; - } + if(originalCpuKernel->supportP2M(FSTARPU_CPU_IDX)){ + p2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::bottomPassCallback; + p2m_cl.where |= STARPU_CPU; + } #endif #ifdef SCALFMM_ENABLE_CUDA_KERNEL - if(originalCpuKernel->supportP2M(FSTARPU_CUDA_IDX)){ - p2m_cl.cuda_funcs[0] = StarPUCudaWrapperClass::bottomPassCallback; - p2m_cl.where |= STARPU_CUDA; - } + if(originalCpuKernel->supportP2M(FSTARPU_CUDA_IDX)){ + p2m_cl.cuda_funcs[0] = StarPUCudaWrapperClass::bottomPassCallback; + p2m_cl.where |= STARPU_CUDA; + } #endif #ifdef SCALFMM_ENABLE_OPENCL_KERNEL - if(originalCpuKernel->supportP2M(FSTARPU_OPENCL_IDX)){ - p2m_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::bottomPassCallback; - p2m_cl.where |= STARPU_OPENCL; - } -#endif - p2m_cl.nbuffers = 3; - p2m_cl.modes[0] = STARPU_R; - p2m_cl.modes[1] = STARPU_RW; - p2m_cl.modes[2] = STARPU_R; - p2m_cl.name = "p2m_cl"; - - memset(&m2m_cl, 0, sizeof(m2m_cl)); + if(originalCpuKernel->supportP2M(FSTARPU_OPENCL_IDX)){ + p2m_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::bottomPassCallback; + p2m_cl.where |= STARPU_OPENCL; + } +#endif + p2m_cl.nbuffers = 3; + p2m_cl.modes[0] = STARPU_R; + p2m_cl.modes[1] = STARPU_RW; + p2m_cl.modes[2] = STARPU_R; + p2m_cl.name = "p2m_cl"; + + memset(&m2m_cl, 0, sizeof(m2m_cl)); #ifdef STARPU_USE_CPU - if(originalCpuKernel->supportM2M(FSTARPU_CPU_IDX)){ - m2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::upwardPassCallback; - m2m_cl.where |= STARPU_CPU; - } + if(originalCpuKernel->supportM2M(FSTARPU_CPU_IDX)){ + m2m_cl.cpu_funcs[0] = StarPUCpuWrapperClass::upwardPassCallback; + m2m_cl.where |= STARPU_CPU; + } #endif #ifdef SCALFMM_ENABLE_CUDA_KERNEL - if(originalCpuKernel->supportM2M(FSTARPU_CUDA_IDX)){ - m2m_cl.cuda_funcs[0] = StarPUCudaWrapperClass::upwardPassCallback; - m2m_cl.where |= STARPU_CUDA; - } + if(originalCpuKernel->supportM2M(FSTARPU_CUDA_IDX)){ + m2m_cl.cuda_funcs[0] = StarPUCudaWrapperClass::upwardPassCallback; + m2m_cl.where |= STARPU_CUDA; + } #endif #ifdef SCALFMM_ENABLE_OPENCL_KERNEL - if(originalCpuKernel->supportM2M(FSTARPU_OPENCL_IDX)){ - m2m_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::upwardPassCallback; - m2m_cl.where |= STARPU_OPENCL; - } -#endif - m2m_cl.nbuffers = 4; - m2m_cl.dyn_modes = (starpu_data_access_mode*)malloc(m2m_cl.nbuffers*sizeof(starpu_data_access_mode)); - m2m_cl.dyn_modes[0] = STARPU_R; - m2m_cl.dyn_modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED); - m2m_cl.name = "m2m_cl"; - m2m_cl.dyn_modes[2] = STARPU_R; - m2m_cl.dyn_modes[3] = STARPU_R; - - memset(&l2l_cl, 0, sizeof(l2l_cl)); + if(originalCpuKernel->supportM2M(FSTARPU_OPENCL_IDX)){ + m2m_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::upwardPassCallback; + m2m_cl.where |= STARPU_OPENCL; + } +#endif + m2m_cl.nbuffers = 4; + m2m_cl.dyn_modes = (starpu_data_access_mode*)malloc(m2m_cl.nbuffers*sizeof(starpu_data_access_mode)); + m2m_cl.dyn_modes[0] = STARPU_R; + m2m_cl.dyn_modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED); + m2m_cl.name = "m2m_cl"; + m2m_cl.dyn_modes[2] = STARPU_R; + m2m_cl.dyn_modes[3] = STARPU_R; + + memset(&l2l_cl, 0, sizeof(l2l_cl)); #ifdef STARPU_USE_CPU - if(originalCpuKernel->supportL2L(FSTARPU_CPU_IDX)){ - l2l_cl.cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback; - l2l_cl.where |= STARPU_CPU; - } + if(originalCpuKernel->supportL2L(FSTARPU_CPU_IDX)){ + l2l_cl.cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback; + l2l_cl.where |= STARPU_CPU; + } #endif #ifdef SCALFMM_ENABLE_CUDA_KERNEL - if(originalCpuKernel->supportL2L(FSTARPU_CUDA_IDX)){ - l2l_cl.cuda_funcs[0] = StarPUCudaWrapperClass::downardPassCallback; - l2l_cl.where |= STARPU_CUDA; - } + if(originalCpuKernel->supportL2L(FSTARPU_CUDA_IDX)){ + l2l_cl.cuda_funcs[0] = StarPUCudaWrapperClass::downardPassCallback; + l2l_cl.where |= STARPU_CUDA; + } #endif #ifdef SCALFMM_ENABLE_OPENCL_KERNEL - if(originalCpuKernel->supportL2L(FSTARPU_OPENCL_IDX)){ - l2l_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::downardPassCallback; - l2l_cl.where |= STARPU_OPENCL; - } -#endif - l2l_cl.nbuffers = 4; - l2l_cl.dyn_modes = (starpu_data_access_mode*)malloc(l2l_cl.nbuffers*sizeof(starpu_data_access_mode)); - l2l_cl.dyn_modes[0] = STARPU_R; - l2l_cl.dyn_modes[1] = STARPU_R; - l2l_cl.name = "l2l_cl"; - l2l_cl.dyn_modes[2] = STARPU_R; - l2l_cl.dyn_modes[3] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED); - - memset(&l2l_cl_nocommute, 0, sizeof(l2l_cl_nocommute)); + if(originalCpuKernel->supportL2L(FSTARPU_OPENCL_IDX)){ + l2l_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::downardPassCallback; + l2l_cl.where |= STARPU_OPENCL; + } +#endif + l2l_cl.nbuffers = 4; + l2l_cl.dyn_modes = (starpu_data_access_mode*)malloc(l2l_cl.nbuffers*sizeof(starpu_data_access_mode)); + l2l_cl.dyn_modes[0] = STARPU_R; + l2l_cl.dyn_modes[1] = STARPU_R; + l2l_cl.name = "l2l_cl"; + l2l_cl.dyn_modes[2] = STARPU_R; + l2l_cl.dyn_modes[3] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED); + + memset(&l2l_cl_nocommute, 0, sizeof(l2l_cl_nocommute)); #ifdef STARPU_USE_CPU - if(originalCpuKernel->supportL2L(FSTARPU_CPU_IDX)){ - l2l_cl_nocommute.cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback; - l2l_cl_nocommute.where |= STARPU_CPU; - } + if(originalCpuKernel->supportL2L(FSTARPU_CPU_IDX)){ + l2l_cl_nocommute.cpu_funcs[0] = StarPUCpuWrapperClass::downardPassCallback; + l2l_cl_nocommute.where |= STARPU_CPU; + } #endif #ifdef SCALFMM_ENABLE_CUDA_KERNEL - if(originalCpuKernel->supportL2L(FSTARPU_CUDA_IDX)){ - l2l_cl_nocommute.cuda_funcs[0] = StarPUCudaWrapperClass::downardPassCallback; - l2l_cl_nocommute.where |= STARPU_CUDA; - } + if(originalCpuKernel->supportL2L(FSTARPU_CUDA_IDX)){ + l2l_cl_nocommute.cuda_funcs[0] = StarPUCudaWrapperClass::downardPassCallback; + l2l_cl_nocommute.where |= STARPU_CUDA; + } #endif #ifdef SCALFMM_ENABLE_OPENCL_KERNEL - if(originalCpuKernel->supportL2L(FSTARPU_OPENCL_IDX)){ - l2l_cl_nocommute.opencl_funcs[0] = StarPUOpenClWrapperClass::downardPassCallback; - l2l_cl_nocommute.where |= STARPU_OPENCL; - } -#endif - l2l_cl_nocommute.nbuffers = 4; - l2l_cl_nocommute.dyn_modes = (starpu_data_access_mode*)malloc(l2l_cl_nocommute.nbuffers*sizeof(starpu_data_access_mode)); - l2l_cl_nocommute.dyn_modes[0] = STARPU_R; - l2l_cl_nocommute.dyn_modes[1] = STARPU_R; - l2l_cl_nocommute.name = "l2l_cl"; - l2l_cl_nocommute.dyn_modes[2] = STARPU_R; - l2l_cl_nocommute.dyn_modes[3] = STARPU_RW; - - memset(&l2p_cl, 0, sizeof(l2p_cl)); + if(originalCpuKernel->supportL2L(FSTARPU_OPENCL_IDX)){ + l2l_cl_nocommute.opencl_funcs[0] = StarPUOpenClWrapperClass::downardPassCallback; + l2l_cl_nocommute.where |= STARPU_OPENCL; + } +#endif + l2l_cl_nocommute.nbuffers = 4; + l2l_cl_nocommute.dyn_modes = (starpu_data_access_mode*)malloc(l2l_cl_nocommute.nbuffers*sizeof(starpu_data_access_mode)); + l2l_cl_nocommute.dyn_modes[0] = STARPU_R; + l2l_cl_nocommute.dyn_modes[1] = STARPU_R; + l2l_cl_nocommute.name = "l2l_cl"; + l2l_cl_nocommute.dyn_modes[2] = STARPU_R; + l2l_cl_nocommute.dyn_modes[3] = STARPU_RW; + + memset(&l2p_cl, 0, sizeof(l2p_cl)); #ifdef STARPU_USE_CPU - if(originalCpuKernel->supportL2P(FSTARPU_CPU_IDX)){ - l2p_cl.cpu_funcs[0] = StarPUCpuWrapperClass::mergePassCallback; - l2p_cl.where |= STARPU_CPU; - } + if(originalCpuKernel->supportL2P(FSTARPU_CPU_IDX)){ + l2p_cl.cpu_funcs[0] = StarPUCpuWrapperClass::mergePassCallback; + l2p_cl.where |= STARPU_CPU; + } #endif #ifdef SCALFMM_ENABLE_CUDA_KERNEL - if(originalCpuKernel->supportL2P(FSTARPU_CUDA_IDX)){ - l2p_cl.cuda_funcs[0] = StarPUCudaWrapperClass::mergePassCallback; - l2p_cl.where |= STARPU_CUDA; - } + if(originalCpuKernel->supportL2P(FSTARPU_CUDA_IDX)){ + l2p_cl.cuda_funcs[0] = StarPUCudaWrapperClass::mergePassCallback; + l2p_cl.where |= STARPU_CUDA; + } #endif #ifdef SCALFMM_ENABLE_OPENCL_KERNEL - if(originalCpuKernel->supportL2P(FSTARPU_OPENCL_IDX)){ - l2p_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::mergePassCallback; - l2p_cl.where |= STARPU_OPENCL; - } -#endif - l2p_cl.nbuffers = 4; - l2p_cl.modes[0] = STARPU_R; - l2p_cl.modes[1] = STARPU_R; - l2p_cl.modes[2] = STARPU_R; + if(originalCpuKernel->supportL2P(FSTARPU_OPENCL_IDX)){ + l2p_cl.opencl_funcs[0] = StarPUOpenClWrapperClass::mergePassCallback; + l2p_cl.where |= STARPU_OPENCL; + } +#endif + l2p_cl.nbuffers = 4; + l2p_cl.modes[0] = STARPU_R; + l2p_cl.modes[1] = STARPU_R; + l2p_cl.modes[2] = STARPU_R; #ifdef STARPU_USE_REDUX - l2p_cl.modes[3] = STARPU_REDUX; + l2p_cl.modes[3] = STARPU_REDUX; #else - l2p_cl.modes[3] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED); + l2p_cl.modes[3] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED); #endif - l2p_cl.name = "l2p_cl"; + l2p_cl.name = "l2p_cl"; - memset(&p2p_cl_in, 0, sizeof(p2p_cl_in)); + memset(&p2p_cl_in, 0, sizeof(p2p_cl_in)); #ifdef STARPU_USE_CPU - if(originalCpuKernel->supportP2P(FSTARPU_CPU_IDX)){ - p2p_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::directInPassCallback; - p2p_cl_in.where |= STARPU_CPU; - } + if(originalCpuKernel->supportP2P(FSTARPU_CPU_IDX)){ + p2p_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::directInPassCallback; + p2p_cl_in.where |= STARPU_CPU; + } #endif #ifdef SCALFMM_ENABLE_CUDA_KERNEL - if(originalCpuKernel->supportP2P(FSTARPU_CUDA_IDX)){ - p2p_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::directInPassCallback; - p2p_cl_in.where |= STARPU_CUDA; - } + if(originalCpuKernel->supportP2P(FSTARPU_CUDA_IDX)){ + p2p_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::directInPassCallback; + p2p_cl_in.where |= STARPU_CUDA; + } #endif #ifdef SCALFMM_ENABLE_OPENCL_KERNEL - if(originalCpuKernel->supportP2P(FSTARPU_OPENCL_IDX)){ - p2p_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::directInPassCallback; - p2p_cl_in.where |= STARPU_OPENCL; - } + if(originalCpuKernel->supportP2P(FSTARPU_OPENCL_IDX)){ + p2p_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::directInPassCallback; + p2p_cl_in.where |= STARPU_OPENCL; + } #endif - p2p_cl_in.nbuffers = 2; - p2p_cl_in.modes[0] = STARPU_R; + p2p_cl_in.nbuffers = 2; + p2p_cl_in.modes[0] = STARPU_R; #ifdef STARPU_USE_REDUX - p2p_cl_in.modes[1] = STARPU_REDUX; + p2p_cl_in.modes[1] = STARPU_REDUX; #else - p2p_cl_in.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED); + p2p_cl_in.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED); #endif - p2p_cl_in.name = "p2p_cl_in"; - memset(&p2p_cl_inout, 0, sizeof(p2p_cl_inout)); + p2p_cl_in.name = "p2p_cl_in"; + memset(&p2p_cl_inout, 0, sizeof(p2p_cl_inout)); #ifdef STARPU_USE_CPU - if(originalCpuKernel->supportP2PExtern(FSTARPU_CPU_IDX)){ - p2p_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallback; - p2p_cl_inout.where |= STARPU_CPU; - } + if(originalCpuKernel->supportP2PExtern(FSTARPU_CPU_IDX)){ + p2p_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallback; + p2p_cl_inout.where |= STARPU_CPU; + } #endif #ifdef SCALFMM_ENABLE_CUDA_KERNEL - if(originalCpuKernel->supportP2PExtern(FSTARPU_CUDA_IDX)){ - p2p_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::directInoutPassCallback; - p2p_cl_inout.where |= STARPU_CUDA; - } + if(originalCpuKernel->supportP2PExtern(FSTARPU_CUDA_IDX)){ + p2p_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::directInoutPassCallback; + p2p_cl_inout.where |= STARPU_CUDA; + } #endif #ifdef SCALFMM_ENABLE_OPENCL_KERNEL - if(originalCpuKernel->supportP2PExtern(FSTARPU_OPENCL_IDX)){ - p2p_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::directInoutPassCallback; - p2p_cl_inout.where |= STARPU_OPENCL; - } + if(originalCpuKernel->supportP2PExtern(FSTARPU_OPENCL_IDX)){ + p2p_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::directInoutPassCallback; + p2p_cl_inout.where |= STARPU_OPENCL; + } #endif - p2p_cl_inout.nbuffers = 4; - p2p_cl_inout.modes[0] = STARPU_R; + p2p_cl_inout.nbuffers = 4; + p2p_cl_inout.modes[0] = STARPU_R; #ifdef STARPU_USE_REDUX - p2p_cl_inout.modes[1] = STARPU_REDUX; + p2p_cl_inout.modes[1] = STARPU_REDUX; #else - p2p_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED); + p2p_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED); #endif - p2p_cl_inout.modes[2] = STARPU_R; + p2p_cl_inout.modes[2] = STARPU_R; #ifdef STARPU_USE_REDUX - p2p_cl_inout.modes[3] = STARPU_REDUX; + p2p_cl_inout.modes[3] = STARPU_REDUX; #else - p2p_cl_inout.modes[3] = starpu_data_access_mode(STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED); + p2p_cl_inout.modes[3] = starpu_data_access_mode(STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED); #endif - p2p_cl_inout.name = "p2p_cl_inout"; + p2p_cl_inout.name = "p2p_cl_inout"; - memset(&m2l_cl_in, 0, sizeof(m2l_cl_in)); + memset(&m2l_cl_in, 0, sizeof(m2l_cl_in)); #ifdef STARPU_USE_CPU - if(originalCpuKernel->supportM2L(FSTARPU_CPU_IDX)){ - m2l_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::transferInPassCallback; - m2l_cl_in.where |= STARPU_CPU; - } + if(originalCpuKernel->supportM2L(FSTARPU_CPU_IDX)){ + m2l_cl_in.cpu_funcs[0] = StarPUCpuWrapperClass::transferInPassCallback; + m2l_cl_in.where |= STARPU_CPU; + } #endif #ifdef SCALFMM_ENABLE_CUDA_KERNEL - if(originalCpuKernel->supportM2L(FSTARPU_CUDA_IDX)){ - m2l_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::transferInPassCallback; - m2l_cl_in.where |= STARPU_CUDA; - } + if(originalCpuKernel->supportM2L(FSTARPU_CUDA_IDX)){ + m2l_cl_in.cuda_funcs[0] = StarPUCudaWrapperClass::transferInPassCallback; + m2l_cl_in.where |= STARPU_CUDA; + } #endif #ifdef SCALFMM_ENABLE_OPENCL_KERNEL - if(originalCpuKernel->supportM2L(FSTARPU_OPENCL_IDX)){ - m2l_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInPassCallback; - m2l_cl_in.where |= STARPU_OPENCL; - } -#endif - m2l_cl_in.nbuffers = 3; - m2l_cl_in.modes[0] = STARPU_R; - m2l_cl_in.modes[1] = STARPU_R; - m2l_cl_in.modes[2] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED); - m2l_cl_in.name = "m2l_cl_in"; - - memset(&m2l_cl_inout, 0, sizeof(m2l_cl_inout)); + if(originalCpuKernel->supportM2L(FSTARPU_OPENCL_IDX)){ + m2l_cl_in.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInPassCallback; + m2l_cl_in.where |= STARPU_OPENCL; + } +#endif + m2l_cl_in.nbuffers = 3; + m2l_cl_in.modes[0] = STARPU_R; + m2l_cl_in.modes[1] = STARPU_R; + m2l_cl_in.modes[2] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED); + m2l_cl_in.name = "m2l_cl_in"; + + memset(&m2l_cl_inout, 0, sizeof(m2l_cl_inout)); #ifdef STARPU_USE_CPU - if(originalCpuKernel->supportM2LExtern(FSTARPU_CPU_IDX)){ - m2l_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallback; - m2l_cl_inout.where |= STARPU_CPU; - } + if(originalCpuKernel->supportM2LExtern(FSTARPU_CPU_IDX)){ + m2l_cl_inout.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallback; + m2l_cl_inout.where |= STARPU_CPU; + } #endif #ifdef SCALFMM_ENABLE_CUDA_KERNEL - if(originalCpuKernel->supportM2LExtern(FSTARPU_CUDA_IDX)){ - m2l_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::transferInoutPassCallback; - m2l_cl_inout.where |= STARPU_CUDA; - } + if(originalCpuKernel->supportM2LExtern(FSTARPU_CUDA_IDX)){ //M2L method between two blocks + m2l_cl_inout.cuda_funcs[0] = StarPUCudaWrapperClass::transferInoutPassCallback; + m2l_cl_inout.where |= STARPU_CUDA; + } #endif #ifdef SCALFMM_ENABLE_OPENCL_KERNEL - if(originalCpuKernel->supportM2LExtern(FSTARPU_OPENCL_IDX)){ - m2l_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInoutPassCallback; - m2l_cl_inout.where |= STARPU_OPENCL; - } -#endif - m2l_cl_inout.nbuffers = 4; - m2l_cl_inout.modes[0] = STARPU_R; - m2l_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED); - m2l_cl_inout.modes[2] = STARPU_R; - m2l_cl_inout.modes[3] = STARPU_R; - m2l_cl_inout.name = "m2l_cl_inout"; + if(originalCpuKernel->supportM2LExtern(FSTARPU_OPENCL_IDX)){ + m2l_cl_inout.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInoutPassCallback; + m2l_cl_inout.where |= STARPU_OPENCL; + } +#endif + m2l_cl_inout.nbuffers = 4; + m2l_cl_inout.modes[0] = STARPU_R; + m2l_cl_inout.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED); + m2l_cl_inout.modes[2] = STARPU_R; + m2l_cl_inout.modes[3] = STARPU_R; + m2l_cl_inout.name = "m2l_cl_inout"; #ifdef STARPU_USE_REDUX - memset(&p2p_redux_init, 0, sizeof(p2p_redux_init)); + memset(&p2p_redux_init, 0, sizeof(p2p_redux_init)); #ifdef STARPU_USE_CPU - p2p_redux_init.cpu_funcs[0] = FStarPUReduxCpu::InitData<typename ParticleGroupClass::ParticleDataType>; - p2p_redux_init.where |= STARPU_CPU; + p2p_redux_init.cpu_funcs[0] = FStarPUReduxCpu::InitData<typename ParticleGroupClass::ParticleDataType>; + p2p_redux_init.where |= STARPU_CPU; #endif - p2p_redux_init.nbuffers = 1; - p2p_redux_init.modes[0] = STARPU_RW; - p2p_redux_init.name = "p2p_redux_init"; + p2p_redux_init.nbuffers = 1; + p2p_redux_init.modes[0] = STARPU_RW; + p2p_redux_init.name = "p2p_redux_init"; - memset(&p2p_redux_perform, 0, sizeof(p2p_redux_perform)); + memset(&p2p_redux_perform, 0, sizeof(p2p_redux_perform)); #ifdef STARPU_USE_CPU - p2p_redux_perform.cpu_funcs[0] = FStarPUReduxCpu::ReduceData<typename ParticleGroupClass::ParticleDataType>; - p2p_redux_perform.where |= STARPU_CPU; + p2p_redux_perform.cpu_funcs[0] = FStarPUReduxCpu::ReduceData<typename ParticleGroupClass::ParticleDataType>; + p2p_redux_perform.where |= STARPU_CPU; #endif - p2p_redux_perform.nbuffers = 2; - p2p_redux_perform.modes[0] = STARPU_RW; - p2p_redux_perform.modes[1] = STARPU_R; - p2p_redux_perform.name = "p2p_redux_perform"; + p2p_redux_perform.nbuffers = 2; + p2p_redux_perform.modes[0] = STARPU_RW; + p2p_redux_perform.modes[1] = STARPU_R; + p2p_redux_perform.name = "p2p_redux_perform"; - memset(&p2p_redux_read, 0, sizeof(p2p_redux_read)); + memset(&p2p_redux_read, 0, sizeof(p2p_redux_read)); #ifdef STARPU_USE_CPU - if(originalCpuKernel->supportL2P(FSTARPU_CPU_IDX)){ - p2p_redux_read.cpu_funcs[0] = FStarPUReduxCpu::EmptyCodelet<typename ParticleGroupClass::ParticleDataType>; - p2p_redux_read.where |= STARPU_CPU; - } + if(originalCpuKernel->supportL2P(FSTARPU_CPU_IDX)){ + p2p_redux_read.cpu_funcs[0] = FStarPUReduxCpu::EmptyCodelet<typename ParticleGroupClass::ParticleDataType>; + p2p_redux_read.where |= STARPU_CPU; + } #endif #ifdef SCALFMM_ENABLE_CUDA_KERNEL - if(originalCpuKernel->supportL2P(FSTARPU_CUDA_IDX)){ - p2p_redux_read.cuda_funcs[0] = FStarPUReduxCpu::EmptyCodelet<typename ParticleGroupClass::ParticleDataType>; - p2p_redux_read.where |= STARPU_CUDA; - } + if(originalCpuKernel->supportL2P(FSTARPU_CUDA_IDX)){ + p2p_redux_read.cuda_funcs[0] = FStarPUReduxCpu::EmptyCodelet<typename ParticleGroupClass::ParticleDataType>; + p2p_redux_read.where |= STARPU_CUDA; + } #endif #ifdef SCALFMM_ENABLE_OPENCL_KERNEL - if(originalCpuKernel->supportL2P(FSTARPU_OPENCL_IDX)){ - p2p_redux_read.opencl_funcs[0] = FStarPUReduxCpu::EmptyCodelet<typename ParticleGroupClass::ParticleDataType>; - p2p_redux_read.where |= STARPU_OPENCL; - } + if(originalCpuKernel->supportL2P(FSTARPU_OPENCL_IDX)){ + p2p_redux_read.opencl_funcs[0] = FStarPUReduxCpu::EmptyCodelet<typename ParticleGroupClass::ParticleDataType>; + p2p_redux_read.where |= STARPU_OPENCL; + } #endif - p2p_redux_read.nbuffers = 1; - p2p_redux_read.modes[0] = STARPU_R; - p2p_redux_read.name = "p2p_redux_read"; + p2p_redux_read.nbuffers = 1; + p2p_redux_read.modes[0] = STARPU_R; + p2p_redux_read.name = "p2p_redux_read"; #endif #ifdef SCALFMM_USE_STARPU_EXTRACT - memset(&p2p_extract, 0, sizeof(p2p_extract)); - p2p_extract.nbuffers = 2; - p2p_extract.modes[0] = STARPU_R; - p2p_extract.modes[1] = STARPU_RW; - p2p_extract.name = "p2p_extract"; - p2p_extract.cpu_funcs[0] = ThisClass::ExtractP2P; - p2p_extract.where |= STARPU_CPU; - - memset(&p2p_insert, 0, sizeof(p2p_insert)); - p2p_insert.nbuffers = 2; - p2p_insert.modes[0] = STARPU_R; - p2p_insert.modes[1] = STARPU_RW; - p2p_insert.name = "p2p_insert"; - p2p_insert.cpu_funcs[0] = ThisClass::InsertP2P; - p2p_insert.where |= STARPU_CPU; - - memset(&p2p_insert_bis, 0, sizeof(p2p_insert_bis)); - p2p_insert_bis.nbuffers = 2; - p2p_insert_bis.modes[0] = STARPU_R; - p2p_insert_bis.modes[1] = STARPU_RW; - p2p_insert_bis.name = "p2p_insert_bis"; - p2p_insert_bis.cpu_funcs[0] = ThisClass::InsertP2PBis; - p2p_insert_bis.where |= STARPU_CPU; - - memset(&cell_extract_up, 0, sizeof(cell_extract_up)); - cell_extract_up.nbuffers = 3; - cell_extract_up.modes[0] = STARPU_R; - cell_extract_up.modes[1] = STARPU_R; - cell_extract_up.modes[2] = STARPU_RW; - cell_extract_up.name = "cell_extract_up"; - cell_extract_up.cpu_funcs[0] = ThisClass::ExtractCellUp; - cell_extract_up.where |= STARPU_CPU; - - memset(&cell_insert_up, 0, sizeof(cell_insert_up)); - cell_insert_up.nbuffers = 3; - cell_insert_up.modes[0] = STARPU_R; - cell_insert_up.modes[1] = STARPU_RW; - cell_insert_up.modes[2] = STARPU_RW; - cell_insert_up.name = "cell_insert_up"; - cell_insert_up.cpu_funcs[0] = ThisClass::InsertCellUp; - cell_insert_up.where |= STARPU_CPU; - - - memset(&cell_insert_up_bis, 0, sizeof(cell_insert_up_bis)); - cell_insert_up_bis.nbuffers = 3; - cell_insert_up_bis.modes[0] = STARPU_R; - cell_insert_up_bis.modes[1] = STARPU_RW; - cell_insert_up_bis.modes[2] = STARPU_RW; - cell_insert_up_bis.name = "cell_insert_up_bis"; - cell_insert_up_bis.cpu_funcs[0] = ThisClass::InsertCellUpBis; - cell_insert_up_bis.where |= STARPU_CPU; -#endif - } + memset(&p2p_extract, 0, sizeof(p2p_extract)); + p2p_extract.nbuffers = 2; + p2p_extract.modes[0] = STARPU_R; + p2p_extract.modes[1] = STARPU_RW; + p2p_extract.name = "p2p_extract"; + p2p_extract.cpu_funcs[0] = ThisClass::ExtractP2P; + p2p_extract.where |= STARPU_CPU; + + memset(&p2p_insert, 0, sizeof(p2p_insert)); + p2p_insert.nbuffers = 2; + p2p_insert.modes[0] = STARPU_R; + p2p_insert.modes[1] = STARPU_RW; + p2p_insert.name = "p2p_insert"; + p2p_insert.cpu_funcs[0] = ThisClass::InsertP2P; + p2p_insert.where |= STARPU_CPU; + + memset(&p2p_insert_bis, 0, sizeof(p2p_insert_bis)); + p2p_insert_bis.nbuffers = 2; + p2p_insert_bis.modes[0] = STARPU_R; + p2p_insert_bis.modes[1] = STARPU_RW; + p2p_insert_bis.name = "p2p_insert_bis"; + p2p_insert_bis.cpu_funcs[0] = ThisClass::InsertP2PBis; + p2p_insert_bis.where |= STARPU_CPU; + + memset(&cell_extract_up, 0, sizeof(cell_extract_up)); + cell_extract_up.nbuffers = 3; + cell_extract_up.modes[0] = STARPU_R; + cell_extract_up.modes[1] = STARPU_R; + cell_extract_up.modes[2] = STARPU_RW; + cell_extract_up.name = "cell_extract_up"; + cell_extract_up.cpu_funcs[0] = ThisClass::ExtractCellUp; + cell_extract_up.where |= STARPU_CPU; + + memset(&cell_insert_up, 0, sizeof(cell_insert_up)); + cell_insert_up.nbuffers = 3; + cell_insert_up.modes[0] = STARPU_R; + cell_insert_up.modes[1] = STARPU_RW; + cell_insert_up.modes[2] = STARPU_RW; + cell_insert_up.name = "cell_insert_up"; + cell_insert_up.cpu_funcs[0] = ThisClass::InsertCellUp; + cell_insert_up.where |= STARPU_CPU; + + + memset(&cell_insert_up_bis, 0, sizeof(cell_insert_up_bis)); + cell_insert_up_bis.nbuffers = 3; + cell_insert_up_bis.modes[0] = STARPU_R; + cell_insert_up_bis.modes[1] = STARPU_RW; + cell_insert_up_bis.modes[2] = STARPU_RW; + cell_insert_up_bis.name = "cell_insert_up_bis"; + cell_insert_up_bis.cpu_funcs[0] = ThisClass::InsertCellUpBis; + cell_insert_up_bis.where |= STARPU_CPU; +#endif + } #ifdef SCALFMM_USE_STARPU_EXTRACT - static void InsertP2P(void *buffers[], void *cl_arg){ - ParticleGroupClass containers((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), - STARPU_VECTOR_GET_NX(buffers[1]), - nullptr); + static void InsertP2P(void *buffers[], void *cl_arg){ + ParticleGroupClass containers((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), + STARPU_VECTOR_GET_NX(buffers[1]), + nullptr); - ParticleExtractedHandles* interactionBufferPtr; - starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr); + ParticleExtractedHandles* interactionBufferPtr; + starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr); - containers.restoreData(interactionBufferPtr->leavesToExtract, - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]), - STARPU_VECTOR_GET_NX(buffers[0])); - } + containers.restoreData(interactionBufferPtr->leavesToExtract, + (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]), + STARPU_VECTOR_GET_NX(buffers[0])); + } - static void InsertP2PBis(void *buffers[], void *cl_arg){ - ParticleExtractedHandles* interactionBufferPtr; - const unsigned char* dataPtr; - size_t datasize; - starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr, &dataPtr, &datasize); + static void InsertP2PBis(void *buffers[], void *cl_arg){ + ParticleExtractedHandles* interactionBufferPtr; + const unsigned char* dataPtr; + size_t datasize; + starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr, &dataPtr, &datasize); - memcpy((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), dataPtr, datasize); + memcpy((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), dataPtr, datasize); - ParticleGroupClass containers((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), - STARPU_VECTOR_GET_NX(buffers[1]), - nullptr); + ParticleGroupClass containers((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), + STARPU_VECTOR_GET_NX(buffers[1]), + nullptr); - containers.restoreData(interactionBufferPtr->leavesToExtract, - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]), - STARPU_VECTOR_GET_NX(buffers[0])); - } + containers.restoreData(interactionBufferPtr->leavesToExtract, + (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]), + STARPU_VECTOR_GET_NX(buffers[0])); + } - static void ExtractP2P(void *buffers[], void *cl_arg){ - ParticleGroupClass containers((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]), - STARPU_VECTOR_GET_NX(buffers[0]), - nullptr); + static void ExtractP2P(void *buffers[], void *cl_arg){ + ParticleGroupClass containers((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]), + STARPU_VECTOR_GET_NX(buffers[0]), + nullptr); - ParticleExtractedHandles* interactionBufferPtr; - starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr); + ParticleExtractedHandles* interactionBufferPtr; + starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr); - containers.extractData(interactionBufferPtr->leavesToExtract, - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), - STARPU_VECTOR_GET_NX(buffers[1])); - } + containers.extractData(interactionBufferPtr->leavesToExtract, + (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), + STARPU_VECTOR_GET_NX(buffers[1])); + } - static void InsertCellUp(void *buffers[], void *cl_arg){ - CellContainerClass currentCells((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), - STARPU_VECTOR_GET_NX(buffers[1]), - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]), - nullptr); + static void InsertCellUp(void *buffers[], void *cl_arg){ + CellContainerClass currentCells((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), + STARPU_VECTOR_GET_NX(buffers[1]), + (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]), + nullptr); - CellExtractedHandles* interactionBufferPtr; - starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr); + CellExtractedHandles* interactionBufferPtr; + starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr); - currentCells.restoreDataUp(interactionBufferPtr->cellsToExtract, - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]), - STARPU_VECTOR_GET_NX(buffers[0])); - } + currentCells.restoreDataUp(interactionBufferPtr->cellsToExtract, + (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]), + STARPU_VECTOR_GET_NX(buffers[0])); + } - static void InsertCellUpBis(void *buffers[], void *cl_arg){ - unsigned char* ptr1; - size_t size1; - unsigned char* ptr2; - size_t size2; - CellExtractedHandles* interactionBufferPtr; - starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr, &ptr1, &size1, &ptr2, &size2); + static void InsertCellUpBis(void *buffers[], void *cl_arg){ + unsigned char* ptr1; + size_t size1; + unsigned char* ptr2; + size_t size2; + CellExtractedHandles* interactionBufferPtr; + starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr, &ptr1, &size1, &ptr2, &size2); - memcpy((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), ptr1, size1); - memcpy((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]), ptr2, size2); + memcpy((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), ptr1, size1); + memcpy((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]), ptr2, size2); - CellContainerClass currentCells((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), - STARPU_VECTOR_GET_NX(buffers[1]), - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]), - nullptr); + CellContainerClass currentCells((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), + STARPU_VECTOR_GET_NX(buffers[1]), + (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]), + nullptr); - currentCells.restoreDataUp(interactionBufferPtr->cellsToExtract, - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]), - STARPU_VECTOR_GET_NX(buffers[0])); - } + currentCells.restoreDataUp(interactionBufferPtr->cellsToExtract, + (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]), + STARPU_VECTOR_GET_NX(buffers[0])); + } - static void ExtractCellUp(void *buffers[], void *cl_arg){ - CellContainerClass currentCells((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]), - STARPU_VECTOR_GET_NX(buffers[0]), - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), - nullptr); + static void ExtractCellUp(void *buffers[], void *cl_arg){ + CellContainerClass currentCells((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]), + STARPU_VECTOR_GET_NX(buffers[0]), + (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), + nullptr); - CellExtractedHandles* interactionBufferPtr; - starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr); + CellExtractedHandles* interactionBufferPtr; + starpu_codelet_unpack_args(cl_arg, &interactionBufferPtr); - currentCells.extractDataUp(interactionBufferPtr->cellsToExtract, - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]), - STARPU_VECTOR_GET_NX(buffers[2])); - } + currentCells.extractDataUp(interactionBufferPtr->cellsToExtract, + (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]), + STARPU_VECTOR_GET_NX(buffers[2])); + } #endif - void initCodeletMpi(){ - memset(&p2p_cl_inout_mpi, 0, sizeof(p2p_cl_inout_mpi)); + void initCodeletMpi(){ + memset(&p2p_cl_inout_mpi, 0, sizeof(p2p_cl_inout_mpi)); #ifdef STARPU_USE_CPU - if(originalCpuKernel->supportP2PMpi(FSTARPU_CPU_IDX)){ - p2p_cl_inout_mpi.where |= STARPU_CPU; - p2p_cl_inout_mpi.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallbackMpi; - } + if(originalCpuKernel->supportP2PMpi(FSTARPU_CPU_IDX)){ + p2p_cl_inout_mpi.where |= STARPU_CPU; + p2p_cl_inout_mpi.cpu_funcs[0] = StarPUCpuWrapperClass::directInoutPassCallbackMpi; + } #endif #ifdef SCALFMM_ENABLE_CUDA_KERNEL - if(originalCpuKernel->supportP2PMpi(FSTARPU_CUDA_IDX)){ - p2p_cl_inout_mpi.where |= STARPU_CUDA; - p2p_cl_inout_mpi.cuda_funcs[0] = StarPUCudaWrapperClass::directInoutPassCallbackMpi; - } + if(originalCpuKernel->supportP2PMpi(FSTARPU_CUDA_IDX)){ + p2p_cl_inout_mpi.where |= STARPU_CUDA; + p2p_cl_inout_mpi.cuda_funcs[0] = StarPUCudaWrapperClass::directInoutPassCallbackMpi; + } #endif #ifdef SCALFMM_ENABLE_OPENCL_KERNEL - if(originalCpuKernel->supportP2PMpi(FSTARPU_OPENCL_IDX)){ - p2p_cl_inout_mpi.where |= STARPU_OPENCL; - p2p_cl_inout_mpi.opencl_funcs[0] = StarPUOpenClWrapperClass::directInoutPassCallbackMpi; - } -#endif - p2p_cl_inout_mpi.nbuffers = 3; - p2p_cl_inout_mpi.modes[0] = STARPU_R; - p2p_cl_inout_mpi.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED); - p2p_cl_inout_mpi.modes[2] = STARPU_R; - p2p_cl_inout_mpi.name = "p2p_cl_inout_mpi"; - - memset(&m2l_cl_inout_mpi, 0, sizeof(m2l_cl_inout_mpi)); + if(originalCpuKernel->supportP2PMpi(FSTARPU_OPENCL_IDX)){ + p2p_cl_inout_mpi.where |= STARPU_OPENCL; + p2p_cl_inout_mpi.opencl_funcs[0] = StarPUOpenClWrapperClass::directInoutPassCallbackMpi; + } +#endif + p2p_cl_inout_mpi.nbuffers = 3; + p2p_cl_inout_mpi.modes[0] = STARPU_R; + p2p_cl_inout_mpi.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED); + p2p_cl_inout_mpi.modes[2] = STARPU_R; + p2p_cl_inout_mpi.name = "p2p_cl_inout_mpi"; + + memset(&m2l_cl_inout_mpi, 0, sizeof(m2l_cl_inout_mpi)); #ifdef STARPU_USE_CPU - if(originalCpuKernel->supportM2LMpi(FSTARPU_CPU_IDX)){ - m2l_cl_inout_mpi.where |= STARPU_CPU; - m2l_cl_inout_mpi.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallbackMpi; - } + if(originalCpuKernel->supportM2LMpi(FSTARPU_CPU_IDX)){ + m2l_cl_inout_mpi.where |= STARPU_CPU; + m2l_cl_inout_mpi.cpu_funcs[0] = StarPUCpuWrapperClass::transferInoutPassCallbackMpi; + } #endif #ifdef SCALFMM_ENABLE_CUDA_KERNEL - if(originalCpuKernel->supportM2LMpi(FSTARPU_CUDA_IDX)){ - m2l_cl_inout_mpi.where |= STARPU_CUDA; - m2l_cl_inout_mpi.cuda_funcs[0] = StarPUCudaWrapperClass::transferInoutPassCallbackMpi; - } + if(originalCpuKernel->supportM2LMpi(FSTARPU_CUDA_IDX)){ + m2l_cl_inout_mpi.where |= STARPU_CUDA; + m2l_cl_inout_mpi.cuda_funcs[0] = StarPUCudaWrapperClass::transferInoutPassCallbackMpi; + } #endif #ifdef SCALFMM_ENABLE_OPENCL_KERNEL - if(originalCpuKernel->supportM2LMpi(FSTARPU_OPENCL_IDX)){ - m2l_cl_inout_mpi.where |= STARPU_OPENCL; - m2l_cl_inout_mpi.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInoutPassCallbackMpi; - } -#endif - m2l_cl_inout_mpi.nbuffers = 4; - m2l_cl_inout_mpi.modes[0] = STARPU_R; - m2l_cl_inout_mpi.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED); - m2l_cl_inout_mpi.modes[2] = STARPU_R; - m2l_cl_inout_mpi.modes[3] = STARPU_R; - m2l_cl_inout_mpi.name = "m2l_cl_inout_mpi"; - } - - /** dealloc in a starpu way all the defined handles */ - void cleanHandle(){ - for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){ - for(int idxHandle = 0 ; idxHandle < int(cellHandles[idxLevel].size()) ; ++idxHandle){ - if(isDataOwnedBerenger(tree->getCellGroup(idxLevel, idxHandle)->getStartingIndex(), idxLevel))//Clean only our data handle - { - starpu_data_unregister(cellHandles[idxLevel][idxHandle].symb); - starpu_data_unregister(cellHandles[idxLevel][idxHandle].up); - starpu_data_unregister(cellHandles[idxLevel][idxHandle].down); - } + if(originalCpuKernel->supportM2LMpi(FSTARPU_OPENCL_IDX)){ + m2l_cl_inout_mpi.where |= STARPU_OPENCL; + m2l_cl_inout_mpi.opencl_funcs[0] = StarPUOpenClWrapperClass::transferInoutPassCallbackMpi; + } +#endif + m2l_cl_inout_mpi.nbuffers = 4; + m2l_cl_inout_mpi.modes[0] = STARPU_R; + m2l_cl_inout_mpi.modes[1] = starpu_data_access_mode(STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED); + m2l_cl_inout_mpi.modes[2] = STARPU_R; + m2l_cl_inout_mpi.modes[3] = STARPU_R; + m2l_cl_inout_mpi.name = "m2l_cl_inout_mpi"; + } + + /** dealloc in a starpu way all the defined handles */ + void cleanHandle(){ + for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){ + for(int idxHandle = 0 ; idxHandle < int(cellHandles[idxLevel].size()) ; ++idxHandle){ + if(isDataOwnedBerenger(tree->getCellGroup(idxLevel, idxHandle)->getStartingIndex(), idxLevel))//Clean only our data handle + { + starpu_data_unregister(cellHandles[idxLevel][idxHandle].symb); + starpu_data_unregister(cellHandles[idxLevel][idxHandle].up); + starpu_data_unregister(cellHandles[idxLevel][idxHandle].down); + } + } + cellHandles[idxLevel].clear(); + } + { + for(int idxHandle = 0 ; idxHandle < int(particleHandles.size()) ; ++idxHandle){ + if(isDataOwnedBerenger(tree->getCellGroup(tree->getHeight()-1, idxHandle)->getStartingIndex(), tree->getHeight()-1))//Clean only our data handle + { + starpu_data_unregister(particleHandles[idxHandle].symb); + starpu_data_unregister(particleHandles[idxHandle].down); } - cellHandles[idxLevel].clear(); - } - { - for(int idxHandle = 0 ; idxHandle < int(particleHandles.size()) ; ++idxHandle){ - if(isDataOwnedBerenger(tree->getCellGroup(tree->getHeight()-1, idxHandle)->getStartingIndex(), tree->getHeight()-1))//Clean only our data handle - { - starpu_data_unregister(particleHandles[idxHandle].symb); - starpu_data_unregister(particleHandles[idxHandle].down); - } - } - particleHandles.clear(); - } -#ifdef SCALFMM_USE_STARPU_EXTRACT - for(auto& iter : extractedParticlesBuffer){ - starpu_data_unregister(iter.symb); - } - for(auto& iter : duplicatedParticlesBuffer){ - starpu_data_unregister(iter.symb); - } - for(auto& iter : extractedCellBuffer){ - starpu_data_unregister(iter.all); - } - for(auto& iter : duplicatedCellBuffer){ - starpu_data_unregister(iter.symb); } -#endif + particleHandles.clear(); } - - /** Reset the handles array and create new ones to define +#ifdef SCALFMM_USE_STARPU_EXTRACT + for(auto& iter : extractedParticlesBuffer){ + starpu_data_unregister(iter.symb); + } + for(auto& iter : duplicatedParticlesBuffer){ + starpu_data_unregister(iter.symb); + } + for(auto& iter : extractedCellBuffer){ + starpu_data_unregister(iter.all); + } + for(auto& iter : duplicatedCellBuffer){ + starpu_data_unregister(iter.symb); + } +#endif + } + + /** Reset the handles array and create new ones to define * in a starpu way each block of data */ - int tag; - void buildHandles(){ - cleanHandle(); - tag = 0; - int where; - for(int idxLevel = 2 ; idxLevel < tree->getHeight() ; ++idxLevel){ - cellHandles[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel)); - for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ - const CellContainerClass* currentCells = tree->getCellGroup(idxLevel, idxGroup); - int registeringNode = dataMappingBerenger(currentCells->getStartingIndex(), idxLevel); - - where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1; - starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].symb, where, - (uintptr_t)currentCells->getRawBuffer(), currentCells->getBufferSizeInByte()); - starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].up, where, - (uintptr_t)currentCells->getRawMultipoleBuffer(), currentCells->getMultipoleBufferSizeInByte()); - starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].down, where, - (uintptr_t)currentCells->getRawLocalBuffer(), currentCells->getLocalBufferSizeInByte()); - - starpu_mpi_data_register(cellHandles[idxLevel][idxGroup].symb, tag++, registeringNode); - starpu_mpi_data_register(cellHandles[idxLevel][idxGroup].up, tag++, registeringNode); - starpu_mpi_data_register(cellHandles[idxLevel][idxGroup].down, tag++, registeringNode); - cellHandles[idxLevel][idxGroup].intervalSize = int(currentCells->getNumberOfCellsInBlock()); + void printCellHandels (){ + std::cout << " print Cell Handles() " << std::endl; + for(int idxLevel = 2 ; idxLevel < tree->getHeight() ; ++idxLevel){ + std::cout << " Level " << idxLevel << " nb handles: " << cellHandles[idxLevel].size() <<std::endl; + for(std::size_t idxGroup = 0 ; idxGroup < cellHandles[idxLevel].size() ; ++idxGroup){ + std::cout << "idxG " << idxGroup << " sym " << cellHandles[idxLevel][idxGroup].symb + << " mult "<< cellHandles[idxLevel][idxGroup].up + << " loc " << cellHandles[idxLevel][idxGroup].down + << " size "<< cellHandles[idxLevel][idxGroup].intervalSize + << " grouID " << cellHandles[idxLevel][idxGroup].groupID <<std::endl; + } + } + } + void buildHandles(){ + std::cout << " BEGIN buildHandles() " << std::endl; + cleanHandle(); + int tag = 0; + int where; + for(int idxLevel = 2 ; idxLevel < tree->getHeight() ; ++idxLevel){ + std::cout << " Level " << idxLevel << std::endl; + cellHandles[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel)); + for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ + const CellContainerClass* currentCells = tree->getCellGroup(idxLevel, idxGroup); + int registeringNode = dataMappingBerenger(currentCells->getStartingIndex(), idxLevel); + int idx_global = currentCells->getIdxGlobal(); + where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1; + std::cout << " idxGroup " << idxGroup << " registeringNode " << registeringNode + << " idx_global "<< idx_global << " where "<< where << std::endl<<std::flush; + starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].symb, where, + (uintptr_t)currentCells->getRawBuffer(), currentCells->getBufferSizeInByte()); + starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].up, where, + (uintptr_t)currentCells->getRawMultipoleBuffer(), currentCells->getMultipoleBufferSizeInByte()); + starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].down, where, + (uintptr_t)currentCells->getRawLocalBuffer(), currentCells->getLocalBufferSizeInByte()); + + starpu_mpi_data_register(cellHandles[idxLevel][idxGroup].symb, tag++, registeringNode); + starpu_mpi_data_register(cellHandles[idxLevel][idxGroup].up, tag++, registeringNode); + starpu_mpi_data_register(cellHandles[idxLevel][idxGroup].down, tag++, registeringNode); + cellHandles[idxLevel][idxGroup].intervalSize = int(currentCells->getNumberOfCellsInBlock()); + + cellHandles[idxLevel][idxGroup].groupID = idxGroup; #ifdef STARPU_SUPPORT_ARBITER - starpu_data_assign_arbiter(cellHandles[idxLevel][idxGroup].up, arbiterGlobal); - starpu_data_assign_arbiter(cellHandles[idxLevel][idxGroup].down, arbiterGlobal); -#endif - } - } - { - particleHandles.resize(tree->getNbParticleGroup()); - for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){ - int registeringNode = dataMappingBerenger(tree->getCellGroup(tree->getHeight()-1, idxGroup)->getStartingIndex(), tree->getHeight()-1); - where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1; - ParticleGroupClass* containers = tree->getParticleGroup(idxGroup); - starpu_variable_data_register(&particleHandles[idxGroup].symb, where, - (uintptr_t)containers->getRawBuffer(), containers->getBufferSizeInByte()); - starpu_variable_data_register(&particleHandles[idxGroup].down, where, - (uintptr_t)containers->getRawAttributesBuffer(), containers->getAttributesBufferSizeInByte()); - - starpu_mpi_data_register(particleHandles[idxGroup].symb, tag++, registeringNode); - starpu_mpi_data_register(particleHandles[idxGroup].down, tag++, registeringNode); + starpu_data_assign_arbiter(cellHandles[idxLevel][idxGroup].up, arbiterGlobal); + starpu_data_assign_arbiter(cellHandles[idxLevel][idxGroup].down, arbiterGlobal); +#endif + } + } + { + particleHandles.resize(tree->getNbParticleGroup()); + for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){ + int registeringNode = dataMappingBerenger(tree->getCellGroup(tree->getHeight()-1, idxGroup)->getStartingIndex(), tree->getHeight()-1); + where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1; + ParticleGroupClass* containers = tree->getParticleGroup(idxGroup); + starpu_variable_data_register(&particleHandles[idxGroup].symb, where, + (uintptr_t)containers->getRawBuffer(), containers->getBufferSizeInByte()); + starpu_variable_data_register(&particleHandles[idxGroup].down, where, + (uintptr_t)containers->getRawAttributesBuffer(), containers->getAttributesBufferSizeInByte()); + + starpu_mpi_data_register(particleHandles[idxGroup].symb, tag++, registeringNode); + starpu_mpi_data_register(particleHandles[idxGroup].down, tag++, registeringNode); #ifdef STARPU_USE_REDUX - starpu_data_set_reduction_methods(particleHandles[idxGroup].down, &p2p_redux_perform, - &p2p_redux_init); + starpu_data_set_reduction_methods(particleHandles[idxGroup].down, &p2p_redux_perform, + &p2p_redux_init); #else #ifdef STARPU_SUPPORT_ARBITER - starpu_data_assign_arbiter(particleHandles[idxGroup].down, arbiterGlobal); + starpu_data_assign_arbiter(particleHandles[idxGroup].down, arbiterGlobal); #endif // STARPU_SUPPORT_ARBITER #endif // STARPU_USE_REDUX - particleHandles[idxGroup].intervalSize = int(containers->getNumberOfLeavesInBlock()); - } + particleHandles[idxGroup].intervalSize = int(containers->getNumberOfLeavesInBlock()); } } - int dataMappingBerenger(MortonIndex const idx, int const idxLevel) const { - for(int i = 0; i < nproc; ++i) - if(nodeRepartition[idxLevel][i][0] <= nodeRepartition[idxLevel][i][1] && idx >= nodeRepartition[idxLevel][i][0] && idx <= nodeRepartition[idxLevel][i][1]) - return i; - if(mpi_rank == 0) - cout << "[scalfmm][map error] idx " << idx << " on level " << idxLevel << " isn't mapped on any proccess. (Default set to 0)." << endl; - return nproc-1; + std::cout << " END buildHandles() " << std::endl; + } + /* + * this function bind block when the tree is distributed (LET) + * we use the global index of every block + */ + void buildDistributedHandles(){ + std::cout << " BEGIN buildDistributedHandles() " << std::endl; + cleanHandle(); + int where; + for(int idxLevel = 2 ; idxLevel < tree->getHeight() ; ++idxLevel){ + std::cout << " Level " << idxLevel << std::endl; + + cellHandles[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel)); + for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ + const CellContainerClass* currentCells = tree->getCellGroup(idxLevel, idxGroup); + int registeringNode = dataMappingBerenger(currentCells->getStartingIndex(), idxLevel); + int idx_global = currentCells->getIdxGlobal(); + where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1; + std::cout << " idxGroup " << idxGroup << " registeringNode " << registeringNode + << " idx_global "<< idx_global << " where "<< where << std::endl<<std::flush; + starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].symb, where, + (uintptr_t)currentCells->getRawBuffer(), + currentCells->getBufferSizeInByte()); + // std::cout <<" 1 " <<std::endl<<std::flush; + starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].up, where, + (uintptr_t)currentCells->getRawMultipoleBuffer(), + currentCells->getMultipoleBufferSizeInByte()); + // std::cout <<" 2 " <<std::endl<<std::flush; + starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].down, where, + (uintptr_t)currentCells->getRawLocalBuffer(), + currentCells->getLocalBufferSizeInByte()); + // std::cout <<" 3 " <<std::endl<<std::flush; + + starpu_mpi_data_register(cellHandles[idxLevel][idxGroup].symb, idx_global, registeringNode); + // std::cout <<" 4 " <<std::endl<<std::flush; + starpu_mpi_data_register(cellHandles[idxLevel][idxGroup].up, idx_global+this->nb_block, registeringNode); + // std::cout <<" 5 " <<std::endl<<std::flush; + starpu_mpi_data_register(cellHandles[idxLevel][idxGroup].down, idx_global+this->nb_block*2, registeringNode); + // std::cout <<" 6 " <<std::endl<<std::flush; + cellHandles[idxLevel][idxGroup].intervalSize = static_cast<int>(currentCells->getNumberOfCellsInBlock()); +#ifdef STARPU_SUPPORT_ARBITER + starpu_data_assign_arbiter(cellHandles[idxLevel][idxGroup].up, arbiterGlobal); + starpu_data_assign_arbiter(cellHandles[idxLevel][idxGroup].down, arbiterGlobal); +#endif + } + } + { + particleHandles.resize(tree->getNbParticleGroup()); + for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){ + int registeringNode = dataMappingBerenger(tree->getCellGroup(tree->getHeight()-1, idxGroup)->getStartingIndex(), tree->getHeight()-1); + where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1; + ParticleGroupClass* containers = tree->getParticleGroup(idxGroup); + int idx_global = containers->getIdxGlobal(); + + starpu_variable_data_register(&particleHandles[idxGroup].symb, where, + (uintptr_t)containers->getRawBuffer(), containers->getBufferSizeInByte()); + starpu_variable_data_register(&particleHandles[idxGroup].down, where, + (uintptr_t)containers->getRawAttributesBuffer(), containers->getAttributesBufferSizeInByte()); + + starpu_mpi_data_register(particleHandles[idxGroup].symb, idx_global, registeringNode); + starpu_mpi_data_register(particleHandles[idxGroup].down, idx_global+this->nb_block, registeringNode); +#ifdef STARPU_USE_REDUX + starpu_data_set_reduction_methods(particleHandles[idxGroup].down, &p2p_redux_perform, + &p2p_redux_init); +#else +#ifdef STARPU_SUPPORT_ARBITER + starpu_data_assign_arbiter(particleHandles[idxGroup].down, arbiterGlobal); +#endif // STARPU_SUPPORT_ARBITER +#endif // STARPU_USE_REDUX + particleHandles[idxGroup].intervalSize = int(containers->getNumberOfLeavesInBlock()); + } } - /** + std::cout << " END buildDistributedHandles() " << std::endl; + + } + + int dataMappingBerenger(MortonIndex const idx, int const idxLevel) const { + for(int i = 0; i < nproc; ++i) + if(_nodeRepartition[idxLevel][i][0] <= _nodeRepartition[idxLevel][i][1] && idx >= _nodeRepartition[idxLevel][i][0] && idx <= _nodeRepartition[idxLevel][i][1]) + return i; + if(mpi_rank == 0) + std::cout << "[scalfmm][map error] idx " << idx << " on level " << idxLevel << " isn't mapped on any proccess. (Default set to 0)." << std::endl; + return nproc-1; + } + ///////////////////////////////////////////////////////////////////////////////////////////// + /** * This function is creating the interactions vector between blocks. * It fills externalInteractionsAllLevel and externalInteractionsLeafLevel. * Warning, the omp task for now are using the class attributes! * */ - void buildExternalInteractionVecs(){ - FLOG( FTic timer; FTic leafTimer; FTic cellTimer; ); - // Reset interactions - externalInteractionsAllLevel.clear(); + void buildExternalInteractionVecs(){ + FLOG( FTic timer; FTic leafTimer; FTic cellTimer; ); + // Reset interactions + externalInteractionsAllLevel.clear(); #ifdef SCALFMM_USE_STARPU_EXTRACT - externalInteractionsAllLevelInnerIndexes.clear(); - externalInteractionsAllLevelOuterIndexes.clear(); + externalInteractionsAllLevelInnerIndexes.clear(); + externalInteractionsAllLevelOuterIndexes.clear(); #endif - externalInteractionsLeafLevel.clear(); + externalInteractionsLeafLevel.clear(); #ifdef SCALFMM_USE_STARPU_EXTRACT - externalInteractionsLeafLevelOuter.clear(); - externalInteractionsLeafLevelInner.clear(); + externalInteractionsLeafLevelOuter.clear(); + externalInteractionsLeafLevelInner.clear(); #endif - // One per level + leaf level - externalInteractionsAllLevel.resize(tree->getHeight()); + // One per level + leaf level + externalInteractionsAllLevel.resize(tree->getHeight()); #ifdef SCALFMM_USE_STARPU_EXTRACT - externalInteractionsAllLevelInnerIndexes.resize(tree->getHeight()); - externalInteractionsAllLevelOuterIndexes.resize(tree->getHeight()); + externalInteractionsAllLevelInnerIndexes.resize(tree->getHeight()); + externalInteractionsAllLevelOuterIndexes.resize(tree->getHeight()); #endif - // First leaf level - { - // We create one big vector per block - externalInteractionsLeafLevel.resize(tree->getNbParticleGroup()); + // First leaf level + { + // We create one big vector per block + externalInteractionsLeafLevel.resize(tree->getNbParticleGroup()); #ifdef SCALFMM_USE_STARPU_EXTRACT - externalInteractionsLeafLevelOuter.resize(tree->getNbParticleGroup()); - externalInteractionsLeafLevelInner.resize(tree->getNbParticleGroup()); + externalInteractionsLeafLevelOuter.resize(tree->getNbParticleGroup()); + externalInteractionsLeafLevelInner.resize(tree->getNbParticleGroup()); #endif - for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){ - // Create the vector - ParticleGroupClass* containers = tree->getParticleGroup(idxGroup); + for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){ + // Create the vector + ParticleGroupClass* containers = tree->getParticleGroup(idxGroup); - std::vector<BlockInteractions<ParticleGroupClass>>* externalInteractions = &externalInteractionsLeafLevel[idxGroup]; + std::vector<BlockInteractions<ParticleGroupClass>>* externalInteractions = &externalInteractionsLeafLevel[idxGroup]; #ifdef SCALFMM_USE_STARPU_EXTRACT - std::vector<std::vector<int>>* externalInteractionsOuter = &externalInteractionsLeafLevelOuter[idxGroup]; - std::vector<std::vector<int>>* externalInteractionsInner = &externalInteractionsLeafLevelInner[idxGroup]; + std::vector<std::vector<int>>* externalInteractionsOuter = &externalInteractionsLeafLevelOuter[idxGroup]; + std::vector<std::vector<int>>* externalInteractionsInner = &externalInteractionsLeafLevelInner[idxGroup]; #endif #ifdef SCALFMM_USE_STARPU_EXTRACT #pragma omp task default(none) firstprivate(idxGroup, containers, externalInteractions, externalInteractionsOuter, externalInteractionsInner) #else - #pragma omp task default(none) firstprivate(idxGroup, containers, externalInteractions) -#endif - { // Can be a task(inout:iterCells) - std::vector<OutOfBlockInteraction> outsideInteractions; - const MortonIndex blockStartIdx = containers->getStartingIndex(); - const MortonIndex blockEndIdx = containers->getEndingIndex(); - - for(int leafIdx = 0 ; leafIdx < containers->getNumberOfLeavesInBlock() ; ++leafIdx){ - const MortonIndex mindex = containers->getLeafMortonIndex(leafIdx); - // ParticleContainerClass particles = containers->template getLeaf<ParticleContainerClass>(leafIdx); - - MortonIndex interactionsIndexes[26]; - int interactionsPosition[26]; - FTreeCoordinate coord(mindex); - int counter = coord.getNeighborsIndexes(tree->getHeight(),interactionsIndexes,interactionsPosition); - - for(int idxInter = 0 ; idxInter < counter ; ++idxInter){ - if( blockStartIdx <= interactionsIndexes[idxInter] && interactionsIndexes[idxInter] < blockEndIdx ){ - // Inside block interaction, do nothing - } - else if(interactionsIndexes[idxInter] < mindex){ - OutOfBlockInteraction property; - property.insideIndex = mindex; - property.outIndex = interactionsIndexes[idxInter]; - property.relativeOutPosition = interactionsPosition[idxInter]; - property.insideIdxInBlock = leafIdx; - property.outsideIdxInBlock = -1; - outsideInteractions.push_back(property); - } - } - } - - // Sort to match external order - FQuickSort<OutOfBlockInteraction, int>::QsSequential(outsideInteractions.data(),int(outsideInteractions.size())); - - int currentOutInteraction = 0; - for(int idxLeftGroup = 0 ; idxLeftGroup < idxGroup && currentOutInteraction < int(outsideInteractions.size()) ; ++idxLeftGroup){ - ParticleGroupClass* leftContainers = tree->getParticleGroup(idxLeftGroup); - const MortonIndex blockStartIdxOther = leftContainers->getStartingIndex(); - const MortonIndex blockEndIdxOther = leftContainers->getEndingIndex(); - - while(currentOutInteraction < int(outsideInteractions.size()) - && (outsideInteractions[currentOutInteraction].outIndex < blockStartIdxOther - || leftContainers->getLeafIndex(outsideInteractions[currentOutInteraction].outIndex) == -1) - && outsideInteractions[currentOutInteraction].outIndex < blockEndIdxOther){ - currentOutInteraction += 1; - } - - int lastOutInteraction = currentOutInteraction; - int copyExistingInteraction = currentOutInteraction; - while(lastOutInteraction < int(outsideInteractions.size()) && outsideInteractions[lastOutInteraction].outIndex < blockEndIdxOther){ - const int leafPos = leftContainers->getLeafIndex(outsideInteractions[lastOutInteraction].outIndex); - if(leafPos != -1){ - if(copyExistingInteraction != lastOutInteraction){ - outsideInteractions[copyExistingInteraction] = outsideInteractions[lastOutInteraction]; - } - outsideInteractions[copyExistingInteraction].outsideIdxInBlock = leafPos; - copyExistingInteraction += 1; - } - lastOutInteraction += 1; - } - - const int nbInteractionsBetweenBlocks = (copyExistingInteraction-currentOutInteraction); - if(nbInteractionsBetweenBlocks){ - externalInteractions->emplace_back(); - BlockInteractions<ParticleGroupClass>* interactions = &externalInteractions->back(); - interactions->otherBlock = leftContainers; - interactions->otherBlockId = idxLeftGroup; - interactions->interactions.resize(nbInteractionsBetweenBlocks); - std::copy(outsideInteractions.begin() + currentOutInteraction, - outsideInteractions.begin() + copyExistingInteraction, - interactions->interactions.begin()); +#pragma omp task default(none) firstprivate(idxGroup, containers, externalInteractions) +#endif + { // Can be a task(inout:iterCells) + std::vector<OutOfBlockInteraction> outsideInteractions; + const MortonIndex blockStartIdx = containers->getStartingIndex(); + const MortonIndex blockEndIdx = containers->getEndingIndex(); + + for(int leafIdx = 0 ; leafIdx < containers->getNumberOfLeavesInBlock() ; ++leafIdx){ + const MortonIndex mindex = containers->getLeafMortonIndex(leafIdx); + // ParticleContainerClass particles = containers->template getLeaf<ParticleContainerClass>(leafIdx); + + MortonIndex interactionsIndexes[26]; + int interactionsPosition[26]; + FTreeCoordinate coord(mindex); + int counter = coord.getNeighborsIndexes(tree->getHeight(),interactionsIndexes,interactionsPosition); + + for(int idxInter = 0 ; idxInter < counter ; ++idxInter){ + if( blockStartIdx <= interactionsIndexes[idxInter] && interactionsIndexes[idxInter] < blockEndIdx ){ + // Inside block interaction, do nothing + } + else if(interactionsIndexes[idxInter] < mindex){ + OutOfBlockInteraction property; + property.insideIndex = mindex; + property.outIndex = interactionsIndexes[idxInter]; + property.relativeOutPosition = interactionsPosition[idxInter]; + property.insideIdxInBlock = leafIdx; + property.outsideIdxInBlock = -1; + outsideInteractions.push_back(property); + } + } + } + + // Sort to match external order + FQuickSort<OutOfBlockInteraction, int>::QsSequential(outsideInteractions.data(),int(outsideInteractions.size())); + + int currentOutInteraction = 0; + for(int idxLeftGroup = 0 ; idxLeftGroup < idxGroup && currentOutInteraction < int(outsideInteractions.size()) ; ++idxLeftGroup){ + ParticleGroupClass* leftContainers = tree->getParticleGroup(idxLeftGroup); + const MortonIndex blockStartIdxOther = leftContainers->getStartingIndex(); + const MortonIndex blockEndIdxOther = leftContainers->getEndingIndex(); + + while(currentOutInteraction < int(outsideInteractions.size()) + && (outsideInteractions[currentOutInteraction].outIndex < blockStartIdxOther + || leftContainers->getLeafIndex(outsideInteractions[currentOutInteraction].outIndex) == -1) + && outsideInteractions[currentOutInteraction].outIndex < blockEndIdxOther){ + currentOutInteraction += 1; + } + + int lastOutInteraction = currentOutInteraction; + int copyExistingInteraction = currentOutInteraction; + while(lastOutInteraction < int(outsideInteractions.size()) && outsideInteractions[lastOutInteraction].outIndex < blockEndIdxOther){ + const int leafPos = leftContainers->getLeafIndex(outsideInteractions[lastOutInteraction].outIndex); + if(leafPos != -1){ + if(copyExistingInteraction != lastOutInteraction){ + outsideInteractions[copyExistingInteraction] = outsideInteractions[lastOutInteraction]; + } + outsideInteractions[copyExistingInteraction].outsideIdxInBlock = leafPos; + copyExistingInteraction += 1; + } + lastOutInteraction += 1; + } + + const int nbInteractionsBetweenBlocks = (copyExistingInteraction-currentOutInteraction); + if(nbInteractionsBetweenBlocks){ + externalInteractions->emplace_back(); + BlockInteractions<ParticleGroupClass>* interactions = &externalInteractions->back(); + interactions->otherBlock = leftContainers; + interactions->otherBlockId = idxLeftGroup; + interactions->interactions.resize(nbInteractionsBetweenBlocks); + std::copy(outsideInteractions.begin() + currentOutInteraction, + outsideInteractions.begin() + copyExistingInteraction, + interactions->interactions.begin()); #ifdef SCALFMM_USE_STARPU_EXTRACT - externalInteractionsOuter->emplace_back(); - externalInteractionsInner->emplace_back(); + externalInteractionsOuter->emplace_back(); + externalInteractionsInner->emplace_back(); - std::vector<int>* interactionsOuter = &externalInteractionsOuter->back(); - std::vector<int>* interactionsInner = &externalInteractionsInner->back(); + std::vector<int>* interactionsOuter = &externalInteractionsOuter->back(); + std::vector<int>* interactionsInner = &externalInteractionsInner->back(); - for(int idxUnique = 0 ; idxUnique < interactions->interactions.size() ; ++idxUnique){ - interactionsOuter->push_back(interactions->interactions[idxUnique].outsideIdxInBlock); - interactionsInner->push_back(interactions->interactions[idxUnique].insideIdxInBlock); - } - FQuickSort<int, int>::QsSequential(interactionsOuter->data(),int(interactionsOuter->size())); - FQuickSort<int, int>::QsSequential(interactionsInner->data(),int(interactionsInner->size())); + for(std::size_t idxUnique = 0 ; idxUnique < interactions->interactions.size() ; ++idxUnique){ + interactionsOuter->push_back(interactions->interactions[idxUnique].outsideIdxInBlock); + interactionsInner->push_back(interactions->interactions[idxUnique].insideIdxInBlock); + } + FQuickSort<int, int>::QsSequential(interactionsOuter->data(),int(interactionsOuter->size())); + FQuickSort<int, int>::QsSequential(interactionsInner->data(),int(interactionsInner->size())); - interactionsOuter->erase(std::unique(interactionsOuter->begin(), interactionsOuter->end()), interactionsOuter->end()); - interactionsInner->erase(std::unique(interactionsInner->begin(), interactionsInner->end()), interactionsInner->end()); + interactionsOuter->erase(std::unique(interactionsOuter->begin(), interactionsOuter->end()), interactionsOuter->end()); + interactionsInner->erase(std::unique(interactionsInner->begin(), interactionsInner->end()), interactionsInner->end()); #endif - } + } - currentOutInteraction = lastOutInteraction; - } - } - } + currentOutInteraction = lastOutInteraction; + } + } } - FLOG( leafTimer.tac(); ); - FLOG( cellTimer.tic(); ); - { - for(int idxLevel = tree->getHeight()-1 ; idxLevel >= 2 ; --idxLevel){ - externalInteractionsAllLevel[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel)); + } + FLOG( leafTimer.tac(); ); + FLOG( cellTimer.tic(); ); + { + for(int idxLevel = tree->getHeight()-1 ; idxLevel >= 2 ; --idxLevel){ + externalInteractionsAllLevel[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel)); #ifdef SCALFMM_USE_STARPU_EXTRACT - externalInteractionsAllLevelInnerIndexes[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel)); - externalInteractionsAllLevelOuterIndexes[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel)); + externalInteractionsAllLevelInnerIndexes[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel)); + externalInteractionsAllLevelOuterIndexes[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel)); #endif - for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ - CellContainerClass* currentCells = tree->getCellGroup(idxLevel, idxGroup); + for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ + CellContainerClass* currentCells = tree->getCellGroup(idxLevel, idxGroup); - std::vector<BlockInteractions<CellContainerClass>>* externalInteractions = &externalInteractionsAllLevel[idxLevel][idxGroup]; + std::vector<BlockInteractions<CellContainerClass>>* externalInteractions = &externalInteractionsAllLevel[idxLevel][idxGroup]; #ifdef SCALFMM_USE_STARPU_EXTRACT - std::vector<std::vector<int>>* externalInteractionsInner = &externalInteractionsAllLevelInnerIndexes[idxLevel][idxGroup]; - std::vector<std::vector<int>>* externalInteractionsOuter = &externalInteractionsAllLevelOuterIndexes[idxLevel][idxGroup]; + std::vector<std::vector<int>>* externalInteractionsInner = &externalInteractionsAllLevelInnerIndexes[idxLevel][idxGroup]; + std::vector<std::vector<int>>* externalInteractionsOuter = &externalInteractionsAllLevelOuterIndexes[idxLevel][idxGroup]; #endif #ifdef SCALFMM_USE_STARPU_EXTRACT #pragma omp task default(none) firstprivate(idxGroup, currentCells, idxLevel, externalInteractions, externalInteractionsInner, externalInteractionsOuter) #else - #pragma omp task default(none) firstprivate(idxGroup, currentCells, idxLevel, externalInteractions) -#endif - { - std::vector<OutOfBlockInteraction> outsideInteractions; - const MortonIndex blockStartIdx = currentCells->getStartingIndex(); - const MortonIndex blockEndIdx = currentCells->getEndingIndex(); - - for(int cellIdx = 0 ; cellIdx < currentCells->getNumberOfCellsInBlock() ; ++cellIdx){ - const MortonIndex mindex = currentCells->getCellMortonIndex(cellIdx); - - MortonIndex interactionsIndexes[189]; - int interactionsPosition[189]; - const FTreeCoordinate coord(mindex); - int counter = coord.getInteractionNeighbors(idxLevel,interactionsIndexes,interactionsPosition); - - for(int idxInter = 0 ; idxInter < counter ; ++idxInter){ - if( blockStartIdx <= interactionsIndexes[idxInter] && interactionsIndexes[idxInter] < blockEndIdx ){ - // Nothing to do - } - else if(interactionsIndexes[idxInter] < mindex){ - OutOfBlockInteraction property; - property.insideIndex = mindex; - property.outIndex = interactionsIndexes[idxInter]; - property.relativeOutPosition = interactionsPosition[idxInter]; - property.insideIdxInBlock = cellIdx; - property.outsideIdxInBlock = -1; - outsideInteractions.push_back(property); - } - } - } - - // Manage outofblock interaction - FQuickSort<OutOfBlockInteraction, int>::QsSequential(outsideInteractions.data(),int(outsideInteractions.size())); - - int currentOutInteraction = 0; - for(int idxLeftGroup = 0 ; idxLeftGroup < idxGroup && currentOutInteraction < int(outsideInteractions.size()) ; ++idxLeftGroup){ - CellContainerClass* leftCells = tree->getCellGroup(idxLevel, idxLeftGroup); - const MortonIndex blockStartIdxOther = leftCells->getStartingIndex(); - const MortonIndex blockEndIdxOther = leftCells->getEndingIndex(); - - while(currentOutInteraction < int(outsideInteractions.size()) - && (outsideInteractions[currentOutInteraction].outIndex < blockStartIdxOther - || leftCells->getCellIndex(outsideInteractions[currentOutInteraction].outIndex) == -1) - && outsideInteractions[currentOutInteraction].outIndex < blockEndIdxOther){ - currentOutInteraction += 1; - } - - int lastOutInteraction = currentOutInteraction; - int copyExistingInteraction = currentOutInteraction; - while(lastOutInteraction < int(outsideInteractions.size()) && outsideInteractions[lastOutInteraction].outIndex < blockEndIdxOther){ - const int cellPos = leftCells->getCellIndex(outsideInteractions[lastOutInteraction].outIndex); - if(cellPos != -1){ - if(copyExistingInteraction != lastOutInteraction){ - outsideInteractions[copyExistingInteraction] = outsideInteractions[lastOutInteraction]; - } - outsideInteractions[copyExistingInteraction].outsideIdxInBlock = cellPos; - copyExistingInteraction += 1; - } - lastOutInteraction += 1; - } - - // Create interactions - const int nbInteractionsBetweenBlocks = (copyExistingInteraction-currentOutInteraction); - if(nbInteractionsBetweenBlocks){ - externalInteractions->emplace_back(); - BlockInteractions<CellContainerClass>* interactions = &externalInteractions->back(); - interactions->otherBlock = leftCells; - interactions->otherBlockId = idxLeftGroup; - interactions->interactions.resize(nbInteractionsBetweenBlocks); - std::copy(outsideInteractions.begin() + currentOutInteraction, - outsideInteractions.begin() + copyExistingInteraction, - interactions->interactions.begin()); +#pragma omp task default(none) firstprivate(idxGroup, currentCells, idxLevel, externalInteractions) +#endif + { + std::vector<OutOfBlockInteraction> outsideInteractions; + const MortonIndex blockStartIdx = currentCells->getStartingIndex(); + const MortonIndex blockEndIdx = currentCells->getEndingIndex(); + + for(int cellIdx = 0 ; cellIdx < currentCells->getNumberOfCellsInBlock() ; ++cellIdx){ + const MortonIndex mindex = currentCells->getCellMortonIndex(cellIdx); + + MortonIndex interactionsIndexes[189]; + int interactionsPosition[189]; + const FTreeCoordinate coord(mindex); + int counter = coord.getInteractionNeighbors(idxLevel,interactionsIndexes,interactionsPosition); + + for(int idxInter = 0 ; idxInter < counter ; ++idxInter){ + if( blockStartIdx <= interactionsIndexes[idxInter] && interactionsIndexes[idxInter] < blockEndIdx ){ + // Nothing to do + } + else if(interactionsIndexes[idxInter] < mindex){ + OutOfBlockInteraction property; + property.insideIndex = mindex; + property.outIndex = interactionsIndexes[idxInter]; + property.relativeOutPosition = interactionsPosition[idxInter]; + property.insideIdxInBlock = cellIdx; + property.outsideIdxInBlock = -1; + outsideInteractions.push_back(property); + } + } + } + + // Manage outofblock interaction + FQuickSort<OutOfBlockInteraction, int>::QsSequential(outsideInteractions.data(),int(outsideInteractions.size())); + + int currentOutInteraction = 0; + for(int idxLeftGroup = 0 ; idxLeftGroup < idxGroup && currentOutInteraction < int(outsideInteractions.size()) ; ++idxLeftGroup){ + CellContainerClass* leftCells = tree->getCellGroup(idxLevel, idxLeftGroup); + const MortonIndex blockStartIdxOther = leftCells->getStartingIndex(); + const MortonIndex blockEndIdxOther = leftCells->getEndingIndex(); + + while(currentOutInteraction < int(outsideInteractions.size()) + && (outsideInteractions[currentOutInteraction].outIndex < blockStartIdxOther + || leftCells->getCellIndex(outsideInteractions[currentOutInteraction].outIndex) == -1) + && outsideInteractions[currentOutInteraction].outIndex < blockEndIdxOther){ + currentOutInteraction += 1; + } + + int lastOutInteraction = currentOutInteraction; + int copyExistingInteraction = currentOutInteraction; + while(lastOutInteraction < int(outsideInteractions.size()) && outsideInteractions[lastOutInteraction].outIndex < blockEndIdxOther){ + const int cellPos = leftCells->getCellIndex(outsideInteractions[lastOutInteraction].outIndex); + if(cellPos != -1){ + if(copyExistingInteraction != lastOutInteraction){ + outsideInteractions[copyExistingInteraction] = outsideInteractions[lastOutInteraction]; + } + outsideInteractions[copyExistingInteraction].outsideIdxInBlock = cellPos; + copyExistingInteraction += 1; + } + lastOutInteraction += 1; + } + + // Create interactions + const int nbInteractionsBetweenBlocks = (copyExistingInteraction-currentOutInteraction); + if(nbInteractionsBetweenBlocks){ + externalInteractions->emplace_back(); + BlockInteractions<CellContainerClass>* interactions = &externalInteractions->back(); + interactions->otherBlock = leftCells; + interactions->otherBlockId = idxLeftGroup; + interactions->interactions.resize(nbInteractionsBetweenBlocks); + std::copy(outsideInteractions.begin() + currentOutInteraction, + outsideInteractions.begin() + copyExistingInteraction, + interactions->interactions.begin()); #ifdef SCALFMM_USE_STARPU_EXTRACT - externalInteractionsInner->emplace_back(); - std::vector<int>* interactionsInnerIndexes = &externalInteractionsInner->back(); - externalInteractionsOuter->emplace_back(); - std::vector<int>* interactionsOuterIndexes = &externalInteractionsOuter->back(); - - for(int idxUnique = 0 ; idxUnique < interactions->interactions.size() ; ++idxUnique){ - interactionsOuterIndexes->push_back(interactions->interactions[idxUnique].outsideIdxInBlock); - interactionsInnerIndexes->push_back(interactions->interactions[idxUnique].insideIdxInBlock); - } - - FQuickSort<int, int>::QsSequential(interactionsOuterIndexes->data(),int(interactionsOuterIndexes->size())); - interactionsOuterIndexes->erase(std::unique(interactionsOuterIndexes->begin(), interactionsOuterIndexes->end()), - interactionsOuterIndexes->end()); - FQuickSort<int, int>::QsSequential(interactionsInnerIndexes->data(),int(interactionsInnerIndexes->size())); - interactionsInnerIndexes->erase(std::unique(interactionsInnerIndexes->begin(), interactionsInnerIndexes->end()), - interactionsInnerIndexes->end()); -#endif - } - - currentOutInteraction = lastOutInteraction; - } - } - } + externalInteractionsInner->emplace_back(); + std::vector<int>* interactionsInnerIndexes = &externalInteractionsInner->back(); + externalInteractionsOuter->emplace_back(); + std::vector<int>* interactionsOuterIndexes = &externalInteractionsOuter->back(); + + for(std::size_t idxUnique = 0 ; idxUnique < interactions->interactions.size() ; ++idxUnique){ + interactionsOuterIndexes->push_back(interactions->interactions[idxUnique].outsideIdxInBlock); + interactionsInnerIndexes->push_back(interactions->interactions[idxUnique].insideIdxInBlock); + } + + FQuickSort<int, int>::QsSequential(interactionsOuterIndexes->data(),int(interactionsOuterIndexes->size())); + interactionsOuterIndexes->erase(std::unique(interactionsOuterIndexes->begin(), interactionsOuterIndexes->end()), + interactionsOuterIndexes->end()); + FQuickSort<int, int>::QsSequential(interactionsInnerIndexes->data(),int(interactionsInnerIndexes->size())); + interactionsInnerIndexes->erase(std::unique(interactionsInnerIndexes->begin(), interactionsInnerIndexes->end()), + interactionsInnerIndexes->end()); +#endif + } + + currentOutInteraction = lastOutInteraction; + } + } } } - FLOG( cellTimer.tac(); ); - -#pragma omp taskwait - - FLOG( FLog::Controller << "\t\t Prepare in " << timer.tacAndElapsed() << "s\n" ); - FLOG( FLog::Controller << "\t\t\t Prepare at leaf level in " << leafTimer.elapsed() << "s\n" ); - FLOG( FLog::Controller << "\t\t\t Prepare at other levels in " << cellTimer.elapsed() << "s\n" ); } + FLOG( cellTimer.tac(); ); - ///////////////////////////////////////////////////////////////////////////////////// - /// Bottom Pass - ///////////////////////////////////////////////////////////////////////////////////// - - void bottomPass(){ - FLOG( FTic timer; ); - - FAssertLF(cellHandles[tree->getHeight()-1].size() == particleHandles.size()); - - for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){ - starpu_mpi_insert_task(MPI_COMM_WORLD, - &p2m_cl, - STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), - STARPU_VALUE, &cellHandles[tree->getHeight()-1][idxGroup].intervalSize, sizeof(int), - #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2M(), - #endif - STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].symb, - STARPU_RW, cellHandles[tree->getHeight()-1][idxGroup].up, - STARPU_R, particleHandles[idxGroup].symb, - #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS - STARPU_NAME, p2mTaskNames.get(), - #else - //"P2M-nb_i_p" - STARPU_NAME, taskNames->print("P2M", "%d, %lld, %lld, %lld, %lld, %d\n", - tree->getCellGroup(tree->getHeight()-1,idxGroup)->getNumberOfCellsInBlock(), - tree->getCellGroup(tree->getHeight()-1,idxGroup)->getSizeOfInterval(), - tree->getCellGroup(tree->getHeight()-1,idxGroup)->getNumberOfCellsInBlock(), - tree->getParticleGroup(idxGroup)->getStartingIndex(), - tree->getParticleGroup(idxGroup)->getEndingIndex(), - starpu_mpi_data_get_rank(cellHandles[tree->getHeight()-1][idxGroup].up)), - #endif - #endif - 0); - } - - FLOG( FLog::Controller << "\t\t bottomPass in " << timer.tacAndElapsed() << "s\n" ); - } - - ///////////////////////////////////////////////////////////////////////////////////// - /// Upward Pass - ///////////////////////////////////////////////////////////////////////////////////// - - void upwardPass(){ - FLOG( FTic timer; ); - for(int idxLevel = FMath::Min(tree->getHeight() - 2, FAbstractAlgorithm::lowerWorkingLevel - 1) ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ - int idxSubGroup = 0; - - for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ - CellContainerClass*const currentCells = tree->getCellGroup(idxLevel, idxGroup); - - // Skip current group if needed - if( tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (currentCells->getStartingIndex()<<3) ){ - ++idxSubGroup; - FAssertLF( idxSubGroup != tree->getNbCellGroupAtLevel(idxLevel+1) ); - FAssertLF( (tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex()>>3) == currentCells->getStartingIndex() ); - } +#pragma omp taskwait - // Copy at max 8 groups - { - starpu_mpi_insert_task(MPI_COMM_WORLD, - &m2m_cl, - STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), - STARPU_VALUE, &idxLevel, sizeof(idxLevel), - STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), + FLOG( FLog::Controller << "\t\t Prepare in " << timer.tacAndElapsed() << "s\n" ); + FLOG( FLog::Controller << "\t\t\t Prepare at leaf level in " << leafTimer.elapsed() << "s\n" ); + FLOG( FLog::Controller << "\t\t\t Prepare at other levels in " << cellTimer.elapsed() << "s\n" ); + } + + ///////////////////////////////////////////////////////////////////////////////////// + /// Bottom Pass + ///////////////////////////////////////////////////////////////////////////////////// + + void bottomPass(){ + FLOG( FTic timer; ); + + FAssertLF(cellHandles[tree->getHeight()-1].size() == particleHandles.size()); + + for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){ + starpu_mpi_insert_task(MPI_COMM_WORLD, + &p2m_cl, + STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), + STARPU_VALUE, &cellHandles[tree->getHeight()-1][idxGroup].intervalSize, sizeof(int), + #ifdef SCALFMM_STARPU_USE_PRIO + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2M(), + #endif + STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].symb, + STARPU_RW, cellHandles[tree->getHeight()-1][idxGroup].up, + STARPU_R, particleHandles[idxGroup].symb, + #ifdef STARPU_USE_TASK_NAME + #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS + STARPU_NAME, p2mTaskNames.get(), + #else + //"P2M-nb_i_p" + STARPU_NAME, taskNames->print("P2M", "%d, %lld, %lld, %lld, %lld, %d\n", + tree->getCellGroup(tree->getHeight()-1,idxGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(tree->getHeight()-1,idxGroup)->getSizeOfInterval(), + tree->getCellGroup(tree->getHeight()-1,idxGroup)->getNumberOfCellsInBlock(), + tree->getParticleGroup(idxGroup)->getStartingIndex(), + tree->getParticleGroup(idxGroup)->getEndingIndex(), + starpu_mpi_data_get_rank(cellHandles[tree->getHeight()-1][idxGroup].up)), + #endif + #endif + 0); + } + + FLOG( FLog::Controller << "\t\t bottomPass in " << timer.tacAndElapsed() << "s\n" ); + } + + ///////////////////////////////////////////////////////////////////////////////////// + /// Upward Pass + ///////////////////////////////////////////////////////////////////////////////////// + + void upwardPassDuplicate(){ + FLOG( FTic timer; ); + for(int idxLevel = FMath::Min(tree->getHeight() - 2, FAbstractAlgorithm::lowerWorkingLevel - 1) ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ + int idxSubGroup = 0; + + for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ + CellContainerClass*const currentCells = tree->getCellGroup(idxLevel, idxGroup); + + // Skip current group if needed + if( tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (currentCells->getStartingIndex()<<3) ){ + ++idxSubGroup; + FAssertLF( idxSubGroup != tree->getNbCellGroupAtLevel(idxLevel+1) ); + FAssertLF( (tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex()>>3) == currentCells->getStartingIndex() ); + } + + // Copy at max 8 groups + { + starpu_mpi_insert_task(MPI_COMM_WORLD, + &m2m_cl, + STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), + STARPU_VALUE, &idxLevel, sizeof(idxLevel), + STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2M(idxLevel), + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2M(idxLevel), #endif - STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly - (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].up, //The remaining, read/write - STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly - STARPU_R, cellHandles[idxLevel+1][idxSubGroup].up, //level d'avant readonly - #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS - STARPU_NAME, m2mTaskNames[idxLevel].get(), - #else - //"M2M-l_nb_i_nbc_ic_s" - STARPU_NAME, taskNames->print("M2M", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", - idxLevel, - tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), - tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), - tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(), - tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(), - FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)- - FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3), - tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), - tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(), - starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].up)), - #endif - #endif - 0); + STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly + (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].up, //The remaining, read/write + STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly + STARPU_R, cellHandles[idxLevel+1][idxSubGroup].up, //level d'avant readonly + #ifdef STARPU_USE_TASK_NAME + #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS + STARPU_NAME, m2mTaskNames[idxLevel].get(), + #else + //"M2M-l_nb_i_nbc_ic_s" + STARPU_NAME, taskNames->print("M2M", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", + idxLevel, + tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), + tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(), + FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)- + FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3), + tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), + tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(), + starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].up)), + #endif + #endif + 0); - } + } - while(tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (((currentCells->getEndingIndex()-1)<<3)+7) - && (idxSubGroup+1) != tree->getNbCellGroupAtLevel(idxLevel+1) - && tree->getCellGroup(idxLevel+1, idxSubGroup+1)->getStartingIndex() <= ((currentCells->getEndingIndex()-1)<<3)+7 ){ - idxSubGroup += 1; + while(tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (((currentCells->getEndingIndex()-1)<<3)+7) + && (idxSubGroup+1) != tree->getNbCellGroupAtLevel(idxLevel+1) + && tree->getCellGroup(idxLevel+1, idxSubGroup+1)->getStartingIndex() <= ((currentCells->getEndingIndex()-1)<<3)+7 ){ + idxSubGroup += 1; - starpu_mpi_insert_task(MPI_COMM_WORLD, - &m2m_cl, - STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), - STARPU_VALUE, &idxLevel, sizeof(idxLevel), - STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), + starpu_mpi_insert_task(MPI_COMM_WORLD, + &m2m_cl, + STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), + STARPU_VALUE, &idxLevel, sizeof(idxLevel), + STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2M(idxLevel), + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2M(idxLevel), #endif - STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly - (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].up, //The remaining, read/write - STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly - STARPU_R, cellHandles[idxLevel+1][idxSubGroup].up, //level d'avant readonly - #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS - STARPU_NAME, m2mTaskNames[idxLevel].get(), - #else - //M2M-l_nb_i_nbc_ic_s - STARPU_NAME, taskNames->print("M2M", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", - idxLevel, - tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), - tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), - tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(), - tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(), - FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)- - FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3), - tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), - tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(), - starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].up)), - #endif - #endif - 0); - } - + STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly + (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].up, //The remaining, read/write + STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly + STARPU_R, cellHandles[idxLevel+1][idxSubGroup].up, //level d'avant readonly + #ifdef STARPU_USE_TASK_NAME + #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS + STARPU_NAME, m2mTaskNames[idxLevel].get(), + #else + //M2M-l_nb_i_nbc_ic_s + STARPU_NAME, taskNames->print("M2M", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", + idxLevel, + tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), + tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(), + FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)- + FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3), + tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), + tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(), + starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].up)), + #endif + #endif + 0); + } + + } + } + FLOG( FLog::Controller << "\t\t upwardPass in " << timer.tacAndElapsed() << "s\n" ); + } + + void upwardPassNoDuplicate(){ + FLOG( FTic timer; ); + // iterate on every working level from lower to upper + for(int idxLevel = FMath::Min(tree->getHeight() - 2, FAbstractAlgorithm::lowerWorkingLevel - 1) ; idxLevel >= + FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ + // index of subgroup + // iterate on every group of cells at current the level + for(int idxGroup = 0 ; + idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; + ++idxGroup) + { + int idxSubGroup = 0; + // get the current group of cell + CellContainerClass*const currentCells = + tree->getCellGroup(idxLevel, idxGroup); + + // Skip current group if needed + // Check if the subGroup is the child of the current group + while( + (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex() <= (currentCells->getStartingIndex() << 3)) ) + //&& (!tree->getCellGroup(idxLevel+1,idxSubGroup)->isMine() && !currentCells->isMine())) + { + ++idxSubGroup; + // if we have no more subGroup + if(idxSubGroup == tree->getNbCellGroupAtLevel(idxLevel+1)) + break; + } + // if we have no more subGroup + if(idxSubGroup == tree->getNbCellGroupAtLevel(idxLevel+1)) + break; + // if the current block have a morton index too small + //if(tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex() > currentCells->getEndingIndex() << 3 ){ + // break; + //} + FAssertLF( idxSubGroup != tree->getNbCellGroupAtLevel(idxLevel+1) ); + // Copy at max 8 groups + { + // if(!currentCells->isMine() && tree->getCellGroup(idxLevel+1,idxSubGroup)->isMine() || !tree->getCellGroup(idxLevel+1,idxSubGroup)->isMine() && currentCells->isMine() ) + //std::cout<<currentCells->getIdxGlobal()<<" "<<currentCells->isMine() << " ----|> " << tree->getCellGroup(idxLevel+1,idxSubGroup)->getIdxGlobal() << " "<< tree->getCellGroup(idxLevel+1,idxSubGroup)->isMine() <<std::endl; + + starpu_mpi_insert_task( + MPI_COMM_WORLD, + &m2m_cl, + STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), + STARPU_VALUE, &idxLevel, sizeof(idxLevel), + STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), + #ifdef SCALFMM_STARPU_USE_PRIO + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2M(idxLevel), + #endif + STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly + (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].up, //The remaining, read/write + STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly + STARPU_R, cellHandles[idxLevel+1][idxSubGroup].up, //level d'avant readonly + #ifdef STARPU_USE_TASK_NAME + STARPU_NAME, m2mTaskNames[idxLevel].get(), + #endif + 0 + ); } - } - FLOG( FLog::Controller << "\t\t upwardPass in " << timer.tacAndElapsed() << "s\n" ); - } - ///////////////////////////////////////////////////////////////////////////////////// - /// Transfer Pass - ///////////////////////////////////////////////////////////////////////////////////// - - void transferPass(const int fromLevel, const int toLevel, const bool inner, const bool outer){ - FLOG( FTic timer; ); - FLOG( FTic timerInBlock; FTic timerOutBlock; ); - for(int idxLevel = fromLevel ; idxLevel < toLevel ; ++idxLevel){ - if(inner){ - FLOG( timerInBlock.tic() ); - for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ - starpu_mpi_insert_task(MPI_COMM_WORLD, - &m2l_cl_in, - STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), - STARPU_VALUE, &idxLevel, sizeof(idxLevel), - STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), + while( + // indice de fin du subGroup + tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex() + // le group courant est bien dans notre group + <= (((currentCells->getEndingIndex()-1)<<3)+7) + // on sort pas des subgroup + && (idxSubGroup+1) != + tree->getNbCellGroupAtLevel(idxLevel+1) + && + // Le prochain subgroup a un idxStart est dans ce group + tree->getCellGroup(idxLevel+1,idxSubGroup+1)->getStartingIndex() + <= ((currentCells->getEndingIndex()-1)<<3)+7 ) + { + idxSubGroup += 1; + // if(!currentCells->isMine() && tree->getCellGroup(idxLevel+1,idxSubGroup)->isMine() || !tree->getCellGroup(idxLevel+1,idxSubGroup)->isMine() && currentCells->isMine() ) + // std::cout << currentCells->getIdxGlobal() << " " << currentCells->isMine() << " ----> " << tree->getCellGroup(idxLevel+1,idxSubGroup)->getIdxGlobal() << " "<< tree->getCellGroup(idxLevel+1,idxSubGroup)->isMine() <<std::endl; + + // if(tree->getCellGroup(idxLevel+1,idxSubGroup)->isMine() || currentCells->isMine()){ + starpu_mpi_insert_task( + MPI_COMM_WORLD, + &m2m_cl, + STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), + STARPU_VALUE, &idxLevel, sizeof(idxLevel), + STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), + #ifdef SCALFMM_STARPU_USE_PRIO + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2M(idxLevel), + #endif + STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly + (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].up, //The remaining, read/write + STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly + STARPU_R, cellHandles[idxLevel+1][idxSubGroup].up, //level d'avant readonly + #ifdef STARPU_USE_TASK_NAME + STARPU_NAME, m2mTaskNames[idxLevel].get(), + #endif + 0 + ); + //} + } + } + } + FLOG( FLog::Controller << "\t\t upwardPass in " << timer.tacAndElapsed() << "s\n" ); + } + ///////////////////////////////////////////////////////////////////////////////////// + /// Transfer Pass + ///////////////////////////////////////////////////////////////////////////////////// + + void transferPass(const int fromLevel, const int toLevel, const bool inner, const bool outer){ + FLOG( FTic timer; ); + FLOG( FTic timerInBlock; FTic timerOutBlock; ); + for(int idxLevel = fromLevel ; idxLevel < toLevel ; ++idxLevel){ + if(inner){// compute the interactions inside a group + // we compute the interaction list for each element on the fly + // and we find the Morton indexes that are included in the group + + FLOG( timerInBlock.tic() ); + for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ + starpu_mpi_insert_task(MPI_COMM_WORLD, + &m2l_cl_in, + STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), + STARPU_VALUE, &idxLevel, sizeof(idxLevel), + STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2L(idxLevel), + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2L(idxLevel), #endif - STARPU_R, cellHandles[idxLevel][idxGroup].symb, - STARPU_R, cellHandles[idxLevel][idxGroup].up, - (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].down, + STARPU_R, cellHandles[idxLevel][idxGroup].symb, + STARPU_R, cellHandles[idxLevel][idxGroup].up, + (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].down, #ifdef STARPU_USE_TASK_NAME #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS - STARPU_NAME, m2lTaskNames[idxLevel].get(), + STARPU_NAME, m2lTaskNames[idxLevel].get(), #else - //"M2L-l_nb_i" - STARPU_NAME, taskNames->print("M2L", "%d, %d, %lld, %lld, %lld, %lld, %lld, %d\n", - idxLevel, - tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), - tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), - tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), - starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].down)), + //"M2L-l_nb_i" + STARPU_NAME, taskNames->print("M2L", "%d, %d, %lld, %lld, %lld, %lld, %lld, %d\n", + idxLevel, + tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), + tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), + tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), + starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].down)), #endif #endif - 0); - } - FLOG( timerInBlock.tac() ); - } - if(outer){ - FLOG( timerOutBlock.tic() ); - for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ - for(int idxInteraction = 0; idxInteraction < int(externalInteractionsAllLevel[idxLevel][idxGroup].size()) ; ++idxInteraction){ - const int interactionid = externalInteractionsAllLevel[idxLevel][idxGroup][idxInteraction].otherBlockId; - const std::vector<OutOfBlockInteraction>* outsideInteractions = &externalInteractionsAllLevel[idxLevel][idxGroup][idxInteraction].interactions; -#ifdef SCALFMM_USE_STARPU_EXTRACT - // On the same node -- do as usual - if(starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].symb) == starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].symb)){ -#endif - int mode = 1; - starpu_mpi_insert_task(MPI_COMM_WORLD, - &m2l_cl_inout, - STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), - STARPU_VALUE, &idxLevel, sizeof(idxLevel), - STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions), - STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), - STARPU_VALUE, &mode, sizeof(int), - #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel), - #endif - STARPU_R, cellHandles[idxLevel][idxGroup].symb, - (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].down, - STARPU_R, cellHandles[idxLevel][interactionid].symb, - STARPU_R, cellHandles[idxLevel][interactionid].up, - #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS - STARPU_NAME, m2lOuterTaskNames[idxLevel].get(), - #else - //"M2L_out-l_nb_i_nb_i_s - STARPU_NAME, taskNames->print("M2L_out", "%d, %d, %lld, %d, %lld, %d, %lld, %lld, %lld, %lld, %d\n", - idxLevel, - tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), - tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), - tree->getCellGroup(idxLevel,interactionid)->getNumberOfCellsInBlock(), - tree->getCellGroup(idxLevel,interactionid)->getSizeOfInterval(), - outsideInteractions->size(), - tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), - tree->getCellGroup(idxLevel, interactionid)->getStartingIndex(), - tree->getCellGroup(idxLevel, interactionid)->getEndingIndex(), - starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].down)), - #endif - #endif - 0); - - mode = 2; - starpu_mpi_insert_task(MPI_COMM_WORLD, - &m2l_cl_inout, - STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), - STARPU_VALUE, &idxLevel, sizeof(idxLevel), - STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions), - STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), - STARPU_VALUE, &mode, sizeof(int), - #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel), - #endif - STARPU_R, cellHandles[idxLevel][interactionid].symb, - (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][interactionid].down, - STARPU_R, cellHandles[idxLevel][idxGroup].symb, - STARPU_R, cellHandles[idxLevel][idxGroup].up, - #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS - STARPU_NAME, m2lOuterTaskNames[idxLevel].get(), - #else - //"M2L_out-l_nb_i_nb_i_s" - STARPU_NAME, taskNames->print("M2L_out", "%d, %d, %lld, %d, %lld, %d, %lld, %lld, %lld, %lld, %d\n", - idxLevel, - tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), - tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), - tree->getCellGroup(idxLevel,interactionid)->getNumberOfCellsInBlock(), - tree->getCellGroup(idxLevel,interactionid)->getSizeOfInterval(), - outsideInteractions->size(), - tree->getCellGroup(idxLevel, interactionid)->getStartingIndex(), - tree->getCellGroup(idxLevel, interactionid)->getEndingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), - starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].down)), - #endif - #endif - 0); - + 0); + } + FLOG( timerInBlock.tac() ); + } + if(outer){// compute the interactions between groups + // we need to store the interactions that each group has with the others, + // we call the list of interactions between two groups the interactions table. + // This table tells where the elements are located in the groups, + // it gives their Morton indexes and the relative position of each interaction which + // is a value between 0 and 342 for the M2L ([−3; +3] × D) or 0 and 26 for the P2P ([−1; +1] × D). + FLOG( timerOutBlock.tic() ); + for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ + for(int idxInteraction = 0; idxInteraction < int(externalInteractionsAllLevel[idxLevel][idxGroup].size()) ; ++idxInteraction){ + const int interactionid = externalInteractionsAllLevel[idxLevel][idxGroup][idxInteraction].otherBlockId; + const std::vector<OutOfBlockInteraction>* outsideInteractions = &externalInteractionsAllLevel[idxLevel][idxGroup][idxInteraction].interactions; #ifdef SCALFMM_USE_STARPU_EXTRACT - } - else{ - { - - // Extract data from second group for the first one - // That is copy B to B' - extractedCellBuffer.emplace_back(); - CellExtractedHandles& interactionBuffer = extractedCellBuffer.back(); - interactionBuffer.cellsToExtract = externalInteractionsAllLevelOuterIndexes[idxLevel][idxGroup][idxInteraction]; - interactionBuffer.size = tree->getCellGroup(idxLevel,interactionid)->extractGetSizeSymbUp(interactionBuffer.cellsToExtract); - // I allocate only if I will use it to extract - if(starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].symb) == mpi_rank){ - interactionBuffer.data.reset(new unsigned char[interactionBuffer.size]); - FAssertLF(interactionBuffer.data); - } - else{ - interactionBuffer.data.reset(nullptr); - } - int registeringNode = starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].symb); - int where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1; - starpu_variable_data_register(&interactionBuffer.all, where, - (uintptr_t)interactionBuffer.data.get(), interactionBuffer.size); - starpu_mpi_data_register(interactionBuffer.all, tag++, registeringNode); - - CellExtractedHandles* interactionBufferPtr = &interactionBuffer; - starpu_mpi_insert_task(MPI_COMM_WORLD, - &cell_extract_up, - STARPU_VALUE, &interactionBufferPtr, sizeof(CellExtractedHandles*), - #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel), - #endif - STARPU_R, cellHandles[idxLevel][interactionid].symb, - STARPU_R, cellHandles[idxLevel][interactionid].up, - STARPU_RW, interactionBuffer.all, 0); - - // Move to a new memory block that is on the same node as A - // B' to B''' - duplicatedCellBuffer.emplace_back(); - DuplicatedCellHandle& duplicateB = duplicatedCellBuffer.back(); - duplicateB.sizeSymb = tree->getCellGroup(idxLevel,interactionid)->getBufferSizeInByte(); - duplicateB.sizeOther = tree->getCellGroup(idxLevel,interactionid)->getMultipoleBufferSizeInByte(); - if(starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].symb) == mpi_rank){ - // Reuse block but just to perform the send - duplicateB.dataSymbPtr.reset(new unsigned char[duplicateB.sizeSymb]);// = const_cast<unsigned char*>(tree->getCellGroup(idxLevel,interactionid)->getRawBuffer()); - duplicateB.dataOtherPtr.reset(new unsigned char[duplicateB.sizeOther]);// = reinterpret_cast<unsigned char*>(tree->getCellGroup(idxLevel,interactionid)->getRawMultipoleBuffer()); - } - duplicateB.dataSymb = nullptr; - duplicateB.dataOther = nullptr; - - registeringNode = starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].symb); - where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1; - starpu_variable_data_register(&duplicateB.symb, where, - (uintptr_t)duplicateB.dataSymbPtr.get(), duplicateB.sizeSymb); - starpu_mpi_data_register(duplicateB.symb, tag++, registeringNode); - starpu_variable_data_register(&duplicateB.other, where, - (uintptr_t)duplicateB.dataOtherPtr.get(), duplicateB.sizeOther); - starpu_mpi_data_register(duplicateB.other, tag++, registeringNode); - - const unsigned char* ptr1 = const_cast<unsigned char*>(tree->getCellGroup(idxLevel,interactionid)->getRawBuffer()); - size_t size1 = duplicateB.sizeSymb; - const unsigned char* ptr2 = reinterpret_cast<unsigned char*>(tree->getCellGroup(idxLevel,interactionid)->getRawMultipoleBuffer()); - size_t size2 = duplicateB.sizeOther; - - starpu_mpi_insert_task(MPI_COMM_WORLD, - &cell_insert_up_bis, - STARPU_VALUE, &interactionBufferPtr, sizeof(CellExtractedHandles*), - STARPU_VALUE, &ptr1, sizeof(ptr1), - STARPU_VALUE, &size1, sizeof(size1), - STARPU_VALUE, &ptr2, sizeof(ptr2), - STARPU_VALUE, &size2, sizeof(size2), - #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel), - #endif - STARPU_R, interactionBuffer.all, - STARPU_RW, duplicateB.symb, - STARPU_RW, duplicateB.other, 0); - - - int mode = 1; - starpu_mpi_insert_task(MPI_COMM_WORLD, - &m2l_cl_inout, - STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), - STARPU_VALUE, &idxLevel, sizeof(idxLevel), - STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions), - STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), - STARPU_VALUE, &mode, sizeof(int), - #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel), - #endif - STARPU_R, cellHandles[idxLevel][idxGroup].symb, - (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].down, - STARPU_R, duplicateB.symb, - STARPU_R, duplicateB.other, - #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS - STARPU_NAME, m2lOuterTaskNames[idxLevel].get(), - #else - //"M2L_out-l_nb_i_nb_i_s - STARPU_NAME, taskNames->print("M2L_out", "%d, %d, %lld, %d, %lld, %d, %lld, %lld, %lld, %lld, %d\n", - idxLevel, - tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), - tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), - tree->getCellGroup(idxLevel,interactionid)->getNumberOfCellsInBlock(), - tree->getCellGroup(idxLevel,interactionid)->getSizeOfInterval(), - outsideInteractions->size(), - tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), - tree->getCellGroup(idxLevel, interactionid)->getStartingIndex(), - tree->getCellGroup(idxLevel, interactionid)->getEndingIndex(), - starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].down)), - #endif - #endif - 0); - } - { - // Extract data from second group for the first one - // That is copy A to A' - extractedCellBuffer.emplace_back(); - CellExtractedHandles& interactionBuffer = extractedCellBuffer.back(); - interactionBuffer.cellsToExtract = externalInteractionsAllLevelInnerIndexes[idxLevel][idxGroup][idxInteraction]; - interactionBuffer.size = tree->getCellGroup(idxLevel,idxGroup)->extractGetSizeSymbUp(interactionBuffer.cellsToExtract); - // I allocate only if I will use it to extract - if(starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].symb) == mpi_rank){ - interactionBuffer.data.reset(new unsigned char[interactionBuffer.size]); - } - else{ - interactionBuffer.data.reset(nullptr); - } - int registeringNode = starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].symb); - int where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1; - starpu_variable_data_register(&interactionBuffer.all, where, - (uintptr_t)interactionBuffer.data.get(), interactionBuffer.size); - starpu_mpi_data_register(interactionBuffer.all, tag++, registeringNode); - - CellExtractedHandles* interactionBufferPtr = &interactionBuffer; - starpu_mpi_insert_task(MPI_COMM_WORLD, - &cell_extract_up, - STARPU_VALUE, &interactionBufferPtr, sizeof(CellExtractedHandles*), - #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel), - #endif - STARPU_R, cellHandles[idxLevel][idxGroup].symb, - STARPU_R, cellHandles[idxLevel][idxGroup].up, - STARPU_RW, interactionBuffer.all, 0); - - // Move to a new memory block that is on the same node as A - // B' to B''' - duplicatedCellBuffer.emplace_back(); - DuplicatedCellHandle& duplicateB = duplicatedCellBuffer.back(); - duplicateB.sizeSymb = tree->getCellGroup(idxLevel,idxGroup)->getBufferSizeInByte(); - duplicateB.sizeOther = tree->getCellGroup(idxLevel,idxGroup)->getMultipoleBufferSizeInByte(); - if(starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].symb) == mpi_rank){ - // Reuse block but just to perform the send - duplicateB.dataSymbPtr.reset(new unsigned char[duplicateB.sizeSymb]);//const_cast<unsigned char*>(tree->getCellGroup(idxLevel,idxGroup)->getRawBuffer()); - //memcpy(duplicateB.dataSymbPtr.get(), tree->getCellGroup(idxLevel,idxGroup)->getRawBuffer(), duplicateB.sizeSymb); - duplicateB.dataOtherPtr.reset(new unsigned char[duplicateB.sizeOther]);//reinterpret_cast<unsigned char*>(tree->getCellGroup(idxLevel,idxGroup)->getRawMultipoleBuffer()); - //memcpy(duplicateB.dataOtherPtr.get(), tree->getCellGroup(idxLevel,idxGroup)->getRawMultipoleBuffer(), duplicateB.sizeOther); - } - duplicateB.dataSymb = nullptr; - duplicateB.dataOther = nullptr; - - registeringNode = starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].symb); - where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1; - starpu_variable_data_register(&duplicateB.symb, where, - (uintptr_t)duplicateB.dataSymbPtr.get(), duplicateB.sizeSymb); - starpu_mpi_data_register(duplicateB.symb, tag++, registeringNode); - starpu_variable_data_register(&duplicateB.other, where, - (uintptr_t)duplicateB.dataOtherPtr.get(), duplicateB.sizeOther); - starpu_mpi_data_register(duplicateB.other, tag++, registeringNode); - - const unsigned char* ptr1 = const_cast<unsigned char*>(tree->getCellGroup(idxLevel,idxGroup)->getRawBuffer()); - size_t size1 = duplicateB.sizeSymb; - const unsigned char* ptr2 = reinterpret_cast<unsigned char*>(tree->getCellGroup(idxLevel,idxGroup)->getRawMultipoleBuffer()); - size_t size2 = duplicateB.sizeOther; - starpu_mpi_insert_task(MPI_COMM_WORLD, - &cell_insert_up_bis, - STARPU_VALUE, &interactionBufferPtr, sizeof(CellExtractedHandles*), - STARPU_VALUE, &ptr1, sizeof(ptr1), - STARPU_VALUE, &size1, sizeof(size1), - STARPU_VALUE, &ptr2, sizeof(ptr2), - STARPU_VALUE, &size2, sizeof(size2), - #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel), - #endif - STARPU_R, interactionBuffer.all, - STARPU_RW, duplicateB.symb, - STARPU_RW, duplicateB.other, 0); - - int mode = 2; - starpu_mpi_insert_task(MPI_COMM_WORLD, - &m2l_cl_inout, - STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), - STARPU_VALUE, &idxLevel, sizeof(idxLevel), - STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions), - STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), - STARPU_VALUE, &mode, sizeof(int), - #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel), - #endif - STARPU_R, cellHandles[idxLevel][interactionid].symb, - (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][interactionid].down, - STARPU_R, duplicateB.symb, - STARPU_R, duplicateB.other, - #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS - STARPU_NAME, m2lOuterTaskNames[idxLevel].get(), - #else - //"M2L_out-l_nb_i_nb_i_s" - STARPU_NAME, taskNames->print("M2L_out", "%d, %d, %lld, %d, %lld, %d, %lld, %lld, %lld, %lld, %d\n", - idxLevel, - tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), - tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), - tree->getCellGroup(idxLevel,interactionid)->getNumberOfCellsInBlock(), - tree->getCellGroup(idxLevel,interactionid)->getSizeOfInterval(), - outsideInteractions->size(), - tree->getCellGroup(idxLevel, interactionid)->getStartingIndex(), - tree->getCellGroup(idxLevel, interactionid)->getEndingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), - starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].down)), - #endif - #endif - 0); - } - } + // On the same node -- do as usual + if(starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].symb) == starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].symb)){ #endif - } - } - FLOG( timerOutBlock.tac() ); - } - } - FLOG( FLog::Controller << "\t\t transferPass in " << timer.tacAndElapsed() << "s\n" ); - FLOG( FLog::Controller << "\t\t\t inblock in " << timerInBlock.elapsed() << "s\n" ); - FLOG( FLog::Controller << "\t\t\t outblock in " << timerOutBlock.elapsed() << "s\n" ); - } - - ///////////////////////////////////////////////////////////////////////////////////// - /// Downard Pass - ///////////////////////////////////////////////////////////////////////////////////// - - void downardPass(){ - FLOG( FTic timer; ); - for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel - 1 ; ++idxLevel){ - int idxSubGroup = 0; - - for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ - CellContainerClass*const currentCells = tree->getCellGroup(idxLevel, idxGroup); - - // Skip current group if needed - if( tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (currentCells->getStartingIndex()<<3) ){ - ++idxSubGroup; - FAssertLF( idxSubGroup != tree->getNbCellGroupAtLevel(idxLevel+1) ); - FAssertLF( (tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex()>>3) == currentCells->getStartingIndex() ); - } - // Copy at max 8 groups - { - // put the right codelet - if((noCommuteAtLastLevel && (idxLevel == FAbstractAlgorithm::lowerWorkingLevel - 2)) || noCommuteBetweenLevel){ + int mode = 1; starpu_mpi_insert_task(MPI_COMM_WORLD, - &l2l_cl_nocommute, + &m2l_cl_inout, STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), STARPU_VALUE, &idxLevel, sizeof(idxLevel), + STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions), STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), + STARPU_VALUE, &mode, sizeof(int), #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel), + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel), #endif - STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly - STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write - STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly - STARPU_RW, cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly - #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS - STARPU_NAME, l2lTaskNames[idxLevel].get(), - #else - //"L2L-l_nb_i_nbc_ic_s" - STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", - idxLevel, - tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), - tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), - tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(), - tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(), - FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)- - FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3), - tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), - tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(), - starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)), - #endif - #endif - 0); - } - else{ + STARPU_R, cellHandles[idxLevel][idxGroup].symb, + (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].down, + STARPU_R, cellHandles[idxLevel][interactionid].symb, + STARPU_R, cellHandles[idxLevel][interactionid].up, + #ifdef STARPU_USE_TASK_NAME + #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS + STARPU_NAME, m2lOuterTaskNames[idxLevel].get(), + #else + //"M2L_out-l_nb_i_nb_i_s + STARPU_NAME, taskNames->print("M2L_out", "%d, %d, %lld, %d, %lld, %d, %lld, %lld, %lld, %lld, %d\n", + idxLevel, + tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), + tree->getCellGroup(idxLevel,interactionid)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel,interactionid)->getSizeOfInterval(), + outsideInteractions->size(), + tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), + tree->getCellGroup(idxLevel, interactionid)->getStartingIndex(), + tree->getCellGroup(idxLevel, interactionid)->getEndingIndex(), + starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].down)), + #endif + #endif + 0); + + mode = 2; starpu_mpi_insert_task(MPI_COMM_WORLD, - &l2l_cl, + &m2l_cl_inout, STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), STARPU_VALUE, &idxLevel, sizeof(idxLevel), + STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions), STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), + STARPU_VALUE, &mode, sizeof(int), #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel), + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel), #endif - STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly - STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write - STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly - (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly - #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS - STARPU_NAME, l2lTaskNames[idxLevel].get(), - #else - //"L2L-l_nb_i_nbc_ic_s" - STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", - idxLevel, - tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), - tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), - tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(), - tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(), - FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)- - FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3), - tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), - tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(), - starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)), - #endif - #endif - 0); - } + STARPU_R, cellHandles[idxLevel][interactionid].symb, + (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][interactionid].down, + STARPU_R, cellHandles[idxLevel][idxGroup].symb, + STARPU_R, cellHandles[idxLevel][idxGroup].up, + #ifdef STARPU_USE_TASK_NAME + #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS + STARPU_NAME, m2lOuterTaskNames[idxLevel].get(), + #else + //"M2L_out-l_nb_i_nb_i_s" + STARPU_NAME, taskNames->print("M2L_out", "%d, %d, %lld, %d, %lld, %d, %lld, %lld, %lld, %lld, %d\n", + idxLevel, + tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), + tree->getCellGroup(idxLevel,interactionid)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel,interactionid)->getSizeOfInterval(), + outsideInteractions->size(), + tree->getCellGroup(idxLevel, interactionid)->getStartingIndex(), + tree->getCellGroup(idxLevel, interactionid)->getEndingIndex(), + tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), + starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].down)), + #endif + #endif + 0); - } - while(tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (((currentCells->getEndingIndex()-1)<<3)+7) - && (idxSubGroup+1) != tree->getNbCellGroupAtLevel(idxLevel+1) - && tree->getCellGroup(idxLevel+1, idxSubGroup+1)->getStartingIndex() <= ((currentCells->getEndingIndex()-1)<<3)+7 ){ - idxSubGroup += 1; +#ifdef SCALFMM_USE_STARPU_EXTRACT + } + else{ + { - // put the right codelet - if((noCommuteAtLastLevel && (idxLevel == FAbstractAlgorithm::lowerWorkingLevel - 2)) || noCommuteBetweenLevel){ - starpu_mpi_insert_task(MPI_COMM_WORLD, - &l2l_cl_nocommute, - STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), - STARPU_VALUE, &idxLevel, sizeof(idxLevel), - STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), + // Extract data from second group for the first one + // That is copy B to B' + extractedCellBuffer.emplace_back(); + CellExtractedHandles& interactionBuffer = extractedCellBuffer.back(); + interactionBuffer.cellsToExtract = externalInteractionsAllLevelOuterIndexes[idxLevel][idxGroup][idxInteraction]; + interactionBuffer.size = tree->getCellGroup(idxLevel,interactionid)->extractGetSizeSymbUp(interactionBuffer.cellsToExtract); + // I allocate only if I will use it to extract + if(starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].symb) == mpi_rank){ + interactionBuffer.data.reset(new unsigned char[interactionBuffer.size]); + FAssertLF(interactionBuffer.data); + } + else{ + interactionBuffer.data.reset(nullptr); + } + int registeringNode = starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].symb); + int where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1; + starpu_variable_data_register(&interactionBuffer.all, where, + (uintptr_t)interactionBuffer.data.get(), interactionBuffer.size); + starpu_mpi_data_register(interactionBuffer.all, tag++, registeringNode); + + CellExtractedHandles* interactionBufferPtr = &interactionBuffer; + starpu_mpi_insert_task(MPI_COMM_WORLD, + &cell_extract_up, + STARPU_VALUE, &interactionBufferPtr, sizeof(CellExtractedHandles*), #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel), + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel), #endif - STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly - STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write - STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly - STARPU_RW, cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly - #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS - STARPU_NAME, l2lTaskNames[idxLevel].get(), - #else - //"L2L-l_nb_i_nbc_ic_s" - STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", - idxLevel, - tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), - tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), - tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(), - tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(), - FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)- - FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3), - tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), - tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(), - starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)), - #endif - #endif - 0); - } - else{ - starpu_mpi_insert_task(MPI_COMM_WORLD, - &l2l_cl, - STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), - STARPU_VALUE, &idxLevel, sizeof(idxLevel), - STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), + STARPU_R, cellHandles[idxLevel][interactionid].symb, + STARPU_R, cellHandles[idxLevel][interactionid].up, + STARPU_RW, interactionBuffer.all, 0); + + // Move to a new memory block that is on the same node as A + // B' to B''' + duplicatedCellBuffer.emplace_back(); + DuplicatedCellHandle& duplicateB = duplicatedCellBuffer.back(); + duplicateB.sizeSymb = tree->getCellGroup(idxLevel,interactionid)->getBufferSizeInByte(); + duplicateB.sizeOther = tree->getCellGroup(idxLevel,interactionid)->getMultipoleBufferSizeInByte(); + if(starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].symb) == mpi_rank){ + // Reuse block but just to perform the send + duplicateB.dataSymbPtr.reset(new unsigned char[duplicateB.sizeSymb]);// = const_cast<unsigned char*>(tree->getCellGroup(idxLevel,interactionid)->getRawBuffer()); + duplicateB.dataOtherPtr.reset(new unsigned char[duplicateB.sizeOther]);// = reinterpret_cast<unsigned char*>(tree->getCellGroup(idxLevel,interactionid)->getRawMultipoleBuffer()); + } + duplicateB.dataSymb = nullptr; + duplicateB.dataOther = nullptr; + + registeringNode = starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].symb); + where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1; + starpu_variable_data_register(&duplicateB.symb, where, + (uintptr_t)duplicateB.dataSymbPtr.get(), duplicateB.sizeSymb); + starpu_mpi_data_register(duplicateB.symb, tag++, registeringNode); + starpu_variable_data_register(&duplicateB.other, where, + (uintptr_t)duplicateB.dataOtherPtr.get(), duplicateB.sizeOther); + starpu_mpi_data_register(duplicateB.other, tag++, registeringNode); + + const unsigned char* ptr1 = const_cast<unsigned char*>(tree->getCellGroup(idxLevel,interactionid)->getRawBuffer()); + size_t size1 = duplicateB.sizeSymb; + const unsigned char* ptr2 = reinterpret_cast<unsigned char*>(tree->getCellGroup(idxLevel,interactionid)->getRawMultipoleBuffer()); + size_t size2 = duplicateB.sizeOther; + + starpu_mpi_insert_task(MPI_COMM_WORLD, + &cell_insert_up_bis, + STARPU_VALUE, &interactionBufferPtr, sizeof(CellExtractedHandles*), + STARPU_VALUE, &ptr1, sizeof(ptr1), + STARPU_VALUE, &size1, sizeof(size1), + STARPU_VALUE, &ptr2, sizeof(ptr2), + STARPU_VALUE, &size2, sizeof(size2), #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel), + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel), #endif - STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly - STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write - STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly - (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly - #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS - STARPU_NAME, l2lTaskNames[idxLevel].get(), - #else - //"L2L-l_nb_i_nbc_ic_s" - STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", - idxLevel, - tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), - tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), - tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(), - tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(), - FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)- - FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3), - tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), - tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(), - starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)), - #endif - #endif - 0); - } + STARPU_R, interactionBuffer.all, + STARPU_RW, duplicateB.symb, + STARPU_RW, duplicateB.other, 0); + + + int mode = 1; + starpu_mpi_insert_task(MPI_COMM_WORLD, + &m2l_cl_inout, + STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), + STARPU_VALUE, &idxLevel, sizeof(idxLevel), + STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions), + STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), + STARPU_VALUE, &mode, sizeof(int), + #ifdef SCALFMM_STARPU_USE_PRIO + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel), + #endif + STARPU_R, cellHandles[idxLevel][idxGroup].symb, + (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].down, + STARPU_R, duplicateB.symb, + STARPU_R, duplicateB.other, + #ifdef STARPU_USE_TASK_NAME + #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS + STARPU_NAME, m2lOuterTaskNames[idxLevel].get(), + #else + //"M2L_out-l_nb_i_nb_i_s + STARPU_NAME, taskNames->print("M2L_out", "%d, %d, %lld, %d, %lld, %d, %lld, %lld, %lld, %lld, %d\n", + idxLevel, + tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), + tree->getCellGroup(idxLevel,interactionid)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel,interactionid)->getSizeOfInterval(), + outsideInteractions->size(), + tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), + tree->getCellGroup(idxLevel, interactionid)->getStartingIndex(), + tree->getCellGroup(idxLevel, interactionid)->getEndingIndex(), + starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].down)), + #endif + #endif + 0); + } + { + // Extract data from second group for the first one + // That is copy A to A' + extractedCellBuffer.emplace_back(); + CellExtractedHandles& interactionBuffer = extractedCellBuffer.back(); + interactionBuffer.cellsToExtract = externalInteractionsAllLevelInnerIndexes[idxLevel][idxGroup][idxInteraction]; + interactionBuffer.size = tree->getCellGroup(idxLevel,idxGroup)->extractGetSizeSymbUp(interactionBuffer.cellsToExtract); + // I allocate only if I will use it to extract + if(starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].symb) == mpi_rank){ + interactionBuffer.data.reset(new unsigned char[interactionBuffer.size]); + } + else{ + interactionBuffer.data.reset(nullptr); + } + int registeringNode = starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].symb); + int where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1; + starpu_variable_data_register(&interactionBuffer.all, where, + (uintptr_t)interactionBuffer.data.get(), interactionBuffer.size); + starpu_mpi_data_register(interactionBuffer.all, tag++, registeringNode); + + CellExtractedHandles* interactionBufferPtr = &interactionBuffer; + starpu_mpi_insert_task(MPI_COMM_WORLD, + &cell_extract_up, + STARPU_VALUE, &interactionBufferPtr, sizeof(CellExtractedHandles*), + #ifdef SCALFMM_STARPU_USE_PRIO + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel), + #endif + STARPU_R, cellHandles[idxLevel][idxGroup].symb, + STARPU_R, cellHandles[idxLevel][idxGroup].up, + STARPU_RW, interactionBuffer.all, 0); + + // Move to a new memory block that is on the same node as A + // B' to B''' + duplicatedCellBuffer.emplace_back(); + DuplicatedCellHandle& duplicateB = duplicatedCellBuffer.back(); + duplicateB.sizeSymb = tree->getCellGroup(idxLevel,idxGroup)->getBufferSizeInByte(); + duplicateB.sizeOther = tree->getCellGroup(idxLevel,idxGroup)->getMultipoleBufferSizeInByte(); + if(starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].symb) == mpi_rank){ + // Reuse block but just to perform the send + duplicateB.dataSymbPtr.reset(new unsigned char[duplicateB.sizeSymb]);//const_cast<unsigned char*>(tree->getCellGroup(idxLevel,idxGroup)->getRawBuffer()); + //memcpy(duplicateB.dataSymbPtr.get(), tree->getCellGroup(idxLevel,idxGroup)->getRawBuffer(), duplicateB.sizeSymb); + duplicateB.dataOtherPtr.reset(new unsigned char[duplicateB.sizeOther]);//reinterpret_cast<unsigned char*>(tree->getCellGroup(idxLevel,idxGroup)->getRawMultipoleBuffer()); + //memcpy(duplicateB.dataOtherPtr.get(), tree->getCellGroup(idxLevel,idxGroup)->getRawMultipoleBuffer(), duplicateB.sizeOther); + } + duplicateB.dataSymb = nullptr; + duplicateB.dataOther = nullptr; + + registeringNode = starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].symb); + where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1; + starpu_variable_data_register(&duplicateB.symb, where, + (uintptr_t)duplicateB.dataSymbPtr.get(), duplicateB.sizeSymb); + starpu_mpi_data_register(duplicateB.symb, tag++, registeringNode); + starpu_variable_data_register(&duplicateB.other, where, + (uintptr_t)duplicateB.dataOtherPtr.get(), duplicateB.sizeOther); + starpu_mpi_data_register(duplicateB.other, tag++, registeringNode); + + const unsigned char* ptr1 = const_cast<unsigned char*>(tree->getCellGroup(idxLevel,idxGroup)->getRawBuffer()); + size_t size1 = duplicateB.sizeSymb; + const unsigned char* ptr2 = reinterpret_cast<unsigned char*>(tree->getCellGroup(idxLevel,idxGroup)->getRawMultipoleBuffer()); + size_t size2 = duplicateB.sizeOther; + starpu_mpi_insert_task(MPI_COMM_WORLD, + &cell_insert_up_bis, + STARPU_VALUE, &interactionBufferPtr, sizeof(CellExtractedHandles*), + STARPU_VALUE, &ptr1, sizeof(ptr1), + STARPU_VALUE, &size1, sizeof(size1), + STARPU_VALUE, &ptr2, sizeof(ptr2), + STARPU_VALUE, &size2, sizeof(size2), + #ifdef SCALFMM_STARPU_USE_PRIO + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel), + #endif + STARPU_R, interactionBuffer.all, + STARPU_RW, duplicateB.symb, + STARPU_RW, duplicateB.other, 0); + + int mode = 2; + starpu_mpi_insert_task(MPI_COMM_WORLD, + &m2l_cl_inout, + STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), + STARPU_VALUE, &idxLevel, sizeof(idxLevel), + STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions), + STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), + STARPU_VALUE, &mode, sizeof(int), + #ifdef SCALFMM_STARPU_USE_PRIO + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosM2LExtern(idxLevel), + #endif + STARPU_R, cellHandles[idxLevel][interactionid].symb, + (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][interactionid].down, + STARPU_R, duplicateB.symb, + STARPU_R, duplicateB.other, + #ifdef STARPU_USE_TASK_NAME + #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS + STARPU_NAME, m2lOuterTaskNames[idxLevel].get(), + #else + //"M2L_out-l_nb_i_nb_i_s" + STARPU_NAME, taskNames->print("M2L_out", "%d, %d, %lld, %d, %lld, %d, %lld, %lld, %lld, %lld, %d\n", + idxLevel, + tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), + tree->getCellGroup(idxLevel,interactionid)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel,interactionid)->getSizeOfInterval(), + outsideInteractions->size(), + tree->getCellGroup(idxLevel, interactionid)->getStartingIndex(), + tree->getCellGroup(idxLevel, interactionid)->getEndingIndex(), + tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), + starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].down)), + #endif + #endif + 0); + } + } +#endif + } + } + FLOG( timerOutBlock.tac() ); + } + } + FLOG( FLog::Controller << "\t\t transferPass in " << timer.tacAndElapsed() << "s\n" ); + FLOG( FLog::Controller << "\t\t\t inblock in " << timerInBlock.elapsed() << "s\n" ); + FLOG( FLog::Controller << "\t\t\t outblock in " << timerOutBlock.elapsed() << "s\n" ); + } + + ///////////////////////////////////////////////////////////////////////////////////// + /// Downard Pass + ///////////////////////////////////////////////////////////////////////////////////// + + void downardPassDuplicate(){ + FLOG( FTic timer; ); + for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel - 1 ; ++idxLevel){ + int idxSubGroup = 0; + std::cout << " Level "<< idxLevel << " -> " << idxLevel+1<<" nbGroupCell " << tree->getNbCellGroupAtLevel(idxLevel) << std::endl; + + for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ + CellContainerClass*const currentCells = tree->getCellGroup(idxLevel, idxGroup); + + // Skip current group if needed + std::cout <<" IF " << tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() << " <= " <<(currentCells->getStartingIndex()<<3)<< std::endl; + if( tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (currentCells->getStartingIndex()<<3) ){ + ++idxSubGroup; + FAssertLF( idxSubGroup != tree->getNbCellGroupAtLevel(idxLevel+1) ); + FAssertLF( (tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex()>>3) == currentCells->getStartingIndex() ); + } + std::cout << " idxSubGroup " << idxSubGroup << std::endl; + // Copy at max 8 groups + { + // put the right codelet + if((noCommuteAtLastLevel && (idxLevel == FAbstractAlgorithm::lowerWorkingLevel - 2)) || noCommuteBetweenLevel){ + std::cout << " (noCommuteAtLastLevel) ID "<<cellHandles[idxLevel][idxGroup].groupID << " intervalSize " <<cellHandles[idxLevel][idxGroup].intervalSize <<std::endl; + starpu_mpi_insert_task(MPI_COMM_WORLD, + &l2l_cl_nocommute, + STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), + STARPU_VALUE, &idxLevel, sizeof(idxLevel), + STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), + #ifdef SCALFMM_STARPU_USE_PRIO + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel), + #endif + STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly + STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write + STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly + STARPU_RW, cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly + #ifdef STARPU_USE_TASK_NAME + #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS + STARPU_NAME, l2lTaskNames[idxLevel].get(), + #else + //"L2L-l_nb_i_nbc_ic_s" + STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", + idxLevel, + tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), + tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(), + FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)- + FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3), + tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), + tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(), + starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)), + #endif + #endif + 0); + } + else{ + std::cout << " (CommuteAtLastLevel) ID "<<cellHandles[idxLevel][idxGroup].groupID << " intervalSize " <<cellHandles[idxLevel][idxGroup].intervalSize <<std::endl; + starpu_mpi_insert_task(MPI_COMM_WORLD, + &l2l_cl, + STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), + STARPU_VALUE, &idxLevel, sizeof(idxLevel), + STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), + #ifdef SCALFMM_STARPU_USE_PRIO + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel), + #endif + STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly + STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write + STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly + (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly + #ifdef STARPU_USE_TASK_NAME + #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS + STARPU_NAME, l2lTaskNames[idxLevel].get(), + #else + //"L2L-l_nb_i_nbc_ic_s" + STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", + idxLevel, + tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), + tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(), + FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)- + FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3), + tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), + tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(), + starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)), + #endif + #endif + 0); } - } - } - FLOG( FLog::Controller << "\t\t downardPass in " << timer.tacAndElapsed() << "s\n" ); - } - ///////////////////////////////////////////////////////////////////////////////////// - /// Direct Pass - ///////////////////////////////////////////////////////////////////////////////////// + } + std::cout << " while loop " << std::endl; + while(tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (((currentCells->getEndingIndex()-1)<<3)+7) + && (idxSubGroup+1) != tree->getNbCellGroupAtLevel(idxLevel+1) + && tree->getCellGroup(idxLevel+1, idxSubGroup+1)->getStartingIndex() <= ((currentCells->getEndingIndex()-1)<<3)+7 ){ + idxSubGroup += 1; + + // put the right codelet + if((noCommuteAtLastLevel && (idxLevel == FAbstractAlgorithm::lowerWorkingLevel - 2)) || noCommuteBetweenLevel){ + std::cout << " (noCommuteAtLastLevel) ID "<<cellHandles[idxLevel][idxGroup].groupID << " intervalSize " <<cellHandles[idxLevel][idxGroup].intervalSize <<std::endl; + starpu_mpi_insert_task(MPI_COMM_WORLD, + &l2l_cl_nocommute, + STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), + STARPU_VALUE, &idxLevel, sizeof(idxLevel), + STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), + #ifdef SCALFMM_STARPU_USE_PRIO + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel), + #endif + STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly + STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write + STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly + STARPU_RW, cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly + #ifdef STARPU_USE_TASK_NAME + #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS + STARPU_NAME, l2lTaskNames[idxLevel].get(), + #else + //"L2L-l_nb_i_nbc_ic_s" + STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", + idxLevel, + tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), + tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(), + FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)- + FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3), + tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), + tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(), + starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)), + #endif + #endif + 0); + } + else{ + std::cout << " (CommuteAtLastLevel) ID "<<cellHandles[idxLevel][idxGroup].groupID << " intervalSize " <<cellHandles[idxLevel][idxGroup].intervalSize <<std::endl; + starpu_mpi_insert_task(MPI_COMM_WORLD, + &l2l_cl, + STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), + STARPU_VALUE, &idxLevel, sizeof(idxLevel), + STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), + #ifdef SCALFMM_STARPU_USE_PRIO + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel), + #endif + STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly + STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write + STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly + (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly + #ifdef STARPU_USE_TASK_NAME + #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS + STARPU_NAME, l2lTaskNames[idxLevel].get(), + #else + //"L2L-l_nb_i_nbc_ic_s" + STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", + idxLevel, + tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), + tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(), + FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)- + FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3), + tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), + tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(), + starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)), + #endif + #endif + 0); + } + } + } + } + FLOG( FLog::Controller << "\t\t downardPass in " << timer.tacAndElapsed() << "s\n" ); + } + void downardPassNoDuplicate(){ + FLOG( FTic timer; ); + for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel - 1 ; ++idxLevel){ + + for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ + int idxSubGroup = 0; + //if(idxSubGroup == tree->getNbCellGroupAtLevel(idxLevel+1)) + // break; + CellContainerClass*const currentCells = tree->getCellGroup(idxLevel, idxGroup); + + // Skip current group if needed + while(tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (currentCells->getStartingIndex()<<3) ){ + ++idxSubGroup; + if(idxSubGroup == tree->getNbCellGroupAtLevel(idxLevel+1)) + break; + + } + if(idxSubGroup == tree->getNbCellGroupAtLevel(idxLevel+1)) + break; + // Copy at max 8 groups + { + // put the right codelet + if((noCommuteAtLastLevel && (idxLevel == FAbstractAlgorithm::lowerWorkingLevel - 2)) || noCommuteBetweenLevel){ + starpu_mpi_insert_task(MPI_COMM_WORLD, + &l2l_cl_nocommute, + STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), + STARPU_VALUE, &idxLevel, sizeof(idxLevel), + STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), + #ifdef SCALFMM_STARPU_USE_PRIO + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel), + #endif + STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly + STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write + STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly + STARPU_RW, cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly + #ifdef STARPU_USE_TASK_NAME + #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS + STARPU_NAME, l2lTaskNames[idxLevel].get(), + #else + //"L2L-l_nb_i_nbc_ic_s" + STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", + idxLevel, + tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), + tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(), + FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)- + FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3), + tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), + tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(), + starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)), + #endif + #endif + 0); + } + else{ + starpu_mpi_insert_task(MPI_COMM_WORLD, + &l2l_cl, + STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), + STARPU_VALUE, &idxLevel, sizeof(idxLevel), + STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), + #ifdef SCALFMM_STARPU_USE_PRIO + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel), + #endif + STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly + STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write + STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly + (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly + #ifdef STARPU_USE_TASK_NAME + #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS + STARPU_NAME, l2lTaskNames[idxLevel].get(), + #else + //"L2L-l_nb_i_nbc_ic_s" + STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", + idxLevel, + tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), + tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(), + FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)- + FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3), + tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), + tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(), + starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)), + #endif + #endif + 0); + } - void directPass(){ - FLOG( FTic timer; ); - FLOG( FTic timerInBlock; FTic timerOutBlock; ); + } + while(tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex() <= (((currentCells->getEndingIndex()-1)<<3)+7) + && (idxSubGroup+1) != tree->getNbCellGroupAtLevel(idxLevel+1) + && tree->getCellGroup(idxLevel+1, idxSubGroup+1)->getStartingIndex() <= ((currentCells->getEndingIndex()-1)<<3)+7 ){ + idxSubGroup += 1; - FLOG( timerOutBlock.tic() ); - for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){ - for(int idxInteraction = 0; idxInteraction < int(externalInteractionsLeafLevel[idxGroup].size()) ; ++idxInteraction){ - const int interactionid = externalInteractionsLeafLevel[idxGroup][idxInteraction].otherBlockId; - const std::vector<OutOfBlockInteraction>* outsideInteractions = &externalInteractionsLeafLevel[idxGroup][idxInteraction].interactions; - if(starpu_mpi_data_get_rank(particleHandles[idxGroup].down) == starpu_mpi_data_get_rank(particleHandles[interactionid].down)) - { + // put the right codelet + if((noCommuteAtLastLevel && (idxLevel == FAbstractAlgorithm::lowerWorkingLevel - 2)) || noCommuteBetweenLevel){ starpu_mpi_insert_task(MPI_COMM_WORLD, - &p2p_cl_inout, + &l2l_cl_nocommute, STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), - STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions), - STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int), + STARPU_VALUE, &idxLevel, sizeof(idxLevel), + STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(), + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel), #endif - STARPU_R, particleHandles[idxGroup].symb, + STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly + STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write + STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly + STARPU_RW, cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly + #ifdef STARPU_USE_TASK_NAME + #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS + STARPU_NAME, l2lTaskNames[idxLevel].get(), + #else + //"L2L-l_nb_i_nbc_ic_s" + STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", + idxLevel, + tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), + tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(), + FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)- + FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3), + tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), + tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(), + starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)), + #endif + #endif + 0); + } + else{ + starpu_mpi_insert_task(MPI_COMM_WORLD, + &l2l_cl, + STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), + STARPU_VALUE, &idxLevel, sizeof(idxLevel), + STARPU_VALUE, &cellHandles[idxLevel][idxGroup].intervalSize, sizeof(int), + #ifdef SCALFMM_STARPU_USE_PRIO + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2L(idxLevel), + #endif + STARPU_R, cellHandles[idxLevel][idxGroup].symb, //symbolique, readonly + STARPU_R, cellHandles[idxLevel][idxGroup].down, //The remaining, read/write + STARPU_R, cellHandles[idxLevel+1][idxSubGroup].symb, //symbolique, readonly + (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel+1][idxSubGroup].down, //level d'avant readonly + #ifdef STARPU_USE_TASK_NAME + #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS + STARPU_NAME, l2lTaskNames[idxLevel].get(), + #else + //"L2L-l_nb_i_nbc_ic_s" + STARPU_NAME, taskNames->print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", + idxLevel, + tree->getCellGroup(idxLevel,idxGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel,idxGroup)->getSizeOfInterval(), + tree->getCellGroup(idxLevel+1,idxSubGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(idxLevel+1,idxSubGroup)->getSizeOfInterval(), + FMath::Min(tree->getCellGroup(idxLevel,idxGroup)->getEndingIndex()-1, (tree->getCellGroup(idxLevel+1,idxSubGroup)->getEndingIndex()-1)>>3)- + FMath::Max(tree->getCellGroup(idxLevel,idxGroup)->getStartingIndex(), tree->getCellGroup(idxLevel+1,idxSubGroup)->getStartingIndex()>>3), + tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), + tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(), + tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(), + starpu_mpi_data_get_rank(cellHandles[idxLevel+1][idxSubGroup].down)), + #endif + #endif + 0); + } + } + } + } + FLOG( FLog::Controller << "\t\t downardPass in " << timer.tacAndElapsed() << "s\n" ); + } + + + ///////////////////////////////////////////////////////////////////////////////////// + /// Direct Pass + ///////////////////////////////////////////////////////////////////////////////////// + + void directPass(){ + FLOG( FTic timer; ); + FLOG( FTic timerInBlock; FTic timerOutBlock; ); + + FLOG( timerOutBlock.tic() ); + for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){ + for(int idxInteraction = 0; idxInteraction < int(externalInteractionsLeafLevel[idxGroup].size()) ; ++idxInteraction){ + const int interactionid = externalInteractionsLeafLevel[idxGroup][idxInteraction].otherBlockId; + const std::vector<OutOfBlockInteraction>* outsideInteractions = &externalInteractionsLeafLevel[idxGroup][idxInteraction].interactions; + if(starpu_mpi_data_get_rank(particleHandles[idxGroup].down) == starpu_mpi_data_get_rank(particleHandles[interactionid].down)) + { + starpu_mpi_insert_task(MPI_COMM_WORLD, + &p2p_cl_inout, + STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), + STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions), + STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int), + #ifdef SCALFMM_STARPU_USE_PRIO + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(), + #endif + STARPU_R, particleHandles[idxGroup].symb, #ifdef STARPU_USE_REDUX - STARPU_REDUX, particleHandles[idxGroup].down, + STARPU_REDUX, particleHandles[idxGroup].down, #else - (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down, + (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down, #endif - STARPU_R, particleHandles[interactionid].symb, + STARPU_R, particleHandles[interactionid].symb, #ifdef STARPU_USE_REDUX - STARPU_REDUX, particleHandles[interactionid].down, + STARPU_REDUX, particleHandles[interactionid].down, #else - (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), particleHandles[interactionid].down, - STARPU_EXECUTE_ON_DATA, particleHandles[interactionid].down, + (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), particleHandles[interactionid].down, + STARPU_EXECUTE_ON_DATA, particleHandles[interactionid].down, #endif #ifdef STARPU_USE_TASK_NAME #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS - STARPU_NAME, p2pOuterTaskNames.get(), + STARPU_NAME, p2pOuterTaskNames.get(), #else - //"P2P_out-nb_i_p_nb_i_p_s" - STARPU_NAME, taskNames->print("P2P_out", "%d, %lld, %lld, %d, %lld, %lld, %d, %lld, %lld, %lld, %lld, %d\n", - tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(), - tree->getParticleGroup(idxGroup)->getSizeOfInterval(), - tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(), - tree->getParticleGroup(interactionid)->getNumberOfLeavesInBlock(), - tree->getParticleGroup(interactionid)->getSizeOfInterval(), - tree->getParticleGroup(interactionid)->getNbParticlesInGroup(), - outsideInteractions->size(), - tree->getParticleGroup(idxGroup)->getStartingIndex(), - tree->getParticleGroup(idxGroup)->getEndingIndex(), - tree->getParticleGroup(interactionid)->getStartingIndex(), - tree->getParticleGroup(interactionid)->getEndingIndex(), - starpu_mpi_data_get_rank(particleHandles[interactionid].down)), + //"P2P_out-nb_i_p_nb_i_p_s" + STARPU_NAME, taskNames->print("P2P_out", "%d, %lld, %lld, %d, %lld, %lld, %d, %lld, %lld, %lld, %lld, %d\n", + tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(), + tree->getParticleGroup(idxGroup)->getSizeOfInterval(), + tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(), + tree->getParticleGroup(interactionid)->getNumberOfLeavesInBlock(), + tree->getParticleGroup(interactionid)->getSizeOfInterval(), + tree->getParticleGroup(interactionid)->getNbParticlesInGroup(), + outsideInteractions->size(), + tree->getParticleGroup(idxGroup)->getStartingIndex(), + tree->getParticleGroup(idxGroup)->getEndingIndex(), + tree->getParticleGroup(interactionid)->getStartingIndex(), + tree->getParticleGroup(interactionid)->getEndingIndex(), + starpu_mpi_data_get_rank(particleHandles[interactionid].down)), #endif #endif - 0); - } - else - { + 0); + } + else + { #ifdef SCALFMM_USE_STARPU_EXTRACT - { - // Extract data from second group for the first one - // That is copy B to B' - extractedParticlesBuffer.emplace_back(); - ParticleExtractedHandles& interactionBuffer = extractedParticlesBuffer.back(); - interactionBuffer.leavesToExtract = externalInteractionsLeafLevelOuter[idxGroup][idxInteraction]; - - interactionBuffer.size = tree->getParticleGroup(interactionid)->getExtractBufferSize(interactionBuffer.leavesToExtract); - // I allocate only if I will use it to extract - if(starpu_mpi_data_get_rank(particleHandles[interactionid].symb) == mpi_rank){ - interactionBuffer.data.reset(new unsigned char[interactionBuffer.size]); - } - else{ - interactionBuffer.data.reset(nullptr); - } + { + // Extract data from second group for the first one + // That is copy B to B' + extractedParticlesBuffer.emplace_back(); + ParticleExtractedHandles& interactionBuffer = extractedParticlesBuffer.back(); + interactionBuffer.leavesToExtract = externalInteractionsLeafLevelOuter[idxGroup][idxInteraction]; + + interactionBuffer.size = tree->getParticleGroup(interactionid)->getExtractBufferSize(interactionBuffer.leavesToExtract); + // I allocate only if I will use it to extract + if(starpu_mpi_data_get_rank(particleHandles[interactionid].symb) == mpi_rank){ + interactionBuffer.data.reset(new unsigned char[interactionBuffer.size]); + } + else{ + interactionBuffer.data.reset(nullptr); + } - int registeringNode = starpu_mpi_data_get_rank(particleHandles[interactionid].symb); - int where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1; - starpu_variable_data_register(&interactionBuffer.symb, where, - (uintptr_t)interactionBuffer.data.get(), interactionBuffer.size); - starpu_mpi_data_register(interactionBuffer.symb, tag++, registeringNode); + int registeringNode = starpu_mpi_data_get_rank(particleHandles[interactionid].symb); + int where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1; + starpu_variable_data_register(&interactionBuffer.symb, where, + (uintptr_t)interactionBuffer.data.get(), interactionBuffer.size); + starpu_mpi_data_register(interactionBuffer.symb, tag++, registeringNode); - ParticleExtractedHandles* interactionBufferPtr = &interactionBuffer; - starpu_mpi_insert_task(MPI_COMM_WORLD, - &p2p_extract, - STARPU_VALUE, &interactionBufferPtr, sizeof(ParticleExtractedHandles*), + ParticleExtractedHandles* interactionBufferPtr = &interactionBuffer; + starpu_mpi_insert_task(MPI_COMM_WORLD, + &p2p_extract, + STARPU_VALUE, &interactionBufferPtr, sizeof(ParticleExtractedHandles*), #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(), + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(), #endif - STARPU_R, particleHandles[interactionid].symb, - STARPU_RW, interactionBuffer.symb, 0); - - // Move to a new memory block that is on the same node as A - // B' to B''' - duplicatedParticlesBuffer.emplace_back(); - DuplicatedParticlesHandle& duplicateB = duplicatedParticlesBuffer.back(); - duplicateB.size = tree->getParticleGroup(interactionid)->getBufferSizeInByte(); - if(starpu_mpi_data_get_rank(particleHandles[idxGroup].symb) == mpi_rank){ - // Reuse block but just to perform the send - duplicateB.data = (unsigned char*) FAlignedMemory::AllocateBytes<64>(duplicateB.size);// = const_cast<unsigned char*>(tree->getParticleGroup(interactionid)->getRawBuffer()); - } - else{ - duplicateB.data = nullptr; - } + STARPU_R, particleHandles[interactionid].symb, + STARPU_RW, interactionBuffer.symb, 0); + + // Move to a new memory block that is on the same node as A + // B' to B''' + duplicatedParticlesBuffer.emplace_back(); + DuplicatedParticlesHandle& duplicateB = duplicatedParticlesBuffer.back(); + duplicateB.size = tree->getParticleGroup(interactionid)->getBufferSizeInByte(); + if(starpu_mpi_data_get_rank(particleHandles[idxGroup].symb) == mpi_rank){ + // Reuse block but just to perform the send + duplicateB.data = (unsigned char*) FAlignedMemory::AllocateBytes<64>(duplicateB.size);// = const_cast<unsigned char*>(tree->getParticleGroup(interactionid)->getRawBuffer()); + } + else{ + duplicateB.data = nullptr; + } - registeringNode = starpu_mpi_data_get_rank(particleHandles[idxGroup].symb); - where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1; - starpu_variable_data_register(&duplicateB.symb, where, - (uintptr_t)duplicateB.data, duplicateB.size); - starpu_mpi_data_register(duplicateB.symb, tag++, registeringNode); + registeringNode = starpu_mpi_data_get_rank(particleHandles[idxGroup].symb); + where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1; + starpu_variable_data_register(&duplicateB.symb, where, + (uintptr_t)duplicateB.data, duplicateB.size); + starpu_mpi_data_register(duplicateB.symb, tag++, registeringNode); - const unsigned char* dataPtr = const_cast<unsigned char*>(tree->getParticleGroup(interactionid)->getRawBuffer()); - size_t sizeData = duplicateB.size; + const unsigned char* dataPtr = const_cast<unsigned char*>(tree->getParticleGroup(interactionid)->getRawBuffer()); + size_t sizeData = duplicateB.size; - starpu_mpi_insert_task(MPI_COMM_WORLD, - &p2p_insert_bis, - STARPU_VALUE, &interactionBufferPtr, sizeof(ParticleExtractedHandles*), - STARPU_VALUE, &dataPtr, sizeof(dataPtr), - STARPU_VALUE, &sizeData, sizeof(sizeData), + starpu_mpi_insert_task(MPI_COMM_WORLD, + &p2p_insert_bis, + STARPU_VALUE, &interactionBufferPtr, sizeof(ParticleExtractedHandles*), + STARPU_VALUE, &dataPtr, sizeof(dataPtr), + STARPU_VALUE, &sizeData, sizeof(sizeData), #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(), + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(), #endif - STARPU_R, interactionBuffer.symb, - STARPU_RW, duplicateB.symb, - 0); - - starpu_mpi_insert_task(MPI_COMM_WORLD, - &p2p_cl_inout_mpi, - STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), - STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions), - STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int), + STARPU_R, interactionBuffer.symb, + STARPU_RW, duplicateB.symb, + 0); + + starpu_mpi_insert_task(MPI_COMM_WORLD, + &p2p_cl_inout_mpi, + STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), + STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions), + STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int), #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(), + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(), #endif - STARPU_R, particleHandles[idxGroup].symb, - (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down, - STARPU_R, duplicateB.symb, + STARPU_R, particleHandles[idxGroup].symb, + (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down, + STARPU_R, duplicateB.symb, #ifdef STARPU_USE_TASK_NAME #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS - STARPU_NAME, p2pOuterTaskNames.get(), + STARPU_NAME, p2pOuterTaskNames.get(), #else - //"P2P_out-nb_i_p_nb_i_p_s" - STARPU_NAME, taskNames->print("P2P_out", "%d, %lld, %lld, %d, %lld, %lld, %d, %lld, %lld, %lld, %lld, %d\n", - tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(), - tree->getParticleGroup(idxGroup)->getSizeOfInterval(), - tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(), - tree->getParticleGroup(interactionid)->getNumberOfLeavesInBlock(), - tree->getParticleGroup(interactionid)->getSizeOfInterval(), - tree->getParticleGroup(interactionid)->getNbParticlesInGroup(), - outsideInteractions->size(), - tree->getParticleGroup(idxGroup)->getStartingIndex(), - tree->getParticleGroup(idxGroup)->getEndingIndex(), - tree->getParticleGroup(interactionid)->getStartingIndex(), - tree->getParticleGroup(interactionid)->getEndingIndex(), - starpu_mpi_data_get_rank(particleHandles[idxGroup].down)), + //"P2P_out-nb_i_p_nb_i_p_s" + STARPU_NAME, taskNames->print("P2P_out", "%d, %lld, %lld, %d, %lld, %lld, %d, %lld, %lld, %lld, %lld, %d\n", + tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(), + tree->getParticleGroup(idxGroup)->getSizeOfInterval(), + tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(), + tree->getParticleGroup(interactionid)->getNumberOfLeavesInBlock(), + tree->getParticleGroup(interactionid)->getSizeOfInterval(), + tree->getParticleGroup(interactionid)->getNbParticlesInGroup(), + outsideInteractions->size(), + tree->getParticleGroup(idxGroup)->getStartingIndex(), + tree->getParticleGroup(idxGroup)->getEndingIndex(), + tree->getParticleGroup(interactionid)->getStartingIndex(), + tree->getParticleGroup(interactionid)->getEndingIndex(), + starpu_mpi_data_get_rank(particleHandles[idxGroup].down)), #endif #endif - 0); - } + 0); + } + { + std::vector<OutOfBlockInteraction>* outsideInteractionsOpposite = new std::vector<OutOfBlockInteraction>(externalInteractionsLeafLevel[idxGroup][idxInteraction].interactions); + for(unsigned int i = 0; i < outsideInteractionsOpposite->size(); ++i) { - std::vector<OutOfBlockInteraction>* outsideInteractionsOpposite = new std::vector<OutOfBlockInteraction>(externalInteractionsLeafLevel[idxGroup][idxInteraction].interactions); - for(unsigned int i = 0; i < outsideInteractionsOpposite->size(); ++i) - { - MortonIndex tmp = outsideInteractionsOpposite->at(i).outIndex; - outsideInteractionsOpposite->at(i).outIndex = outsideInteractionsOpposite->at(i).insideIndex; - outsideInteractionsOpposite->at(i).insideIndex = tmp; - int tmp2 = outsideInteractionsOpposite->at(i).insideIdxInBlock; - outsideInteractionsOpposite->at(i).insideIdxInBlock = outsideInteractionsOpposite->at(i).outsideIdxInBlock; - outsideInteractionsOpposite->at(i).outsideIdxInBlock = tmp2; - outsideInteractionsOpposite->at(i).relativeOutPosition = getOppositeInterIndex(outsideInteractionsOpposite->at(i).relativeOutPosition); - } - externalInteractionsLeafLevelOpposite.push_front(outsideInteractionsOpposite); + MortonIndex tmp = outsideInteractionsOpposite->at(i).outIndex; + outsideInteractionsOpposite->at(i).outIndex = outsideInteractionsOpposite->at(i).insideIndex; + outsideInteractionsOpposite->at(i).insideIndex = tmp; + int tmp2 = outsideInteractionsOpposite->at(i).insideIdxInBlock; + outsideInteractionsOpposite->at(i).insideIdxInBlock = outsideInteractionsOpposite->at(i).outsideIdxInBlock; + outsideInteractionsOpposite->at(i).outsideIdxInBlock = tmp2; + outsideInteractionsOpposite->at(i).relativeOutPosition = getOppositeInterIndex(outsideInteractionsOpposite->at(i).relativeOutPosition); + } + externalInteractionsLeafLevelOpposite.push_front(outsideInteractionsOpposite); - // Extract data from second group for the first one - // That is copy A to A' - extractedParticlesBuffer.emplace_back(); - ParticleExtractedHandles& interactionBuffer = extractedParticlesBuffer.back(); - interactionBuffer.leavesToExtract = externalInteractionsLeafLevelInner[idxGroup][idxInteraction]; + // Extract data from second group for the first one + // That is copy A to A' + extractedParticlesBuffer.emplace_back(); + ParticleExtractedHandles& interactionBuffer = extractedParticlesBuffer.back(); + interactionBuffer.leavesToExtract = externalInteractionsLeafLevelInner[idxGroup][idxInteraction]; - interactionBuffer.size = tree->getParticleGroup(idxGroup)->getExtractBufferSize(interactionBuffer.leavesToExtract); - // I allocate only if I will use it to extract - if(starpu_mpi_data_get_rank(particleHandles[idxGroup].down) == mpi_rank){ - interactionBuffer.data.reset(new unsigned char[interactionBuffer.size]); - } - else{ - interactionBuffer.data.reset(nullptr); - } + interactionBuffer.size = tree->getParticleGroup(idxGroup)->getExtractBufferSize(interactionBuffer.leavesToExtract); + // I allocate only if I will use it to extract + if(starpu_mpi_data_get_rank(particleHandles[idxGroup].down) == mpi_rank){ + interactionBuffer.data.reset(new unsigned char[interactionBuffer.size]); + } + else{ + interactionBuffer.data.reset(nullptr); + } - int registeringNode = starpu_mpi_data_get_rank(particleHandles[idxGroup].down); - int where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1; - starpu_variable_data_register(&interactionBuffer.symb, where, - (uintptr_t)interactionBuffer.data.get(), interactionBuffer.size); - starpu_mpi_data_register(interactionBuffer.symb, tag++, registeringNode); + int registeringNode = starpu_mpi_data_get_rank(particleHandles[idxGroup].down); + int where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1; + starpu_variable_data_register(&interactionBuffer.symb, where, + (uintptr_t)interactionBuffer.data.get(), interactionBuffer.size); + starpu_mpi_data_register(interactionBuffer.symb, tag++, registeringNode); - ParticleExtractedHandles* interactionBufferPtr = &interactionBuffer; - starpu_mpi_insert_task(MPI_COMM_WORLD, - &p2p_extract, - STARPU_VALUE, &interactionBufferPtr, sizeof(ParticleExtractedHandles*), + ParticleExtractedHandles* interactionBufferPtr = &interactionBuffer; + starpu_mpi_insert_task(MPI_COMM_WORLD, + &p2p_extract, + STARPU_VALUE, &interactionBufferPtr, sizeof(ParticleExtractedHandles*), #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(), + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(), #endif - STARPU_R, particleHandles[idxGroup].symb, - STARPU_RW, interactionBuffer.symb, 0); - - // Move to a new memory block that is on the same node as A - // B' to B''' - duplicatedParticlesBuffer.emplace_back(); - DuplicatedParticlesHandle& duplicateA = duplicatedParticlesBuffer.back(); - duplicateA.size = tree->getParticleGroup(idxGroup)->getBufferSizeInByte(); - if(starpu_mpi_data_get_rank(particleHandles[interactionid].down) == mpi_rank){ - // Reuse block but just to perform the send - duplicateA.data = (unsigned char*) FAlignedMemory::AllocateBytes<64>(duplicateA.size);// = const_cast<unsigned char*>(tree->getParticleGroup(idxGroup)->getRawBuffer()); - } - else{ - duplicateA.data = nullptr; - } + STARPU_R, particleHandles[idxGroup].symb, + STARPU_RW, interactionBuffer.symb, 0); + + // Move to a new memory block that is on the same node as A + // B' to B''' + duplicatedParticlesBuffer.emplace_back(); + DuplicatedParticlesHandle& duplicateA = duplicatedParticlesBuffer.back(); + duplicateA.size = tree->getParticleGroup(idxGroup)->getBufferSizeInByte(); + if(starpu_mpi_data_get_rank(particleHandles[interactionid].down) == mpi_rank){ + // Reuse block but just to perform the send + duplicateA.data = (unsigned char*) FAlignedMemory::AllocateBytes<64>(duplicateA.size);// = const_cast<unsigned char*>(tree->getParticleGroup(idxGroup)->getRawBuffer()); + } + else{ + duplicateA.data = nullptr; + } - registeringNode = starpu_mpi_data_get_rank(particleHandles[interactionid].down); - where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1; - starpu_variable_data_register(&duplicateA.symb, where, - (uintptr_t)duplicateA.data, duplicateA.size); - starpu_mpi_data_register(duplicateA.symb, tag++, registeringNode); + registeringNode = starpu_mpi_data_get_rank(particleHandles[interactionid].down); + where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1; + starpu_variable_data_register(&duplicateA.symb, where, + (uintptr_t)duplicateA.data, duplicateA.size); + starpu_mpi_data_register(duplicateA.symb, tag++, registeringNode); - const unsigned char* dataPtr = const_cast<unsigned char*>(tree->getParticleGroup(idxGroup)->getRawBuffer()); - size_t sizeData = duplicateA.size; + const unsigned char* dataPtr = const_cast<unsigned char*>(tree->getParticleGroup(idxGroup)->getRawBuffer()); + size_t sizeData = duplicateA.size; - starpu_mpi_insert_task(MPI_COMM_WORLD, - &p2p_insert_bis, - STARPU_VALUE, &interactionBufferPtr, sizeof(ParticleExtractedHandles*), - STARPU_VALUE, &dataPtr, sizeof(dataPtr), - STARPU_VALUE, &sizeData, sizeof(sizeData), + starpu_mpi_insert_task(MPI_COMM_WORLD, + &p2p_insert_bis, + STARPU_VALUE, &interactionBufferPtr, sizeof(ParticleExtractedHandles*), + STARPU_VALUE, &dataPtr, sizeof(dataPtr), + STARPU_VALUE, &sizeData, sizeof(sizeData), #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(), + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(), #endif - STARPU_R, interactionBuffer.symb, - STARPU_RW, duplicateA.symb, 0); + STARPU_R, interactionBuffer.symb, + STARPU_RW, duplicateA.symb, 0); - starpu_mpi_insert_task(MPI_COMM_WORLD, - &p2p_cl_inout_mpi, - STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), - STARPU_VALUE, &outsideInteractionsOpposite, sizeof(outsideInteractionsOpposite), - STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int), + starpu_mpi_insert_task(MPI_COMM_WORLD, + &p2p_cl_inout_mpi, + STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), + STARPU_VALUE, &outsideInteractionsOpposite, sizeof(outsideInteractionsOpposite), + STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int), #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(), + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(), #endif - STARPU_R, particleHandles[interactionid].symb, - (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[interactionid].down, - STARPU_R, duplicateA.symb, + STARPU_R, particleHandles[interactionid].symb, + (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[interactionid].down, + STARPU_R, duplicateA.symb, #ifdef STARPU_USE_TASK_NAME #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS - STARPU_NAME, p2pOuterTaskNames.get(), + STARPU_NAME, p2pOuterTaskNames.get(), #else - //"P2P_out-nb_i_p_nb_i_p_s" - STARPU_NAME, taskNames->print("P2P_out", "%d, %lld, %lld, %d, %lld, %lld, %d, %lld, %lld, %lld, %lld, %d\n", - tree->getParticleGroup(interactionid)->getNumberOfLeavesInBlock(), - tree->getParticleGroup(interactionid)->getSizeOfInterval(), - tree->getParticleGroup(interactionid)->getNbParticlesInGroup(), - tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(), - tree->getParticleGroup(idxGroup)->getSizeOfInterval(), - tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(), - outsideInteractions->size(), - tree->getParticleGroup(interactionid)->getStartingIndex(), - tree->getParticleGroup(interactionid)->getEndingIndex(), - tree->getParticleGroup(idxGroup)->getStartingIndex(), - tree->getParticleGroup(idxGroup)->getEndingIndex(), - starpu_mpi_data_get_rank(particleHandles[interactionid].down)), + //"P2P_out-nb_i_p_nb_i_p_s" + STARPU_NAME, taskNames->print("P2P_out", "%d, %lld, %lld, %d, %lld, %lld, %d, %lld, %lld, %lld, %lld, %d\n", + tree->getParticleGroup(interactionid)->getNumberOfLeavesInBlock(), + tree->getParticleGroup(interactionid)->getSizeOfInterval(), + tree->getParticleGroup(interactionid)->getNbParticlesInGroup(), + tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(), + tree->getParticleGroup(idxGroup)->getSizeOfInterval(), + tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(), + outsideInteractions->size(), + tree->getParticleGroup(interactionid)->getStartingIndex(), + tree->getParticleGroup(interactionid)->getEndingIndex(), + tree->getParticleGroup(idxGroup)->getStartingIndex(), + tree->getParticleGroup(idxGroup)->getEndingIndex(), + starpu_mpi_data_get_rank(particleHandles[interactionid].down)), #endif #endif - 0); - } + 0); + } #else + { + starpu_mpi_insert_task(MPI_COMM_WORLD, + &p2p_cl_inout_mpi, + STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), + STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions), + STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int), + #ifdef SCALFMM_STARPU_USE_PRIO + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(), + #endif + STARPU_R, particleHandles[idxGroup].symb, + (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down, + STARPU_R, particleHandles[interactionid].symb, + #ifdef STARPU_USE_TASK_NAME + #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS + STARPU_NAME, p2pOuterTaskNames.get(), + #else + //"P2P_out-nb_i_p_nb_i_p_s" + STARPU_NAME, taskNames->print("P2P_out", "%d, %lld, %lld, %d, %lld, %lld, %d, %lld, %lld, %lld, %lld, %d\n", + tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(), + tree->getParticleGroup(idxGroup)->getSizeOfInterval(), + tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(), + tree->getParticleGroup(interactionid)->getNumberOfLeavesInBlock(), + tree->getParticleGroup(interactionid)->getSizeOfInterval(), + tree->getParticleGroup(interactionid)->getNbParticlesInGroup(), + outsideInteractions->size(), + tree->getParticleGroup(idxGroup)->getStartingIndex(), + tree->getParticleGroup(idxGroup)->getEndingIndex(), + tree->getParticleGroup(interactionid)->getStartingIndex(), + tree->getParticleGroup(interactionid)->getEndingIndex(), + starpu_mpi_data_get_rank(particleHandles[idxGroup].down)), + #endif + #endif + 0); + std::vector<OutOfBlockInteraction>* outsideInteractionsOpposite = new std::vector<OutOfBlockInteraction>(externalInteractionsLeafLevel[idxGroup][idxInteraction].interactions); + for(unsigned int i = 0; i < outsideInteractionsOpposite->size(); ++i) { - starpu_mpi_insert_task(MPI_COMM_WORLD, - &p2p_cl_inout_mpi, - STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), - STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions), - STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int), - #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(), - #endif - STARPU_R, particleHandles[idxGroup].symb, - (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down, - STARPU_R, particleHandles[interactionid].symb, - #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS - STARPU_NAME, p2pOuterTaskNames.get(), - #else - //"P2P_out-nb_i_p_nb_i_p_s" - STARPU_NAME, taskNames->print("P2P_out", "%d, %lld, %lld, %d, %lld, %lld, %d, %lld, %lld, %lld, %lld, %d\n", - tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(), - tree->getParticleGroup(idxGroup)->getSizeOfInterval(), - tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(), - tree->getParticleGroup(interactionid)->getNumberOfLeavesInBlock(), - tree->getParticleGroup(interactionid)->getSizeOfInterval(), - tree->getParticleGroup(interactionid)->getNbParticlesInGroup(), - outsideInteractions->size(), - tree->getParticleGroup(idxGroup)->getStartingIndex(), - tree->getParticleGroup(idxGroup)->getEndingIndex(), - tree->getParticleGroup(interactionid)->getStartingIndex(), - tree->getParticleGroup(interactionid)->getEndingIndex(), - starpu_mpi_data_get_rank(particleHandles[idxGroup].down)), - #endif - #endif - 0); - std::vector<OutOfBlockInteraction>* outsideInteractionsOpposite = new std::vector<OutOfBlockInteraction>(externalInteractionsLeafLevel[idxGroup][idxInteraction].interactions); - for(unsigned int i = 0; i < outsideInteractionsOpposite->size(); ++i) - { - MortonIndex tmp = outsideInteractionsOpposite->at(i).outIndex; - outsideInteractionsOpposite->at(i).outIndex = outsideInteractionsOpposite->at(i).insideIndex; - outsideInteractionsOpposite->at(i).insideIndex = tmp; - int tmp2 = outsideInteractionsOpposite->at(i).insideIdxInBlock; - outsideInteractionsOpposite->at(i).insideIdxInBlock = outsideInteractionsOpposite->at(i).outsideIdxInBlock; - outsideInteractionsOpposite->at(i).outsideIdxInBlock = tmp2; - outsideInteractionsOpposite->at(i).relativeOutPosition = getOppositeInterIndex(outsideInteractionsOpposite->at(i).relativeOutPosition); - } - externalInteractionsLeafLevelOpposite.push_front(outsideInteractionsOpposite); - starpu_mpi_insert_task(MPI_COMM_WORLD, - &p2p_cl_inout_mpi, - STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), - STARPU_VALUE, &outsideInteractionsOpposite, sizeof(outsideInteractionsOpposite), - STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int), - #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(), - #endif - STARPU_R, particleHandles[interactionid].symb, - (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[interactionid].down, - STARPU_R, particleHandles[idxGroup].symb, - #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS - STARPU_NAME, p2pOuterTaskNames.get(), - #else - //"P2P_out-nb_i_p_nb_i_p_s" - STARPU_NAME, taskNames->print("P2P_out", "%d, %lld, %lld, %d, %lld, %lld, %d, %lld, %lld, %lld, %lld, %d\n", - tree->getParticleGroup(interactionid)->getNumberOfLeavesInBlock(), - tree->getParticleGroup(interactionid)->getSizeOfInterval(), - tree->getParticleGroup(interactionid)->getNbParticlesInGroup(), - tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(), - tree->getParticleGroup(idxGroup)->getSizeOfInterval(), - tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(), - outsideInteractions->size(), - tree->getParticleGroup(interactionid)->getStartingIndex(), - tree->getParticleGroup(interactionid)->getEndingIndex(), - tree->getParticleGroup(idxGroup)->getStartingIndex(), - tree->getParticleGroup(idxGroup)->getEndingIndex(), - starpu_mpi_data_get_rank(particleHandles[interactionid].down)), - #endif - #endif - 0); + MortonIndex tmp = outsideInteractionsOpposite->at(i).outIndex; + outsideInteractionsOpposite->at(i).outIndex = outsideInteractionsOpposite->at(i).insideIndex; + outsideInteractionsOpposite->at(i).insideIndex = tmp; + int tmp2 = outsideInteractionsOpposite->at(i).insideIdxInBlock; + outsideInteractionsOpposite->at(i).insideIdxInBlock = outsideInteractionsOpposite->at(i).outsideIdxInBlock; + outsideInteractionsOpposite->at(i).outsideIdxInBlock = tmp2; + outsideInteractionsOpposite->at(i).relativeOutPosition = getOppositeInterIndex(outsideInteractionsOpposite->at(i).relativeOutPosition); } -#endif + externalInteractionsLeafLevelOpposite.push_front(outsideInteractionsOpposite); + starpu_mpi_insert_task(MPI_COMM_WORLD, + &p2p_cl_inout_mpi, + STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), + STARPU_VALUE, &outsideInteractionsOpposite, sizeof(outsideInteractionsOpposite), + STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int), + #ifdef SCALFMM_STARPU_USE_PRIO + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2PExtern(), + #endif + STARPU_R, particleHandles[interactionid].symb, + (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[interactionid].down, + STARPU_R, particleHandles[idxGroup].symb, + #ifdef STARPU_USE_TASK_NAME + #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS + STARPU_NAME, p2pOuterTaskNames.get(), + #else + //"P2P_out-nb_i_p_nb_i_p_s" + STARPU_NAME, taskNames->print("P2P_out", "%d, %lld, %lld, %d, %lld, %lld, %d, %lld, %lld, %lld, %lld, %d\n", + tree->getParticleGroup(interactionid)->getNumberOfLeavesInBlock(), + tree->getParticleGroup(interactionid)->getSizeOfInterval(), + tree->getParticleGroup(interactionid)->getNbParticlesInGroup(), + tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(), + tree->getParticleGroup(idxGroup)->getSizeOfInterval(), + tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(), + outsideInteractions->size(), + tree->getParticleGroup(interactionid)->getStartingIndex(), + tree->getParticleGroup(interactionid)->getEndingIndex(), + tree->getParticleGroup(idxGroup)->getStartingIndex(), + tree->getParticleGroup(idxGroup)->getEndingIndex(), + starpu_mpi_data_get_rank(particleHandles[interactionid].down)), + #endif + #endif + 0); } - } - } - FLOG( timerOutBlock.tac() ); - FLOG( timerInBlock.tic() ); - for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){ - starpu_mpi_insert_task(MPI_COMM_WORLD, - &p2p_cl_in, - STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), - STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int), +#endif + } + } + } + FLOG( timerOutBlock.tac() ); + FLOG( timerInBlock.tic() ); + for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){ + starpu_mpi_insert_task(MPI_COMM_WORLD, + &p2p_cl_in, + STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), + STARPU_VALUE, &particleHandles[idxGroup].intervalSize, sizeof(int), #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2P(), + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosP2P(), #endif - STARPU_R, particleHandles[idxGroup].symb, + STARPU_R, particleHandles[idxGroup].symb, #ifdef STARPU_USE_REDUX - STARPU_REDUX, particleHandles[idxGroup].down, + STARPU_REDUX, particleHandles[idxGroup].down, #else - (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down, + (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down, #endif #ifdef STARPU_USE_TASK_NAME #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS - STARPU_NAME, p2pTaskNames.get(), + STARPU_NAME, p2pTaskNames.get(), #else - //"P2P-nb_i_p" - STARPU_NAME, taskNames->print("P2P", "%d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", - tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(), - tree->getParticleGroup(idxGroup)->getSizeOfInterval(), - tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(), - tree->getParticleGroup(idxGroup)->getStartingIndex(), - tree->getParticleGroup(idxGroup)->getEndingIndex(), - tree->getParticleGroup(idxGroup)->getStartingIndex(), - tree->getParticleGroup(idxGroup)->getEndingIndex(), - starpu_mpi_data_get_rank(particleHandles[idxGroup].down)), + //"P2P-nb_i_p" + STARPU_NAME, taskNames->print("P2P", "%d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", + tree->getParticleGroup(idxGroup)->getNumberOfLeavesInBlock(), + tree->getParticleGroup(idxGroup)->getSizeOfInterval(), + tree->getParticleGroup(idxGroup)->getNbParticlesInGroup(), + tree->getParticleGroup(idxGroup)->getStartingIndex(), + tree->getParticleGroup(idxGroup)->getEndingIndex(), + tree->getParticleGroup(idxGroup)->getStartingIndex(), + tree->getParticleGroup(idxGroup)->getEndingIndex(), + starpu_mpi_data_get_rank(particleHandles[idxGroup].down)), #endif #endif - 0); - } - FLOG( timerInBlock.tac() ); - - FLOG( FLog::Controller << "\t\t directPass in " << timer.tacAndElapsed() << "s\n" ); - FLOG( FLog::Controller << "\t\t\t inblock in " << timerInBlock.elapsed() << "s\n" ); - FLOG( FLog::Controller << "\t\t\t outblock in " << timerOutBlock.elapsed() << "s\n" ); - } - ///////////////////////////////////////////////////////////////////////////////////// - /// Merge Pass - ///////////////////////////////////////////////////////////////////////////////////// - - void mergePass(){ - FLOG( FTic timer; ); - - FAssertLF(cellHandles[tree->getHeight()-1].size() == particleHandles.size()); - - for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){ - starpu_mpi_insert_task(MPI_COMM_WORLD, - &l2p_cl, - STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), - STARPU_VALUE, &cellHandles[tree->getHeight()-1][idxGroup].intervalSize, sizeof(int), - #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2P(), - #endif - STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].symb, - STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].down, - STARPU_R, particleHandles[idxGroup].symb, - #ifdef STARPU_USE_REDUX - STARPU_REDUX, particleHandles[idxGroup].down, - #else - (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down, - #endif - #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS - STARPU_NAME, l2pTaskNames.get(), - #else - //"L2P-nb_i_p" - STARPU_NAME, taskNames->print("L2P", "%d, %lld, %lld, %lld, %lld, %d\n", - tree->getCellGroup(tree->getHeight()-1,idxGroup)->getNumberOfCellsInBlock(), - tree->getCellGroup(tree->getHeight()-1,idxGroup)->getSizeOfInterval(), - tree->getCellGroup(tree->getHeight()-1,idxGroup)->getNumberOfCellsInBlock(), - tree->getParticleGroup(idxGroup)->getStartingIndex(), - tree->getParticleGroup(idxGroup)->getEndingIndex(), - starpu_mpi_data_get_rank(particleHandles[idxGroup].down)), - #endif - #endif - 0); - } - - FLOG( FLog::Controller << "\t\t L2P in " << timer.tacAndElapsed() << "s\n" ); - } + 0); + } + FLOG( timerInBlock.tac() ); + + FLOG( FLog::Controller << "\t\t directPass in " << timer.tacAndElapsed() << "s\n" ); + FLOG( FLog::Controller << "\t\t\t inblock in " << timerInBlock.elapsed() << "s\n" ); + FLOG( FLog::Controller << "\t\t\t outblock in " << timerOutBlock.elapsed() << "s\n" ); + } + ///////////////////////////////////////////////////////////////////////////////////// + /// Merge Pass + ///////////////////////////////////////////////////////////////////////////////////// + + void mergePass(){ + FLOG( FTic timer; ); + + FAssertLF(cellHandles[tree->getHeight()-1].size() == particleHandles.size()); + + for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){ + starpu_mpi_insert_task(MPI_COMM_WORLD, + &l2p_cl, + STARPU_VALUE, &wrapperptr, sizeof(wrapperptr), + STARPU_VALUE, &cellHandles[tree->getHeight()-1][idxGroup].intervalSize, sizeof(int), + #ifdef SCALFMM_STARPU_USE_PRIO + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2P(), + #endif + STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].symb, + STARPU_R, cellHandles[tree->getHeight()-1][idxGroup].down, + STARPU_R, particleHandles[idxGroup].symb, + #ifdef STARPU_USE_REDUX + STARPU_REDUX, particleHandles[idxGroup].down, + #else + (STARPU_RW | STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down, + #endif + #ifdef STARPU_USE_TASK_NAME + #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS + STARPU_NAME, l2pTaskNames.get(), + #else + //"L2P-nb_i_p" + STARPU_NAME, taskNames->print("L2P", "%d, %lld, %lld, %lld, %lld, %d\n", + tree->getCellGroup(tree->getHeight()-1,idxGroup)->getNumberOfCellsInBlock(), + tree->getCellGroup(tree->getHeight()-1,idxGroup)->getSizeOfInterval(), + tree->getCellGroup(tree->getHeight()-1,idxGroup)->getNumberOfCellsInBlock(), + tree->getParticleGroup(idxGroup)->getStartingIndex(), + tree->getParticleGroup(idxGroup)->getEndingIndex(), + starpu_mpi_data_get_rank(particleHandles[idxGroup].down)), + #endif + #endif + 0); + } + + FLOG( FLog::Controller << "\t\t L2P in " << timer.tacAndElapsed() << "s\n" ); + } #ifdef STARPU_USE_REDUX - void readParticle(){ - FLOG( FTic timer; ); + void readParticle(){ + FLOG( FTic timer; ); - FAssertLF(cellHandles[tree->getHeight()-1].size() == particleHandles.size()); + FAssertLF(cellHandles[tree->getHeight()-1].size() == particleHandles.size()); - for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){ - starpu_mpi_insert_task(MPI_COMM_WORLD, - &p2p_redux_read, + for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){ + starpu_mpi_insert_task(MPI_COMM_WORLD, + &p2p_redux_read, #ifdef SCALFMM_STARPU_USE_PRIO - STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2P(), + STARPU_PRIORITY, PrioClass::Controller().getInsertionPosL2P(), #endif - STARPU_R, particleHandles[idxGroup].down, + STARPU_R, particleHandles[idxGroup].down, #ifdef STARPU_USE_TASK_NAME - STARPU_NAME, "read-particle", + STARPU_NAME, "read-particle", #endif - 0); - } - } + 0); + } + } #endif }; diff --git a/Src/GroupTree/Core/FGroupTools.hpp b/Src/GroupTree/Core/FGroupTools.hpp new file mode 100644 index 000000000..e11db9eb8 --- /dev/null +++ b/Src/GroupTree/Core/FGroupTools.hpp @@ -0,0 +1,266 @@ +#ifndef FGROUPTOOLS_HPP +#define FGROUPTOOLS_HPP + +#include "ScalFmmConfig.h" +#include "Utils/FGlobal.hpp" +#include "Utils/FPoint.hpp" +#ifdef SCALFMM_USE_MPI +#include "Utils/FMpi.hpp" +#endif + + +namespace groupTree { + // Structure for 1 particle + template<typename FReal> + struct particle_t { + using position_t = FPoint<FReal>; + position_t pos; + FReal phi; + MortonIndex morton_index; + const auto& position() const { + return pos; + } + const FPoint<FReal>& getPosition(){ + return pos; + } + const FPoint<FReal>& getPosition() const{ + return pos; + } + const auto& physicalValue() const{ + return phi; + } + const auto& getPositions() const { + return pos; + } + void fill(const position_t &inPos, const FReal &inPhyVal, MortonIndex & inMortonIndex){ + pos = inPos ; phi = inPhyVal ; morton_index = inMortonIndex ; + } + + int weight() const { return 1;} + MortonIndex getMorton() const{ + return morton_index; + + } + friend constexpr MortonIndex morton_index(const particle_t& p) { + return p.morton_index; + } + }; + + + // + // param[in] FMpiComm + // param[in] elapsedTime time on each processor + // param[out] minTime the minimum time on each processor + // param[out] maxTime the maximal time on each processor + // param[out] meanTime the mean time on each processor + // + void timeAverage(const FMpi &FMpiComm, double &elapsedTime , double &minTime, + double &maxTime, double &meanTime) + { + double * allTimes = nullptr ; + int myrank = FMpiComm.global().processId() , nprocs=FMpiComm.global().processCount() ; + if(myrank == 0) + { + allTimes = new double[nprocs] ; + } +#ifdef SCALFMM_USE_MPI + MPI_Gather(&elapsedTime,1, MPI_DOUBLE, allTimes, 1, MPI_DOUBLE,0 /* root*/,FMpiComm.global().getComm()) ; +#endif + if(myrank == 0) + { + minTime = allTimes[0], maxTime = allTimes[0], meanTime = allTimes[0] ; + + for (int i = 1 ; i < nprocs ; ++i) { + minTime = std::min(minTime, allTimes[i]) ; + maxTime = std::max(maxTime, allTimes[i]) ; + meanTime += allTimes[i] ; + } + meanTime /= nprocs; + + } + FMpiComm.global().barrier() ; + } + template <class OCTREECLASS> + void saveSolutionInFile(const std::string &fileName, const std::size_t& NbPoints, + OCTREECLASS &tree) { + using REALTYPE = typename OCTREECLASS::FRealType ; + FFmaGenericWriter<REALTYPE> writer(fileName) ; + // + REALTYPE * particles = new REALTYPE[8*NbPoints] ; + memset(particles,0,8*NbPoints*sizeof(REALTYPE)); + FSize j = 0 ; + #ifdef _VERBOSE_LEAF + int countLeaf = 0, coutPart=0; + #endif + tree.forEachLeaf([&](typename OCTREECLASS::LeafClass* leaf){ + // + // Input + const REALTYPE*const posX = leaf->getTargets()->getPositions()[0]; + const REALTYPE*const posY = leaf->getTargets()->getPositions()[1]; + const REALTYPE*const posZ = leaf->getTargets()->getPositions()[2]; + const REALTYPE*const physicalValues = leaf->getTargets()->getPhysicalValues(); + // const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); + // + // Computed data + const REALTYPE*const potentials = leaf->getTargets()->getPotentials(); + const REALTYPE*const forcesX = leaf->getTargets()->getForcesX(); + const REALTYPE*const forcesY = leaf->getTargets()->getForcesY(); + const REALTYPE*const forcesZ = leaf->getTargets()->getForcesZ(); + // + // + const FSize nbParticlesInLeaf = leaf->getTargets()->getNbParticles(); + #ifdef _VERBOSE_LEAF + std::cout << "Leaf " << countLeaf << " Particles : [ " << coutPart << ", " <<coutPart+nbParticlesInLeaf -1 << " ] " << nbParticlesInLeaf << std::endl; + coutPart += nbParticlesInLeaf ; ++countLeaf; + #endif + for(FSize idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart,j+=8){ + // j = 8*indexes[idxPart]; + // j = 8*idxPart; + particles[j] = posX[idxPart] ; + particles[j+1] = posY[idxPart] ; + particles[j+2] = posZ[idxPart] ; + particles[j+3] = physicalValues[idxPart] ; + particles[j+4] = potentials[idxPart] ; + particles[j+5] = forcesX[idxPart] ; + particles[j+6] = forcesY[idxPart] ; + particles[j+7] = forcesZ[idxPart] ; + } + }); + + writer.writeHeader( tree.getBoxCenter(), tree.getBoxWidth() , NbPoints, sizeof(REALTYPE), 8) ; + writer.writeArrayOfReal(particles, 8 , NbPoints); + + delete[] particles; + } + + template< typename FReal, class GROUPTREE_T,class GROUPALGO_T, class OCTTREE_T> + void checkCellTree(GROUPTREE_T &groupedTree, GROUPALGO_T & groupalgo, OCTTREE_T &treeCheck, const FReal &epsilon){ + // + + std::vector<bool> OK(groupedTree.getHeight(),true) ; + groupedTree.forEachCellWithLevel( + [&](typename GROUPTREE_T::GroupSymbolCellClass_T* gsymb , + typename GROUPTREE_T::GroupCellUpClass_T* gmul , + typename GROUPTREE_T::GroupCellDownClass_T* gloc , + const int level) + { + if(groupalgo.isDataOwnedBerenger(gsymb->getMortonIndex(), level)) + { + const auto * cell = treeCheck.getCell(gsymb->getMortonIndex(), level); + if(cell == nullptr){ + std::cout << "[Empty] Error cell should exist " << gsymb->getMortonIndex() << "\n"; + OK[level] = false ; + } + else { + FMath::FAccurater<FReal> diffUp; + diffUp.add(cell->getMultipoleData().get(0), gmul->get(0), gmul->getVectorSize()); + if(diffUp.getRelativeInfNorm() > epsilon || diffUp.getRelativeL2Norm() > epsilon){ + std::cout << "[Up] Up is different at index " << gsymb->getMortonIndex() << " level " << level << " is " << diffUp << "\n"; + OK[level] = false ; + + } + FMath::FAccurater<FReal> diffDown; + diffDown.add(cell->getLocalExpansionData().get(0), gloc->get(0), gloc->getVectorSize()); + if(diffDown.getRelativeInfNorm() > epsilon || diffDown.getRelativeL2Norm() > epsilon){ + std::cout << "[Down] Down is different at index " << gsymb->getMortonIndex() << " level " << level << " is " << diffDown << "\n"; + OK[level] = false ; + + } + } + } + }); + for (std::size_t l = 0 ; l < OK.size(); ++l){ + std:: cout << " Level ( " << l << " ) --> " << (OK[l] ? " Ok" : "Error " ) <<std::endl; + } + std:: cout << " checkCellTree --> done" <<std::endl; + + } + template< typename FReal, class GROUPTREE_T,class GROUPALGO_T, class OCTTREE_T> + void checkLeaves(GROUPTREE_T &groupedTree, GROUPALGO_T & groupalgo, OCTTREE_T &treeCheck, const FReal &epsilon){ + // + FMath::FAccurater<FReal> potentialGlobalDiff; + const int NbLevels = groupedTree.getHeight(); + bool OK = true ; + groupedTree.template forEachCellMyLeaf<typename GROUPTREE_T::LeafClass_T >( + [&](typename GROUPTREE_T::GroupSymbolCellClass_T* gsymb , + typename GROUPTREE_T::GroupCellUpClass_T* /* gmul */, + typename GROUPTREE_T::GroupCellDownClass_T* /* gloc */, + typename GROUPTREE_T::LeafClass_T * leafTarget + ) + { + + if(groupalgo.isDataOwnedBerenger(gsymb->getMortonIndex(), NbLevels-1)) + { + const auto * targets = treeCheck.getLeafSrc(gsymb->getMortonIndex()); + if(targets == nullptr){ + std::cout << "[Empty] Error leaf should exist " << gsymb->getMortonIndex() << "\n"; + OK = false ; + + } + else{ + const FReal*const gposX = leafTarget->getPositions()[0]; + const FReal*const gposY = leafTarget->getPositions()[1]; + const FReal*const gposZ = leafTarget->getPositions()[2]; + const FSize gnbPartsInLeafTarget = leafTarget->getNbParticles(); + const FReal*const gforceX = leafTarget->getForcesX(); + const FReal*const gforceY = leafTarget->getForcesY(); + const FReal*const gforceZ = leafTarget->getForcesZ(); + const FReal*const gpotential = leafTarget->getPotentials(); + + const FReal*const posX = targets->getPositions()[0]; + const FReal*const posY = targets->getPositions()[1]; + const FReal*const posZ = targets->getPositions()[2]; + const FSize nbPartsInLeafTarget = targets->getNbParticles(); + const FReal*const forceX = targets->getForcesX(); + const FReal*const forceY = targets->getForcesY(); + const FReal*const forceZ = targets->getForcesZ(); + const FReal*const potential = targets->getPotentials(); + + if(gnbPartsInLeafTarget != nbPartsInLeafTarget){ + std::cout << "[Empty] Not the same number of particles at " << gsymb->getMortonIndex() + << " gnbPartsInLeafTarget " << gnbPartsInLeafTarget << " nbPartsInLeafTarget " << nbPartsInLeafTarget << "\n"; + OK = false ; + }else{ + FMath::FAccurater<FReal> potentialDiff; + FMath::FAccurater<FReal> fx, fy, fz; + for(FSize idxPart = 0 ; idxPart < nbPartsInLeafTarget ; ++idxPart){ + if(gposX[idxPart] != posX[idxPart] || gposY[idxPart] != posY[idxPart] || gposZ[idxPart] != posZ[idxPart]){ + std::cout << "[Empty] Not the same particlea at " << gsymb->getMortonIndex() << " idx " << idxPart << " " + << gposX[idxPart] << " " << posX[idxPart] << " " << gposY[idxPart] << " " << posY[idxPart] + << " " << gposZ[idxPart] << " " << posZ[idxPart] << "\n"; + OK = false ; + } + else{ + potentialGlobalDiff.add(potential[idxPart], gpotential[idxPart]); + potentialDiff.add(potential[idxPart], gpotential[idxPart]); + fx.add(forceX[idxPart], gforceX[idxPart]); + fy.add(forceY[idxPart], gforceY[idxPart]); + fz.add(forceZ[idxPart], gforceZ[idxPart]); + } + } + if(potentialDiff.getRelativeInfNorm() > epsilon || potentialDiff.getRelativeL2Norm() > epsilon){ + std::cout << " potentialDiff is different at index " << gsymb->getMortonIndex() << " is " << potentialDiff << "\n"; + OK = false ; + } + if(fx.getRelativeInfNorm() > epsilon || fx.getRelativeL2Norm() > epsilon){ + std::cout << " fx is different at index " << gsymb->getMortonIndex() << " is " << fx << "\n"; + OK = false ; + } + if(fy.getRelativeInfNorm() > epsilon || fy.getRelativeL2Norm() > epsilon){ + std::cout << " fy is different at index " << gsymb->getMortonIndex() << " is " << fy << "\n"; + OK = false ; + } + if(fz.getRelativeInfNorm() > epsilon || fz.getRelativeL2Norm() > epsilon){ + OK = false ; + std::cout << " fz is different at index " << gsymb->getMortonIndex() << " is " << fz << "\n"; + } + } + } + } + }); + std::cout << " potentialDiff is " << potentialGlobalDiff << "\n"; + std:: cout << " checkLeaves --> " << (OK ? " Ok" : "Error " ) <<std::endl; + + } +} +#endif // FGROUPTOOLS_HPP diff --git a/Src/GroupTree/Core/FGroupTree.hpp b/Src/GroupTree/Core/FGroupTree.hpp index 14240fcf6..8ae94bee8 100644 --- a/Src/GroupTree/Core/FGroupTree.hpp +++ b/Src/GroupTree/Core/FGroupTree.hpp @@ -1,5 +1,8 @@ +// ==== CMAKE ===== +// +// ================ -// Keep in private GIT +// #ifndef FGROUPTREE_HPP #define FGROUPTREE_HPP #include <vector> @@ -14,161 +17,170 @@ #include "FGroupOfParticles.hpp" #include "FGroupAttachedLeaf.hpp" #include "../../Kernels/P2P/FP2PParticleContainer.hpp" - - +#ifdef SCALFMM_USE_MPI +#include "FDistributedGroupTreeBuilder.hpp" +#endif template <class FReal, class SymbolCellClass, class PoleCellClass, class LocalCellClass, class GroupAttachedLeafClass, unsigned NbSymbAttributes, unsigned NbAttributesPerParticle, class AttributeClass = FReal> class FGroupTree { public: - typedef GroupAttachedLeafClass BasicAttachedClass; - typedef FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle,AttributeClass> ParticleGroupClass; - typedef FGroupOfCells<SymbolCellClass, PoleCellClass, LocalCellClass> CellGroupClass; + typedef GroupAttachedLeafClass BasicAttachedClass; // Leaf + typedef FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle,AttributeClass> ParticleGroupClass; + typedef FGroupOfCells<SymbolCellClass, PoleCellClass, LocalCellClass> CellGroupClass; + typedef SymbolCellClass GroupSymbolCellClass_T ; + typedef LocalCellClass GroupCellDownClass_T ; + typedef PoleCellClass GroupCellUpClass_T ; + typedef GroupAttachedLeafClass LeafClass_T; // Leaf protected: - //< height of the tree (1 => only the root) - const int _treeHeight; - //< max number of cells in a block - const int _nbElementsPerBlock; - //< all the blocks of the tree - std::vector<CellGroupClass*>* _cellBlocksPerLevel; - //< all the blocks of leaves - std::vector<ParticleGroupClass*> _particleBlocks; - - //< the space system center - const FPoint<FReal> boxCenter; - //< the space system corner (used to compute morton index) - const FPoint<FReal> boxCorner; - //< the space system width - const FReal boxWidth; - //< the width of a box at width level - const FReal boxWidthAtLeafLevel; + //< height of the tree (1 => only the root) + const int _treeHeight; + //< max number of cells in a block + const int _nbElementsPerBlock; + //< all the blocks of the tree + std::vector<CellGroupClass*>* _cellBlocksPerLevel; + //< all the blocks of leaves + std::vector<ParticleGroupClass*> _particleBlocks; + + //< the space system center + const FPoint<FReal> boxCenter; + //< the space system corner (used to compute morton index) + const FPoint<FReal> boxCorner; + //< the space system width + const FReal boxWidth; + //< the width of a box at width level + const FReal boxWidthAtLeafLevel; public: - typedef typename std::vector<CellGroupClass*>::iterator CellGroupIterator; - typedef typename std::vector<CellGroupClass*>::const_iterator CellGroupConstIterator; - typedef typename std::vector<ParticleGroupClass*>::iterator ParticleGroupIterator; - typedef typename std::vector<ParticleGroupClass*>::const_iterator ParticleGroupConstIterator; + typedef typename std::vector<CellGroupClass*>::iterator CellGroupIterator; + typedef typename std::vector<CellGroupClass*>::const_iterator CellGroupConstIterator; + typedef typename std::vector<ParticleGroupClass*>::iterator ParticleGroupIterator; + typedef typename std::vector<ParticleGroupClass*>::const_iterator ParticleGroupConstIterator; - /** This constructor create a blocked octree from a usual octree + /** This constructor create a blocked octree from a usual octree * The cell are allocated as in the usual octree (no copy constructor are called!) * Once allocated each cell receive its morton index and tree coordinate. * No blocks are allocated at level 0. */ - template<class OctreeClass> - FGroupTree(const int in_treeHeight, const int in_nbElementsPerBlock, OctreeClass*const inOctreeSrc) - : _treeHeight(in_treeHeight), _nbElementsPerBlock(in_nbElementsPerBlock), _cellBlocksPerLevel(nullptr), - boxCenter(inOctreeSrc->getBoxCenter()), boxCorner(inOctreeSrc->getBoxCenter(),-(inOctreeSrc->getBoxWidth()/2)), - boxWidth(inOctreeSrc->getBoxWidth()), boxWidthAtLeafLevel(inOctreeSrc->getBoxWidth()/FReal(1<<(in_treeHeight-1))){ - _cellBlocksPerLevel = new std::vector<CellGroupClass*>[_treeHeight]; - - // Iterate on the tree and build - typename OctreeClass::Iterator octreeIterator(inOctreeSrc); - octreeIterator.gotoBottomLeft(); - - { // First leaf level, we create leaves and cells groups - const int idxLevel = _treeHeight-1; - typename OctreeClass::Iterator avoidGotoLeft = octreeIterator; - // For each cell at this level - do { - typename OctreeClass::Iterator blockIteratorInOctree = octreeIterator; - // Move the iterator per _nbElementsPerBlock (or until it cannot move right) - int sizeOfBlock = 1; - FSize nbParticlesInGroup = octreeIterator.getCurrentLeaf()->getSrc()->getNbParticles(); - while(sizeOfBlock < _nbElementsPerBlock && octreeIterator.moveRight()){ - sizeOfBlock += 1; - nbParticlesInGroup += octreeIterator.getCurrentLeaf()->getSrc()->getNbParticles(); - } - - // Create a block with the apropriate parameters - CellGroupClass*const newBlock = new CellGroupClass(blockIteratorInOctree.getCurrentGlobalIndex(), - octreeIterator.getCurrentGlobalIndex()+1, - sizeOfBlock); - FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>*const newParticleBlock = new FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>(blockIteratorInOctree.getCurrentGlobalIndex(), - octreeIterator.getCurrentGlobalIndex()+1, - sizeOfBlock, nbParticlesInGroup); - - // Initialize each cell of the block - int cellIdInBlock = 0; - size_t nbParticlesOffsetBeforeLeaf = 0; - while(cellIdInBlock != sizeOfBlock){ - const MortonIndex newNodeIndex = blockIteratorInOctree.getCurrentCell()->getMortonIndex(); - const FTreeCoordinate newNodeCoordinate = blockIteratorInOctree.getCurrentCell()->getCoordinate(); - // Add cell - newBlock->newCell(newNodeIndex, cellIdInBlock); - - SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock); - symbolic.setMortonIndex(newNodeIndex); - symbolic.setCoordinate(newNodeCoordinate); - symbolic.setLevel(idxLevel); - - // Add leaf - nbParticlesOffsetBeforeLeaf = newParticleBlock->newLeaf(newNodeIndex, cellIdInBlock, - blockIteratorInOctree.getCurrentLeaf()->getSrc()->getNbParticles(), - nbParticlesOffsetBeforeLeaf); - - BasicAttachedClass attachedLeaf = newParticleBlock->template getLeaf<BasicAttachedClass>(cellIdInBlock); - attachedLeaf.copyFromContainer(blockIteratorInOctree.getCurrentLeaf()->getSrc(), 0); - - cellIdInBlock += 1; - blockIteratorInOctree.moveRight(); - } - - // Keep the block - _cellBlocksPerLevel[idxLevel].push_back(newBlock); - _particleBlocks.push_back(newParticleBlock); - - // If we can move right then add another block - } while(octreeIterator.moveRight()); - - avoidGotoLeft.moveUp(); - octreeIterator = avoidGotoLeft; - } - - // For each level from heigth - 2 to 1 - for(int idxLevel = _treeHeight-2; idxLevel > 0 ; --idxLevel){ - typename OctreeClass::Iterator avoidGotoLeft = octreeIterator; - // For each cell at this level - do { - typename OctreeClass::Iterator blockIteratorInOctree = octreeIterator; - // Move the iterator per _nbElementsPerBlock (or until it cannot move right) - int sizeOfBlock = 1; - while(sizeOfBlock < _nbElementsPerBlock && octreeIterator.moveRight()){ - sizeOfBlock += 1; - } - - // Create a block with the apropriate parameters - CellGroupClass*const newBlock = new CellGroupClass(blockIteratorInOctree.getCurrentGlobalIndex(), - octreeIterator.getCurrentGlobalIndex()+1, - sizeOfBlock); - // Initialize each cell of the block - int cellIdInBlock = 0; - while(cellIdInBlock != sizeOfBlock){ - const MortonIndex newNodeIndex = blockIteratorInOctree.getCurrentCell()->getMortonIndex(); - const FTreeCoordinate newNodeCoordinate = blockIteratorInOctree.getCurrentCell()->getCoordinate(); - newBlock->newCell(newNodeIndex, cellIdInBlock); - - SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock); - symbolic.setMortonIndex(newNodeIndex); - symbolic.setCoordinate(newNodeCoordinate); - symbolic.setLevel(idxLevel); + template<class OctreeClass> + FGroupTree() + {} + template<class OctreeClass> + FGroupTree(const int in_treeHeight, const int in_nbElementsPerBlock, OctreeClass*const inOctreeSrc) + : _treeHeight(in_treeHeight), _nbElementsPerBlock(in_nbElementsPerBlock), _cellBlocksPerLevel(nullptr), + boxCenter(inOctreeSrc->getBoxCenter()), boxCorner(inOctreeSrc->getBoxCenter(),-(inOctreeSrc->getBoxWidth()/2)), + boxWidth(inOctreeSrc->getBoxWidth()), boxWidthAtLeafLevel(inOctreeSrc->getBoxWidth()/FReal(1<<(in_treeHeight-1))){ + + _cellBlocksPerLevel = new std::vector<CellGroupClass*>[_treeHeight]; + + // Iterate on the tree and build + typename OctreeClass::Iterator octreeIterator(inOctreeSrc); + octreeIterator.gotoBottomLeft(); + + { // First leaf level, we create leaves and cells groups + const int idxLevel = _treeHeight-1; + typename OctreeClass::Iterator avoidGotoLeft = octreeIterator; + // For each cell at this level + do { + typename OctreeClass::Iterator blockIteratorInOctree = octreeIterator; + // Move the iterator per _nbElementsPerBlock (or until it cannot move right) + int sizeOfBlock = 1; + FSize nbParticlesInGroup = octreeIterator.getCurrentLeaf()->getSrc()->getNbParticles(); + while(sizeOfBlock < _nbElementsPerBlock && octreeIterator.moveRight()){ + sizeOfBlock += 1; + nbParticlesInGroup += octreeIterator.getCurrentLeaf()->getSrc()->getNbParticles(); + } - cellIdInBlock += 1; - blockIteratorInOctree.moveRight(); - } + // Create a block with the apropriate parameters + CellGroupClass*const newBlock = new CellGroupClass(blockIteratorInOctree.getCurrentGlobalIndex(), + octreeIterator.getCurrentGlobalIndex()+1, + sizeOfBlock); + FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>*const newParticleBlock = new FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>(blockIteratorInOctree.getCurrentGlobalIndex(), + octreeIterator.getCurrentGlobalIndex()+1, + sizeOfBlock, nbParticlesInGroup); + + // Initialize each cell of the block + int cellIdInBlock = 0; + size_t nbParticlesOffsetBeforeLeaf = 0; + while(cellIdInBlock != sizeOfBlock){ + const MortonIndex newNodeIndex = blockIteratorInOctree.getCurrentCell()->getMortonIndex(); + const FTreeCoordinate newNodeCoordinate = blockIteratorInOctree.getCurrentCell()->getCoordinate(); + // Add cell + newBlock->newCell(newNodeIndex, cellIdInBlock); + + SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock); + symbolic.setMortonIndex(newNodeIndex); + symbolic.setCoordinate(newNodeCoordinate); + symbolic.setLevel(idxLevel); + + // Add leaf + nbParticlesOffsetBeforeLeaf = newParticleBlock->newLeaf(newNodeIndex, cellIdInBlock, + blockIteratorInOctree.getCurrentLeaf()->getSrc()->getNbParticles(), + nbParticlesOffsetBeforeLeaf); + + BasicAttachedClass attachedLeaf = newParticleBlock->template getLeaf<BasicAttachedClass>(cellIdInBlock); + attachedLeaf.copyFromContainer(blockIteratorInOctree.getCurrentLeaf()->getSrc(), 0); + + cellIdInBlock += 1; + blockIteratorInOctree.moveRight(); + } - // Keep the block - _cellBlocksPerLevel[idxLevel].push_back(newBlock); + // Keep the block + _cellBlocksPerLevel[idxLevel].push_back(newBlock); + _particleBlocks.push_back(newParticleBlock); - // If we can move right then add another block - } while(octreeIterator.moveRight()); + // If we can move right then add another block + } while(octreeIterator.moveRight()); - avoidGotoLeft.moveUp(); - octreeIterator = avoidGotoLeft; - } + avoidGotoLeft.moveUp(); + octreeIterator = avoidGotoLeft; } - /** + // For each level from heigth - 2 to 1 + for(int idxLevel = _treeHeight-2; idxLevel > 0 ; --idxLevel){ + typename OctreeClass::Iterator avoidGotoLeft = octreeIterator; + // For each cell at this level + do { + typename OctreeClass::Iterator blockIteratorInOctree = octreeIterator; + // Move the iterator per _nbElementsPerBlock (or until it cannot move right) + int sizeOfBlock = 1; + while(sizeOfBlock < _nbElementsPerBlock && octreeIterator.moveRight()){ + sizeOfBlock += 1; + } + + // Create a block with the apropriate parameters + CellGroupClass*const newBlock = new CellGroupClass(blockIteratorInOctree.getCurrentGlobalIndex(), + octreeIterator.getCurrentGlobalIndex()+1, + sizeOfBlock); + // Initialize each cell of the block + int cellIdInBlock = 0; + while(cellIdInBlock != sizeOfBlock){ + const MortonIndex newNodeIndex = blockIteratorInOctree.getCurrentCell()->getMortonIndex(); + const FTreeCoordinate newNodeCoordinate = blockIteratorInOctree.getCurrentCell()->getCoordinate(); + newBlock->newCell(newNodeIndex, cellIdInBlock); + + SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock); + symbolic.setMortonIndex(newNodeIndex); + symbolic.setCoordinate(newNodeCoordinate); + symbolic.setLevel(idxLevel); + + cellIdInBlock += 1; + blockIteratorInOctree.moveRight(); + } + + // Keep the block + _cellBlocksPerLevel[idxLevel].push_back(newBlock); + + // If we can move right then add another block + } while(octreeIterator.moveRight()); + + avoidGotoLeft.moveUp(); + octreeIterator = avoidGotoLeft; + } + } + + /** * This constructor create a group tree from a particle container index. * The morton index are computed and the particles are sorted in a first stage. * Then the leaf level is done. @@ -176,87 +188,170 @@ public: * It should be easy to make it parallel using for and tasks. * If no limite give inLeftLimite = -1 */ - template<class ParticleContainer> - FGroupTree(const int in_treeHeight, const FReal inBoxWidth, const FPoint<FReal>& inBoxCenter, - const int in_nbElementsPerBlock, ParticleContainer* inParticlesContainer, - const bool particlesAreSorted = false, MortonIndex inLeftLimite = -1): - _treeHeight(in_treeHeight),_nbElementsPerBlock(in_nbElementsPerBlock),_cellBlocksPerLevel(nullptr), - boxCenter(inBoxCenter), boxCorner(inBoxCenter,-(inBoxWidth/2)), boxWidth(inBoxWidth), - boxWidthAtLeafLevel(inBoxWidth/FReal(1<<(in_treeHeight-1))) + template<class ParticleContainer> + FGroupTree(const int in_treeHeight, const FReal inBoxWidth, const FPoint<FReal>& inBoxCenter, + const int in_nbElementsPerBlock, ParticleContainer* inParticlesContainer, + const bool particlesAreSorted = false, MortonIndex inLeftLimite = -1): + _treeHeight(in_treeHeight),_nbElementsPerBlock(in_nbElementsPerBlock),_cellBlocksPerLevel(nullptr), + boxCenter(inBoxCenter), boxCorner(inBoxCenter,-(inBoxWidth/2)), boxWidth(inBoxWidth), + boxWidthAtLeafLevel(inBoxWidth/FReal(1<<(in_treeHeight-1))) + { + _cellBlocksPerLevel = new std::vector<CellGroupClass*>[_treeHeight]; + + MortonIndex* currentBlockIndexes = new MortonIndex[_nbElementsPerBlock]; + // First we work at leaf level { + // Build morton index for particles + struct ParticleSortingStruct{ + FSize originalIndex; + MortonIndex mindex; + }; + // Convert position to morton index + const FSize nbParticles = inParticlesContainer->getNbParticles(); + ParticleSortingStruct* particlesToSort = new ParticleSortingStruct[nbParticles]; + { + const FReal* xpos = inParticlesContainer->getPositions()[0]; + const FReal* ypos = inParticlesContainer->getPositions()[1]; + const FReal* zpos = inParticlesContainer->getPositions()[2]; + + for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){ + const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPositionAndCorner<FReal>(this->boxCorner, this->boxWidth, + _treeHeight, + FPoint<FReal>(xpos[idxPart], ypos[idxPart], zpos[idxPart]) ); + const MortonIndex particleIndex = host.getMortonIndex(); + particlesToSort[idxPart].mindex = particleIndex; + particlesToSort[idxPart].originalIndex = idxPart; + } + } + + // Sort if needed + if(particlesAreSorted == false){ + FQuickSort<ParticleSortingStruct, FSize>::QsOmp(particlesToSort, nbParticles, [](const ParticleSortingStruct& v1, const ParticleSortingStruct& v2){ + return v1.mindex <= v2.mindex; + }); + } - _cellBlocksPerLevel = new std::vector<CellGroupClass*>[_treeHeight]; - - MortonIndex* currentBlockIndexes = new MortonIndex[_nbElementsPerBlock]; - // First we work at leaf level - { - // Build morton index for particles - struct ParticleSortingStruct{ - FSize originalIndex; - MortonIndex mindex; - }; - // Convert position to morton index - const FSize nbParticles = inParticlesContainer->getNbParticles(); - ParticleSortingStruct* particlesToSort = new ParticleSortingStruct[nbParticles]; - { - const FReal* xpos = inParticlesContainer->getPositions()[0]; - const FReal* ypos = inParticlesContainer->getPositions()[1]; - const FReal* zpos = inParticlesContainer->getPositions()[2]; - - for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){ - const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPositionAndCorner<FReal>(this->boxCorner, this->boxWidth, - _treeHeight, - FPoint<FReal>(xpos[idxPart], ypos[idxPart], zpos[idxPart]) ); - const MortonIndex particleIndex = host.getMortonIndex(); - particlesToSort[idxPart].mindex = particleIndex; - particlesToSort[idxPart].originalIndex = idxPart; + FAssertLF(nbParticles == 0 || inLeftLimite < particlesToSort[0].mindex); + // Convert to block + const int idxLevel = (_treeHeight - 1); + FSize* nbParticlesPerLeaf = new FSize[_nbElementsPerBlock]; + FSize firstParticle = 0; + // We need to proceed each group in sub level + while(firstParticle != nbParticles){ + int sizeOfBlock = 0; + FSize lastParticle = firstParticle; + // Count until end of sub group is reached or we have enough cells + while(sizeOfBlock < _nbElementsPerBlock && lastParticle < nbParticles){ + if(sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != particlesToSort[lastParticle].mindex){ + currentBlockIndexes[sizeOfBlock] = particlesToSort[lastParticle].mindex; + nbParticlesPerLeaf[sizeOfBlock] = 1; + sizeOfBlock += 1; + } + else{ + nbParticlesPerLeaf[sizeOfBlock-1] += 1; } + lastParticle += 1; } - - // Sort if needed - if(particlesAreSorted == false){ - FQuickSort<ParticleSortingStruct, FSize>::QsOmp(particlesToSort, nbParticles, [](const ParticleSortingStruct& v1, const ParticleSortingStruct& v2){ - return v1.mindex <= v2.mindex; - }); + while(lastParticle < nbParticles && currentBlockIndexes[sizeOfBlock-1] == particlesToSort[lastParticle].mindex){ + nbParticlesPerLeaf[sizeOfBlock-1] += 1; + lastParticle += 1; } - FAssertLF(nbParticles == 0 || inLeftLimite < particlesToSort[0].mindex); - // Convert to block - const int idxLevel = (_treeHeight - 1); - FSize* nbParticlesPerLeaf = new FSize[_nbElementsPerBlock]; - FSize firstParticle = 0; - // We need to proceed each group in sub level - while(firstParticle != nbParticles){ - int sizeOfBlock = 0; - FSize lastParticle = firstParticle; - // Count until end of sub group is reached or we have enough cells - while(sizeOfBlock < _nbElementsPerBlock && lastParticle < nbParticles){ - if(sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != particlesToSort[lastParticle].mindex){ - currentBlockIndexes[sizeOfBlock] = particlesToSort[lastParticle].mindex; - nbParticlesPerLeaf[sizeOfBlock] = 1; - sizeOfBlock += 1; - } - else{ - nbParticlesPerLeaf[sizeOfBlock-1] += 1; - } - lastParticle += 1; - } - while(lastParticle < nbParticles && currentBlockIndexes[sizeOfBlock-1] == particlesToSort[lastParticle].mindex){ - nbParticlesPerLeaf[sizeOfBlock-1] += 1; - lastParticle += 1; + // Create a group + CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0], + currentBlockIndexes[sizeOfBlock-1]+1, + sizeOfBlock); + FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>*const newParticleBlock = new FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>(currentBlockIndexes[0], + currentBlockIndexes[sizeOfBlock-1]+1, + sizeOfBlock, lastParticle-firstParticle); + + // Init cells + size_t nbParticlesOffsetBeforeLeaf = 0; + FSize offsetParticles = firstParticle; + for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){ + newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock); + + SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock); + symbolic.setMortonIndex(currentBlockIndexes[cellIdInBlock]); + FTreeCoordinate coord; + coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]); + symbolic.setCoordinate(coord); + symbolic.setLevel(idxLevel); + + // Add leaf + nbParticlesOffsetBeforeLeaf = newParticleBlock->newLeaf(currentBlockIndexes[cellIdInBlock], cellIdInBlock, + nbParticlesPerLeaf[cellIdInBlock], nbParticlesOffsetBeforeLeaf); + + BasicAttachedClass attachedLeaf = newParticleBlock->template getLeaf<BasicAttachedClass>(cellIdInBlock); + // Copy each particle from the original position + for(FSize idxPart = 0 ; idxPart < nbParticlesPerLeaf[cellIdInBlock] ; ++idxPart){ + attachedLeaf.setParticle(idxPart, particlesToSort[idxPart + offsetParticles].originalIndex, inParticlesContainer); } + offsetParticles += nbParticlesPerLeaf[cellIdInBlock]; + } + + // Keep the block + _cellBlocksPerLevel[idxLevel].push_back(newBlock); + _particleBlocks.push_back(newParticleBlock); + + sizeOfBlock = 0; + firstParticle = lastParticle; + } + delete[] nbParticlesPerLeaf; + delete[] particlesToSort; + } + + // For each level from heigth - 2 to 1 + for(int idxLevel = _treeHeight-2; idxLevel > 0 ; --idxLevel){ + inLeftLimite = (inLeftLimite == -1 ? inLeftLimite : (inLeftLimite>>3)); + + CellGroupConstIterator iterChildCells = _cellBlocksPerLevel[idxLevel+1].begin(); + const CellGroupConstIterator iterChildEndCells = _cellBlocksPerLevel[idxLevel+1].end(); + + // Skip blocks that do not respect limit + while(iterChildCells != iterChildEndCells + && ((*iterChildCells)->getEndingIndex()>>3) <= inLeftLimite){ + ++iterChildCells; + } + // If lower level is empty or all blocks skiped stop here + if(iterChildCells == iterChildEndCells){ + break; + } + + MortonIndex currentCellIndex = (*iterChildCells)->getStartingIndex(); + if((currentCellIndex>>3) <= inLeftLimite) currentCellIndex = ((inLeftLimite+1)<<3); + int sizeOfBlock = 0; + + // We need to proceed each group in sub level + while(iterChildCells != iterChildEndCells){ + // Count until end of sub group is reached or we have enough cells + while(sizeOfBlock < _nbElementsPerBlock && iterChildCells != iterChildEndCells ){ + if((sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != (currentCellIndex>>3)) + && (*iterChildCells)->exists(currentCellIndex)){ + currentBlockIndexes[sizeOfBlock] = (currentCellIndex>>3); + sizeOfBlock += 1; + currentCellIndex = (((currentCellIndex>>3)+1)<<3); + } + else{ + currentCellIndex += 1; + } + // If we are at the end of the sub group, move to next + while(iterChildCells != iterChildEndCells && (*iterChildCells)->getEndingIndex() <= currentCellIndex){ + ++iterChildCells; + // Update morton index + if(iterChildCells != iterChildEndCells && currentCellIndex < (*iterChildCells)->getStartingIndex()){ + currentCellIndex = (*iterChildCells)->getStartingIndex(); + } + } + } + // If group is full + if(sizeOfBlock == _nbElementsPerBlock || (sizeOfBlock && iterChildCells == iterChildEndCells)){ // Create a group CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0], - currentBlockIndexes[sizeOfBlock-1]+1, - sizeOfBlock); - FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>*const newParticleBlock = new FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>(currentBlockIndexes[0], - currentBlockIndexes[sizeOfBlock-1]+1, - sizeOfBlock, lastParticle-firstParticle); - + currentBlockIndexes[sizeOfBlock-1]+1, + sizeOfBlock); // Init cells - size_t nbParticlesOffsetBeforeLeaf = 0; - FSize offsetParticles = firstParticle; for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){ newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock); @@ -266,80 +361,206 @@ public: coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]); symbolic.setCoordinate(coord); symbolic.setLevel(idxLevel); - - // Add leaf - nbParticlesOffsetBeforeLeaf = newParticleBlock->newLeaf(currentBlockIndexes[cellIdInBlock], cellIdInBlock, - nbParticlesPerLeaf[cellIdInBlock], nbParticlesOffsetBeforeLeaf); - - BasicAttachedClass attachedLeaf = newParticleBlock->template getLeaf<BasicAttachedClass>(cellIdInBlock); - // Copy each particle from the original position - for(FSize idxPart = 0 ; idxPart < nbParticlesPerLeaf[cellIdInBlock] ; ++idxPart){ - attachedLeaf.setParticle(idxPart, particlesToSort[idxPart + offsetParticles].originalIndex, inParticlesContainer); - } - offsetParticles += nbParticlesPerLeaf[cellIdInBlock]; - } + } // Keep the block _cellBlocksPerLevel[idxLevel].push_back(newBlock); - _particleBlocks.push_back(newParticleBlock); sizeOfBlock = 0; - firstParticle = lastParticle; + } + } + } + delete[] currentBlockIndexes; + } + + /** + * This constructor create a group tree from a particle container index. + * The morton index are computed and the particles are sorted in a first stage. + * Then the leaf level is done. + * Finally the other leve are proceed one after the other. + * It should be easy to make it parallel using for and tasks. + * If no limite give inLeftLimite = -1 + * The cover ration is the minimum pourcentage of cell that should + * exist in a group (0 means no limite, 1 means the block must be dense) + * oneParent should be turned on if it is better to have one block parent + * per sublock (in case of have the cost of FMM that increase with the level + * this could be an asset). + */ + template<class ParticleContainer> + FGroupTree(const int in_treeHeight, const FReal inBoxWidth, const FPoint<FReal>& inBoxCenter, + const int in_nbElementsPerBlock, ParticleContainer* inParticlesContainer, + const bool particlesAreSorted, const bool oneParent, + const FReal inCoverRatio = 0.0, MortonIndex inLeftLimite = -1): + _treeHeight(in_treeHeight),_nbElementsPerBlock(in_nbElementsPerBlock),_cellBlocksPerLevel(nullptr), + boxCenter(inBoxCenter), boxCorner(inBoxCenter,-(inBoxWidth/2)), boxWidth(inBoxWidth), + boxWidthAtLeafLevel(inBoxWidth/FReal(1<<(in_treeHeight-1))) + { + + FAssertLF(inCoverRatio == 0.0 || oneParent == true, "If a ratio is choosen oneParent should be turned on"); + const bool userCoverRatio = (inCoverRatio != 0.0); + + _cellBlocksPerLevel = new std::vector<CellGroupClass*>[_treeHeight]; + + MortonIndex* currentBlockIndexes = new MortonIndex[_nbElementsPerBlock]; + // First we work at leaf level + { + // Build morton index for particles + struct ParticleSortingStruct{ + FSize originalIndex; + MortonIndex mindex; + }; + // Convert position to morton index + const FSize nbParticles = inParticlesContainer->getNbParticles(); + ParticleSortingStruct* particlesToSort = new ParticleSortingStruct[nbParticles]; + { + const FReal* xpos = inParticlesContainer->getPositions()[0]; + const FReal* ypos = inParticlesContainer->getPositions()[1]; + const FReal* zpos = inParticlesContainer->getPositions()[2]; + + for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){ + const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPositionAndCorner<FReal>(this->boxCorner, this->boxWidth, + _treeHeight, + FPoint<FReal>(xpos[idxPart], ypos[idxPart], zpos[idxPart]) ); + const MortonIndex particleIndex = host.getMortonIndex(); + particlesToSort[idxPart].mindex = particleIndex; + particlesToSort[idxPart].originalIndex = idxPart; + } + } + + // Sort if needed + if(particlesAreSorted == false){ + FQuickSort<ParticleSortingStruct, FSize>::QsOmp(particlesToSort, nbParticles, [](const ParticleSortingStruct& v1, const ParticleSortingStruct& v2){ + return v1.mindex <= v2.mindex; + }); + } + + FAssertLF(nbParticles == 0 || inLeftLimite < particlesToSort[0].mindex); + + // Convert to block + const int idxLevel = (_treeHeight - 1); + int* nbParticlesPerLeaf = new int[_nbElementsPerBlock]; + int firstParticle = 0; + // We need to proceed each group in sub level + while(firstParticle != nbParticles){ + int sizeOfBlock = 0; + int lastParticle = firstParticle; + // Count until end of sub group is reached or we have enough cells + while(sizeOfBlock < _nbElementsPerBlock && lastParticle < nbParticles + && (userCoverRatio == false + || sizeOfBlock == 0 + || currentBlockIndexes[sizeOfBlock-1] == particlesToSort[lastParticle].mindex + || (FReal(sizeOfBlock+1)/FReal(particlesToSort[lastParticle].mindex-particlesToSort[firstParticle].mindex)) >= inCoverRatio)){ + if(sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != particlesToSort[lastParticle].mindex){ + currentBlockIndexes[sizeOfBlock] = particlesToSort[lastParticle].mindex; + nbParticlesPerLeaf[sizeOfBlock] = 1; + sizeOfBlock += 1; + } + else{ + nbParticlesPerLeaf[sizeOfBlock-1] += 1; + } + lastParticle += 1; + } + while(lastParticle < nbParticles && currentBlockIndexes[sizeOfBlock-1] == particlesToSort[lastParticle].mindex){ + nbParticlesPerLeaf[sizeOfBlock-1] += 1; + lastParticle += 1; + } + + // Create a group + CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0], + currentBlockIndexes[sizeOfBlock-1]+1, + sizeOfBlock); + FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>*const newParticleBlock = new FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>(currentBlockIndexes[0], + currentBlockIndexes[sizeOfBlock-1]+1, + sizeOfBlock, lastParticle-firstParticle); + + // Init cells + size_t nbParticlesOffsetBeforeLeaf = 0; + int offsetParticles = firstParticle; + for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){ + newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock); + + SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock); + symbolic.setMortonIndex(currentBlockIndexes[cellIdInBlock]); + FTreeCoordinate coord; + coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]); + symbolic.setCoordinate(coord); + symbolic.setLevel(idxLevel); + + // Add leaf + nbParticlesOffsetBeforeLeaf = newParticleBlock->newLeaf(currentBlockIndexes[cellIdInBlock], cellIdInBlock, + nbParticlesPerLeaf[cellIdInBlock], nbParticlesOffsetBeforeLeaf); + + BasicAttachedClass attachedLeaf = newParticleBlock->template getLeaf<BasicAttachedClass>(cellIdInBlock); + // Copy each particle from the original position + for(FSize idxPart = 0 ; idxPart < nbParticlesPerLeaf[cellIdInBlock] ; ++idxPart){ + attachedLeaf.setParticle(idxPart, particlesToSort[idxPart + offsetParticles].originalIndex, inParticlesContainer); + } + offsetParticles += nbParticlesPerLeaf[cellIdInBlock]; } - delete[] nbParticlesPerLeaf; - delete[] particlesToSort; + + // Keep the block + _cellBlocksPerLevel[idxLevel].push_back(newBlock); + _particleBlocks.push_back(newParticleBlock); + + sizeOfBlock = 0; + firstParticle = lastParticle; } + delete[] nbParticlesPerLeaf; + delete[] particlesToSort; + } - // For each level from heigth - 2 to 1 - for(int idxLevel = _treeHeight-2; idxLevel > 0 ; --idxLevel){ - inLeftLimite = (inLeftLimite == -1 ? inLeftLimite : (inLeftLimite>>3)); - CellGroupConstIterator iterChildCells = _cellBlocksPerLevel[idxLevel+1].begin(); - const CellGroupConstIterator iterChildEndCells = _cellBlocksPerLevel[idxLevel+1].end(); + // For each level from heigth - 2 to 1 + for(int idxLevel = _treeHeight-2; idxLevel > 0 ; --idxLevel){ + inLeftLimite = (inLeftLimite == -1 ? inLeftLimite : (inLeftLimite>>3)); - // Skip blocks that do not respect limit - while(iterChildCells != iterChildEndCells - && ((*iterChildCells)->getEndingIndex()>>3) <= inLeftLimite){ - ++iterChildCells; - } - // If lower level is empty or all blocks skiped stop here - if(iterChildCells == iterChildEndCells){ - break; - } + CellGroupConstIterator iterChildCells = _cellBlocksPerLevel[idxLevel+1].begin(); + const CellGroupConstIterator iterChildEndCells = _cellBlocksPerLevel[idxLevel+1].end(); - MortonIndex currentCellIndex = (*iterChildCells)->getStartingIndex(); - if((currentCellIndex>>3) <= inLeftLimite) currentCellIndex = ((inLeftLimite+1)<<3); - int sizeOfBlock = 0; + // Skip blocks that do not respect limit + while(iterChildCells != iterChildEndCells + && ((*iterChildCells)->getEndingIndex()>>3) <= inLeftLimite){ + ++iterChildCells; + } + // If lower level is empty or all blocks skiped stop here + if(iterChildCells == iterChildEndCells){ + break; + } + + MortonIndex currentCellIndex = (*iterChildCells)->getStartingIndex(); + if((currentCellIndex>>3) <= inLeftLimite) currentCellIndex = ((inLeftLimite+1)<<3); + int sizeOfBlock = 0; + if(oneParent == false){ // We need to proceed each group in sub level while(iterChildCells != iterChildEndCells){ // Count until end of sub group is reached or we have enough cells while(sizeOfBlock < _nbElementsPerBlock && iterChildCells != iterChildEndCells ){ if((sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != (currentCellIndex>>3)) - && (*iterChildCells)->exists(currentCellIndex)){ + && (*iterChildCells)->exists(currentCellIndex)){ currentBlockIndexes[sizeOfBlock] = (currentCellIndex>>3); sizeOfBlock += 1; currentCellIndex = (((currentCellIndex>>3)+1)<<3); - } + } else{ currentCellIndex += 1; - } + } // If we are at the end of the sub group, move to next while(iterChildCells != iterChildEndCells && (*iterChildCells)->getEndingIndex() <= currentCellIndex){ ++iterChildCells; // Update morton index if(iterChildCells != iterChildEndCells && currentCellIndex < (*iterChildCells)->getStartingIndex()){ currentCellIndex = (*iterChildCells)->getStartingIndex(); - } - } - } + } + } + } // If group is full if(sizeOfBlock == _nbElementsPerBlock || (sizeOfBlock && iterChildCells == iterChildEndCells)){ // Create a group CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0], - currentBlockIndexes[sizeOfBlock-1]+1, - sizeOfBlock); + currentBlockIndexes[sizeOfBlock-1]+1, + sizeOfBlock); // Init cells for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){ newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock); @@ -350,371 +571,254 @@ public: coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]); symbolic.setCoordinate(coord); symbolic.setLevel(idxLevel); - } + } // Keep the block _cellBlocksPerLevel[idxLevel].push_back(newBlock); sizeOfBlock = 0; - } - } - } - delete[] currentBlockIndexes; - } - - /** - * This constructor create a group tree from a particle container index. - * The morton index are computed and the particles are sorted in a first stage. - * Then the leaf level is done. - * Finally the other leve are proceed one after the other. - * It should be easy to make it parallel using for and tasks. - * If no limite give inLeftLimite = -1 - * The cover ration is the minimum pourcentage of cell that should - * exist in a group (0 means no limite, 1 means the block must be dense) - * oneParent should be turned on if it is better to have one block parent - * per sublock (in case of have the cost of FMM that increase with the level - * this could be an asset). - */ - template<class ParticleContainer> - FGroupTree(const int in_treeHeight, const FReal inBoxWidth, const FPoint<FReal>& inBoxCenter, - const int in_nbElementsPerBlock, ParticleContainer* inParticlesContainer, - const bool particlesAreSorted, const bool oneParent, - const FReal inCoverRatio = 0.0, MortonIndex inLeftLimite = -1): - _treeHeight(in_treeHeight),_nbElementsPerBlock(in_nbElementsPerBlock),_cellBlocksPerLevel(nullptr), - boxCenter(inBoxCenter), boxCorner(inBoxCenter,-(inBoxWidth/2)), boxWidth(inBoxWidth), - boxWidthAtLeafLevel(inBoxWidth/FReal(1<<(in_treeHeight-1))) - { - - FAssertLF(inCoverRatio == 0.0 || oneParent == true, "If a ratio is choosen oneParent should be turned on"); - const bool userCoverRatio = (inCoverRatio != 0.0); - - _cellBlocksPerLevel = new std::vector<CellGroupClass*>[_treeHeight]; - - MortonIndex* currentBlockIndexes = new MortonIndex[_nbElementsPerBlock]; - // First we work at leaf level - { - // Build morton index for particles - struct ParticleSortingStruct{ - FSize originalIndex; - MortonIndex mindex; - }; - // Convert position to morton index - const FSize nbParticles = inParticlesContainer->getNbParticles(); - ParticleSortingStruct* particlesToSort = new ParticleSortingStruct[nbParticles]; - { - const FReal* xpos = inParticlesContainer->getPositions()[0]; - const FReal* ypos = inParticlesContainer->getPositions()[1]; - const FReal* zpos = inParticlesContainer->getPositions()[2]; - - for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){ - const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPositionAndCorner<FReal>(this->boxCorner, this->boxWidth, - _treeHeight, - FPoint<FReal>(xpos[idxPart], ypos[idxPart], zpos[idxPart]) ); - const MortonIndex particleIndex = host.getMortonIndex(); - particlesToSort[idxPart].mindex = particleIndex; - particlesToSort[idxPart].originalIndex = idxPart; - } - } - - // Sort if needed - if(particlesAreSorted == false){ - FQuickSort<ParticleSortingStruct, FSize>::QsOmp(particlesToSort, nbParticles, [](const ParticleSortingStruct& v1, const ParticleSortingStruct& v2){ - return v1.mindex <= v2.mindex; - }); - } - - FAssertLF(nbParticles == 0 || inLeftLimite < particlesToSort[0].mindex); - - // Convert to block - const int idxLevel = (_treeHeight - 1); - int* nbParticlesPerLeaf = new int[_nbElementsPerBlock]; - int firstParticle = 0; + } + } + } + else{ // We need to proceed each group in sub level - while(firstParticle != nbParticles){ - int sizeOfBlock = 0; - int lastParticle = firstParticle; - // Count until end of sub group is reached or we have enough cells - while(sizeOfBlock < _nbElementsPerBlock && lastParticle < nbParticles - && (userCoverRatio == false - || sizeOfBlock == 0 - || currentBlockIndexes[sizeOfBlock-1] == particlesToSort[lastParticle].mindex - || (FReal(sizeOfBlock+1)/FReal(particlesToSort[lastParticle].mindex-particlesToSort[firstParticle].mindex)) >= inCoverRatio)){ - if(sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != particlesToSort[lastParticle].mindex){ - currentBlockIndexes[sizeOfBlock] = particlesToSort[lastParticle].mindex; - nbParticlesPerLeaf[sizeOfBlock] = 1; - sizeOfBlock += 1; - } - else{ - nbParticlesPerLeaf[sizeOfBlock-1] += 1; - } - lastParticle += 1; - } - while(lastParticle < nbParticles && currentBlockIndexes[sizeOfBlock-1] == particlesToSort[lastParticle].mindex){ - nbParticlesPerLeaf[sizeOfBlock-1] += 1; - lastParticle += 1; - } + while(iterChildCells != iterChildEndCells){ + // We want one parent group per child group so we will stop the parent group + // when we arrive to the same parent as lastChildIndex (which is lastChildIndex>>3) + const MortonIndex lastChildIndex = ((*iterChildCells)->getEndingIndex()-1); + // Count until end of sub group is reached or we passe the requested parent + while( iterChildCells != iterChildEndCells + && (currentCellIndex>>3) <= (lastChildIndex>>3) ){ + // Proceed until the requested parent + while(currentCellIndex != (*iterChildCells)->getEndingIndex() + && (currentCellIndex>>3) <= (lastChildIndex>>3) ){ + if((*iterChildCells)->exists(currentCellIndex)){ + currentBlockIndexes[sizeOfBlock] = (currentCellIndex>>3); + sizeOfBlock += 1; + currentCellIndex = (((currentCellIndex>>3)+1)<<3); + } + else{ + currentCellIndex += 1; + } + } + // If we are at the end of the sub group, move to next (otherwise we have consume a part of it) + while(iterChildCells != iterChildEndCells && (*iterChildCells)->getEndingIndex() <= currentCellIndex){ + ++iterChildCells; + // Update morton index + if(iterChildCells != iterChildEndCells && currentCellIndex < (*iterChildCells)->getStartingIndex()){ + currentCellIndex = (*iterChildCells)->getStartingIndex(); + } + } + } - // Create a group - CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0], - currentBlockIndexes[sizeOfBlock-1]+1, - sizeOfBlock); - FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>*const newParticleBlock = new FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>(currentBlockIndexes[0], + // If group is full + if(sizeOfBlock){ + // Create a group + CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0], currentBlockIndexes[sizeOfBlock-1]+1, - sizeOfBlock, lastParticle-firstParticle); - - // Init cells - size_t nbParticlesOffsetBeforeLeaf = 0; - int offsetParticles = firstParticle; - for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){ - newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock); - - SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock); - symbolic.setMortonIndex(currentBlockIndexes[cellIdInBlock]); - FTreeCoordinate coord; - coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]); - symbolic.setCoordinate(coord); - symbolic.setLevel(idxLevel); - - // Add leaf - nbParticlesOffsetBeforeLeaf = newParticleBlock->newLeaf(currentBlockIndexes[cellIdInBlock], cellIdInBlock, - nbParticlesPerLeaf[cellIdInBlock], nbParticlesOffsetBeforeLeaf); + sizeOfBlock); + // Init cells + for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){ + newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock); - BasicAttachedClass attachedLeaf = newParticleBlock->template getLeaf<BasicAttachedClass>(cellIdInBlock); - // Copy each particle from the original position - for(FSize idxPart = 0 ; idxPart < nbParticlesPerLeaf[cellIdInBlock] ; ++idxPart){ - attachedLeaf.setParticle(idxPart, particlesToSort[idxPart + offsetParticles].originalIndex, inParticlesContainer); - } - offsetParticles += nbParticlesPerLeaf[cellIdInBlock]; - } + SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock); + symbolic.setMortonIndex(currentBlockIndexes[cellIdInBlock]); + FTreeCoordinate coord; + coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]); + symbolic.setCoordinate(coord); + symbolic.setLevel(idxLevel); + } - // Keep the block - _cellBlocksPerLevel[idxLevel].push_back(newBlock); - _particleBlocks.push_back(newParticleBlock); + // Keep the block + _cellBlocksPerLevel[idxLevel].push_back(newBlock); - sizeOfBlock = 0; - firstParticle = lastParticle; - } - delete[] nbParticlesPerLeaf; - delete[] particlesToSort; + sizeOfBlock = 0; + } + } + } + } + delete[] currentBlockIndexes; + } + /** + * Sequential Constructor of GroupTree + * used to construct a duplicated Ggroup tree on all processes + * @param[in] in_treeHeight size of the tree + * @param[in] in_boxWidth bow witdh + * @param[in] in_boxCenter box center + * @param[in] in__nbElementsPerBlock block size + * @param[in] inParticlesContainer an array of particles + * @param[out] blockSizeAtEachLevel box width at leaf level + * @param[in] particlesAreSorted True if the particle are sorted + */ + template<class ParticleContainer> + FGroupTree(const int in_treeHeight, const FReal inBoxWidth, const FPoint<FReal>& inBoxCenter, + const int in_nbElementsPerBlock, ParticleContainer* inParticlesContainer, + std::vector<std::vector<int>> & blockSizeAtEachLevel, + const bool particlesAreSorted = false): + _treeHeight(in_treeHeight),_nbElementsPerBlock(in_nbElementsPerBlock),_cellBlocksPerLevel(nullptr), + boxCenter(inBoxCenter), boxCorner(inBoxCenter,-(inBoxWidth/2)), boxWidth(inBoxWidth), + boxWidthAtLeafLevel(inBoxWidth/FReal(1<<(in_treeHeight-1))) + { + _cellBlocksPerLevel = new std::vector<CellGroupClass*>[_treeHeight]; + + MortonIndex* currentBlockIndexes = new MortonIndex[_nbElementsPerBlock]; + // First we work at leaf level + { + // Build morton index for particles + struct ParticleSortingStruct{ + FSize originalIndex; + MortonIndex mindex; + }; + // Convert position to morton index + const FSize nbParticles = inParticlesContainer->getNbParticles(); + ParticleSortingStruct* particlesToSort = new ParticleSortingStruct[nbParticles]; + { + const FReal* xpos = inParticlesContainer->getPositions()[0]; + const FReal* ypos = inParticlesContainer->getPositions()[1]; + const FReal* zpos = inParticlesContainer->getPositions()[2]; + + for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){ + const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPositionAndCorner<FReal>(this->boxCorner, this->boxWidth, + _treeHeight, + FPoint<FReal>(xpos[idxPart], ypos[idxPart], zpos[idxPart]) ); + const MortonIndex particleIndex = host.getMortonIndex(); + particlesToSort[idxPart].mindex = particleIndex; + particlesToSort[idxPart].originalIndex = idxPart; + } + } + + // Sort if needed + if(particlesAreSorted == false){ + FQuickSort<ParticleSortingStruct, FSize>::QsOmp(particlesToSort, nbParticles, [](const ParticleSortingStruct& v1, const ParticleSortingStruct& v2){ + return v1.mindex <= v2.mindex; + }); } - - // For each level from heigth - 2 to 1 - for(int idxLevel = _treeHeight-2; idxLevel > 0 ; --idxLevel){ - inLeftLimite = (inLeftLimite == -1 ? inLeftLimite : (inLeftLimite>>3)); - - CellGroupConstIterator iterChildCells = _cellBlocksPerLevel[idxLevel+1].begin(); - const CellGroupConstIterator iterChildEndCells = _cellBlocksPerLevel[idxLevel+1].end(); - - // Skip blocks that do not respect limit - while(iterChildCells != iterChildEndCells - && ((*iterChildCells)->getEndingIndex()>>3) <= inLeftLimite){ - ++iterChildCells; - } - // If lower level is empty or all blocks skiped stop here - if(iterChildCells == iterChildEndCells){ - break; - } - - MortonIndex currentCellIndex = (*iterChildCells)->getStartingIndex(); - if((currentCellIndex>>3) <= inLeftLimite) currentCellIndex = ((inLeftLimite+1)<<3); - int sizeOfBlock = 0; - - if(oneParent == false){ - // We need to proceed each group in sub level - while(iterChildCells != iterChildEndCells){ - // Count until end of sub group is reached or we have enough cells - while(sizeOfBlock < _nbElementsPerBlock && iterChildCells != iterChildEndCells ){ - if((sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != (currentCellIndex>>3)) - && (*iterChildCells)->exists(currentCellIndex)){ - currentBlockIndexes[sizeOfBlock] = (currentCellIndex>>3); - sizeOfBlock += 1; - currentCellIndex = (((currentCellIndex>>3)+1)<<3); - } - else{ - currentCellIndex += 1; - } - // If we are at the end of the sub group, move to next - while(iterChildCells != iterChildEndCells && (*iterChildCells)->getEndingIndex() <= currentCellIndex){ - ++iterChildCells; - // Update morton index - if(iterChildCells != iterChildEndCells && currentCellIndex < (*iterChildCells)->getStartingIndex()){ - currentCellIndex = (*iterChildCells)->getStartingIndex(); - } - } - } - - // If group is full - if(sizeOfBlock == _nbElementsPerBlock || (sizeOfBlock && iterChildCells == iterChildEndCells)){ - // Create a group - CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0], - currentBlockIndexes[sizeOfBlock-1]+1, - sizeOfBlock); - // Init cells - for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){ - newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock); - - SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock); - symbolic.setMortonIndex(currentBlockIndexes[cellIdInBlock]); - FTreeCoordinate coord; - coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]); - symbolic.setCoordinate(coord); - symbolic.setLevel(idxLevel); - } - - // Keep the block - _cellBlocksPerLevel[idxLevel].push_back(newBlock); - - sizeOfBlock = 0; - } + // Convert to block + const int idxLevel = (_treeHeight - 1); + int idxBlock = 0; + FSize* nbParticlesPerLeaf = new FSize[_nbElementsPerBlock]; + FSize firstParticle = 0; + // We need to proceed each group in sub level + while(firstParticle != nbParticles){ + int sizeOfBlock = 0; + FSize lastParticle = firstParticle; + // Count until end of sub group is reached or we have enough cells + while(sizeOfBlock < blockSizeAtEachLevel[_treeHeight-1][idxBlock] && lastParticle < nbParticles){ + if(sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != particlesToSort[lastParticle].mindex){ + currentBlockIndexes[sizeOfBlock] = particlesToSort[lastParticle].mindex; + nbParticlesPerLeaf[sizeOfBlock] = 1; + sizeOfBlock += 1; } - } - else{ - // We need to proceed each group in sub level - while(iterChildCells != iterChildEndCells){ - // We want one parent group per child group so we will stop the parent group - // when we arrive to the same parent as lastChildIndex (which is lastChildIndex>>3) - const MortonIndex lastChildIndex = ((*iterChildCells)->getEndingIndex()-1); - // Count until end of sub group is reached or we passe the requested parent - while( iterChildCells != iterChildEndCells - && (currentCellIndex>>3) <= (lastChildIndex>>3) ){ - // Proceed until the requested parent - while(currentCellIndex != (*iterChildCells)->getEndingIndex() - && (currentCellIndex>>3) <= (lastChildIndex>>3) ){ - if((*iterChildCells)->exists(currentCellIndex)){ - currentBlockIndexes[sizeOfBlock] = (currentCellIndex>>3); - sizeOfBlock += 1; - currentCellIndex = (((currentCellIndex>>3)+1)<<3); - } - else{ - currentCellIndex += 1; - } - } - // If we are at the end of the sub group, move to next (otherwise we have consume a part of it) - while(iterChildCells != iterChildEndCells && (*iterChildCells)->getEndingIndex() <= currentCellIndex){ - ++iterChildCells; - // Update morton index - if(iterChildCells != iterChildEndCells && currentCellIndex < (*iterChildCells)->getStartingIndex()){ - currentCellIndex = (*iterChildCells)->getStartingIndex(); - } - } - } - - // If group is full - if(sizeOfBlock){ - // Create a group - CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0], - currentBlockIndexes[sizeOfBlock-1]+1, - sizeOfBlock); - // Init cells - for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){ - newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock); - - SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock); - symbolic.setMortonIndex(currentBlockIndexes[cellIdInBlock]); - FTreeCoordinate coord; - coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]); - symbolic.setCoordinate(coord); - symbolic.setLevel(idxLevel); - } - - // Keep the block - _cellBlocksPerLevel[idxLevel].push_back(newBlock); - - sizeOfBlock = 0; - } + else{ + nbParticlesPerLeaf[sizeOfBlock-1] += 1; } + lastParticle += 1; } - } - delete[] currentBlockIndexes; + while(lastParticle < nbParticles && currentBlockIndexes[sizeOfBlock-1] == particlesToSort[lastParticle].mindex){ + nbParticlesPerLeaf[sizeOfBlock-1] += 1; + lastParticle += 1; + } + + // Create a group + CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0], + currentBlockIndexes[sizeOfBlock-1]+1, + sizeOfBlock); + FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>*const newParticleBlock = new FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>(currentBlockIndexes[0], + currentBlockIndexes[sizeOfBlock-1]+1, + sizeOfBlock, lastParticle-firstParticle); + + ///////////////////////// TO REMOVE ?? ////////////// + #include <iostream> + using namespace std; + if(currentBlockIndexes[sizeOfBlock-1]+1 == 511) + cout << "Suricate" << endl; + ///////////////////////////////////////////////////// + + // Init cells + size_t nbParticlesOffsetBeforeLeaf = 0; + FSize offsetParticles = firstParticle; + for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){ + newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock); + + SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock); + symbolic.setMortonIndex(currentBlockIndexes[cellIdInBlock]); + FTreeCoordinate coord; + coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]); + symbolic.setCoordinate(coord); + symbolic.setLevel(idxLevel); + + // Add leaf + nbParticlesOffsetBeforeLeaf = newParticleBlock->newLeaf(currentBlockIndexes[cellIdInBlock], cellIdInBlock, + nbParticlesPerLeaf[cellIdInBlock], nbParticlesOffsetBeforeLeaf); + + BasicAttachedClass attachedLeaf = newParticleBlock->template getLeaf<BasicAttachedClass>(cellIdInBlock); + // Copy each particle from the original position + for(FSize idxPart = 0 ; idxPart < nbParticlesPerLeaf[cellIdInBlock] ; ++idxPart){ + attachedLeaf.setParticle(idxPart, particlesToSort[idxPart + offsetParticles].originalIndex, inParticlesContainer); + } + offsetParticles += nbParticlesPerLeaf[cellIdInBlock]; + } + + // Keep the block + _cellBlocksPerLevel[idxLevel].push_back(newBlock); + _particleBlocks.push_back(newParticleBlock); + + sizeOfBlock = 0; + firstParticle = lastParticle; + ++idxBlock; + } + delete[] nbParticlesPerLeaf; + delete[] particlesToSort; } +// MPI_Barrier(MPI_COMM_WORLD); - template<class ParticleContainer> - FGroupTree(const int in_treeHeight, const FReal inBoxWidth, const FPoint<FReal>& inBoxCenter, - const int in_nbElementsPerBlock, ParticleContainer* inParticlesContainer, - std::vector<std::vector<int>> & blockSizeAtEachLevel, - const bool particlesAreSorted = false): - _treeHeight(in_treeHeight),_nbElementsPerBlock(in_nbElementsPerBlock),_cellBlocksPerLevel(nullptr), - boxCenter(inBoxCenter), boxCorner(inBoxCenter,-(inBoxWidth/2)), boxWidth(inBoxWidth), - boxWidthAtLeafLevel(inBoxWidth/FReal(1<<(in_treeHeight-1))) - { + // For each level from heigth - 2 to 1 + for(int idxLevel = _treeHeight-2; idxLevel > 0 ; --idxLevel){ + CellGroupConstIterator iterChildCells = _cellBlocksPerLevel[idxLevel+1].begin(); + const CellGroupConstIterator iterChildEndCells = _cellBlocksPerLevel[idxLevel+1].end(); - _cellBlocksPerLevel = new std::vector<CellGroupClass*>[_treeHeight]; + // If lower level is empty or all blocks skiped stop here + if(iterChildCells == iterChildEndCells){ + break; + } - MortonIndex* currentBlockIndexes = new MortonIndex[_nbElementsPerBlock]; - // First we work at leaf level - { - // Build morton index for particles - struct ParticleSortingStruct{ - FSize originalIndex; - MortonIndex mindex; - }; - // Convert position to morton index - const FSize nbParticles = inParticlesContainer->getNbParticles(); - ParticleSortingStruct* particlesToSort = new ParticleSortingStruct[nbParticles]; - { - const FReal* xpos = inParticlesContainer->getPositions()[0]; - const FReal* ypos = inParticlesContainer->getPositions()[1]; - const FReal* zpos = inParticlesContainer->getPositions()[2]; - - for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){ - const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPositionAndCorner<FReal>(this->boxCorner, this->boxWidth, - _treeHeight, - FPoint<FReal>(xpos[idxPart], ypos[idxPart], zpos[idxPart]) ); - const MortonIndex particleIndex = host.getMortonIndex(); - particlesToSort[idxPart].mindex = particleIndex; - particlesToSort[idxPart].originalIndex = idxPart; - } - } + MortonIndex currentCellIndex = (*iterChildCells)->getStartingIndex(); + int sizeOfBlock = 0; + int idxBlock = 0; - // Sort if needed - if(particlesAreSorted == false){ - FQuickSort<ParticleSortingStruct, FSize>::QsOmp(particlesToSort, nbParticles, [](const ParticleSortingStruct& v1, const ParticleSortingStruct& v2){ - return v1.mindex <= v2.mindex; - }); - } + // We need to proceed each group in sub level + while(iterChildCells != iterChildEndCells){ + // Count until end of sub group is reached or we have enough cells + while(sizeOfBlock < blockSizeAtEachLevel[idxLevel][idxBlock] && iterChildCells != iterChildEndCells ){ - // Convert to block - const int idxLevel = (_treeHeight - 1); - int idxBlock = 0; - FSize* nbParticlesPerLeaf = new FSize[_nbElementsPerBlock]; - FSize firstParticle = 0; - // We need to proceed each group in sub level - while(firstParticle != nbParticles){ - int sizeOfBlock = 0; - FSize lastParticle = firstParticle; - // Count until end of sub group is reached or we have enough cells - while(sizeOfBlock < blockSizeAtEachLevel[_treeHeight-1][idxBlock] && lastParticle < nbParticles){ - if(sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != particlesToSort[lastParticle].mindex){ - currentBlockIndexes[sizeOfBlock] = particlesToSort[lastParticle].mindex; - nbParticlesPerLeaf[sizeOfBlock] = 1; - sizeOfBlock += 1; - } - else{ - nbParticlesPerLeaf[sizeOfBlock-1] += 1; - } - lastParticle += 1; - } - while(lastParticle < nbParticles && currentBlockIndexes[sizeOfBlock-1] == particlesToSort[lastParticle].mindex){ - nbParticlesPerLeaf[sizeOfBlock-1] += 1; - lastParticle += 1; - } + if((sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != (currentCellIndex>>3)) + && (*iterChildCells)->exists(currentCellIndex)){ + currentBlockIndexes[sizeOfBlock] = (currentCellIndex>>3); + sizeOfBlock += 1; + currentCellIndex = (((currentCellIndex>>3)+1)<<3); + } + else{ + currentCellIndex += 1; + } + // If we are at the end of the sub group, move to next + while(iterChildCells != iterChildEndCells && (*iterChildCells)->getEndingIndex() <= currentCellIndex){ + ++iterChildCells; + // Update morton index + if(iterChildCells != iterChildEndCells && currentCellIndex < (*iterChildCells)->getStartingIndex()){ + currentCellIndex = (*iterChildCells)->getStartingIndex(); + } + } + } + // If group is full + if(sizeOfBlock == blockSizeAtEachLevel[idxLevel][idxBlock] || (sizeOfBlock && iterChildCells == iterChildEndCells)){ //NOTE la seconde partie va sûrement sauter, car la taille est pré-calculée // Create a group CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0], - currentBlockIndexes[sizeOfBlock-1]+1, - sizeOfBlock); - FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>*const newParticleBlock = new FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle, AttributeClass>(currentBlockIndexes[0], - currentBlockIndexes[sizeOfBlock-1]+1, - sizeOfBlock, lastParticle-firstParticle); - #include <iostream> - using namespace std; - if(currentBlockIndexes[sizeOfBlock-1]+1 == 511) - cout << "Suricate" << endl; + currentBlockIndexes[sizeOfBlock-1]+1, + sizeOfBlock); // Init cells - size_t nbParticlesOffsetBeforeLeaf = 0; - FSize offsetParticles = firstParticle; for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){ newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock); @@ -724,100 +828,21 @@ public: coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]); symbolic.setCoordinate(coord); symbolic.setLevel(idxLevel); - - // Add leaf - nbParticlesOffsetBeforeLeaf = newParticleBlock->newLeaf(currentBlockIndexes[cellIdInBlock], cellIdInBlock, - nbParticlesPerLeaf[cellIdInBlock], nbParticlesOffsetBeforeLeaf); - - BasicAttachedClass attachedLeaf = newParticleBlock->template getLeaf<BasicAttachedClass>(cellIdInBlock); - // Copy each particle from the original position - for(FSize idxPart = 0 ; idxPart < nbParticlesPerLeaf[cellIdInBlock] ; ++idxPart){ - attachedLeaf.setParticle(idxPart, particlesToSort[idxPart + offsetParticles].originalIndex, inParticlesContainer); - } - offsetParticles += nbParticlesPerLeaf[cellIdInBlock]; - } + } // Keep the block _cellBlocksPerLevel[idxLevel].push_back(newBlock); - _particleBlocks.push_back(newParticleBlock); sizeOfBlock = 0; - firstParticle = lastParticle; - ++idxBlock; - } - delete[] nbParticlesPerLeaf; - delete[] particlesToSort; - } - - - // For each level from heigth - 2 to 1 - for(int idxLevel = _treeHeight-2; idxLevel > 0 ; --idxLevel){ - - CellGroupConstIterator iterChildCells = _cellBlocksPerLevel[idxLevel+1].begin(); - const CellGroupConstIterator iterChildEndCells = _cellBlocksPerLevel[idxLevel+1].end(); - - // If lower level is empty or all blocks skiped stop here - if(iterChildCells == iterChildEndCells){ - break; - } - - MortonIndex currentCellIndex = (*iterChildCells)->getStartingIndex(); - int sizeOfBlock = 0; - int idxBlock = 0; - // We need to proceed each group in sub level - while(iterChildCells != iterChildEndCells){ - // Count until end of sub group is reached or we have enough cells - while(sizeOfBlock < blockSizeAtEachLevel[idxLevel][idxBlock] && iterChildCells != iterChildEndCells ){ - if((sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != (currentCellIndex>>3)) - && (*iterChildCells)->exists(currentCellIndex)){ - currentBlockIndexes[sizeOfBlock] = (currentCellIndex>>3); - sizeOfBlock += 1; - currentCellIndex = (((currentCellIndex>>3)+1)<<3); - } - else{ - currentCellIndex += 1; - } - // If we are at the end of the sub group, move to next - while(iterChildCells != iterChildEndCells && (*iterChildCells)->getEndingIndex() <= currentCellIndex){ - ++iterChildCells; - // Update morton index - if(iterChildCells != iterChildEndCells && currentCellIndex < (*iterChildCells)->getStartingIndex()){ - currentCellIndex = (*iterChildCells)->getStartingIndex(); - } - } - } - - // If group is full - if(sizeOfBlock == blockSizeAtEachLevel[idxLevel][idxBlock] || (sizeOfBlock && iterChildCells == iterChildEndCells)){ //NOTE la seconde partie va sûrement sauter, car la taille est pré-calculée - // Create a group - CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0], - currentBlockIndexes[sizeOfBlock-1]+1, - sizeOfBlock); - // Init cells - for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){ - newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock); - - SymbolCellClass& symbolic = newBlock->getSymbolic(cellIdInBlock); - symbolic.setMortonIndex(currentBlockIndexes[cellIdInBlock]); - FTreeCoordinate coord; - coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]); - symbolic.setCoordinate(coord); - symbolic.setLevel(idxLevel); - } + ++idxBlock; + } + } + } + delete[] currentBlockIndexes; + } - // Keep the block - _cellBlocksPerLevel[idxLevel].push_back(newBlock); - sizeOfBlock = 0; - ++idxBlock; - } - } - } - delete[] currentBlockIndexes; - } - - - /** + /** * Minimal Constructor of GroupTree * @author benjamin.dufoyer@inria.fr * @param in__treeHeight size of the tree @@ -827,64 +852,65 @@ public: * @param in_boxWidth bow witdh * @param in_boxWidthAtLeafLevel box width at leaf level */ - FGroupTree( - int in__treeHeight, - int in__nbElementsPerBlock, - FPoint<FReal> in_boxCenter, - FPoint<FReal> in_boxCorner, - FReal in_boxWidth, - FReal in_boxWidthAtLeafLevel - ): - _treeHeight(in__treeHeight), - _nbElementsPerBlock(in__nbElementsPerBlock), - boxCenter(in_boxCenter), - boxCorner(in_boxCorner), - boxWidth(in_boxWidth), - boxWidthAtLeafLevel(in_boxWidthAtLeafLevel) - { - this->_cellBlocksPerLevel = new std::vector<CellGroupClass*>[this->_treeHeight]; - } - - - /** + FGroupTree( + int in__treeHeight, + int in__nbElementsPerBlock, + FPoint<FReal> in_boxCenter, + FPoint<FReal> in_boxCorner, + FReal in_boxWidth, + FReal in_boxWidthAtLeafLevel + ): + _treeHeight(in__treeHeight), + _nbElementsPerBlock(in__nbElementsPerBlock), + boxCenter(in_boxCenter), + boxCorner(in_boxCorner), + boxWidth(in_boxWidth), + boxWidthAtLeafLevel(in_boxWidthAtLeafLevel) + { + this->_cellBlocksPerLevel = new std::vector<CellGroupClass*>[_treeHeight]; + + } + + + /** * get_block_tree_instance return a new instance of FGroupTree from * a blocked linear tree * @author benjamin.dufoyer@inria.fr * @param blocked_linear_tree blocked linear tree * @return new FGroupTree */ - template< - class GroupCellSymbClass, - class GroupCellUpClass, - class GroupCellDownClass, - class GroupContainerClass - > - static FGroupTree get_block_tree_instance( - int in_tree_height, - int in_block_size, - FPoint<FReal> in_box_center, - FReal in_box_width - ){ - // Compute every information to initialise the group tree - FPoint<FReal> box_corner = FPoint<FReal>(in_box_center, -in_box_width/2); - FReal box_width_at_leaf_level = in_box_width/FReal( 1<< (in_tree_height-1)); - - // Return a new instance of a empty group tree - return FGroupTree<FReal,GroupCellSymbClass,GroupCellUpClass, GroupCellDownClass, GroupContainerClass, NbSymbAttributes, NbAttributesPerParticle, FReal>( - in_tree_height - ,in_block_size - ,in_box_center - ,box_corner - ,in_box_width - ,box_width_at_leaf_level); - } - - - ///////////////////////////////////////////////////////// - // Function to init group tree - ///////////////////////////////////////////////////////// - - /** + template< + class GroupCellSymbClass, + class GroupCellUpClass, + class GroupCellDownClass, + class GroupContainerClass + > + static FGroupTree get_block_tree_instance( + int in_tree_height, + int in_block_size, + FPoint<FReal> in_box_center, + FReal in_box_width + ){ + // Compute every information to initialise the group tree + FPoint<FReal> box_corner = FPoint<FReal>(in_box_center, -in_box_width/2); + FReal box_width_at_leaf_level = in_box_width/FReal( 1<< (in_tree_height-1)); + + // Return a new instance of a empty group tree + return FGroupTree<FReal,GroupCellSymbClass,GroupCellUpClass, GroupCellDownClass, GroupContainerClass, NbSymbAttributes, NbAttributesPerParticle, FReal>( + in_tree_height + ,in_block_size + ,in_box_center + ,box_corner + ,in_box_width + ,box_width_at_leaf_level); + } + + + ///////////////////////////////////////////////////////// + // Function to init group tree + ///////////////////////////////////////////////////////// + + /** * create_tree this function fill the tree from blocked_linear_tree * She build the group tree from the bottom * @author benjamin.dufoyer@inria.fr @@ -892,237 +918,306 @@ public: * @param particles vector where particle are stock, * they will be sort BEFORE calling this function */ - template<class Group_Linear_tree, - class Particle_Container - > - void create_tree(Group_Linear_tree* in_lin_tree, - Particle_Container* particles, - MortonIndex inLeftLimite = -1){ - MortonIndex in_left_limit = in_lin_tree->get_left_limit(); - // Creation of the leaf level and groups of particle - auto current_block_indexes = create_leaf_level(in_lin_tree,particles); - // Creation of every level of the tree - create_block_nodes_level( - current_block_indexes, - in_left_limit); - } - - /** This function dealloc the tree by deleting each block */ - ~FGroupTree(){ - for(int idxLevel = 0 ; idxLevel < _treeHeight ; ++idxLevel){ - std::vector<CellGroupClass*>& levelBlocks = _cellBlocksPerLevel[idxLevel]; - for (CellGroupClass* block: levelBlocks){ - delete block; - } - } - delete[] _cellBlocksPerLevel; - for (ParticleGroupClass* block: _particleBlocks){ + template<class Group_Linear_tree, + class Particle_Container + > + void create_tree(Group_Linear_tree& in_lin_tree, + const Particle_Container& particles){ + MortonIndex in_left_limit = in_lin_tree.get_left_limit(); + // Creation of the leaf level and groups of particle + std::vector<MortonIndex> current_block_indexes = create_leaf_level(in_lin_tree,particles); + // Creation of every level of the tree + create_block_nodes_level( + current_block_indexes, + in_left_limit); + } + + /** This function dealloc the tree by deleting each block */ + ~FGroupTree(){ + for(int idxLevel = 0 ; idxLevel < _treeHeight ; ++idxLevel){ + std::vector<CellGroupClass*>& levelBlocks = _cellBlocksPerLevel[idxLevel]; + for (CellGroupClass* block: levelBlocks){ delete block; - } - } - - //////////////////////////////////////////////////////// - // Lambda function to apply to all member - ///////////////////////////////////////////////////////// - - /** + } + } + delete[] _cellBlocksPerLevel; + for (ParticleGroupClass* block: _particleBlocks){ + delete block; + } + } + + //////////////////////////////////////////////////////// + // Lambda function to apply to all member + ///////////////////////////////////////////////////////// + + /** * @brief forEachLeaf iterate on the leaf and apply the function * @param function */ - template<class ParticlesAttachedClass> - void forEachLeaf(std::function<void(ParticlesAttachedClass*)> function){ - for (ParticleGroupClass* block: _particleBlocks){ - block->forEachLeaf(function); - } - } - - /** - * @brief forEachLeaf iterate on the cell and apply the function + template<class ParticlesAttachedClass> + void forEachLeaf(std::function<void(ParticlesAttachedClass*)> function){ + for (ParticleGroupClass* block: _particleBlocks){ + block->forEachLeaf(function); + } + } + /** + * @brief forEachMyLeaf iterate on the leaf and apply the function * @param function */ - void forEachCell(std::function<void(SymbolCellClass*,PoleCellClass*,LocalCellClass*)> function){ - for(int idxLevel = 0 ; idxLevel < _treeHeight ; ++idxLevel){ - std::vector<CellGroupClass*>& levelBlocks = _cellBlocksPerLevel[idxLevel]; - for (CellGroupClass* block: levelBlocks){ - block->forEachCell(function); - } - } - } - - /** + template<class ParticlesAttachedClass> + void forEachMyLeaf(std::function<void(ParticlesAttachedClass*)> function){ + for (ParticleGroupClass* block: _particleBlocks){ + if(block->isMine()) + block->forEachLeaf(function); + } + } + + /** + * @brief forEachCell iterate on the cell and apply the function + * @param function + */ + void forEachCell(std::function<void(SymbolCellClass*,PoleCellClass*,LocalCellClass*)> function){ + for(int idxLevel = 0 ; idxLevel < _treeHeight ; ++idxLevel){ + std::vector<CellGroupClass*>& levelBlocks = _cellBlocksPerLevel[idxLevel]; + for (CellGroupClass* block: levelBlocks){ + block->forEachCell(function); + } + } + } + + void forEachMyCell(std::function<void(SymbolCellClass*,PoleCellClass*,LocalCellClass*)> function){ + for(int idxLevel = 0 ; idxLevel < _treeHeight ; ++idxLevel){ + std::vector<CellGroupClass*>& levelBlocks = _cellBlocksPerLevel[idxLevel]; + for (CellGroupClass* block: levelBlocks){ + if(block->isMine()) + block->forEachCell(function); + } + } + } + + /** * @brief forEachLeaf iterate on the cell and apply the function * @param function */ - void forEachCellWithLevel(std::function<void(SymbolCellClass*,PoleCellClass*,LocalCellClass*,const int)> function){ - for(int idxLevel = 0 ; idxLevel < _treeHeight ; ++idxLevel){ - std::vector<CellGroupClass*>& levelBlocks = _cellBlocksPerLevel[idxLevel]; - for (CellGroupClass* block: levelBlocks){ - block->forEachCell(function, idxLevel); - } - } - } - - /** + void forEachCellWithLevel(std::function<void(SymbolCellClass*,PoleCellClass*,LocalCellClass*,const int)> function){ + for(int idxLevel = 0 ; idxLevel < _treeHeight ; ++idxLevel){ + std::vector<CellGroupClass*>& levelBlocks = _cellBlocksPerLevel[idxLevel]; + for (CellGroupClass* block: levelBlocks){ + block->forEachCell(function, idxLevel); + } + } + } + + void forEachMyCellWithLevel(std::function<void(SymbolCellClass*,PoleCellClass*,LocalCellClass*,const int)> function){ + for(int idxLevel = 0 ; idxLevel < _treeHeight ; ++idxLevel){ + std::vector<CellGroupClass*>& levelBlocks = _cellBlocksPerLevel[idxLevel]; + for (CellGroupClass* block: levelBlocks){ + if(block->isMine()) + block->forEachCell(function, idxLevel); + } + } + } + + /** * @brief forEachLeaf iterate on the cell and apply the function * @param function */ - template<class ParticlesAttachedClass> - void forEachCellLeaf(std::function<void(SymbolCellClass*,PoleCellClass*,LocalCellClass*,ParticlesAttachedClass*)> function){ - CellGroupIterator iterCells = _cellBlocksPerLevel[_treeHeight-1].begin(); - const CellGroupIterator iterEndCells = _cellBlocksPerLevel[_treeHeight-1].end(); + template<class ParticlesAttachedClass> + void forEachCellLeaf(std::function<void(SymbolCellClass*,PoleCellClass*,LocalCellClass*,ParticlesAttachedClass*)> function){ + CellGroupIterator iterCells = _cellBlocksPerLevel[_treeHeight-1].begin(); + const CellGroupIterator iterEndCells = _cellBlocksPerLevel[_treeHeight-1].end(); - ParticleGroupIterator iterLeaves = _particleBlocks.begin(); - const ParticleGroupIterator iterEndLeaves = _particleBlocks.end(); + ParticleGroupIterator iterLeaves = _particleBlocks.begin(); + const ParticleGroupIterator iterEndLeaves = _particleBlocks.end(); - while(iterCells != iterEndCells && iterLeaves != iterEndLeaves){ + while(iterCells != iterEndCells && iterLeaves != iterEndLeaves){ - (*iterCells)->forEachCell( - [&](SymbolCellClass* symb, - PoleCellClass* mult, - LocalCellClass* loc) - { - const int leafIdx = (*iterLeaves)->getLeafIndex(symb->getMortonIndex()); - FAssertLF(leafIdx != -1); - ParticlesAttachedClass aLeaf = (*iterLeaves)->template getLeaf <ParticlesAttachedClass>(leafIdx); - FAssertLF(aLeaf.isAttachedToSomething()); - function(symb, mult, loc, &aLeaf); - }); + (*iterCells)->forEachCell( + [&](SymbolCellClass* symb, + PoleCellClass* mult, + LocalCellClass* loc) + { + const int leafIdx = (*iterLeaves)->getLeafIndex(symb->getMortonIndex()); + FAssertLF(leafIdx != -1); + ParticlesAttachedClass aLeaf = (*iterLeaves)->template getLeaf <ParticlesAttachedClass>(leafIdx); + FAssertLF(aLeaf.isAttachedToSomething()); + function(symb, mult, loc, &aLeaf); + }); - ++iterCells; - ++iterLeaves; - } + ++iterCells; + ++iterLeaves; + } - FAssertLF(iterCells == iterEndCells && iterLeaves == iterEndLeaves); - } + FAssertLF(iterCells == iterEndCells && iterLeaves == iterEndLeaves); + } + + + template<class ParticlesAttachedClass> + void forEachCellMyLeaf(std::function<void(SymbolCellClass*,PoleCellClass*,LocalCellClass*,ParticlesAttachedClass*)> function){ + CellGroupIterator iterCells = _cellBlocksPerLevel[_treeHeight-1].begin(); + + const CellGroupIterator iterEndCells = _cellBlocksPerLevel[_treeHeight-1].end(); + + ParticleGroupIterator iterLeaves = _particleBlocks.begin(); + const ParticleGroupIterator iterEndLeaves = _particleBlocks.end(); + + while(iterCells != iterEndCells && iterLeaves != iterEndLeaves){ + if((*iterCells)->isMine()){ + (*iterCells)->forEachCell( + [&](SymbolCellClass* symb, + PoleCellClass* mult, + LocalCellClass* loc) + { + const int leafIdx = (*iterLeaves)->getLeafIndex(symb->getMortonIndex()); + FAssertLF(leafIdx != -1); + ParticlesAttachedClass aLeaf = (*iterLeaves)->template getLeaf <ParticlesAttachedClass>(leafIdx); + FAssertLF(aLeaf.isAttachedToSomething()); + function(symb, mult, loc, &aLeaf); + }); + } + ++iterCells; + ++iterLeaves; + } + FAssertLF(iterCells == iterEndCells && iterLeaves == iterEndLeaves); + } - /** @brief, for statistic purpose, display each block with number of + /** @brief, for statistic purpose, display each block with number of * cell, size of header, starting index, and ending index */ - void printInfoBlocks(){ - std::cout << "Group Tree information:\n"; - std::cout << "\t Group Size = " << _nbElementsPerBlock << "\n"; - std::cout << "\t Tree height = " << _treeHeight << "\n"; - for(int idxLevel = 1 ; idxLevel < _treeHeight ; ++idxLevel){ - std::vector<CellGroupClass*>& levelBlocks = _cellBlocksPerLevel[idxLevel]; - std::cout << "Level " << idxLevel << ", there are " << levelBlocks.size() << " groups.\n"; - int idxGroup = 0; - for (const CellGroupClass* block: levelBlocks){ - std::cout << "\t Group " << (idxGroup++); - std::cout << "\t Size = " << block->getNumberOfCellsInBlock(); - std::cout << "\t Starting Index = " << block->getStartingIndex(); - std::cout << "\t Ending Index = " << block->getEndingIndex(); - std::cout << "\t Ratio of usage = " << float(block->getNumberOfCellsInBlock())/float(block->getEndingIndex()-block->getStartingIndex()) << "\n"; - } - } - - std::cout << "There are " << _particleBlocks.size() << " leaf-groups.\n"; + void printInfoBlocks(){ + std::cout << "Group Tree information:\n"; + std::cout << "\t Group Size = " << _nbElementsPerBlock << "\n"; + std::cout << "\t Tree height = " << _treeHeight << "\n"; + for(int idxLevel = 1 ; idxLevel < _treeHeight ; ++idxLevel){ + std::vector<CellGroupClass*>& levelBlocks = _cellBlocksPerLevel[idxLevel]; + std::cout << "Level " << idxLevel << ", there are " << levelBlocks.size() << " groups.\n"; int idxGroup = 0; - FSize totalNbParticles = 0; - for (const ParticleGroupClass* block: _particleBlocks){ + for (const CellGroupClass* block: levelBlocks){ std::cout << "\t Group " << (idxGroup++); - std::cout << "\t Size = " << block->getNumberOfLeavesInBlock(); + // std::cout << "\t local " << std::boolalpha << block->isMine(); + std::cout << "\t Size = " << block->getNumberOfCellsInBlock(); std::cout << "\t Starting Index = " << block->getStartingIndex(); std::cout << "\t Ending Index = " << block->getEndingIndex(); - std::cout << "\t Nb Particles = " << block->getNbParticlesInGroup(); - std::cout << "\t Ratio of usage = " << float(block->getNumberOfLeavesInBlock())/float(block->getEndingIndex()-block->getStartingIndex()) << "\n"; - totalNbParticles += block->getNbParticlesInGroup(); - } - std::cout << "There are " << totalNbParticles << " particles.\n"; - } - - ///////////////////////////////////////////////////////// - // Algorithm function - ///////////////////////////////////////////////////////// - - int getHeight() const { - return _treeHeight; - } - - CellGroupIterator cellsBegin(const int inLevel){ - FAssertLF(inLevel < _treeHeight); - return _cellBlocksPerLevel[inLevel].begin(); - } - - CellGroupConstIterator cellsBegin(const int inLevel) const { - FAssertLF(inLevel < _treeHeight); - return _cellBlocksPerLevel[inLevel].begin(); - } - - CellGroupIterator cellsEnd(const int inLevel){ - FAssertLF(inLevel < _treeHeight); - return _cellBlocksPerLevel[inLevel].end(); - } - - CellGroupConstIterator cellsEnd(const int inLevel) const { - FAssertLF(inLevel < _treeHeight); - return _cellBlocksPerLevel[inLevel].end(); - } - - int getNbCellGroupAtLevel(const int inLevel) const { - FAssertLF(inLevel < _treeHeight); - return int(_cellBlocksPerLevel[inLevel].size()); - } - - CellGroupClass* getCellGroup(const int inLevel, const int inIdx){ - FAssertLF(inLevel < _treeHeight); - if(inIdx >= int(_cellBlocksPerLevel[inLevel].size())){ - std::cout << "level : "<< inLevel << std::endl; - std::cout << " idx :"<< inIdx << std::endl; - } - FAssertLF(inIdx < int(_cellBlocksPerLevel[inLevel].size())); - return _cellBlocksPerLevel[inLevel][inIdx]; - } - - const int getNbElementsPerBlock() const{ - return this->_nbElementsPerBlock; - } - - const CellGroupClass* getCellGroup(const int inLevel, const int inIdx) const { - FAssertLF(inLevel < _treeHeight); - FAssertLF(inIdx < int(_cellBlocksPerLevel[inLevel].size())); - return _cellBlocksPerLevel[inLevel][inIdx]; - } - - ParticleGroupIterator leavesBegin(){ - return _particleBlocks.begin(); - } - - ParticleGroupConstIterator leavesBegin() const { - return _particleBlocks.begin(); - } - - ParticleGroupIterator leavesEnd(){ - return _particleBlocks.end(); - } - - ParticleGroupConstIterator leavesEnd() const { - return _particleBlocks.end(); - } - - int getNbParticleGroup() const { - return int(_particleBlocks.size()); - } - - ParticleGroupClass* getParticleGroup(const int inIdx){ - FAssertLF(inIdx < int(_particleBlocks.size())); - return _particleBlocks[inIdx]; - } - - const ParticleGroupClass* getParticleGroup(const int inIdx) const { - FAssertLF(inIdx < int(_particleBlocks.size())); - return _particleBlocks[inIdx]; - } - - size_t getTotalNbLeaf() { - size_t nbLeaf = 0; - for(int i = 0 ; i < this->getNbParticleGroup();++i){ - nbLeaf += this->_particleBlocks[i]->getNumberOfLeavesInBlock(); - } - return nbLeaf; - } - /** + // std::cout << "\t Global index = " << block->getIdxGlobal(); + std::cout << "\t Ratio of usage = " << + float(block->getNumberOfCellsInBlock())/float(block->getEndingIndex()-block->getStartingIndex()) << "\n"; + } + } + + std::cout << "There are " << _particleBlocks.size() << " leaf-groups.\n"; + int idxGroup = 0; + FSize totalNbParticles = 0; + for (const ParticleGroupClass* block: _particleBlocks){ + std::cout << "\t Group " << (idxGroup++); + + std::cout << "\t Size = " << block->getNumberOfLeavesInBlock(); + std::cout << "\t Starting Index = " << block->getStartingIndex(); + std::cout << "\t Ending Index = " << block->getEndingIndex(); + std::cout << "\t Nb Particles = " << block->getNbParticlesInGroup(); + std::cout << "\t Global index = " << block->getIdxGlobal(); + std::cout << "\t Ratio of usage = " << float(block->getNumberOfLeavesInBlock())/float(block->getEndingIndex()-block->getStartingIndex()) << "\n"; + totalNbParticles += block->getNbParticlesInGroup(); + } + std::cout << "There are " << totalNbParticles << " particles.\n"; + } + + ///////////////////////////////////////////////////////// + // Algorithm function + ///////////////////////////////////////////////////////// + + int getHeight() const { + return _treeHeight; + } + + CellGroupIterator cellsBegin(const int inLevel){ + FAssertLF(inLevel < _treeHeight); + return _cellBlocksPerLevel[inLevel].begin(); + } + + CellGroupConstIterator cellsBegin(const int inLevel) const { + FAssertLF(inLevel < _treeHeight); + return _cellBlocksPerLevel[inLevel].begin(); + } + + CellGroupIterator cellsEnd(const int inLevel){ + FAssertLF(inLevel < _treeHeight); + return _cellBlocksPerLevel[inLevel].end(); + } + + CellGroupConstIterator cellsEnd(const int inLevel) const { + FAssertLF(inLevel < _treeHeight); + return _cellBlocksPerLevel[inLevel].end(); + } + + int getNbCellGroupAtLevel(const int inLevel) const { + FAssertLF(inLevel < _treeHeight); + return int(_cellBlocksPerLevel[inLevel].size()); + } + + CellGroupClass* getCellGroup(const int inLevel, const int inIdx){ + FAssertLF(inLevel < _treeHeight); + FAssertLF(inIdx < int(_cellBlocksPerLevel[inLevel].size())); + return _cellBlocksPerLevel[inLevel][inIdx]; + } + + const int getNbElementsPerBlock() const{ + return this->_nbElementsPerBlock; + } + + const CellGroupClass* getCellGroup(const int inLevel, const int inIdx) const { + FAssertLF(inLevel < _treeHeight); + FAssertLF(inIdx < int(_cellBlocksPerLevel[inLevel].size())); + return _cellBlocksPerLevel[inLevel][inIdx]; + } + + ParticleGroupIterator leavesBegin(){ + return _particleBlocks.begin(); + } + + ParticleGroupConstIterator leavesBegin() const { + return _particleBlocks.begin(); + } + + ParticleGroupIterator leavesEnd(){ + return _particleBlocks.end(); + } + + ParticleGroupConstIterator leavesEnd() const { + return _particleBlocks.end(); + } + + int getNbParticleGroup() const { + return int(_particleBlocks.size()); + } + + ParticleGroupClass* getParticleGroup(const int inIdx){ + FAssertLF(inIdx < int(_particleBlocks.size())); + return _particleBlocks[inIdx]; + } + + const ParticleGroupClass* getParticleGroup(const int inIdx) const { + FAssertLF(inIdx < int(_particleBlocks.size())); + return _particleBlocks[inIdx]; + } + + const FPoint<FReal> getBoxCenter() const{ + return this->boxCenter; + } + + const FReal getBoxWidth() const{ + return this->boxWidth; + } + + std::size_t getTotalNbLeaf() { + std::size_t nbLeaf = 0; + for(int i = 0 ; i < this->getNbParticleGroup();++i){ + nbLeaf += this->_particleBlocks[i]->getNumberOfLeavesInBlock(); + } + return nbLeaf; + } + /** * RESTRICTION : The array will be initialise BEFORE * RESTRICTION : The morton index of particle will be at _treeHeight * get_number_of_particle compute the total number of @@ -1130,27 +1225,26 @@ public: * @author benjamin.dufoyer@inria.fr * @param container container of particle */ - template<class particle_t> - void get_number_of_particle(std::vector<particle_t>* container, - std::vector<size_t>* nb_particles_per_leaf){ - FAssert(container->size() != 0); - int current_idx = 0; - size_t old_m_index = container->front().morton_index; - size_t current_m_idx = old_m_index; - - for(size_t i = 0 ; i < container->size(); ++i){ - current_m_idx = container->data()[i].morton_index; - if(current_m_idx == old_m_index){ - nb_particles_per_leaf->data()[current_idx] += 1; - } else { - current_idx += 1; - nb_particles_per_leaf->data()[current_idx] += 1; - old_m_index = current_m_idx; - } - } - } - - /** + template<class particle_t> + void get_number_of_particle(const std::vector<particle_t>& container, + std::vector<std::size_t>& nb_particles_per_leaf){ + FAssert(container.size() != 0); + int current_idx = 0; + std::size_t old_m_index = container.front().morton_index; + std::size_t current_m_idx = old_m_index; + for(std::size_t i = 0 ; i < container.size(); ++i){ + current_m_idx = container[i].morton_index; + if(current_m_idx == old_m_index){ + nb_particles_per_leaf[current_idx] += 1; + } else { + current_idx += 1; + nb_particles_per_leaf[current_idx] += 1; + old_m_index = current_m_idx; + } + } + } + + /** * create_leaf_level create the leaf level of the * Group tree from a blocked linear tree * @author benjamin.dufoyer@inria.fr @@ -1158,153 +1252,153 @@ public: * @param particles container of particle, will be a std::vector */ -template< -class Blocked_Linear_tree, -class Particle_Container -> -MortonIndex* create_leaf_level(Blocked_Linear_tree* in_lin_tree, - Particle_Container* particles) -{ + template<class Blocked_Linear_tree, + class Particle_Container> + std::vector<MortonIndex> create_leaf_level(Blocked_Linear_tree& in_lin_tree, + Particle_Container& particles) + { + // set parametter for the function const int idxLevel = this->_treeHeight-1; - const int nb_block = in_lin_tree->get_nb_block(); + const int nb_block = in_lin_tree.get_nb_block(); const int block_size = this->_nbElementsPerBlock; - size_t in_nb_leaf = in_lin_tree->get_nb_leaf(); - auto tree = in_lin_tree->get_tree(); - - MortonIndex* current_block_indexes = new MortonIndex[this->_nbElementsPerBlock]; - - std::vector<size_t> nb_particle_per_leaf(in_nb_leaf,0); - this->get_number_of_particle(particles,&nb_particle_per_leaf); - + std::size_t in_nb_leaf = in_lin_tree.get_nb_leaf(); + auto tree = in_lin_tree.get_tree(); + // alloc the vector for the current block index + // get the number of particle per leaf + std::vector<MortonIndex> current_block_indexes(this->_nbElementsPerBlock,0); + std::vector<std::size_t> nb_particle_per_leaf(in_nb_leaf,0); + this->get_number_of_particle(particles,nb_particle_per_leaf); + // put the particle in the FP2PParticleContainer FP2PParticleContainer<FReal> particle_container; - for(unsigned i = 0 ; i < particles->size() ; ++i){ - particle_container.push( - particles->data()[i].position(), - particles->data()[i].physicalValue()); - } + for(unsigned i = 0 ; i < particles.size() ; ++i){ + particle_container.push(particles[i].position(), particles[i].physicalValue()); + } - size_t leaf_number = 0; - size_t leaf_number_min = 0; + std::size_t leaf_number = 0; + std::size_t leaf_number_min = 0; // Create every block + std::size_t idx_particules = 0; for(int n_block = 0 ; n_block < nb_block ; ++n_block){ // Compute the morton index for the first and the // last cell of the block unsigned size_of_block = 0; - while(size_of_block < (unsigned)block_size && leaf_number < in_nb_leaf){ + while(size_of_block < (unsigned)block_size + && leaf_number < in_nb_leaf) + { current_block_indexes[size_of_block] = tree->data()[leaf_number].morton_index; leaf_number += 1; size_of_block += 1; - } + } CellGroupClass*const new_block = new CellGroupClass(current_block_indexes[0], - current_block_indexes[size_of_block-1]+1, //+1 is need by the class - size_of_block); - - size_t current_nb_particle = 0; - for(size_t i = 0 ; i < size_of_block ; ++i){ - current_nb_particle += nb_particle_per_leaf[leaf_number_min+i]; - } - FGroupOfParticles< - FReal, - NbSymbAttributes, - NbAttributesPerParticle, - AttributeClass>*const new_particle_block - = new FGroupOfParticles< - FReal, - NbSymbAttributes, - NbAttributesPerParticle, - AttributeClass> - (current_block_indexes[0], - current_block_indexes[size_of_block-1]+1, - size_of_block, - current_nb_particle); - - // Initialise each cell of the block - size_t nb_particles_offset_before_leaf = 0; - size_t idx_particules = 0; - for(unsigned cell_id_in_block = 0; cell_id_in_block < size_of_block; ++cell_id_in_block){ - // Adding cell into leaf block - new_block->newCell( - current_block_indexes[cell_id_in_block], - cell_id_in_block); - - // Fill symbolic information of the block - SymbolCellClass& symbolic = - new_block->getSymbolic(cell_id_in_block); - symbolic.setMortonIndex(current_block_indexes[cell_id_in_block]); - FTreeCoordinate coord; - coord.setPositionFromMorton(current_block_indexes[cell_id_in_block]); - symbolic.setCoordinate(coord); - symbolic.setLevel(idxLevel); - - // Adding cell into particle blockCells - - nb_particles_offset_before_leaf = - new_particle_block->newLeaf( - current_block_indexes[cell_id_in_block], - cell_id_in_block, - FSize(nb_particle_per_leaf[leaf_number_min+cell_id_in_block]), - nb_particles_offset_before_leaf - ); - - - BasicAttachedClass attached_leaf = - new_particle_block->template getLeaf<BasicAttachedClass>(cell_id_in_block); - - // Adding particle - for(size_t idxPart = 0 ; idxPart < nb_particle_per_leaf[leaf_number_min+cell_id_in_block] ; ++idxPart ){ - attached_leaf.setParticle( - idxPart, - idx_particules, - //nb_particles_offset_before_leaf+idxPart, - &particle_container); - ++idx_particules; - } - // Setting the offset to don't use particle twice - //offset_particles += nb_particle_per_leaf[idx_nb_particle_in_block]; - //idx_nb_particle_in_block += 1; - // cell_id_in_block += 1; - } - leaf_number_min = leaf_number; - //Stock the block cell and the block particles - _cellBlocksPerLevel[idxLevel].push_back(new_block); - _particleBlocks.push_back(new_particle_block); - size_of_block = 0; - } - return current_block_indexes; - -} - - /** - * create_level create every level - * It's juste a factorisation from the Beregenger constructor - * @author benjamin.dufoyer@inria.fr - * @param currentBlockIndexes block repartition at leaf level - * to construct - * @param inLeftLimite left limit of block of the current proc - * this parameter is not used with the blocked_linear_tree, he is here - * to have compatibility with old constructor - */ -void create_block_nodes_level(MortonIndex* currentBlockIndexes, - MortonIndex inLeftLimite = -1 - ){ - // Cronstruct every level - for(int idxLevel = _treeHeight-2; idxLevel > 0 ; --idxLevel){ + current_block_indexes[size_of_block-1]+1, //+1 is need by the class + size_of_block); + size_t current_nb_particle = 0; + for(size_t i = 0 ; i < size_of_block ; ++i){ + current_nb_particle += nb_particle_per_leaf[leaf_number_min+i]; + } + FGroupOfParticles< + FReal, + NbSymbAttributes, + NbAttributesPerParticle, + AttributeClass>*const new_particle_block + = new FGroupOfParticles< + FReal, + NbSymbAttributes, + NbAttributesPerParticle, + AttributeClass> + (current_block_indexes[0], + current_block_indexes[size_of_block-1]+1, + size_of_block, + current_nb_particle); + + // Initialise each cell of the block + size_t nb_particles_offset_before_leaf = 0; + for(unsigned cell_id_in_block = 0; cell_id_in_block < size_of_block; ++cell_id_in_block) + { + // Adding cell into leaf block + new_block->newCell( + current_block_indexes[cell_id_in_block], + cell_id_in_block); + + // Fill symbolic information of the block + SymbolCellClass& symbolic = + new_block->getSymbolic(cell_id_in_block); + symbolic.setMortonIndex(current_block_indexes[cell_id_in_block]); + FTreeCoordinate coord; + coord.setPositionFromMorton(current_block_indexes[cell_id_in_block]); + symbolic.setCoordinate(coord); + symbolic.setLevel(idxLevel); + + // Adding cell into particle blockCells + + nb_particles_offset_before_leaf = + new_particle_block->newLeaf( + current_block_indexes[cell_id_in_block], + cell_id_in_block, + FSize(nb_particle_per_leaf[leaf_number_min+cell_id_in_block]), + nb_particles_offset_before_leaf + ); + + + BasicAttachedClass attached_leaf = + new_particle_block->template getLeaf<BasicAttachedClass>(cell_id_in_block); + + // Adding particle + for(size_t idxPart = 0 ; idxPart < nb_particle_per_leaf[leaf_number_min+cell_id_in_block] ; ++idxPart ){ + attached_leaf.setParticle( + idxPart, + idx_particules, + //nb_particles_offset_before_leaf+idxPart, + &particle_container); + ++idx_particules; + } + // Setting the offset to don't use particle twice + //offset_particles += nb_particle_per_leaf[idx_nb_particle_in_block]; + //idx_nb_particle_in_block += 1; + // cell_id_in_block += 1; + } + leaf_number_min = leaf_number; + new_block->declare_mine(); + //Stock the block cell and the block particles + _cellBlocksPerLevel[idxLevel].push_back(new_block); + _particleBlocks.push_back(new_particle_block); + size_of_block = 0; + } + return {current_block_indexes.begin(),current_block_indexes.end()}; + + } + + /** + * create_level create every level + * It's juste a factorisation from the Beregenger constructor + * @author benjamin.dufoyer@inria.fr + * @param currentBlockIndexes block repartition at leaf level + * to construct + * @param inLeftLimite left limit of block of the current proc + * this parameter is not used with the blocked_linear_tree, he is here + * to have compatibility with old constructor + */ + void create_block_nodes_level(std::vector<MortonIndex>& currentBlockIndexes, + MortonIndex inLeftLimite = -1 + ){ + // Cronstruct every level + for(int idxLevel = _treeHeight-2; idxLevel > 0 ; --idxLevel){ inLeftLimite = (inLeftLimite == -1 ? inLeftLimite : (inLeftLimite>>3)); - CellGroupConstIterator iterChildCells = _cellBlocksPerLevel[idxLevel+1].begin(); + CellGroupConstIterator iterChildCells = _cellBlocksPerLevel[idxLevel+1].begin(); const CellGroupConstIterator iterChildEndCells = _cellBlocksPerLevel[idxLevel+1].end(); // Skip blocks that do not respect limit while(iterChildCells != iterChildEndCells && ((*iterChildCells)->getEndingIndex()>>3) <= inLeftLimite){ ++iterChildCells; - } + } // If lower level is empty or all blocks skiped stop here if(iterChildCells == iterChildEndCells){ break; - } + } MortonIndex currentCellIndex = (*iterChildCells)->getStartingIndex(); if((currentCellIndex>>3) <= inLeftLimite) currentCellIndex = ((inLeftLimite+1)<<3); @@ -1315,30 +1409,30 @@ void create_block_nodes_level(MortonIndex* currentBlockIndexes, // Count until end of sub group is reached or we have enough cells while(sizeOfBlock < _nbElementsPerBlock && iterChildCells != iterChildEndCells ){ if((sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != (currentCellIndex>>3)) - && (*iterChildCells)->exists(currentCellIndex)){ + && (*iterChildCells)->exists(currentCellIndex)){ currentBlockIndexes[sizeOfBlock] = (currentCellIndex>>3); sizeOfBlock += 1; currentCellIndex = (((currentCellIndex>>3)+1)<<3); - } + } else{ currentCellIndex += 1; - } + } // If we are at the end of the sub group, move to next while(iterChildCells != iterChildEndCells && (*iterChildCells)->getEndingIndex() <= currentCellIndex){ ++iterChildCells; // Update morton index if(iterChildCells != iterChildEndCells && currentCellIndex < (*iterChildCells)->getStartingIndex()){ currentCellIndex = (*iterChildCells)->getStartingIndex(); - } - } - } + } + } + } // If group is full if(sizeOfBlock == _nbElementsPerBlock || (sizeOfBlock && iterChildCells == iterChildEndCells)){ // Create a group CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0], - currentBlockIndexes[sizeOfBlock-1]+1, - sizeOfBlock); + currentBlockIndexes[sizeOfBlock-1]+1, + sizeOfBlock); // Init cells for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){ newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock); @@ -1349,60 +1443,437 @@ void create_block_nodes_level(MortonIndex* currentBlockIndexes, coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock]); symbolic.setCoordinate(coord); symbolic.setLevel(idxLevel); - } - + } + newBlock->declare_mine(); // Keep the block _cellBlocksPerLevel[idxLevel].push_back(newBlock); sizeOfBlock = 0; - } - } - } -} - - - - /** - * Add LET block at leaf level of the local GroupTree - * @author benjamin.dufoyer@inria.fr - * @param block_to_insert list symbolic information of block - * to add - * @param start indicate if we put the block at the - * start of at the end of current block - * @param level level to add - */ - template<class block_t> - void add_LET_block(std::vector<block_t>& block_to_insert, - int level, - const MortonIndex& local_min_m_idx - ){ - if(block_to_insert.size() == 0) - return; - // Allocate vector of new block - std::vector<CellGroupClass*> vect_block(block_to_insert.size()); - // Fill the vector of new block - unsigned block_at_begin = 0; - for(unsigned i = 0; i < vect_block.size(); ++i){ - vect_block[i] = new CellGroupClass( - block_to_insert[i].start_index , - block_to_insert[i].end_index , - (int)block_to_insert[i].nb_leaf_in_block ); - if(block_to_insert[i].end_index < local_min_m_idx){ + } + } + } + } + + + + /** + * This function add all LET block put in parameter + * She put block in order according to idx_global + * She detect if we are at leaf level and create particle group + * + * @author benjamin.dufoyer@inria.fr + * @param block_to_insert pair of symbolic information of cellGroup and Part + * group + * @param level The level where are adding LET group + */ + template<class particle_symbolic_block_t, + class cell_symbolic_block_t> + void add_LET_block( + std::pair<std::vector<cell_symbolic_block_t>, + std::vector<particle_symbolic_block_t>>& block_to_insert, + int level + ){ + // Check if we are at leaf level + bool leaf_level = ( level == ( _treeHeight - 1 ) ); + // Bind the vector of the pair + std::vector<cell_symbolic_block_t> cell_to_insert = block_to_insert.first; + std::vector<particle_symbolic_block_t> particle_to_insert = block_to_insert.second; + // If we are at leaf level + if(leaf_level){ + // Check if we have the same number of symoblic information of cellBlock + // and of particleBlock + FAssert(cell_to_insert.size() == particle_to_insert.size()); + } else { + // Else check if the particle block is empty + FAssert(particle_to_insert.size() == 0); + } + // if we have no block to insert, we don't need to continue this function + if(cell_to_insert.size() == 0) + return; + // Get my local minimum index global + int min_idx_global = this->getCellGroup(level,0)->getIdxGlobal(); + // Allocate vector of new block + std::vector<CellGroupClass*> vect_block(cell_to_insert.size()); + + // Fill the vector of new block + unsigned block_at_begin = 0; + // iterate on every cell + for(unsigned i = 0; i < cell_to_insert.size(); ++i){ + // create new cell + vect_block[i] = new CellGroupClass( + cell_to_insert[i].start_index , + cell_to_insert[i].end_index, + (int)cell_to_insert[i].nb_leaf_in_block ); + // set the global index of the cell + vect_block[i]->setIdxGlobal(cell_to_insert[i].idx_global_block); + // if the global index is less than the local idex, we need to + // insert + // the block at the beginning of the tree + if(cell_to_insert[i].idx_global_block < min_idx_global){ + ++block_at_begin; + } + // init each cell of the new block + for(unsigned j = 0; j < cell_to_insert[i].m_idx_in_block.size(); ++j){ + vect_block[i]->newCell(cell_to_insert[i].m_idx_in_block[j],j); + } + } + // Add block at beginning of the level + _cellBlocksPerLevel[level].insert( + _cellBlocksPerLevel[level].begin(), + vect_block.begin(), + vect_block.begin()+block_at_begin); + + // Add block a the end of the level + _cellBlocksPerLevel[level].insert( + _cellBlocksPerLevel[level].end(), + vect_block.begin()+block_at_begin, + vect_block.end()); + // if we are at the leaf level + if(leaf_level ){ + // init of the vector of particle + std::vector<ParticleGroupClass*> vect_particle(particle_to_insert.size()); + + block_at_begin = 0; + // iterate on every symbolic particle group + for(unsigned i = 0 ; i < particle_to_insert.size(); ++i ){ + // create a new particle group + vect_particle[i] = new ParticleGroupClass( + cell_to_insert[i].start_index , + cell_to_insert[i].end_index, + (int)cell_to_insert[i].nb_leaf_in_block, + particle_to_insert[i].nb_particles); + // set the global index of the new particle group + vect_particle[i]-> setIdxGlobal(particle_to_insert[i].idx_global_block); + // if the current idx global block have a idx global smaller than + // the global index in local + if(cell_to_insert[i].idx_global_block < min_idx_global){ ++block_at_begin; - } - } + } + size_t offset = 0; + // init all leaf of the current particle group + for(int j = 0; j < cell_to_insert[i].nb_leaf_in_block; ++j){ + offset = vect_particle[i]->newLeaf( + cell_to_insert[i].m_idx_in_block[j], + j, + particle_to_insert[i].nb_particle_per_leaf[j], + offset); + } + } // Add block at beginning of the level - _cellBlocksPerLevel[level].insert( - _cellBlocksPerLevel[level].begin(), - vect_block.begin(), - vect_block.begin()+block_at_begin); + _particleBlocks.insert( + _particleBlocks.begin(), + vect_particle.begin(), + vect_particle.begin()+block_at_begin); // Add block a the end of the level + _particleBlocks.insert( + _particleBlocks.end(), + vect_particle.begin()+block_at_begin, + vect_particle.end()); + } + } + + +#ifdef SCALFMM_USE_MPI + /** + * This function compute and add the local essential tree (LET) at + * the level. + * We compute interaction for the P2P(if needed) and M2L. We communicate + * other proc to get the GroupOfCell needed for building the LET + * @author benjamin.dufoyer@inria.fr + * @param group_linear_tree The group linear tree + * @param level The level to build the LET + * @param dim The dimension of Coordinate + */ + template<class GroupLinearTree> + void create_LET_at_level( + GroupLinearTree& group_linear_tree, + int& level, + MortonIndex& gmin, + MortonIndex& gmax, + MortonIndex& lmin, + MortonIndex& lmax, + int dim = 3 + ){ + // stock in the variable if we are at the leaf level + bool leaf_level = (this->getHeight()-1 == level); + // update the morton index + if(!leaf_level){ + gmin = gmin >> 3; + gmax = gmax >> 3; + } + const MortonIndex global_min_m_idx = gmin; + const MortonIndex global_max_m_idx = gmax; + // Compute min and max local morton index at the level needed + if(this->getNbCellGroupAtLevel(level) > 0){ + lmin = this->getCellGroup(level,0)->getStartingIndex(); + lmax = this->getCellGroup(level,this->getNbCellGroupAtLevel(level)-1)->getEndingIndex()-1; + } else { + lmin = -1; + lmax = -1; + } + const MortonIndex local_min_m_idx = lmin; + const MortonIndex local_max_m_idx = lmax; + + // declare variable, needed because we fill it in a if case + std::vector<MortonIndex> leaf_P2P; + if(leaf_level){ + // IDEA : can be a task + // This function compute the leaf needed by the P2P operation + // This function return a vector with all leaf needed + // The P2P interaction is only needed at leaf level + leaf_P2P = dstr_grp_tree_builder::get_leaf_P2P_interaction( + *this, + global_min_m_idx, + global_max_m_idx, + local_min_m_idx, + local_max_m_idx); + } + + // IDEA can be a task + // This function compute the leaf needed by the M2L operation + // This function return a vector with all leaf needed + // get leaf M2L + std::vector<MortonIndex> leaf_M2L = + dstr_grp_tree_builder::get_leaf_M2L_interaction_at_level( + global_min_m_idx, + global_max_m_idx, + local_min_m_idx, + local_max_m_idx, + level, + *this, + dim); + std::vector<MortonIndex> needed_leaf; + if(leaf_level){ + // this function return the concatenation of the leaf for the P2P and + // the leaf for the M2L + needed_leaf = dstr_grp_tree_builder::concat_M2L_P2P(leaf_P2P,leaf_M2L); + } else { + // if it's not the leaf level, we juste need the M2L + needed_leaf = leaf_M2L; + group_linear_tree.update_index_particle_distribution( + std::pair<MortonIndex,MortonIndex>(local_min_m_idx + ,local_max_m_idx) + ); + } + // free memory + // this call swap the current vector to a empty vector + std::vector<MortonIndex>().swap(leaf_P2P); + std::vector<MortonIndex>().swap(leaf_M2L); + + std::vector<std::pair<MortonIndex,MortonIndex>> index_particle_distribution = + group_linear_tree.get_index_particle_distribution(); + + // Get the interaction matrix + // matrix[2][nproc] + // first line for Morton index to Send + // second line for Morton index to Recv + std::vector<std::vector<size_t>> global_matrix_interaction = dstr_grp_tree_builder::get_matrix_interaction( + needed_leaf, + index_particle_distribution, + group_linear_tree.get_mpi_conf()); + + // Send and get leaf + // Auto is used to get the block more easly + // it's a std::pair<std::vector<cell_symbolic_block>,std::vector<particle_symbolic_block>> + // block_t is a struct define on FDistributedGroupTreeBuilder.hpp + auto let_block = + dstr_grp_tree_builder::send_get_symbolic_block_at_level( + needed_leaf, + global_matrix_interaction, + *this, + level, + group_linear_tree.get_mpi_conf()); + + // free needed leaf + std::vector<MortonIndex>().swap(needed_leaf); + // free interaction matrix + std::vector<std::vector<size_t>>().swap(global_matrix_interaction); + + + //add the LET block to the tree + this->add_LET_block( + let_block, + level); + } + + /** + * this function create the local essential tree at every level requested + * by this function + * @author benjamin.dufoyer@inria.fr + * @param group_linear_tree The group linear tree + * @param level_min The minimum level to build the LET + * @param dim The dimension of coordinate + */ + template<class GroupLinearTree> + void create_LET( + GroupLinearTree& group_linear_tree, + int level_min = 2, + int dim = 3 + ){ + // get the particle distribution + std::vector<std::pair<MortonIndex,MortonIndex>> index_particle_distribution = + group_linear_tree.get_index_particle_distribution(); + + // compute the min and the max global morton index at the level needed + // Compute min and max global morton index at the level needed + // This variable is used to put value in const + MortonIndex gmin = index_particle_distribution.front().first; + MortonIndex gmax = index_particle_distribution.back().second; + MortonIndex lmin = this->getParticleGroup(0)->getStartingIndex(); + MortonIndex lmax = this->getParticleGroup((this->getNbParticleGroup()-1) )->getEndingIndex(); + // if we have more than 1 proc + if( group_linear_tree.get_mpi_conf().comm.size() != 1 ){ + // compute the LET at every level + for(int i = this->_treeHeight-1 ; i >= level_min ; --i){ + // std::cout << "Start creating LET at " << i << std::endl; + this->create_LET_at_level(group_linear_tree,i,gmin,gmax,lmin,lmax,dim); + } + } + dstr_grp_tree_builder::send_get_block_M2M( + *this, + group_linear_tree.get_mpi_conf() + ); + } +#endif + /** + * IDEA une factorisation peut être faite avec la fonction d'ajout du LET + * This function allow to insert 1 block at a level needed + * @author benjamin.dufoyer@inria.fr + * @param block_to_insert symbolique information of the block + * @param list_m_idx List of Morton Index + * @param level Level to insert + * @param nb_particle_per_leaf [OPTIONNAL] number of particle per leaf + */ + template<class info_symb_cell_t> + void insert_block( + info_symb_cell_t& block_to_insert, + std::vector<MortonIndex>& list_m_idx, + int level, + std::vector<FSize>* nb_particle_per_leaf = nullptr + ){ + // Check if we already have this block + for(int i = 0 ; i < this->getNbCellGroupAtLevel(level); ++i){ + auto* container = this->getCellGroup(level,i); + // break the loop if the globalIdx is too big + if(container->getIdxGlobal() > block_to_insert.idx_global_block ){ + break; + } + if(container->getIdxGlobal() == block_to_insert.idx_global_block ){ + return; + } + } + // Check if we are at the leaf level + bool leaf_level = ( level == ( _treeHeight - 1 ) ); + // create the new block + CellGroupClass* new_block = new CellGroupClass( + block_to_insert.start_index, + block_to_insert.end_index, + block_to_insert.nb_leaf_in_block); + // set the global idx to the new block + new_block->setIdxGlobal(block_to_insert.idx_global_block); + // init all cell of the new block + for(int i = 0 ; i < block_to_insert.nb_leaf_in_block ; ++i){ + new_block->newCell(list_m_idx[i],i); + } + // if we are at leaf level + if(leaf_level){ + MortonIndex min_global_idx = 0; + MortonIndex max_global_idx = 0; + int idx_min = 0; + int idx_max = 0; + // seek the min morton index of my blocks + for(int i = 0 ; i < this->getNbParticleGroup() ; ++i){ + if(this->getCellGroup(level,i)->isMine()){ + min_global_idx = this->getParticleGroup(i)->getIdxGlobal()-1; + idx_min = i; + break; + } + } + // seek the max morton index of my blocks + for(int i = this->getNbParticleGroup()-1 ; i >= 0 ; --i){ + if(this->getCellGroup(level,i)->isMine()){ + max_global_idx = this->getParticleGroup(i)->getIdxGlobal()+1; + idx_max = i; + break; + } + } + // compute the number of particle of this block + FSize nb_particle = 0; + for(unsigned i = 0; i < nb_particle_per_leaf->size(); ++i){ + nb_particle += nb_particle_per_leaf->data()[i]; + } + // create the particle group + ParticleGroupClass* new_block_p = new ParticleGroupClass( + block_to_insert.start_index , + block_to_insert.end_index, + (int)block_to_insert.nb_leaf_in_block, + nb_particle); + // set the global index of the particle group + new_block_p->setIdxGlobal((int)min_global_idx); + std::size_t offset = 0; + // create all leaf of the particle group + for(int i = 0 ; i < block_to_insert.nb_leaf_in_block ; ++i){ + offset = new_block_p->newLeaf( + list_m_idx[i], + i, + nb_particle_per_leaf->data()[i], + offset + ); + } + // insert the particle group at the good place + if(this->getParticleGroup(idx_min)->getStartingIndex() > block_to_insert.start_index){ + new_block_p->setIdxGlobal((int)min_global_idx); + _particleBlocks.insert( + _particleBlocks.begin()+idx_min, + new_block_p + ); + } else { + new_block_p->setIdxGlobal((int)max_global_idx); + _particleBlocks.insert( + _particleBlocks.begin()+idx_max+1, + new_block_p + ); + } + + } + // if we need to put the new block at first + // if we already have a block at this level + if(this->getNbCellGroupAtLevel(level) > 0) { + if(this->getCellGroup(level,0)->getIdxGlobal() > block_to_insert.idx_global_block){ + _cellBlocksPerLevel[level].insert( + _cellBlocksPerLevel[level].begin(), + new_block); + return; + } + // if we don't have block at this level + } else { _cellBlocksPerLevel[level].insert( - _cellBlocksPerLevel[level].end(), - vect_block.begin()+block_at_begin, - vect_block.end()); - } - + _cellBlocksPerLevel[level].begin(), + new_block); + return; + } + // else find the place of the block + // iterate on every block + for(int idx_block = 0 ; idx_block < this->getNbCellGroupAtLevel(level) ; ++idx_block){ + auto* container = this->getCellGroup(level,idx_block); + // if the block i want to insert is already here + if(container->getIdxGlobal() == block_to_insert.idx_global_block ){ + return; + } + if(container->getIdxGlobal() > block_to_insert.idx_global_block ){ + _cellBlocksPerLevel[level].insert( + _cellBlocksPerLevel[level].begin()+idx_block, + new_block); + return; + } + } + _cellBlocksPerLevel[level].insert( + _cellBlocksPerLevel[level].end(), + new_block); + } }; + + + #endif // FGROUPTREE_HPP diff --git a/Src/GroupTree/Core/FOutOfBlockInteraction.hpp b/Src/GroupTree/Core/FOutOfBlockInteraction.hpp index c4a9ca2e6..d0bd203c3 100644 --- a/Src/GroupTree/Core/FOutOfBlockInteraction.hpp +++ b/Src/GroupTree/Core/FOutOfBlockInteraction.hpp @@ -1,7 +1,7 @@ #ifndef FOUTOFBLOCKINTERACTION_HPP #define FOUTOFBLOCKINTERACTION_HPP -#include "../../Utils/FGlobal.hpp" +#include "Utils/FGlobal.hpp" #include "../StarPUUtils/FStarPUDefaultAlign.hpp" diff --git a/Src/GroupTree/Core/FP2PGroupParticleContainer.hpp b/Src/GroupTree/Core/FP2PGroupParticleContainer.hpp index 9cd819106..1d2e1691c 100644 --- a/Src/GroupTree/Core/FP2PGroupParticleContainer.hpp +++ b/Src/GroupTree/Core/FP2PGroupParticleContainer.hpp @@ -66,7 +66,11 @@ public: int getNVALS() const { return NVALS; } - +/* + const long long int* getDataDown() const { + return Parent::template getAttribute<0>(); + } +*/ }; #endif // FP2PGROUPPARTICLECONTAINER_HPP diff --git a/Src/GroupTree/StarPUUtils/FStarPUCptInteractionsWrapper.hpp b/Src/GroupTree/StarPUUtils/FStarPUCptInteractionsWrapper.hpp index 1bfec0963..d7fd4151a 100644 --- a/Src/GroupTree/StarPUUtils/FStarPUCptInteractionsWrapper.hpp +++ b/Src/GroupTree/StarPUUtils/FStarPUCptInteractionsWrapper.hpp @@ -448,7 +448,9 @@ public: FSize nbInteractions = int(outsideInteractions->size()); if(mode == 1){ - FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(GetWorkerId(), &taskTimeRecorder, (((currentCells->getStartingIndex()+1) * (cellsOther->getStartingIndex()+2)) * 20 + idxLevel) * 8 + 3, "M2L-ext")); + FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(GetWorkerId(), &taskTimeRecorder, + (((currentCells->getStartingIndex()+1) * (cellsOther->getStartingIndex()+2)) * 20 + idxLevel) * 8 + 3, + "M2L-ext")); for(int outInterIdx = 0 ; outInterIdx < int(outsideInteractions->size()) ; ++outInterIdx){ const auto& inter_data = (*outsideInteractions)[outInterIdx]; const multipole_t* source_multipole @@ -479,7 +481,9 @@ public: } } else{ - FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(GetWorkerId(), &taskTimeRecorder, (((currentCells->getStartingIndex()+1) * (cellsOther->getStartingIndex()+1)) * 20 + idxLevel) * 8 + 3, "M2L-ext")); + FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(GetWorkerId(), &taskTimeRecorder, + (((currentCells->getStartingIndex()+1) * (cellsOther->getStartingIndex()+1)) * 20 + idxLevel) * 8 + 3, + "M2L-ext")); for(int outInterIdx = 0 ; outInterIdx < int(outsideInteractions->size()) ; ++outInterIdx){ const auto& inter_data = (*outsideInteractions)[outInterIdx]; const multipole_t* source_multipole diff --git a/Src/Utils/FMpi.hpp b/Src/Utils/FMpi.hpp index 49bb7085c..7a5338ab9 100644 --- a/Src/Utils/FMpi.hpp +++ b/Src/Utils/FMpi.hpp @@ -1,8 +1,8 @@ // See LICENCE file at project root +// @FUSE_MPI #ifndef FMPI_HPP #define FMPI_HPP - #include <cstdio> #include <stdexcept> @@ -10,7 +10,7 @@ #ifndef SCALFMM_USE_MPI #error The MPI header is included while SCALFMM_USE_MPI is turned OFF #endif - +#include <mpi.h> #include "FNoCopyable.hpp" #include "FMath.hpp" @@ -27,7 +27,7 @@ ///////////////////////////////////////////////////////////////////////////////////////// -#include <mpi.h> + ///////////////////////////////////////////////////////////////////////////////////////// @@ -86,7 +86,7 @@ public: //////////////////////////////////////////////////////// // FComm to factorize MPI_Comm work //////////////////////////////////////////////////////// - + /** * \brief MPI comunicator abstraction * @@ -102,7 +102,7 @@ public: /// Updates current process rank and process count from mpi - void reset(){ + void updateMembers(){ FMpi::Assert( MPI_Comm_rank(communicator,&rank), __LINE__ ); FMpi::Assert( MPI_Comm_size(communicator,&nbProc), __LINE__ ); } @@ -113,7 +113,7 @@ public: FMpi::Assert( MPI_Comm_dup(inCommunicator, &communicator), __LINE__ , "comm dup"); FMpi::Assert( MPI_Comm_group(communicator, &group), __LINE__ , "comm group"); - reset(); + this->updateMembers(); } /// Constructor : duplicates the given communicator @@ -121,7 +121,7 @@ public: FMpi::Assert( MPI_Comm_dup(inCommunicator.communicator, &communicator), __LINE__ , "comm dup"); FMpi::Assert( MPI_Comm_group(communicator, &group), __LINE__ , "comm group"); - reset(); + this->updateMembers(); } FComm& operator=(const FComm& inCommunicator ) { @@ -131,7 +131,7 @@ public: FMpi::Assert( MPI_Comm_dup(inCommunicator.communicator, &communicator), __LINE__ , "comm dup"); FMpi::Assert( MPI_Comm_group(communicator, &group), __LINE__ , "comm group"); - reset(); + this->updateMembers(); return *this; } @@ -250,7 +250,7 @@ public: MPI_Comm_free(&previousComm); MPI_Group_free(&previousGroup); - reset(); + this->updateMembers(); delete[] procsIdArray ; } @@ -278,7 +278,7 @@ public: MPI_Comm_free(&previousComm); MPI_Group_free(&previousGroup); - reset(); + this->updateMembers(); FAssertLF(nbProc == counterNewGroup); delete[] procsIdArray ; } @@ -306,7 +306,7 @@ public: * [fourmi062:15896] [[13237,0],1]-[[13237,1],1] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104) * [fourmi056:04597] [[13237,0],3]-[[13237,1],3] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104) * [fourmi053:08571] [[13237,0],5]-[[13237,1],5] mca_oob_tcp_msg_recv: readv failed: Connection reset by peer (104) - * + * * Error for process 1: * * [[13237,1],1][btl_openib_component.c:3227:handle_wc] from fourmi062 to: fourmi056 error polling LP CQ with status LOCAL LENGTH ERROR status number 1 for wr_id 7134664 opcode 0 vendor error 105 qp_idx 3 @@ -326,6 +326,15 @@ public: communicator = new FComm(MPI_COMM_WORLD); } + FMpi(MPI_Comm comm):communicator(nullptr){ + if( instanceCount > 0) { + throw std::logic_error("FMpi should not be instanciatedmore than once."); + } else { + instanceCount++; + } + communicator = new FComm(comm); + } + /// Constructor FMpi(int inArgc, char ** inArgv ) : communicator(nullptr) { if( instanceCount > 0) { @@ -346,7 +355,7 @@ public: } /// Get the global communicator - const FComm& global() { + const FComm& global() const { return (*communicator); } @@ -511,12 +520,10 @@ public: private: /// The original communicator FComm* communicator; - + /// Counter to avoid several instanciations static int instanceCount; }; #endif //FMPI_HPP - - diff --git a/Src/Utils/FValidationAlgorithm.hpp b/Src/Utils/FValidationAlgorithm.hpp index d814edb56..9ac674d7e 100644 --- a/Src/Utils/FValidationAlgorithm.hpp +++ b/Src/Utils/FValidationAlgorithm.hpp @@ -1,10 +1,229 @@ #ifndef _VALIDATION_METHOD_ #define _VALIDATION_METHOD_ +#include "../../Src/Kernels/P2P/FP2PParticleContainer.hpp" +#include "../../Src/Components/FSimpleLeaf.hpp" +#include "../../Src/Kernels/Chebyshev/FChebCell.hpp" +#include "../../Src/Kernels/Chebyshev/FChebSymKernel.hpp" +#include "../../Src/Core/FFmmAlgorithmThreadProc.hpp" +#include "../../Src/Containers/FOctree.hpp" +#include "../../Src/Utils/FMpi.hpp" +#include "../../Src/Kernels/Interpolation/FInterpMatrixKernel.hpp" +#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp" + +#include "../../Src/Kernels/Uniform/FUnifCell.hpp" +#include "../../Src/Kernels/Uniform/FUnifKernel.hpp" + namespace validation_methods { +using FReal = double; + +template<class GroupCellSymbClass, + class GroupCellUpClass, + class GroupCellDownClass, + class GroupContainerClass, + class particle_t, + class GroupAlgorithm> +void validate_uniform_distributed( + FGroupTree<FReal,GroupCellSymbClass,GroupCellUpClass, GroupCellDownClass, GroupContainerClass,1,4, FReal>& groupedTree, + GroupAlgorithm& groupalgo, + int& operationsToProceed, + std::vector<particle_t>& allParticles, + FMpi& mpiComm +){ + static const int ORDER = 6; + using MatrixKernelClass = FInterpMatrixKernelR<FReal>; + using ContainerClass = FP2PParticleContainer<FReal>; + using LeafClass = FSimpleLeaf<FReal, ContainerClass>; + using CellClass = FUnifCell<FReal,ORDER> ; + using OctreeClass = FOctree<FReal, CellClass,ContainerClass,LeafClass> ; + using KernelClass = FUnifKernel<FReal,CellClass,ContainerClass,MatrixKernelClass,ORDER> ; + using FmmClass = FFmmAlgorithmThreadProc<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> ; + + OctreeClass treeCheck(groupedTree.getHeight(), + /*subTree*/2, + groupedTree.getBoxWidth(), + groupedTree.getBoxCenter()); + + for(FSize idxPart = 0 ; idxPart < (FSize)allParticles.size() ; ++idxPart){ + treeCheck.insert(allParticles[idxPart].pos, 0.1); + } + + MatrixKernelClass MatrixKernelValidation; + + KernelClass kernels(groupedTree.getHeight(), + groupedTree.getBoxWidth(), + groupedTree.getBoxCenter(), &MatrixKernelValidation); + FmmClass algorithm(mpiComm.global(),&treeCheck, &kernels); + algorithm.execute(operationsToProceed); + + validate_group_tree_distributed( + groupedTree, + groupalgo, + treeCheck + ); +} + +template<class GroupCellSymbClass, + class GroupCellUpClass, + class GroupCellDownClass, + class GroupContainerClass, + class particle_t, + class GroupAlgorithm> +void validate_chebyshev_distributed( + FGroupTree<FReal,GroupCellSymbClass,GroupCellUpClass, GroupCellDownClass, GroupContainerClass,1,4, FReal>& groupedTree, + GroupAlgorithm groupalgo, + int operationsToProceed, + std::vector<particle_t>& allParticles, + FMpi mpiComm +){ + static const int ORDER = 6; + using MatrixKernelClass = FInterpMatrixKernelR<FReal>; + using ContainerClass = FP2PParticleContainer<FReal>; + using LeafClass = FSimpleLeaf<FReal, ContainerClass>; + using CellClass = FChebCell<FReal,ORDER> ; + using OctreeClass = FOctree<FReal, CellClass,ContainerClass,LeafClass> ; + using KernelClass = FChebSymKernel<FReal,CellClass,ContainerClass,MatrixKernelClass,ORDER> ; + using FmmClass = FFmmAlgorithmThreadProc<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> ; + + OctreeClass treeCheck(groupedTree.getHeight(), + /*subTree*/2, + groupedTree.getBoxWidth(), + groupedTree.getBoxCenter()); + + for(FSize idxPart = 0 ; idxPart < (FSize)allParticles.size() ; ++idxPart){ + treeCheck.insert(allParticles[idxPart].pos, 0.1); + } + + MatrixKernelClass MatrixKernelValidation; + + KernelClass kernels(groupedTree.getHeight(), + groupedTree.getBoxWidth(), + groupedTree.getBoxCenter(), &MatrixKernelValidation); + FmmClass algorithm(mpiComm.global(),&treeCheck, &kernels); + algorithm.execute(operationsToProceed); + + validate_group_tree_distributed( + groupedTree, + groupalgo, + treeCheck + ); +} + +template<class GroupCellSymbClass, + class GroupCellUpClass, + class GroupCellDownClass, + class GroupContainerClass, + class GroupAlgorithm, + class CellClass, + class ContainerClass, + class LeafClass> +void validate_group_tree_distributed( + FGroupTree<FReal,GroupCellSymbClass,GroupCellUpClass, GroupCellDownClass, GroupContainerClass,1,4, FReal>& groupedTree, + GroupAlgorithm groupalgo, + FOctree<FReal, CellClass,ContainerClass,LeafClass> treeCheck +){ + const FReal epsi = 1E-10; + + groupedTree.forEachMyCellWithLevel( + [&](GroupCellSymbClass* gsymb , + GroupCellUpClass* gmul, + GroupCellDownClass* gloc, + const int level) + { + if(groupalgo.isDataOwnedBerenger(gsymb->getMortonIndex(), level)) + { + const CellClass* cell = treeCheck.getCell(gsymb->getMortonIndex(), level); + if(cell == nullptr){ + std::cout << "[Empty] Error cell should exist " << gsymb->getMortonIndex() << "\n"; + } else { + FMath::FAccurater<FReal> diffUp; + diffUp.add(cell->getMultipoleData().get(0), gmul->get(0), gmul->getVectorSize()); + if(diffUp.getRelativeInfNorm() > epsi || diffUp.getRelativeL2Norm() > epsi){ + std::cout << "[Up] Up is different at index " << gsymb->getMortonIndex() << " level " << level << " is " << diffUp << "\n"; + } + FMath::FAccurater<FReal> diffDown; + diffDown.add(cell->getLocalExpansionData().get(0), gloc->get(0), gloc->getVectorSize()); + if(diffDown.getRelativeInfNorm() > epsi || diffDown.getRelativeL2Norm() > epsi){ + std::cout << "[Down] Down is different at index " << gsymb->getMortonIndex() << " level " << level << " is " << diffDown << "\n"; + } + } + } + }); + + groupedTree.forEachCellMyLeaf( + [&](GroupCellSymbClass* gsymb , + GroupCellUpClass* /* gmul */, + GroupCellDownClass* /* gloc */, + FP2PGroupParticleContainer<FReal> * leafTarget) + { + if(groupalgo.isDataOwnedBerenger(gsymb->getMortonIndex(), groupedTree.getHeight()-1)) + { + const ContainerClass* targets = treeCheck.getLeafSrc(gsymb->getMortonIndex()); + if(targets == nullptr){ + std::cout << "[Empty] Error leaf should exist " << gsymb->getMortonIndex() << "\n"; + } else { + const FReal*const gposX = leafTarget->getPositions()[0]; + const FReal*const gposY = leafTarget->getPositions()[1]; + const FReal*const gposZ = leafTarget->getPositions()[2]; + const FSize gnbPartsInLeafTarget = leafTarget->getNbParticles(); + const FReal*const gforceX = leafTarget->getForcesX(); + const FReal*const gforceY = leafTarget->getForcesY(); + const FReal*const gforceZ = leafTarget->getForcesZ(); + const FReal*const gpotential = leafTarget->getPotentials(); + + const FReal*const posX = targets->getPositions()[0]; + const FReal*const posY = targets->getPositions()[1]; + const FReal*const posZ = targets->getPositions()[2]; + const FSize nbPartsInLeafTarget = targets->getNbParticles(); + const FReal*const forceX = targets->getForcesX(); + const FReal*const forceY = targets->getForcesY(); + const FReal*const forceZ = targets->getForcesZ(); + const FReal*const potential = targets->getPotentials(); + + if(gnbPartsInLeafTarget != nbPartsInLeafTarget){ + std::cout << "[Empty] Not the same number of particles at " << gsymb->getMortonIndex() + << " gnbPartsInLeafTarget " << gnbPartsInLeafTarget << " nbPartsInLeafTarget " << nbPartsInLeafTarget << "\n"; + }else{ + FMath::FAccurater<FReal> potentialDiff; + FMath::FAccurater<FReal> fx, fy, fz; + for(FSize idxPart = 0 ; idxPart < nbPartsInLeafTarget ; ++idxPart){ + if(gposX[idxPart] != posX[idxPart] || gposY[idxPart] != posY[idxPart] || gposZ[idxPart] != posZ[idxPart]){ + std::cout << "[Empty] Not the same particlea at " << gsymb->getMortonIndex() << " idx " << idxPart << " " + << gposX[idxPart] << " " << posX[idxPart] << " " << gposY[idxPart] << " " << posY[idxPart] + << " " << gposZ[idxPart] << " " << posZ[idxPart] << "\n"; + } else { + potentialDiff.add(potential[idxPart], gpotential[idxPart]); + fx.add(forceX[idxPart], gforceX[idxPart]); + fy.add(forceY[idxPart], gforceY[idxPart]); + fz.add(forceZ[idxPart], gforceZ[idxPart]); + } + } + if(potentialDiff.getRelativeInfNorm() > epsi || potentialDiff.getRelativeL2Norm() > epsi){ + std::cout << "[Up] potentialDiff is different at index " << gsymb->getMortonIndex() << " is " << potentialDiff << "\n"; + } + if(fx.getRelativeInfNorm() > epsi || fx.getRelativeL2Norm() > epsi){ + std::cout << "[Up] fx is different at index " << gsymb->getMortonIndex() << " is " << fx << "\n"; + } + if(fy.getRelativeInfNorm() > epsi || fy.getRelativeL2Norm() > epsi){ + std::cout << "[Up] fy is different at index " << gsymb->getMortonIndex() << " is " << fy << "\n"; + } + if(fz.getRelativeInfNorm() > epsi || fz.getRelativeL2Norm() > epsi){ + std::cout << "[Up] fz is different at index " << gsymb->getMortonIndex() << " is " << fz << "\n"; + } + } + } + } + }); + + +} + + + + /** * function to check the result of the groupTree Algo * @author benjamin.dufoyer@inria.fr diff --git a/Tests/CMakeLists.txt b/Tests/CMakeLists.txt index 9616ae310..ab40202cc 100644 --- a/Tests/CMakeLists.txt +++ b/Tests/CMakeLists.txt @@ -19,6 +19,7 @@ file( INCLUDE_DIRECTORIES( ${SCALFMM_BINARY_DIR}/Src ${SCALFMM_SOURCE_DIR}/Src + ${SCALFMM_SOURCE_DIR} ${SCALFMM_INCLUDES} ) diff --git a/Tests/GroupTree/testBlockedChebyshev.cpp b/Tests/GroupTree/testBlockedChebyshev.cpp index c2d18f002..78cc3f3c4 100644 --- a/Tests/GroupTree/testBlockedChebyshev.cpp +++ b/Tests/GroupTree/testBlockedChebyshev.cpp @@ -1,11 +1,11 @@ // ==== CMAKE ===== // @FUSE_BLAS +// // ================ // Keep in private GIT #include "../../Src/Utils/FGlobal.hpp" - #include "../../Src/GroupTree/Core/FGroupTree.hpp" #include "../../Src/Components/FSimpleLeaf.hpp" @@ -34,6 +34,7 @@ #include "../../Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp" #include "../../Src/GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp" #endif + #include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp" #include "../../Src/Utils/FParameterNames.hpp" diff --git a/Tests/GroupTree/testBlockedImplicitChebyshev.cpp b/Tests/GroupTree/testBlockedImplicitChebyshev.cpp index 476158d69..2ee5e754f 100644 --- a/Tests/GroupTree/testBlockedImplicitChebyshev.cpp +++ b/Tests/GroupTree/testBlockedImplicitChebyshev.cpp @@ -1,11 +1,14 @@ // @FUSE_BLAS // @FUSE_MPI // @FUSE_STARPU +// @FUSE_MPI // Keep in private GIT #include <iostream> #include <fstream> #include <vector> +#ifdef SCALFMM_USE_MPI #include <mpi.h> +#endif using namespace std; #include "../../Src/Utils/FGlobal.hpp" diff --git a/Tests/GroupTree/testBlockedImplicitUniform.cpp b/Tests/GroupTree/testBlockedImplicitUniform.cpp index a2ee1756d..9c2b9b185 100644 --- a/Tests/GroupTree/testBlockedImplicitUniform.cpp +++ b/Tests/GroupTree/testBlockedImplicitUniform.cpp @@ -154,9 +154,10 @@ int main(int argc, char* argv[]){ GroupAlgorithm groupalgo(&groupedTree,&groupkernel, distributedMortonIndex); mpiComm.global().barrier(); FTic timerExecute; + timerExecute.tic(); starpu_fxt_start_profiling(); groupalgo.execute(operationsToProceed); - groupedTree.printInfoBlocks(); + //groupedTree.printInfoBlocks(); mpiComm.global().barrier(); starpu_fxt_stop_profiling(); timerExecute.tac(); diff --git a/Tests/GroupTree/testBlockedMpiInterpolation.cpp b/Tests/GroupTree/testBlockedMpiInterpolation.cpp new file mode 100644 index 000000000..d85945bd3 --- /dev/null +++ b/Tests/GroupTree/testBlockedMpiInterpolation.cpp @@ -0,0 +1,68 @@ +// ==== CMAKE ===== +// @FUSE_BLAS +// ================ +// Keep in private GIT +// @FUSE_MPI +// @FUSE_STARPU + +#include "../../Src/Files/FBlockedMpiInterpolation.hpp" +#include "../../Src/Utils/FGlobal.hpp" + + +#include "../../Src/Kernels/P2P/FP2PParticleContainer.hpp" + +#include "../../Src/Kernels/Chebyshev/FChebSymKernel.hpp" +#include "../../Src/Kernels/Chebyshev/FChebCell.hpp" +#include "Kernels/Interpolation/FInterpMatrixKernel.hpp" + +#include "../../Src/GroupTree/Core/FGroupSeqAlgorithm.hpp" +#include "../../Src/GroupTree/Core/FGroupTaskAlgorithm.hpp" +#include "../../Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp" +#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp" + +#include "../../Src/Components/FTestParticleContainer.hpp" +#include "../../Src/Components/FTestCell.hpp" +#include "../../Src/Components/FTestKernels.hpp" + +#include "../../Src/Core/FFmmAlgorithmThreadProc.hpp" +#include "../../Src/Files/FMpiTreeBuilder.hpp" +#include "../../Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp" + +#include "../../Src/Files/FMpiFmaGenericLoader.hpp" +#include "../../Src/Containers/FCoordinateComputer.hpp" + +#include "../../Src/GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp" + +#include <memory> + + +int main(int argc, char *argv[]){ + const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree"}; + const FParameterNames LocalOptionNoValidate { {"-no-validation"}, "To avoid comparing with direct computation"}; + FHelpDescribeAndExit(argc, argv, "Test the blocked tree by counting the particles.", + FParameterDefinitions::OctreeHeight,FParameterDefinitions::InputFile, + FParameterDefinitions::NbParticles, + LocalOptionBlocSize,LocalOptionNoValidate); + + using FReal = double; + static const int ORDER = 6; + using GroupContainerClass = FP2PGroupParticleContainer<FReal>; + using MatrixKernelClass = FInterpMatrixKernelR<FReal>; + using GroupCellClass = FChebCell<FReal, ORDER>; + using GroupCellUpClass = typename GroupCellClass::multipole_t; + using GroupCellDownClass = typename GroupCellClass::local_expansion_t; + using GroupCellSymbClass = FSymbolicData; + using kernelClass = FChebSymKernel<FReal,GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER>; + + auto groupedTree = blockedMpiInterpolation::execute_algorithm< + GroupCellClass, + GroupCellUpClass, + GroupCellDownClass, + GroupCellSymbClass, + kernelClass, + MatrixKernelClass + >(argc,argv); + + // Validation + +} diff --git a/Tests/LET_STF_Implicit/testCompareGroupTree.cpp b/Tests/LET_STF_Implicit/testCompareGroupTree.cpp new file mode 100644 index 000000000..e1da9b739 --- /dev/null +++ b/Tests/LET_STF_Implicit/testCompareGroupTree.cpp @@ -0,0 +1,431 @@ +// See LICENCE file at project root + +// ==== CMAKE ===== +// @FUSE_MPI +// @FUSE_BLAS +// ================ + + +#include "../../Src/Utils/FGlobal.hpp" +//#include <mpi.h> +// include algo for linear tree +#include "inria/algorithm/distributed/mpi.hpp" +#include "inria/linear_tree/balance_tree.hpp" +// tree class +#include "../../Src/GroupTree/Core/FGroupTree.hpp" +// symbolic data +#include "../../Src/Components/FSymbolicData.hpp" +// cell class +#include "../../Src/Kernels/Chebyshev/FChebCell.hpp" +// parameter +#include "../../Src/Utils/FParameters.hpp" +#include "../../Src/Utils/FParameterNames.hpp" +// GroupParticleContianer +#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp" +// file loader +#include "../../Src/Files/FMpiFmaGenericLoader.hpp" +// FBox +#include "Adaptive/FBox.hpp" +// Group linear tree +#include "../../Src/GroupTree/Core/FGroupLinearTree.hpp" +// Function for GroupLinearTree +#include "../../Src/GroupTree/Core/FDistributedGroupTreeBuilder.hpp" +#include "../../Src/Utils/FTic.hpp" +#include "../../Src/Files/FFmaGenericLoader.hpp" +#include "../../Src/Utils/FLeafBalance.hpp" + +#include "../../Contribs/json.hpp" + +#include <memory> + + +static const int ORDER = 6; +using FReal = double; +using GroupCellClass = FChebCell<FReal, ORDER>; +using GroupCellUpClass = typename GroupCellClass::multipole_t; +using GroupCellDownClass = typename GroupCellClass::local_expansion_t; +using GroupCellSymbClass = FSymbolicData; +using GroupContainerClass = FP2PGroupParticleContainer<FReal>; +using GroupOctreeClass = FGroupTree<FReal, + GroupCellSymbClass, + GroupCellUpClass, + GroupCellDownClass, GroupContainerClass, 1, 4, FReal>; + +// Structure for 1 particle +struct particle_t { + using position_t = FPoint<FReal>; + position_t pos; + FReal phi; + std::size_t morton_index; + const auto& position() const { + return pos; + } + const FPoint<FReal>& getPosition(){ + return pos; + } + const auto& physicalValue() const{ + return phi; + } + const auto& getPositions() const { + return pos; + } + int weight() const { return 1;} + friend constexpr auto morton_index(const particle_t& p) { + return p.morton_index; + } +}; +void sortParticle(FPoint<FReal> * allParticlesToSort, int treeHeight, int groupSize, std::vector<std::vector<int>> & sizeForEachGroup, std::vector<MortonIndex> & distributedMortonIndex, FFmaGenericLoader<FReal>& loader, int nproc); +void createNodeRepartition(std::vector<MortonIndex> distributedMortonIndex, std::vector<std::vector<std::vector<MortonIndex>>>& nodeRepartition, int nproc, int treeHeight); +FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total); + +int main(int argc, char *argv[]) { + FTic time; + // Parameter definition + const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree"}; + const FParameterNames LocalOptionTreeChoice{ {"-let"}, "Build the LET Group Tree"}; + // Parameter help + FHelpDescribeAndExit(argc, argv, + "Test the blocked tree created with linear tree." ,FParameterDefinitions::OctreeHeight, + FParameterDefinitions::InputFile, + LocalOptionBlocSize, + LocalOptionTreeChoice); + // Get parameters + // Get the groupSize + const int groupSize = + FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250); + // Get the file input + const char* const filename = + FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma"); + // Get the treeHeight + const unsigned int TreeHeight = + FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5); + + const bool letTree = + FParameters::getValue(argc,argv,LocalOptionTreeChoice.options,false); + // The level is the level of the leaf + int level = TreeHeight-1; + // Init MPI communicator + // Initialisation MPI Berenger + FMpi FMpiComm(argc,argv); + int nproc = FMpiComm.global().processCount(); + // Initialisation MPI Quentin + inria::mpi::communicator mpi_comm(FMpiComm.global().getComm()); + + // Show job information + std::cout << "GroupTree building comparaison " << std::endl; + std::cout << "File name : " << filename << std::endl; + std::cout << "TreeHeight : " << TreeHeight << std::endl; + std::cout << "Block size : " << groupSize << std::endl; + std::cout << "------------------------------------------" << std::endl; + + + FMpiFmaGenericLoader<FReal> loaderParrallel(filename, FMpiComm.global()); + FFmaGenericLoader<FReal> loader(filename); + // vector to stock all particles + FTic timer_1; + + + if(letTree){ + std::vector<particle_t> myParticles(loaderParrallel.getMyNumberOfParticles()); + // define the max level to sort particle + const std::size_t max_level = sizeof(particle_t::morton_index) * 8 / 3; + // define a box, used in the sort + const FBox<FPoint<FReal>> box{loaderParrallel.getBoxWidth(),loaderParrallel.getCenterOfBox()}; + + // iterate on all of my particles + for(FSize idxPart = 0; idxPart <loaderParrallel.getMyNumberOfParticles();++idxPart){ + particle_t tmp; + // get the current particles + loaderParrallel.fillParticle(&tmp.pos,&tmp.phi); + // set the morton index of the current particle at the max_level + tmp.morton_index = inria::linear_tree::get_morton_index( + tmp.pos, box, max_level); + // set the weight of the particle + tmp.phi = 0.1; + // add the particle to my vector of particle + myParticles.at(idxPart) = tmp; + } + + // Now i have all of my particles in a vector, they all have a morton index + // now we will sort them + inria::sort(mpi_comm,myParticles, + [](const auto& p1, const auto& p2) { + return p1.morton_index < p2.morton_index; + }); + + // Now i want to create the the linear tree + // a linear tree is a tree, with only the leaf + auto linear_tree = + inria::linear_tree::create_balanced_linear_tree_at_level(mpi_comm, + level, + box, + myParticles); + // Now i need to create a blocked linear tree, it's just a linear tree with + // more information + // declaration of the group linear tree + FGroupLinearTree<decltype(linear_tree)::value_type>group_linear_tree{mpi_comm}; + group_linear_tree.create_local_group_linear_tree( + &linear_tree, + groupSize + ); + // now i will to redistribute the particle according to the linear tree + // Redistribution of particles + inria::linear_tree::redistribute_particles(mpi_comm, + linear_tree, + myParticles); + // Now we need to modify the morton index of of all particle to + // have the morton index at le treeHeight-1 + for(unsigned i = 0 ; i < myParticles.size(); ++i){ + myParticles.at(i).morton_index = inria::linear_tree::get_morton_index( + myParticles.at(i).pos, box, level); + } + + // Now we need to share the particle distribution to build the GroupTree + group_linear_tree.set_index_particle_distribution(myParticles); + + // Now i can declare my groupTree + // it's a empty instance of the FGroupTree + GroupOctreeClass localGroupTree = + GroupOctreeClass::template get_block_tree_instance<GroupCellSymbClass, GroupCellUpClass, + GroupCellDownClass, GroupContainerClass>(TreeHeight, + groupSize, + loaderParrallel.getCenterOfBox(), + loaderParrallel.getBoxWidth()); + + // Now i can fill the localGroupTree + std::cout << "Start creating LET Blocked tree " << std::endl; + time.tic(); + localGroupTree.create_tree(group_linear_tree,myParticles); + time.tac(); + // Now i want to build a LET tree + // the LET tree is the Local Essential Tree + // It's a the same tree but we add block from other proc + // the block added are the blocked needed to post task for the + // P2P M2M and M2L interaction + + // The first step is to add a unique index at all of group + //int nb_block = dstr_grp_tree_builder::set_cell_group_global_index(localGroupTree,mpi_comm); + //// now we can create LET + timer_1.tic(); + localGroupTree.create_LET(group_linear_tree); + timer_1.tac(); + mpi_comm.barrier(); + nlohmann::json result; + std::string name = std::to_string(TreeHeight); + name += "_" + std::to_string(groupSize)+"_"+std::to_string(loader.getNumberOfParticles()) + ".json"; + std::ifstream file(name); + if(mpi_comm.rank() == 0){ + auto tree = time.elapsed(); + auto let = timer_1.elapsed(); + if(file.fail()){ + // le fichier n'éxiste pas + auto tree = time.elapsed(); + auto let = timer_1.elapsed(); + result["TreeHeight"] = TreeHeight; + result["GroupSize"] = groupSize; + result["Filename"] = filename; + result["NbParticle"] = loaderParrallel.getNumberOfParticles(); + result["LET_Blocked_tree"]["tree"] = tree; + result["LET_Blocked_tree"]["let"] = let; + result["LET_Blocked_tree"]["total"] = let+tree; + } else { + file >> result; + result["LET_Blocked_tree"]["tree"] = tree; + result["LET_Blocked_tree"]["let"] = let; + result["LET_Blocked_tree"]["total"] = let+tree; + } + std::ofstream out(name, std::ios::out | std::ios::app); + out << result << std::endl; + } + return 0; + } else { + + + //FFmaGenericLoader<FReal> loader(filename); + const FSize NbParticles = loader.getNumberOfParticles(); + FPoint<FReal> * allParticlesToSort = new FPoint<FReal>[NbParticles]; + FSize idxPart = 0; + for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + FReal physicalValue = 0.1; + loader.fillParticle(&allParticlesToSort[idxPart], &physicalValue);//Same with file or not + } + std::vector<MortonIndex> distributedMortonIndex; + std::vector<std::vector<int>> sizeForEachGroup; + sortParticle(allParticlesToSort, TreeHeight, groupSize, sizeForEachGroup, distributedMortonIndex, loader, nproc); + + FP2PParticleContainer<FReal> allParticles; + for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + FReal physicalValue = 0.1; + allParticles.push(allParticlesToSort[idxPart], physicalValue); + } + // Put the data into the tree + time.tic(); + GroupOctreeClass groupedTree(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, &allParticles, sizeForEachGroup, true); + time.tac(); + nlohmann::json result; + std::string name = std::to_string(TreeHeight); + name += "_" + std::to_string(groupSize)+"_"+std::to_string(loader.getNumberOfParticles()) + ".json"; + std::ifstream file(name); + if(mpi_comm.rank() == 0){ + if(file.fail()){ + // le fichier n'existe pas + result["TreeHeight"] = TreeHeight; + result["GroupSize"] = groupSize; + result["Filename"] = filename; + result["NbParticle"] = loaderParrallel.getNumberOfParticles(); + result["Group_tree"]["time"] = time.elapsed(); + } else { + file >> result; + result["Group_tree"]["time"] = time.elapsed(); + } + std::ofstream out(name, std::ios::out | std::ios::app); + out << result << std::endl; + } + return 0; + } +} + + +void sortParticle(FPoint<FReal> * allParticles, int treeHeight, int groupSize, std::vector<std::vector<int>> & sizeForEachGroup, std::vector<MortonIndex> & distributedMortonIndex, FFmaGenericLoader<FReal>& loader, int nproc) +{ + //Structure pour trier + struct ParticleSortingStruct{ + FPoint<FReal> position; + MortonIndex mindex; + }; + // Création d'un tableau de la structure pour trier puis remplissage du tableau + const FSize nbParticles = loader.getNumberOfParticles(); + ParticleSortingStruct* particlesToSort = new ParticleSortingStruct[nbParticles]; + for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){ + const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPosition<FReal>(loader.getCenterOfBox(), loader.getBoxWidth(), + treeHeight, + allParticles[idxPart]); + const MortonIndex particleIndex = host.getMortonIndex(); + particlesToSort[idxPart].mindex = particleIndex; + particlesToSort[idxPart].position = allParticles[idxPart]; + } + + //Trie du nouveau tableau + FQuickSort<ParticleSortingStruct, FSize>::QsOmp(particlesToSort, nbParticles, [](const ParticleSortingStruct& v1, const ParticleSortingStruct& v2){ + return v1.mindex <= v2.mindex; + }); + //Replace tout dans l'ordre dans le tableau d'origine + for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){ + allParticles[idxPart] = particlesToSort[idxPart].position; + } + + //Compte le nombre de feuilles + sizeForEachGroup.resize(treeHeight); + MortonIndex previousLeaf = -1; + int numberOfLeaf = 0; + for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart) + { + if(particlesToSort[idxPart].mindex != previousLeaf) + { + previousLeaf = particlesToSort[idxPart].mindex; + ++numberOfLeaf; + } + } + + //Calcul de la taille des groupes au niveau des feuilles + FLeafBalance balancer; + for(int processId = 0; processId < nproc; ++processId) + { + FSize size_last; + FSize countGroup; + FSize leafOnProcess = balancer.getRight(numberOfLeaf, nproc, processId) - balancer.getLeft(numberOfLeaf, nproc, processId); + size_last = leafOnProcess%groupSize; + countGroup = (leafOnProcess - size_last)/groupSize; + for(int i = 0; i < countGroup; ++i) + sizeForEachGroup[treeHeight-1].push_back(groupSize); + if(size_last > 0) + sizeForEachGroup[treeHeight-1].push_back((int)size_last); + } + + //Calcul du working interval au niveau des feuilles + previousLeaf = -1; + int countLeaf = 0; + int processId = 0; + FSize leafOnProcess = balancer.getRight(numberOfLeaf, nproc, 0) - balancer.getLeft(numberOfLeaf, nproc, 0); + distributedMortonIndex.push_back(previousLeaf); + for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart) + { + if(particlesToSort[idxPart].mindex != previousLeaf) + { + previousLeaf = particlesToSort[idxPart].mindex; + ++countLeaf; + if(countLeaf == leafOnProcess) + { + distributedMortonIndex.push_back(previousLeaf); + distributedMortonIndex.push_back(previousLeaf); + countLeaf = 0; + ++processId; + leafOnProcess = balancer.getRight(numberOfLeaf, nproc, processId) - balancer.getLeft(numberOfLeaf, nproc, processId); + } + } + } + distributedMortonIndex.push_back(particlesToSort[nbParticles - 1].mindex); + + //Calcul des working interval à chaque niveau + std::vector<std::vector<std::vector<MortonIndex>>> nodeRepartition; + createNodeRepartition(distributedMortonIndex, nodeRepartition, nproc, treeHeight); + + //Pour chaque niveau calcul de la taille des groupe + for(int idxLevel = treeHeight - 2; idxLevel >= 0; --idxLevel) + { + processId = 0; + int countParticleInTheGroup = 0; + MortonIndex previousMortonCell = -1; + + //cout << "Compute Level " << idxLevel << endl; + for(int idxPart = 0; idxPart < nbParticles; ++idxPart) + { + MortonIndex mortonCell = (particlesToSort[idxPart].mindex) >> (3*(treeHeight - 1 - idxLevel)); + if(mortonCell <= nodeRepartition[idxLevel][processId][1]) //Si l'indice est dans le working interval + { + if(mortonCell != previousMortonCell) //Si c'est un nouvelle indice + { + ++countParticleInTheGroup; //On le compte dans le groupe + previousMortonCell = mortonCell; + if(countParticleInTheGroup == groupSize) //Si le groupe est plein on ajoute le compte + { + sizeForEachGroup[idxLevel].push_back(groupSize); + countParticleInTheGroup = 0; + } + } + } + else //Si l'on change d'interval de process on ajoute ce que l'on a compté + { + if(countParticleInTheGroup > 0) + sizeForEachGroup[idxLevel].push_back(countParticleInTheGroup); + countParticleInTheGroup = 1; + previousMortonCell = mortonCell; + ++processId; + } + } + if(countParticleInTheGroup > 0) + sizeForEachGroup[idxLevel].push_back(countParticleInTheGroup); + } +} + +void createNodeRepartition(std::vector<MortonIndex> distributedMortonIndex, std::vector<std::vector<std::vector<MortonIndex>>>& nodeRepartition, int nproc, int treeHeight) { + nodeRepartition.resize(treeHeight, std::vector<std::vector<MortonIndex>>(nproc, std::vector<MortonIndex>(2))); + for(int node_id = 0; node_id < nproc; ++node_id){ + nodeRepartition[treeHeight-1][node_id][0] = distributedMortonIndex[node_id*2]; + nodeRepartition[treeHeight-1][node_id][1] = distributedMortonIndex[node_id*2+1]; + } + for(int idxLevel = treeHeight - 2; idxLevel >= 0 ; --idxLevel){ + nodeRepartition[idxLevel][0][0] = nodeRepartition[idxLevel+1][0][0] >> 3; + nodeRepartition[idxLevel][0][1] = nodeRepartition[idxLevel+1][0][1] >> 3; + for(int node_id = 1; node_id < nproc; ++node_id){ + nodeRepartition[idxLevel][node_id][0] = FMath::Max(nodeRepartition[idxLevel+1][node_id][0] >> 3, nodeRepartition[idxLevel][node_id-1][0]+1); //Berenger phd :) + nodeRepartition[idxLevel][node_id][1] = nodeRepartition[idxLevel+1][node_id][1] >> 3; + } + } +} + +FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total){ + if(mpi_rank < (total%mpi_count)) + return ((total - (total%mpi_count))/mpi_count)+1; + return ((total - (total%mpi_count))/mpi_count); +} diff --git a/Tests/LET_STF_Implicit/testFMMInterpolationStarPU.hpp b/Tests/LET_STF_Implicit/testFMMInterpolationStarPU.hpp new file mode 100644 index 000000000..ee4031fa3 --- /dev/null +++ b/Tests/LET_STF_Implicit/testFMMInterpolationStarPU.hpp @@ -0,0 +1,284 @@ +// -*-c++-*- +// ==== CMAKE ===== +// @FUSE_BLAS +// @FUSE_MPI +// @FUSE_STARPU +// ================ +// +#include "Utils/FGlobal.hpp" +// parameters +#include "Utils/FParameters.hpp" +#include "Utils/FParameterNames.hpp" + +// include algo for linear tree +#include "inria/algorithm/distributed/mpi.hpp" +#include "inria/linear_tree/balance_tree.hpp" +// tree class +#include "GroupTree/Core/FGroupTree.hpp" +// symbolic data +#include "Components/FSymbolicData.hpp" +// + +// GroupParticleContainer +#include "GroupTree/Core/FP2PGroupParticleContainer.hpp" +// file loader +#include "Files/FMpiFmaGenericLoader.hpp" +//#include "Files/FmaGenericLoader.hpp" +// FBox +#include "Adaptive/FBox.hpp" +// Group linear tree +#include "GroupTree/Core/FGroupLinearTree.hpp" +// Function for GroupLinearTree +#include "GroupTree/Core/FDistributedGroupTreeBuilder.hpp" +// +// Algorithm include +#include "GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp" +#include "GroupTree/StarPUUtils/FStarPUCpuWrapper.hpp" +#include "GroupTree/Core/FGroupTaskStarpuImplicitAlgorithm.hpp" +// +// To construct either the duplicated Octree or the LET +// +#include "GroupTree/Core/FBuildGroupTree.hpp" +//For validation +#include "GroupTree/Core/FGroupTools.hpp" +#include "GroupTree/Core/FCheckResults.hpp" +#include "Components/FSimpleLeaf.hpp" +#include "Core/FFmmAlgorithm.hpp" +// Four output +#include "Contribs/json.hpp" + + +static const int ORDER = 6; +using FReal = double; +// +// 1/r kernel +using MatrixKernelClass = FInterpMatrixKernelR<FReal> ; +// +// definition of the common tree structure +using CellClass = FInterpolationCell<FReal, ORDER>; +using GroupCellUpClass = typename CellClass::multipole_t; +using GroupCellDownClass = typename CellClass::local_expansion_t; +using GroupCellSymbClass = FSymbolicData; +using GroupContainerClass = FP2PGroupParticleContainer<FReal>; +using GroupOctreeClass = FGroupTree<FReal,GroupCellSymbClass, +GroupCellUpClass, +GroupCellDownClass, GroupContainerClass, 1, 4, FReal>; + +// definition of algorithm structure +using GroupKernelClass = FStarPUAllCpuCapacities<FInterpolationKernel +<FReal, CellClass,GroupContainerClass,MatrixKernelClass,ORDER>>; + +using GroupCpuWrapper = FStarPUCpuWrapper< +typename GroupOctreeClass::CellGroupClass, CellClass, GroupKernelClass, +typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass> ; +// +using GroupAlgorithm = FGroupTaskStarPUImplicitAlgorithm<GroupOctreeClass, +typename GroupOctreeClass::CellGroupClass,GroupKernelClass, +typename GroupOctreeClass::ParticleGroupClass, GroupCpuWrapper>; +////////////////////////////////////////////////////////////////// + + +int main(int argc, char *argv[]) { + // Parameter definition + const FParameterNames LocalOptionBlocSize { {"-bs"},"The size of the block of the blocked tree"}; + const FParameterNames LocalOptionValidate { {"-check-result"}, "To compare with direct computation"}; + const FParameterNames LocalOptionBuildTree { {"-tree"}, "To compare with direct computation 0 let, 1 duplicate tree (let distribution) 2 duplicate tree "}; + const std::string TreeBuilderOption[3]={"Let tree", "Duplicated tree with Let distribution", "Duplicated tree"}; + // Parameter help + FHelpDescribeAndExit(argc, argv, + "Test the blocked tree created with linear tree." , + FParameterDefinitions::OctreeHeight, + FParameterDefinitions::InputFile, + LocalOptionBlocSize, + LocalOptionValidate, LocalOptionBuildTree); + + // Get parameters + // Get the groupSize + const int groupSize = FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250); + // Get the file input + const char* const filename = + FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma"); + // Get the treeHeight + const unsigned int TreeHeight = + FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5); + const int optionBuildTree = FParameters::getValue(argc, argv, LocalOptionBuildTree.options,0) ; + + + // Init MPI communicator + // Initialisation MPI Berenger + FMpi FMpiComm; + // Initialisation MPI Quentin + inria::mpi::communicator mpi_comm(FMpiComm.global().getComm()); + // Show job information + std::cout << "JOB INFORMATION " << std::endl; + std::cout << "File name: " << filename << std::endl; + std::cout << "TreeHeight: " << TreeHeight << std::endl; + std::cout << "Block size: " << groupSize << std::endl; + std::cout << "Tree type: " << TreeBuilderOption[optionBuildTree] << std::endl; + std::cout << "------------------------------------------" << std::endl; + FTic time; + // Use FMpiFmaGenericLoader to read the box simulation size + std::cout << "Opening : " << filename << " ..."; + FMpiFmaGenericLoader<FReal> loader(filename, FMpiComm.global()); + std::cout << " done." << std::endl; + + // define a box, used in the sort + const FBox<FPoint<FReal>> box{loader.getBoxWidth(),loader.getCenterOfBox()}; + FReal width = std::max(box.width(0) , std::max(box.width(1) ,box.width(2) )) ; + // + // The group tree used for the computation + GroupOctreeClass * computeOnGroupTree = nullptr ; + // + /////////////////////////////////////////////////////////////////////////////// + // Build Let or duplicated tree + /////////////////////////////////////////////////////////////////////////////// + // + std::string title ; + int nb_block ; + std::vector<MortonIndex> mortonCellDistribution ; + // vector to stock all particles + std::vector<groupTree::particle_t<FReal> > myParticles ; + // + // define the max level to sort particle + + std::string octreeType; + if(optionBuildTree <= 1 ){ + title = "Distribution LETGroupTree in "; + octreeType = "Let group" ; + // + GroupOctreeClass *localGroupTree = nullptr; + // + groupTree::buildLetTree(mpi_comm, loader, myParticles, + box, TreeHeight, groupSize, localGroupTree, mortonCellDistribution ,nb_block); + computeOnGroupTree = localGroupTree ; + } + // + if(optionBuildTree > 0 ){ + title ="duplicate GroupTree in "; + octreeType = "duplicate group" ; + // + GroupOctreeClass *fullGroupTree = nullptr; + // + groupTree::buildDuplicatedTree( FMpiComm, optionBuildTree, filename, myParticles, box, TreeHeight, + groupSize, fullGroupTree,mortonCellDistribution,nb_block); + computeOnGroupTree = fullGroupTree ; + nb_block =0; + } + time.tac(); + computeOnGroupTree->printInfoBlocks(); + std::cout << title << mortonCellDistribution.size() << std::endl; + for ( auto v : mortonCellDistribution) + std::cout << " " << v ; + std::cout << std::endl; + std::cout << " nb_block: " << nb_block <<std::endl; + std::cout << " Creating GroupTree in " << time.elapsed() << "s)." << std::endl; + // + /////////////////////////////////////////////////////////////////////////////// + // Computation part + /////////////////////////////////////////////////////////////////////////////// + // + // define the operation to proceed + // FFmmNearField only Near field + // FFmmFarField only Far field + // FFmmNearAndFarFields full FMM + // By operator FFmmP2P| FFmmP2M | | FFmmM2M FFmmM2L | FFmmL2L | FFmmL2P + const unsigned int operationsToProceed = FFmmP2M | FFmmM2M | FFmmM2L | FFmmL2L | FFmmL2P ; + + const MatrixKernelClass MatrixKernel; + GroupKernelClass groupkernel(TreeHeight, width, box.center() , &MatrixKernel); + std::cout << " end GroupKernelClass " << std::endl ; + GroupAlgorithm groupalgo(computeOnGroupTree,&groupkernel, mortonCellDistribution,nb_block); + std::cout << " end GroupAlgorithm " << std::endl ; + // wait all proc + FTic timerExecute; + FMpiComm.global().barrier(); // Synchronization for timer + // start new timer + timerExecute.tic(); + // starpu_fxt_start_profiling(); + std::cout << " start groupalgo.execute " << std::endl ; + groupalgo.execute(operationsToProceed); + std::cout << " end groupalgo.execute " << std::endl ; + computeOnGroupTree->printInfoBlocks(); + + FMpiComm.global().barrier(); + // starpu_fxt_stop_profiling(); + timerExecute.tac(); + auto timeElapsed = timerExecute.elapsed(); + // print times + double minTime,maxTime,meanTime ; + groupTree::timeAverage(FMpiComm, timeElapsed, minTime, maxTime, meanTime) ; + std::cout << " time (in sec.) on node: " << timeElapsed + << " min " << minTime << " max " << maxTime + << " mean " << meanTime << std::endl; + // + /////////////////////////////////////////////////////////////////////////////// + // Extraction des resultats + /////////////////////////////////////////////////////////////////////////////// + // + nlohmann::json result; + std::string name = std::to_string(TreeHeight); + name += "_" + std::to_string(groupSize)+"_"+std::to_string(loader.getNumberOfParticles()) + ".json"; + result["TreeHeight"] = TreeHeight; + result["GroupSize"] = groupSize; + result["Filename"] = filename; + result["NbParticle"] = loader.getNumberOfParticles(); + result["Octree"] = octreeType; + result["Algorithm"]["time"] = timeElapsed; + result["Algorithm"]["mean"] = meanTime; + result["Algorithm"]["min"] = minTime; + result["Algorithm"]["max"] = maxTime; + result["kernel"] = interpolationKernel ; + std::ofstream out(name); + out << result << std::endl; + // + /////////////////////////////////////////////////////////////////////////////// + // Validation + /////////////////////////////////////////////////////////////////////////////// + // + // Validate the result + if(FParameters::existParameter(argc, argv, LocalOptionValidate.options) == true){ + // Check the result with a previous computation + // The resuls are stored in the files + using CellClass1 = FInterpolationCell<FReal, ORDER>; + + typedef FP2PParticleContainer<FReal> ContainerClass; + typedef FSimpleLeaf<FReal, ContainerClass > LeafClass; + using OctreeClass = FOctree<FReal, CellClass1,ContainerClass,LeafClass> ; + using KernelClass = FInterpolationKernel<FReal,CellClass1,ContainerClass,MatrixKernelClass,ORDER> ; + using FmmClass = FFmmAlgorithm<OctreeClass,CellClass1,ContainerClass,KernelClass,LeafClass> ; + const int SubTreeHeight=3; + OctreeClass treeCheck(TreeHeight, SubTreeHeight,width,box.center()); + const FReal epsilon = 1E-10; + KernelClass kernels(TreeHeight, width, box.center(), &MatrixKernel); + if(optionBuildTree > 0 ){ + for(std::size_t idxPart = 0 ; idxPart < myParticles.size() ; ++idxPart){ + // put in tree + treeCheck.insert(myParticles[idxPart].getPosition(), + myParticles[idxPart].physicalValue()); + // std::cout << idxPart<< " " << myParticles[idxPart].getPosition() << " " << myParticles[idxPart].physicalValue() <<std::endl; + } + FmmClass algorithm(&treeCheck, &kernels); + + + checkWithDuplicatedTree(FMpiComm.global().processId(), myParticles,treeCheck, algorithm, + *computeOnGroupTree,groupalgo,operationsToProceed,epsilon ); + } + else { + std::cerr << " check Not yet implemented " << std::endl ; + FFmaGenericLoader<FReal> seqLoader(filename); + readAndSortAllParticle(seqLoader, box, myParticles, TreeHeight ) ; + for(std::size_t idxPart = 0 ; idxPart < myParticles.size() ; ++idxPart){ + // put in tree + treeCheck.insert(myParticles[idxPart].getPosition(), + myParticles[idxPart].physicalValue()); + } + FmmClass algorithm(&treeCheck, &kernels); + + + checkWithDuplicatedTree(FMpiComm.global().processId(), myParticles,treeCheck, algorithm, + *computeOnGroupTree,groupalgo,operationsToProceed,epsilon ); + } + + } + return 0; +} diff --git a/Tests/LET_STF_Implicit/testGroupTreeFromLinearTree.cpp b/Tests/LET_STF_Implicit/testGroupTreeFromLinearTree.cpp new file mode 100644 index 000000000..dfe23d8ba --- /dev/null +++ b/Tests/LET_STF_Implicit/testGroupTreeFromLinearTree.cpp @@ -0,0 +1,218 @@ +// ==== CMAKE ===== +// @FUSE_BLAS +// ================ +// Keep in private GIT +// @FUSE_MPI + +#include "../../Src/Utils/FGlobal.hpp" +// include algo for linear tree +#include "inria/algorithm/distributed/mpi.hpp" +#include "inria/linear_tree/balance_tree.hpp" +// tree class +#include "../../Src/GroupTree/Core/FGroupTree.hpp" +// symbolic data +#include "../../Src/Components/FSymbolicData.hpp" +// cell class +#include "../../Src/Kernels/Chebyshev/FChebCell.hpp" +// parameter +#include "../../Src/Utils/FParameters.hpp" +#include "../../Src/Utils/FParameterNames.hpp" +// GroupParticleContianer +#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp" +// file loader +#include "../../Src/Files/FMpiFmaGenericLoader.hpp" +// FBox +#include "Adaptive/FBox.hpp" +// Group linear tree +#include "../../Src/GroupTree/Core/FGroupLinearTree.hpp" +// Function for GroupLinearTree +#include "../../Src/GroupTree/Core/FDistributedGroupTreeBuilder.hpp" + +#include <memory> + +static const int ORDER = 6; +using FReal = double; +using GroupCellClass = FChebCell<FReal, ORDER>; +using GroupCellUpClass = typename GroupCellClass::multipole_t; +using GroupCellDownClass = typename GroupCellClass::local_expansion_t; +using GroupCellSymbClass = FSymbolicData; +using GroupContainerClass = FP2PGroupParticleContainer<FReal>; +using GroupOctreeClass = FGroupTree<FReal, + GroupCellSymbClass, + GroupCellUpClass, + GroupCellDownClass, GroupContainerClass, 1, 4, FReal>; + +// Structure for 1 particle +struct particle_t { + using position_t = FPoint<FReal>; + position_t pos; + FReal phi; + MortonIndex morton_index; + const auto& position() const { + return pos; + } + const FPoint<FReal>& getPosition(){ + return pos; + } + const auto& physicalValue() const{ + return phi; + } + const auto& getPositions() const { + return pos; + } + int weight() const { return 1;} + friend constexpr auto morton_index(const particle_t& p) { + return p.morton_index; + } +}; + +int main(int argc, char *argv[]) { + // Parameter definition + const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree"}; + // Parameter help + FHelpDescribeAndExit(argc, argv, + "Test the blocked tree created with linear tree." ,FParameterDefinitions::OctreeHeight, + FParameterDefinitions::InputFile, + LocalOptionBlocSize); + // Get parameters + // Get the groupSize + const int groupSize = + FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250); + // Get the file input + const char* const filename = + FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma"); + // Get the treeHeight + const unsigned int TreeHeight = + FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5); + // The level is the level of the leaf + int level = TreeHeight-1; + // Init MPI communicator + // Initialisation MPI Berenger + FMpi FMpiComm(argc,argv); + // Initialisation MPI Quentin + inria::mpi::communicator mpi_comm(FMpiComm.global().getComm()); + + // Show job information + std::cout << "JOB INFORMATION " << std::endl; + std::cout << "File name : " << filename << std::endl; + std::cout << "TreeHeight : " << TreeHeight << std::endl; + std::cout << "Block size : " << groupSize << std::endl; + std::cout << "------------------------------------------" << std::endl; + + std::cout << "Opening : " << filename << " ..."; + FMpiFmaGenericLoader<FReal> loader(filename, FMpiComm.global()); + std::cout << " done." << std::endl; + + // vector to stock all particles + std::vector<particle_t> myParticles(loader.getMyNumberOfParticles()); + // define the max level to sort particle + const std::size_t max_level = sizeof(particle_t::morton_index) * 8 / 3; + // define a box, used in the sort + const FBox<FPoint<FReal>> box{loader.getBoxWidth(),loader.getCenterOfBox()}; + + // iterate on all of my particles + for(FSize idxPart = 0; idxPart <loader.getMyNumberOfParticles();++idxPart){ + particle_t tmp; + // get the current particles + loader.fillParticle(&tmp.pos,&tmp.phi); + // set the morton index of the current particle at the max_level + tmp.morton_index = inria::linear_tree::get_morton_index( + tmp.pos, box, max_level); + // set the weight of the particle + tmp.phi = 0.1; + // add the particle to my vector of particle + myParticles.at(idxPart) = tmp; + } + + // Now i have all of my particles in a vector, they all have a morton index + // now we will sort them + std::cout << "Sorting particles ..."; + inria::sort(mpi_comm,myParticles, + [](const auto& p1, const auto& p2) { + return p1.morton_index < p2.morton_index; + }); + std::cout << " Done" << std::endl; + + std::cout << " I have " << loader.getMyNumberOfParticles() << " particles ..." << std::endl; + std::cout << "For a total of " << loader.getNumberOfParticles() << " particles ..." << std::endl; + + // Now i want to create the the linear tree + // a linear tree is a tree, with only the leaf + std::cout << "Create linear tree at level " << level << " ..."; + auto linear_tree = + inria::linear_tree::create_balanced_linear_tree_at_level(mpi_comm, + level, + box, + myParticles); + std::cout << " done." << std::endl; + + // Now i need to create a blocked linear tree, it's just a linear tree with + // more information + std::cout << "Creating blocked linear tree ..."; + // declaration of the group linear tree + FGroupLinearTree<decltype(linear_tree)::value_type>group_linear_tree{mpi_comm}; + // now i will fill it + // i can apply 2 methods + // - create_local_group_linear_tree + // - create_global_group_linear_tree + // the first function balance the linear tree according to particle (so he + // don't touch to the distribution ) + // the second function balance the linear tree according to the number of + // block, he try to have group full on the left + // + // with a groupSize at 128, 890 leaf and 3 proc the distribution will be + // ___________________________________ + // | Proc 0 | Proc 1 | Proc 2 | + // | | | | + // | 128 128 | 128 128 | 128 128 122 | + // |___________|_________|_____________| + group_linear_tree.create_local_group_linear_tree( + &linear_tree, + groupSize + ); + std::cout << " Done" << std::endl; + + // now i will to redistribute the particle according to the linear tree + std::cout << "Redistribute ..."; + // Redistribution of particles + inria::linear_tree::redistribute_particles(mpi_comm, + linear_tree, + myParticles); + std::cout << " Done" << std::endl; + + // Now we need to modify the morton index of of all particle to + // have the morton index at le treeHeight-1 + MortonIndex minMidx=1,maxMidx=0; + for (int i =0 ; i< level ; ++i){ + minMidx= minMidx <<3 ; + } + for(unsigned i = 0 ; i < myParticles.size(); ++i){ + myParticles.at(i).morton_index = inria::linear_tree::get_morton_index( + myParticles.at(i).pos, box, level); + maxMidx =std::max(maxMidx, myParticles.at(i).morton_index); + minMidx =std::min(minMidx, myParticles.at(i).morton_index); + } + + std::cout << " MinIndex: " << minMidx << " MaxIndex: " << maxMidx<<std::endl; + // Now we need to share the particle distribution to build the GroupTree + std::cout << "Share my particle distribution ..."; + group_linear_tree.set_index_particle_distribution(myParticles); + std::cout << " done"<< std::endl; + + // Now i can declare my groupTree + // it's a empty instance of the FGroupTree + GroupOctreeClass localGroupTree = GroupOctreeClass::template get_block_tree_instance<GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass, GroupContainerClass>(TreeHeight, + groupSize, + loader.getCenterOfBox(), + loader.getBoxWidth()); + + // Now i can fill the localGroupTree + std::cout << "Creating tree ..." ; + localGroupTree.create_tree(group_linear_tree,myParticles); + std::cout << " done." << std::endl; + + // now we can show the groupTree + localGroupTree.printInfoBlocks(); + + return 0; +} diff --git a/Tests/LET_STF_Implicit/testLETGroupTree.cpp b/Tests/LET_STF_Implicit/testLETGroupTree.cpp new file mode 100644 index 000000000..19c940fc9 --- /dev/null +++ b/Tests/LET_STF_Implicit/testLETGroupTree.cpp @@ -0,0 +1,254 @@ +// ==== CMAKE ===== +// @FUSE_BLAS +// @FUSE_MPI +// ================ +// + +#include "../../Src/Utils/FGlobal.hpp" +// include algo for linear tree +#include "inria/algorithm/distributed/mpi.hpp" +#include "inria/linear_tree/balance_tree.hpp" +// tree class +#include "../../Src/GroupTree/Core/FGroupTree.hpp" +// symbolic data +#include "../../Src/Components/FSymbolicData.hpp" +// cell class +#include "../../Src/Kernels/Chebyshev/FChebCell.hpp" +// parameter +#include "../../Src/Utils/FParameters.hpp" +#include "../../Src/Utils/FParameterNames.hpp" +// GroupParticleContianer +#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp" +// file loader +#include "../../Src/Files/FMpiFmaGenericLoader.hpp" +// FBox +#include "Adaptive/FBox.hpp" +// Group linear tree +#include "../../Src/GroupTree/Core/FGroupLinearTree.hpp" +// Function for GroupLinearTree +#include "../../Src/GroupTree/Core/FDistributedGroupTreeBuilder.hpp" +// Include validation Function +#include "../../Src/GroupTree/Core/FDistributedLETGroupTreeValidator.hpp" +#include <memory> + + +static const int ORDER = 6; +using FReal = double; +using GroupCellClass = FChebCell<FReal, ORDER>; +using GroupCellUpClass = typename GroupCellClass::multipole_t; +using GroupCellDownClass = typename GroupCellClass::local_expansion_t; +using GroupCellSymbClass = FSymbolicData; +using GroupContainerClass = FP2PGroupParticleContainer<FReal>; +using GroupOctreeClass = FGroupTree<FReal, + GroupCellSymbClass, + GroupCellUpClass, + GroupCellDownClass, GroupContainerClass, 1, 4, FReal>; + +// Structure for 1 particle +struct particle_t { + using position_t = FPoint<FReal>; + position_t pos; + FReal phi; + std::size_t morton_index; + const auto& position() const { + return pos; + } + const FPoint<FReal>& getPosition(){ + return pos; + } + const auto& physicalValue() const{ + return phi; + } + const auto& getPositions() const { + return pos; + } + int weight() const { return 1;} + friend constexpr auto morton_index(const particle_t& p) { + return p.morton_index; + } +}; + +int main(int argc, char *argv[]) { + // Parameter definition + const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree"}; + // Parameter help + FHelpDescribeAndExit(argc, argv, + "Test the blocked tree created with linear tree." ,FParameterDefinitions::OctreeHeight, + FParameterDefinitions::InputFile, + LocalOptionBlocSize); + std::string comments("==================================================================================="); + // Get parameters + // Get the groupSize + const int groupSize = + FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250); + // Get the file input + const char* const filename = + FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma"); + // Get the treeHeight + const unsigned int TreeHeight = + FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5); + // The level is the level of the leaf + int level = TreeHeight-1; + // Init MPI communicator + // Initialisation MPI Berenger + FMpi FMpiComm(argc,argv); + // Initialisation MPI Quentin + inria::mpi::communicator mpi_comm(FMpiComm.global().getComm()); + + // Show job information + std::cout << "JOB INFORMATION " << std::endl; + std::cout << "File name : " << filename << std::endl; + std::cout << "TreeHeight : " << TreeHeight << std::endl; + std::cout << "Block size : " << groupSize << std::endl; + std::cout << "------------------------------------------" << std::endl; + + std::cout << "Opening : " << filename << " ..."; + FMpiFmaGenericLoader<FReal> loader(filename, FMpiComm.global()); + std::cout << " done." << std::endl; + + // vector to stock all particles + std::vector<particle_t> myParticles(loader.getMyNumberOfParticles()); + // define the max level to sort particle + const std::size_t max_level = sizeof(particle_t::morton_index) * 8 / 3; + // define a box, used in the sort + const FBox<FPoint<FReal>> box{loader.getBoxWidth(),loader.getCenterOfBox()}; + + // iterate on all of my particles + for(FSize idxPart = 0; idxPart <loader.getMyNumberOfParticles();++idxPart){ + particle_t tmp; + // get the current particles + loader.fillParticle(&tmp.pos,&tmp.phi); + // set the morton index of the current particle at the max_level + tmp.morton_index = inria::linear_tree::get_morton_index( + tmp.pos, box, max_level); + // set the weight of the particle + tmp.phi = 0.1; + // add the particle to my vector of particle + myParticles.at(idxPart) = tmp; + } + + // Now i have all of my particles in a vector, they all have a morton index + // now we will sort them + std::cout << "Sorting particles ..."; + inria::sort(mpi_comm,myParticles, + [](const auto& p1, const auto& p2) { + return p1.morton_index < p2.morton_index; + }); + std::cout << " Done" << std::endl; + + std::cout << " I have " << loader.getMyNumberOfParticles() << " particles ..." << std::endl; + std::cout << "For a total of " << loader.getNumberOfParticles() << " particles ..." << std::endl; + + // Now i want to create the the linear tree + // a linear tree is a tree, with only the leaf + std::cout << "Create linear tree at level " << level << " ..."; + auto linear_tree = + inria::linear_tree::create_balanced_linear_tree_at_level(mpi_comm, + level, + box, + myParticles); + std::cout << " done." << std::endl; + + // Now i need to create a blocked linear tree, it's just a linear tree with + // more information + std::cout << "Creating blocked linear tree ..."; + // declaration of the group linear tree + FGroupLinearTree<decltype(linear_tree)::value_type>group_linear_tree{mpi_comm}; + // now i will fill it + // i can apply 2 methods + // - create_local_group_linear_tree + // - create_global_group_linear_tree + // the first function balance the linear tree according to particle (so he + // don't touch to the distribution ) + // the second function balance the linear tree according to the number of + // block, he try to have group full on the left + // + // with a groupSize at 128, 890 leaf and 3 proc the distribution will be + // ___________________________________ + // | Proc 0 | Proc 1 | Proc 2 | + // | | | | + // | 128 128 | 128 128 | 128 128 122 | + // |___________|_________|_____________| + group_linear_tree.create_local_group_linear_tree( + &linear_tree, + groupSize + ); + std::cout << " Done" << std::endl; + + // now i will to redistribute the particle according to the linear tree + std::cout << "Redistribute ..."; + // Redistribution of particles + inria::linear_tree::redistribute_particles(mpi_comm, + linear_tree, + myParticles); + std::cout << " Done" << std::endl; + + // Now we need to modify the morton index of of all particle to + // have the morton index at le treeHeight-1 + MortonIndex minMidx=1,maxMidx=0; + for (int i =0 ; i< level ; ++i){ + minMidx= minMidx <<3 ; + } + for(unsigned i = 0 ; i < myParticles.size(); ++i){ + myParticles.at(i).morton_index = inria::linear_tree::get_morton_index( + myParticles.at(i).pos, box, level); + } + std::cout << " MinIndex: " << minMidx << " MaxIndex: " << maxMidx<<std::endl; + + // Now we need to share the particle distribution to build the GroupTree + std::cout << "Share my particle distribution ..."; + group_linear_tree.set_index_particle_distribution(myParticles); + std::cout << " done"<< std::endl; + // + ////////////////////////////////////////////////////////////////////////////////////////////////////// + // + // Now i can declare my groupTree + // it's a empty instance of the FGroupTree + GroupOctreeClass localGroupTree = GroupOctreeClass::template get_block_tree_instance<GroupCellSymbClass, + GroupCellUpClass, + GroupCellDownClass, + GroupContainerClass>(TreeHeight, + groupSize, + loader.getCenterOfBox(), + loader.getBoxWidth()); + // + // Now I fill the localGroupTree + // + std::cout << "Creating tree ..." ; + localGroupTree.create_tree(group_linear_tree,myParticles); + std::cout << " done." << std::endl; + // + // now we can show the groupTree + std::cout << comments <<std::endl << " Local Tree " <<std::endl; + + localGroupTree.printInfoBlocks(); + // + std::cout << comments<<std::endl; + // Now i want to build a LET tree + // the LET tree is the Local Essential Tree + // It's a the same tree but we add block from other proc + // the block added are the blocked needed to post task for the P2P M2M and M2L interactions + // + // The first step is to add a unique index at all of group + std::cout << " Add a global index on every block .."; + int nb_block = dstr_grp_tree_builder::set_cell_group_global_index(localGroupTree,mpi_comm); + std::cout << ". done."; + std::cout << "We have a total of " << nb_block << " on every proc." << std::endl; + // + // now we can create LET + // + std::cout << "Creating let ..."; + localGroupTree.create_LET(group_linear_tree); + std::cout << "done." << std::endl; + + // now we can show the groupTree with LET + // to show where is our block, the second argument of every block + // show if the block is local + + bool flag = dstr_grp_tree_vldr::validate_group_tree(localGroupTree,mpi_comm); + std::cout << comments <<std::endl << " LET " <<std::endl; + localGroupTree.printInfoBlocks(); + std::cout << comments <<std::endl; + + return 0; +} diff --git a/Tests/LET_STF_Implicit/testLetImplicitChebyshev.cpp b/Tests/LET_STF_Implicit/testLetImplicitChebyshev.cpp new file mode 100644 index 000000000..0f3af149a --- /dev/null +++ b/Tests/LET_STF_Implicit/testLetImplicitChebyshev.cpp @@ -0,0 +1,30 @@ +// ==== CMAKE ===== +// @FUSE_BLAS +// ================ +// Keep in private GIT +// @FUSE_MPI +// @FUSE_STARPU +// +// +#include <string> +// +// Chebychev cell class +#include "Kernels/Chebyshev/FChebCell.hpp" +#include "Kernels/Chebyshev/FChebSymKernel.hpp" +// +template<typename FReal, int ORDER> +using FInterpolationCell = FChebCell<FReal, ORDER>; + +template<typename FReal, typename GroupCellClass, + typename GroupContainerClass, + typename MatrixKernelClass, int ORDER> + +using FInterpolationKernel = FChebSymKernel<FReal, + GroupCellClass, + GroupContainerClass, + MatrixKernelClass, + ORDER> ; + +static std::string interpolationKernel("Chebyshev"); +#include "testFMMInterpolationStarPU.hpp" + diff --git a/Tests/LET_STF_Implicit/testLetImplicitUniform.cpp b/Tests/LET_STF_Implicit/testLetImplicitUniform.cpp new file mode 100644 index 000000000..572d1348e --- /dev/null +++ b/Tests/LET_STF_Implicit/testLetImplicitUniform.cpp @@ -0,0 +1,29 @@ +// ==== CMAKE ===== +// @FUSE_BLAS +// ================ +// Keep in private GIT +// @FUSE_MPI +// @FUSE_STARPU +// +#include <string> +// +// Uniform Grid points cell class +#include "Kernels/Uniform/FUnifCell.hpp" +#include "Kernels/Uniform/FUnifKernel.hpp" +// +template<typename FReal, int ORDER> +using FInterpolationCell = FUnifCell<FReal, ORDER>; + +template<typename FReal, typename GroupCellClass, + typename GroupContainerClass, + typename MatrixKernelClass, int ORDER> +using FInterpolationKernel = FUnifKernel<FReal, + GroupCellClass, + GroupContainerClass, + MatrixKernelClass, + ORDER> ; + +static std::string interpolationKernel("Uniform"); + +#include "testFMMInterpolationStarPU.hpp" + diff --git a/Tests/LET_STF_Implicit/testSizeGroupTree.cpp b/Tests/LET_STF_Implicit/testSizeGroupTree.cpp new file mode 100644 index 000000000..62457891e --- /dev/null +++ b/Tests/LET_STF_Implicit/testSizeGroupTree.cpp @@ -0,0 +1,356 @@ +// ==== CMAKE ===== +// @FUSE_BLAS +// ================ +// Keep in private GIT +// @FUSE_MPI +// @FUSE_STARPU + +#include "../../Src/Utils/FGlobal.hpp" +// include algo for linear tree +#include "inria/algorithm/distributed/mpi.hpp" +#include "inria/linear_tree/balance_tree.hpp" +// tree class +#include "../../Src/GroupTree/Core/FGroupTree.hpp" +// symbolic data +#include "../../Src/Components/FSymbolicData.hpp" +// cell class +#include "../../Src/Kernels/Chebyshev/FChebCell.hpp" +// parameter +#include "../../Src/Utils/FParameters.hpp" +#include "../../Src/Utils/FParameterNames.hpp" +// GroupParticleContianer +#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp" +// file loader +#include "../../Src/Files/FMpiFmaGenericLoader.hpp" +// FBox +#include "Adaptive/FBox.hpp" +// Group linear tree +#include "../../Src/GroupTree/Core/FGroupLinearTree.hpp" +// Function for GroupLinearTree +#include "../../Src/GroupTree/Core/FDistributedGroupTreeBuilder.hpp" +#include "../../Src/Utils/FTic.hpp" +#include "../../Src/Files/FFmaGenericLoader.hpp" +#include "../../Src/Utils/FLeafBalance.hpp" + +#include "../../Contribs/json.hpp" + +#include <memory> + + +static const int ORDER = 6; +using FReal = double; +using GroupCellClass = FChebCell<FReal, ORDER>; +using GroupCellUpClass = typename GroupCellClass::multipole_t; +using GroupCellDownClass = typename GroupCellClass::local_expansion_t; +using GroupCellSymbClass = FSymbolicData; +using GroupContainerClass = FP2PGroupParticleContainer<FReal>; +using GroupOctreeClass = FGroupTree<FReal, + GroupCellSymbClass, + GroupCellUpClass, + GroupCellDownClass, GroupContainerClass, 1, 4, FReal>; + +// Structure for 1 particle +struct particle_t { + using position_t = FPoint<FReal>; + position_t pos; + FReal phi; + std::size_t morton_index; + const auto& position() const { + return pos; + } + const FPoint<FReal>& getPosition(){ + return pos; + } + const auto& physicalValue() const{ + return phi; + } + const auto& getPositions() const { + return pos; + } + int weight() const { return 1;} + friend constexpr auto morton_index(const particle_t& p) { + return p.morton_index; + } +}; +void sortParticle(FPoint<FReal> * allParticlesToSort, int treeHeight, int groupSize, std::vector<std::vector<int>> & sizeForEachGroup, std::vector<MortonIndex> & distributedMortonIndex, FFmaGenericLoader<FReal>& loader, int nproc); +void createNodeRepartition(std::vector<MortonIndex> distributedMortonIndex, std::vector<std::vector<std::vector<MortonIndex>>>& nodeRepartition, int nproc, int treeHeight); +FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total); + +int main(int argc, char *argv[]) { + FTic time; + // Parameter definition + const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree"}; + // Parameter help + FHelpDescribeAndExit(argc, argv, + "Test the blocked tree created with linear tree." ,FParameterDefinitions::OctreeHeight, + FParameterDefinitions::InputFile, + LocalOptionBlocSize); + // Get parameters + // Get the groupSize + const int groupSize = + FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250); + // Get the file input + const char* const filename = + FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma"); + // Get the treeHeight + const unsigned int TreeHeight = + FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5); + // The level is the level of the leaf + int level = TreeHeight-1; + // Init MPI communicator + // Initialisation MPI Berenger + FMpi FMpiComm(argc,argv); + int nproc = FMpiComm.global().processCount(); + // Initialisation MPI Quentin + inria::mpi::communicator mpi_comm(FMpiComm.global().getComm()); + + // Show job information + std::cout << "GroupTree building comparaison " << std::endl; + std::cout << "File name : " << filename << std::endl; + std::cout << "TreeHeight : " << TreeHeight << std::endl; + std::cout << "Block size : " << groupSize << std::endl; + std::cout << "------------------------------------------" << std::endl; + + FFmaGenericLoader<FReal> loader(filename); + const FSize NbParticles = loader.getNumberOfParticles(); + FPoint<FReal> * allParticlesToSort = new FPoint<FReal>[NbParticles]; + FSize idxPart = 0; + for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + FReal physicalValue = 0.1; + loader.fillParticle(&allParticlesToSort[idxPart], &physicalValue);//Same with file or not + } + std::vector<MortonIndex> distributedMortonIndex; + std::vector<std::vector<int>> sizeForEachGroup; + sortParticle(allParticlesToSort, TreeHeight, groupSize, sizeForEachGroup, distributedMortonIndex, loader, nproc); + + FP2PParticleContainer<FReal> allParticles; + for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + FReal physicalValue = 0.1; + allParticles.push(allParticlesToSort[idxPart], physicalValue); + } + // Put the data into the tree + FTic time2; + time2.tic(); + GroupOctreeClass groupedTree(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, &allParticles, sizeForEachGroup, true); + time2.tac(); + + std::size_t particle_blocks = 0; + std::size_t total_size = 0; + std::size_t cell_blocks = 0; + int old_idx = 0; + for(int i = 0; i < groupedTree.getNbParticleGroup();++i){ + auto* container = groupedTree.getParticleGroup(i); + total_size += sizeof(*container); + total_size += container->getBufferSizeInByte(); + } + particle_blocks = total_size; + + for(int i = groupedTree.getHeight()-1 ; i > 0 ; --i ){ + for(int j = 0 ; j < groupedTree.getNbCellGroupAtLevel(i) ; ++j){ + auto* container = groupedTree.getCellGroup(i,j); + total_size += sizeof(*container); + total_size += container->getBufferSizeInByte(); + } + } + cell_blocks = total_size - particle_blocks; + total_size += sizeof(groupedTree); + mpi_comm.barrier(); + + int nb_proc = mpi_comm.size(); + int my_rank = mpi_comm.rank(); + std::vector<std::size_t> vect_result(3); + vect_result[0] = particle_blocks; + vect_result[1] = total_size; + vect_result[2] = cell_blocks; + std::vector<std::size_t> vect_recv(0,0); + if(my_rank == 0){ + vect_recv.resize(nb_proc*3); + } + mpi_comm.gather( + &vect_result[0], + 3*sizeof(std::size_t), + MPI_CHAR, + &vect_recv[0], + 3*sizeof(std::size_t), + MPI_CHAR, + 0 + ); + + if(my_rank == 0 ){ + std::size_t partavg = 0; + std::size_t cellavg = 0; + std::size_t totalavg = 0; + for(int i = 0 ; i < vect_recv.size() ; i+= 3){ + partavg += vect_recv[0]; + cellavg += vect_recv[2]; + totalavg += vect_recv[1]; + } + + partavg /= nb_proc; + cellavg /= nb_proc; + totalavg /= nb_proc; + + std::cout << "Particle blocks : " << partavg << std::endl; + std::cout << "Cell blocks : " << cellavg << std::endl; + std::cout << "Total size : " << totalavg << " bits " << (totalavg)/1000 << " Kb "<< ((totalavg)/1000)/1000 << " Mb " << std::endl; + + // read a JSON file + std::string name = "out"+std::to_string(TreeHeight); + name += "_" + std::to_string(groupSize)+"_"+std::to_string(loader.getNumberOfParticles()) + ".json"; + std::ifstream ii(name); + nlohmann::json j; + // ii >> j; + j["GroupTree"]["ParticlesBlocks"] = partavg; + j["GroupTree"]["CellsBlocks"] = cellavg; + j["GroupTree"]["TotalSize"] = totalavg; + std::ofstream out(name); + out << j << std::endl; + } + + return 0; +} + + +void sortParticle(FPoint<FReal> * allParticles, int treeHeight, int groupSize, std::vector<std::vector<int>> & sizeForEachGroup, std::vector<MortonIndex> & distributedMortonIndex, FFmaGenericLoader<FReal>& loader, int nproc) +{ + //Structure pour trier + struct ParticleSortingStruct{ + FPoint<FReal> position; + MortonIndex mindex; + }; + // Création d'un tableau de la structure pour trier puis remplissage du tableau + const FSize nbParticles = loader.getNumberOfParticles(); + ParticleSortingStruct* particlesToSort = new ParticleSortingStruct[nbParticles]; + for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){ + const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPosition<FReal>(loader.getCenterOfBox(), loader.getBoxWidth(), + treeHeight, + allParticles[idxPart]); + const MortonIndex particleIndex = host.getMortonIndex(); + particlesToSort[idxPart].mindex = particleIndex; + particlesToSort[idxPart].position = allParticles[idxPart]; + } + + //Trie du nouveau tableau + FQuickSort<ParticleSortingStruct, FSize>::QsOmp(particlesToSort, nbParticles, [](const ParticleSortingStruct& v1, const ParticleSortingStruct& v2){ + return v1.mindex <= v2.mindex; + }); + //Replace tout dans l'ordre dans le tableau d'origine + for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){ + allParticles[idxPart] = particlesToSort[idxPart].position; + } + + //Compte le nombre de feuilles + sizeForEachGroup.resize(treeHeight); + MortonIndex previousLeaf = -1; + int numberOfLeaf = 0; + for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart) + { + if(particlesToSort[idxPart].mindex != previousLeaf) + { + previousLeaf = particlesToSort[idxPart].mindex; + ++numberOfLeaf; + } + } + + //Calcul de la taille des groupes au niveau des feuilles + FLeafBalance balancer; + for(int processId = 0; processId < nproc; ++processId) + { + FSize size_last; + FSize countGroup; + FSize leafOnProcess = balancer.getRight(numberOfLeaf, nproc, processId) - balancer.getLeft(numberOfLeaf, nproc, processId); + size_last = leafOnProcess%groupSize; + countGroup = (leafOnProcess - size_last)/groupSize; + for(int i = 0; i < countGroup; ++i) + sizeForEachGroup[treeHeight-1].push_back(groupSize); + if(size_last > 0) + sizeForEachGroup[treeHeight-1].push_back((int)size_last); + } + + //Calcul du working interval au niveau des feuilles + previousLeaf = -1; + int countLeaf = 0; + int processId = 0; + FSize leafOnProcess = balancer.getRight(numberOfLeaf, nproc, 0) - balancer.getLeft(numberOfLeaf, nproc, 0); + distributedMortonIndex.push_back(previousLeaf); + for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart) + { + if(particlesToSort[idxPart].mindex != previousLeaf) + { + previousLeaf = particlesToSort[idxPart].mindex; + ++countLeaf; + if(countLeaf == leafOnProcess) + { + distributedMortonIndex.push_back(previousLeaf); + distributedMortonIndex.push_back(previousLeaf); + countLeaf = 0; + ++processId; + leafOnProcess = balancer.getRight(numberOfLeaf, nproc, processId) - balancer.getLeft(numberOfLeaf, nproc, processId); + } + } + } + distributedMortonIndex.push_back(particlesToSort[nbParticles - 1].mindex); + + //Calcul des working interval à chaque niveau + std::vector<std::vector<std::vector<MortonIndex>>> nodeRepartition; + createNodeRepartition(distributedMortonIndex, nodeRepartition, nproc, treeHeight); + + //Pour chaque niveau calcul de la taille des groupe + for(int idxLevel = treeHeight - 2; idxLevel >= 0; --idxLevel) + { + processId = 0; + int countParticleInTheGroup = 0; + MortonIndex previousMortonCell = -1; + + //cout << "Compute Level " << idxLevel << endl; + for(int idxPart = 0; idxPart < nbParticles; ++idxPart) + { + MortonIndex mortonCell = (particlesToSort[idxPart].mindex) >> (3*(treeHeight - 1 - idxLevel)); + if(mortonCell <= nodeRepartition[idxLevel][processId][1]) //Si l'indice est dans le working interval + { + if(mortonCell != previousMortonCell) //Si c'est un nouvelle indice + { + ++countParticleInTheGroup; //On le compte dans le groupe + previousMortonCell = mortonCell; + if(countParticleInTheGroup == groupSize) //Si le groupe est plein on ajoute le compte + { + sizeForEachGroup[idxLevel].push_back(groupSize); + countParticleInTheGroup = 0; + } + } + } + else //Si l'on change d'interval de process on ajoute ce que l'on a compté + { + if(countParticleInTheGroup > 0) + sizeForEachGroup[idxLevel].push_back(countParticleInTheGroup); + countParticleInTheGroup = 1; + previousMortonCell = mortonCell; + ++processId; + } + } + if(countParticleInTheGroup > 0) + sizeForEachGroup[idxLevel].push_back(countParticleInTheGroup); + } +} + +void createNodeRepartition(std::vector<MortonIndex> distributedMortonIndex, std::vector<std::vector<std::vector<MortonIndex>>>& nodeRepartition, int nproc, int treeHeight) { + nodeRepartition.resize(treeHeight, std::vector<std::vector<MortonIndex>>(nproc, std::vector<MortonIndex>(2))); + for(int node_id = 0; node_id < nproc; ++node_id){ + nodeRepartition[treeHeight-1][node_id][0] = distributedMortonIndex[node_id*2]; + nodeRepartition[treeHeight-1][node_id][1] = distributedMortonIndex[node_id*2+1]; + } + for(int idxLevel = treeHeight - 2; idxLevel >= 0 ; --idxLevel){ + nodeRepartition[idxLevel][0][0] = nodeRepartition[idxLevel+1][0][0] >> 3; + nodeRepartition[idxLevel][0][1] = nodeRepartition[idxLevel+1][0][1] >> 3; + for(int node_id = 1; node_id < nproc; ++node_id){ + nodeRepartition[idxLevel][node_id][0] = FMath::Max(nodeRepartition[idxLevel+1][node_id][0] >> 3, nodeRepartition[idxLevel][node_id-1][0]+1); //Berenger phd :) + nodeRepartition[idxLevel][node_id][1] = nodeRepartition[idxLevel+1][node_id][1] >> 3; + } + } +} + +FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total){ + if(mpi_rank < (total%mpi_count)) + return ((total - (total%mpi_count))/mpi_count)+1; + return ((total - (total%mpi_count))/mpi_count); +} diff --git a/Tests/LET_STF_Implicit/testSizeLETGroupTree.cpp b/Tests/LET_STF_Implicit/testSizeLETGroupTree.cpp new file mode 100644 index 000000000..c2b2570e7 --- /dev/null +++ b/Tests/LET_STF_Implicit/testSizeLETGroupTree.cpp @@ -0,0 +1,267 @@ +// ==== CMAKE ===== +// @FUSE_BLAS +// ================ +// Keep in private GIT +// @FUSE_MPI + +#include "../../Src/Utils/FGlobal.hpp" +// include algo for linear tree +#include "inria/algorithm/distributed/mpi.hpp" +#include "inria/linear_tree/balance_tree.hpp" +// tree class +#include "../../Src/GroupTree/Core/FGroupTree.hpp" +// symbolic data +#include "../../Src/Components/FSymbolicData.hpp" +// cell class +#include "../../Src/Kernels/Chebyshev/FChebCell.hpp" +// parameter +#include "../../Src/Utils/FParameters.hpp" +#include "../../Src/Utils/FParameterNames.hpp" +// GroupParticleContianer +#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp" +// file loader +#include "../../Src/Files/FMpiFmaGenericLoader.hpp" +// FBox +#include "Adaptive/FBox.hpp" +// Group linear tree +#include "../../Src/GroupTree/Core/FGroupLinearTree.hpp" +// Function for GroupLinearTree +#include "../../Src/GroupTree/Core/FDistributedGroupTreeBuilder.hpp" + +#include "../../Contribs/json.hpp" + +#include <memory> + + +static const int ORDER = 6; +using FReal = double; +using GroupCellClass = FChebCell<FReal, ORDER>; +using GroupCellUpClass = typename GroupCellClass::multipole_t; +using GroupCellDownClass = typename GroupCellClass::local_expansion_t; +using GroupCellSymbClass = FSymbolicData; +using GroupContainerClass = FP2PGroupParticleContainer<FReal>; +using GroupOctreeClass = FGroupTree<FReal, + GroupCellSymbClass, + GroupCellUpClass, + GroupCellDownClass, GroupContainerClass, 1, 4, FReal>; + +// Structure for 1 particle +struct particle_t { + using position_t = FPoint<FReal>; + position_t pos; + FReal phi; + std::size_t morton_index; + const auto& position() const { + return pos; + } + const FPoint<FReal>& getPosition(){ + return pos; + } + const auto& physicalValue() const{ + return phi; + } + const auto& getPositions() const { + return pos; + } + int weight() const { return 1;} + friend constexpr auto morton_index(const particle_t& p) { + return p.morton_index; + } +}; + +int main(int argc, char *argv[]) { + // Parameter definition + const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree"}; + // Parameter help + FHelpDescribeAndExit(argc, argv, + "Test the blocked tree created with linear tree." ,FParameterDefinitions::OctreeHeight, + FParameterDefinitions::InputFile, + LocalOptionBlocSize); + // Get parameters + // Get the groupSize + const int groupSize = + FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250); + // Get the file input + const char* const filename = + FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma"); + // Get the treeHeight + const unsigned int TreeHeight = + FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5); + // The level is the level of the leaf + int level = TreeHeight-1; + // Init MPI communicator + // Initialisation MPI Berenger + FMpi FMpiComm(argc,argv); + // Initialisation MPI Quentin + inria::mpi::communicator mpi_comm(FMpiComm.global().getComm()); + + // Show job information + std::cout << "JOB INFORMATION " << std::endl; + std::cout << "File name : " << filename << std::endl; + std::cout << "TreeHeight : " << TreeHeight << std::endl; + std::cout << "Block size : " << groupSize << std::endl; + std::cout << "------------------------------------------" << std::endl; + + std::cout << "Opening : " << filename << " ..."; + FMpiFmaGenericLoader<FReal> loader(filename, FMpiComm.global()); + std::cout << " done." << std::endl; + + // vector to stock all particles + std::vector<particle_t> myParticles(loader.getMyNumberOfParticles()); + // define the max level to sort particle + const std::size_t max_level = sizeof(particle_t::morton_index) * 8 / 3; + // define a box, used in the sort + const FBox<FPoint<FReal>> box{loader.getBoxWidth(),loader.getCenterOfBox()}; + + // iterate on all of my particles + for(FSize idxPart = 0; idxPart <loader.getMyNumberOfParticles();++idxPart){ + particle_t tmp; + // get the current particles + loader.fillParticle(&tmp.pos,&tmp.phi); + // set the morton index of the current particle at the max_level + tmp.morton_index = inria::linear_tree::get_morton_index( + tmp.pos, box, max_level); + // set the weight of the particle + tmp.phi = 0.1; + // add the particle to my vector of particle + myParticles.at(idxPart) = tmp; + } + + // Now i have all of my particles in a vector, they all have a morton index + // now we will sort them + inria::sort(mpi_comm,myParticles, + [](const auto& p1, const auto& p2) { + return p1.morton_index < p2.morton_index; + }); + + std::cout << " I have " << loader.getMyNumberOfParticles() << " particles ..." << std::endl; + std::cout << "For a total of " << loader.getNumberOfParticles() << " particles ..." << std::endl; + + // Now i want to create the the linear tree + // a linear tree is a tree, with only the leaf + auto linear_tree = + inria::linear_tree::create_balanced_linear_tree_at_level(mpi_comm, + level, + box, + myParticles); + // Now i need to create a blocked linear tree, it's just a linear tree with + // more information + // declaration of the group linear tree + FGroupLinearTree<decltype(linear_tree)::value_type>group_linear_tree{mpi_comm}; + group_linear_tree.create_local_group_linear_tree( + &linear_tree, + groupSize + ); + + // now i will to redistribute the particle according to the linear tree + // Redistribution of particles + inria::linear_tree::redistribute_particles(mpi_comm, + linear_tree, + myParticles); + + // Now we need to modify the morton index of of all particle to + // have the morton index at le treeHeight-1 + for(unsigned i = 0 ; i < myParticles.size(); ++i){ + myParticles.at(i).morton_index = inria::linear_tree::get_morton_index( + myParticles.at(i).pos, box, level); + } + + // Now we need to share the particle distribution to build the GroupTree + group_linear_tree.set_index_particle_distribution(myParticles); + + // Now i can declare my groupTree + // it's a empty instance of the FGroupTree + GroupOctreeClass localGroupTree = GroupOctreeClass::template get_block_tree_instance<GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass, GroupContainerClass>(TreeHeight, + groupSize, + loader.getCenterOfBox(), + loader.getBoxWidth()); + + // Now i can fill the localGroupTree + localGroupTree.create_tree(group_linear_tree,myParticles); + + int nb_block = dstr_grp_tree_builder::set_cell_group_global_index(localGroupTree,mpi_comm); + + // now we can create LET + localGroupTree.create_LET(group_linear_tree); + + std::size_t particle_blocks = 0; + std::size_t total_size = 0; + std::size_t cell_blocks = 0; + + /// Validation of block, we check to don't have dupicate block + int old_idx = 0; + for(int i = 0; i < localGroupTree.getNbParticleGroup();++i){ + auto* container = localGroupTree.getParticleGroup(i); + total_size += sizeof(*container); + total_size += container->getBufferSizeInByte(); + } + particle_blocks = total_size; + + for(int i = localGroupTree.getHeight()-1 ; i > 0 ; --i ){ + for(int j = 0 ; j < localGroupTree.getNbCellGroupAtLevel(i) ; ++j){ + auto* container = localGroupTree.getCellGroup(i,j); + total_size += sizeof(*container); + total_size += container->getBufferSizeInByte(); + } + } + cell_blocks = total_size - particle_blocks; + total_size += sizeof(localGroupTree); + mpi_comm.barrier(); + + int nb_proc = mpi_comm.size(); + int my_rank = mpi_comm.rank(); + std::vector<std::size_t> vect_result(3); + vect_result[0] = particle_blocks; + vect_result[1] = total_size; + vect_result[2] = cell_blocks; + std::vector<std::size_t> vect_recv(0,0); + if(my_rank == 0){ + vect_recv.resize(nb_proc*3); + } + mpi_comm.gather( + &vect_result[0], + 3*sizeof(std::size_t), + MPI_CHAR, + &vect_recv[0], + 3*sizeof(std::size_t), + MPI_CHAR, + 0 + ); + + if(my_rank == 0 ){ + std::size_t partavg = 0; + std::size_t cellavg = 0; + std::size_t totalavg = 0; + for(int i = 0 ; i < vect_recv.size() ; i+= 3){ + partavg += vect_recv[0]; + cellavg += vect_recv[2]; + totalavg += vect_recv[1]; + } + + partavg /= nb_proc; + cellavg /= nb_proc; + totalavg /= nb_proc; + + std::cout << "Particle blocks : " << partavg << std::endl; + std::cout << "Cell blocks : " << cellavg << std::endl; + std::cout << "Total size : " << totalavg << " bits " << (totalavg)/1000 << " Kb "<< ((totalavg)/1000)/1000 << " Mb " << std::endl; + + nlohmann::json result; + std::string name = std::to_string(TreeHeight); + name += "_" + std::to_string(groupSize)+"_"+std::to_string(loader.getNumberOfParticles()) + ".json"; + + result["TreeHeight"] = TreeHeight; + result["GroupSize"] = groupSize; + result["Filename"] = filename; + result["NbParticle"] = loader.getNumberOfParticles(); + result["LETGroupTree"]["ParticlesBlocks"] = partavg; + result["LETGroupTree"]["CellsBlocks"] = cellavg; + result["LETGroupTree"]["TotalSize"] = totalavg; + + std::ofstream out(name); + out << result << std::endl; + + } + + return 0; +} diff --git a/UTests/utestLetTree.cpp b/UTests/utestLetTree.cpp new file mode 100644 index 000000000..d50b8e421 --- /dev/null +++ b/UTests/utestLetTree.cpp @@ -0,0 +1,213 @@ +// See LICENCE file at project root + +// ==== CMAKE ===== +// @FUSE_BLAS +// @FUSE_MPI +// ================ + +#include "../../Src/Utils/FGlobal.hpp" +// include algo for linear tree +#include "inria/algorithm/distributed/mpi.hpp" +#include "inria/linear_tree/balance_tree.hpp" +// tree class +#include "GroupTree/Core/FGroupTree.hpp" +// symbolic data +#include "Components/FSymbolicData.hpp" +// cell class +#include "Kernels/Chebyshev/FChebCell.hpp" +// parameter +#include "Utils/FParameters.hpp" +#include "Utils/FParameterNames.hpp" +// GroupParticleContianer +#include "GroupTree/Core/FP2PGroupParticleContainer.hpp" +// file loader +#include "Files/FMpiFmaGenericLoader.hpp" +// FBox +#include "Adaptive/FBox.hpp" +// Group linear tree +#include "GroupTree/Core/FGroupLinearTree.hpp" +// Function for GroupLinearTree +#include "GroupTree/Core/FDistributedGroupTreeBuilder.hpp" +#include "GroupTree/Core/FDistributedLETGroupTreeValidator.hpp" +#include <memory> + +#include "FUTester.hpp" + +static const int ORDER = 6; +using FReal = double; + +struct particle_t { + using position_t = FPoint<FReal>; + position_t pos; + FReal phi; + + std::size_t morton_index; + const auto& position() const { + return pos; + } + const FPoint<FReal>& getPosition(){ + return pos; + } + const auto& physicalValue() const{ + return phi; + } + const auto& getPositions() const { + return pos; + } + int weight() const { return 1;} + friend constexpr std::size_t morton_index(const particle_t& p) { + return p.morton_index; + } +}; + +class TestLetGroupTree : public FUTesterMpi<TestLetGroupTree>{ + + + template<class GroupCellClass + ,class GroupCellUpClass + ,class GroupCellDownClass + ,class GroupCellSymbClass + ,class GroupContainerClass + ,class GroupOctreeClass> + void RunTest(){ + const int TreeHeight = 5; + const int level = TreeHeight-1; + const int groupSize = 32; + // Definition of the particle type + + + // Load the Quentin MPI + inria::mpi::communicator mpi_comm(app.global().getComm()); + + // Selection of the file + const std::string parFile( (sizeof(FReal) == sizeof(float))? + "Test/DirectFloatbfma": + "test20k.fma"); + std::string filename(SCALFMMDataPath+parFile); + + // Load the file + FMpiFmaGenericLoader<FReal> loader(filename, app.global()); + + // declare vector to stock particle + std::vector<particle_t> myParticles(loader.getMyNumberOfParticles()); + + + const std::size_t max_level = sizeof(particle_t::morton_index) * 8 / 3; + // define a box, used in the sort + const FBox<FPoint<FReal>> box{loader.getBoxWidth(),loader.getCenterOfBox()}; + + // iterate on all of my particles + for(FSize idxPart = 0; idxPart <loader.getMyNumberOfParticles();++idxPart){ + particle_t tmp; + // get the current particles + loader.fillParticle(&tmp.pos,&tmp.phi); + // set the morton index of the current particle at the max_level + tmp.morton_index = inria::linear_tree::get_morton_index( + tmp.pos, box, max_level); + // set the weight of the particle + tmp.phi = 0.1; + // add the particle to my vector of particle + myParticles[idxPart] = tmp; + } + // Sort particules + inria::sort(mpi_comm,myParticles, + [](const auto& p1, const auto& p2) { + return p1.morton_index < p2.morton_index; + }); + // Create linear tree + auto linear_tree = inria::linear_tree::create_balanced_linear_tree_at_level( + mpi_comm, + level, + box, myParticles); + // Create empty instance of group linear tree + FGroupLinearTree<decltype(linear_tree)::value_type>group_linear_tree{mpi_comm}; + + + // Fill the group linear tree + group_linear_tree.create_local_group_linear_tree( + &linear_tree, + groupSize); + + // Redistribute particule according to the linear tree + inria::linear_tree::redistribute_particles(mpi_comm, + linear_tree, + myParticles); + + + // Modify the Morton Index to accord him to the level + for(unsigned i = 0 ; i < myParticles.size(); ++i){ + myParticles.at(i).morton_index = inria::linear_tree::get_morton_index( + myParticles.at(i).pos, box, level); + } + + group_linear_tree.set_index_particle_distribution(myParticles); + + + GroupOctreeClass localGroupTree = GroupOctreeClass::template get_block_tree_instance<GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass, GroupContainerClass>(TreeHeight, + groupSize, + loader.getCenterOfBox(), + loader.getBoxWidth()); + std::cout << " Creating Tree" << std::endl; + + localGroupTree.create_tree(group_linear_tree,myParticles); + std::cout << "Tree construit" << std::endl; + int nb_block = dstr_grp_tree_builder::set_cell_group_global_index(localGroupTree,mpi_comm); + // Check if we have no duplication of global index + std::vector<bool> group_index_checker(nb_block,false); + // Check on particle group + for(int i = 0 ; i < localGroupTree.getNbParticleGroup() ; ++i){ + auto* container = localGroupTree.getParticleGroup(i); + int idx_global = container->getIdxGlobal(); + uassert(group_index_checker[idx_global] == false); + group_index_checker[idx_global] = true; + } + // Check on cell group + for(int j = 0 ; j < localGroupTree.getHeight() ; ++j){ + for(int i = 0 ; i < localGroupTree.getNbCellGroupAtLevel(j); ++i){ + auto* container = localGroupTree.getCellGroup(j,i); + int idx_global = container->getIdxGlobal(); + uassert(group_index_checker[idx_global] == false); + group_index_checker[idx_global] = true; + } + } + // Create LET + localGroupTree.create_LET(group_linear_tree); + // launch the let checker + bool flag = dstr_grp_tree_vldr::validate_group_tree(localGroupTree,mpi_comm); + // if the LET is correct, flag is true + uassert(flag); + } + + + + void TestLet(){ + using GroupCellClass = FChebCell<FReal, ORDER>; + using GroupCellUpClass = typename GroupCellClass::multipole_t; + using GroupCellDownClass = typename GroupCellClass::local_expansion_t; + using GroupCellSymbClass = FSymbolicData; + using GroupContainerClass = FP2PGroupParticleContainer<FReal>; + using GroupOctreeClass = FGroupTree<FReal, + GroupCellSymbClass, + GroupCellUpClass, + GroupCellDownClass, GroupContainerClass, 1, 4, FReal>; + + RunTest<GroupCellClass, + GroupCellUpClass, + GroupCellDownClass, + GroupCellSymbClass, + GroupContainerClass, + GroupOctreeClass>(); + } + + void SetTests(){ + AddTest(&TestLetGroupTree::TestLet,"Test the building of the LET "); + } + +public: + TestLetGroupTree(int argc, char ** argv): + FUTesterMpi(argc,argv){ + } + +}; + +TestClassMpi(TestLetGroupTree); diff --git a/Utils/stdComplex.hpp b/Utils/stdComplex.hpp new file mode 100644 index 000000000..4811beb46 --- /dev/null +++ b/Utils/stdComplex.hpp @@ -0,0 +1,26 @@ +// =================================================================================== +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== +#ifndef STDCOMPLEXE_HPP +#define STDCOMPLEXE_HPP + +#include <complex> +template<typename T> +using stdComplex = std::complex<T> ; + + + +#endif //STDCOMPLEXE_HPP + + -- GitLab