diff --git a/CMakeLists.txt b/CMakeLists.txt index 0b6bc578e59766ad48fcd2ae1825b8c3d952e27b..e19f68eeae38e8bd7557cfa270255b08f7b280dc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -122,6 +122,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/morse/ CMAKE_DEPENDENT_OPTION(SCALFMM_STARPU_USE_REDUX "Set to ON to enable redux with StarPU" OFF "SCALFMM_USE_STARPU" OFF) CMAKE_DEPENDENT_OPTION(SCALFMM_STARPU_USE_PRIO "Set to ON to enable priority with StarPU" ON "SCALFMM_USE_STARPU" OFF) CMAKE_DEPENDENT_OPTION(SCALFMM_STARPU_FORCE_NO_SCHEDULER "Set to ON to disable heteroprio even if supported" OFF "SCALFMM_USE_STARPU" OFF) + CMAKE_DEPENDENT_OPTION(SCALFMM_USE_STARPU_EXTRACT "Set to ON to enable extract with StarPU mpi implicit" ON "SCALFMM_USE_STARPU" OFF) endif() message(STATUS "AVANT ${CMAKE_CXX_COMPILER_ID}" ) # @@ -460,63 +461,14 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/CMakeModules/morse/ find_package(FFTW COMPONENTS SIMPLE) endif() - message(STATUS " SCALFMM USE MKL ") - - if( SCALFMM_USE_MKL_AS_BLAS ) - - unset(FFT_LIBRARIES CACHE) - message(STATUS " SCALFMM USE MKL already defined") - set(FFT_INCLUDES "$ENV{MKLROOT}/include/fftw" CACHE STRING "Set your MKL flags") - if (BLAS_FOUND) - set(FFTW_FOUND ON) - endif() - - else(SCALFMM_USE_MKL_AS_BLAS) - - # The package can be used with the following COMPONENTS: - # MKL, THREADS|OMP and/or SIMPLE|DOUBLE|LONG|QUAD - # Default is DOUBLE and without THREADS|OMP - find_package(FFTW COMPONENTS MKL) # not REQUIRED - if (FFTW_LIBRARY_DIRS_DEP) - set(FFT_LIBRARIES "-L${FFTW_LIBRARY_DIRS_DEP};" CACHE STRING "Set your MKL flags") - endif() - if (FFTW_LIBRARIES_DEP) - foreach (fft_lib ${FFTW_LIBRARIES_DEP}) - set(FFT_LIBRARIES "${FFT_LIBRARIES};${fft_lib};") - endforeach() - endif() - set(FFT_INCLUDES "${FFTW_INCLUDE_DIRS_DEP}" ) - if (FFT_LIBRARIES) - set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${FFT_LIBRARIES}") - endif() - - endif(SCALFMM_USE_MKL_AS_BLAS) - - else(SCALFMM_USE_MKL_AS_FFTW) - - message(STATUS " SCALFMM USE FFTW ") - # The package can be used with the following COMPONENTS: - # MKL, THREADS|OMP and/or SIMPLE|DOUBLE|LONG|QUAD - # Default is DOUBLE and without THREADS|OMP - find_package(FFTW COMPONENTS SIMPLE) # not REQUIRED - #message(FATAL_ERROR ${FFTW_FOUND} ${FFTW_LIBRARY_DIRS_DEP} ${FFTW_LIBRARIES_DEP}) - if (FFTW_LIBRARY_DIRS_DEP) - set(FFT_LIBRARIES "-L${FFTW_LIBRARY_DIRS_DEP};" CACHE STRING "Set your FFTW path") - endif() - if (FFTW_LIBRARIES_DEP) - foreach (fft_lib ${FFTW_LIBRARIES_DEP}) - set(FFT_LIBRARIES "${FFT_LIBRARIES};${fft_lib};") - endforeach() - endif() - #message(FATAL_ERROR ${FFTW_INCLUDE_DIRS_DEP}) - set(FFT_INCLUDES "${FFTW_INCLUDE_DIRS_DEP}" ) - if (FFT_LIBRARIES) - set(SCALFMM_LIBRARIES "${SCALFMM_LIBRARIES};${FFT_LIBRARIES}") - endif() - if(FFTW_LIBRARY_DIRS) - # the RPATH to be used when installing - list(APPEND CMAKE_INSTALL_RPATH "${FFTW_LIBRARY_DIRS}") - endif() + if (FFTW_LIBRARY_DIRS_DEP) + set(FFT_LIBRARIES "-L${FFTW_LIBRARY_DIRS_DEP};" CACHE STRING "Set your MKL flags") + endif() + if (FFTW_LIBRARIES_DEP) + foreach (fft_lib ${FFTW_LIBRARIES_DEP}) + set(FFT_LIBRARIES "${FFT_LIBRARIES};${fft_lib};") + endforeach() + endif() set(FFT_INCLUDES "${FFTW_INCLUDE_DIRS_DEP}" ) if (FFT_LIBRARIES) diff --git a/Src/Files/FFmaGenericLoader.hpp b/Src/Files/FFmaGenericLoader.hpp index 6fb8ffc4a00c419958ec106e1015220103428893..c231f87f06cb53bbab40635ef625af8b45dd63dc 100644 --- a/Src/Files/FFmaGenericLoader.hpp +++ b/Src/Files/FFmaGenericLoader.hpp @@ -230,7 +230,7 @@ private: std::cerr << "File "<< filename<<" not opened! Error: " << strerror(errno) <<std::endl; std::exit( EXIT_FAILURE); } - FLOG(FLog::Controller << "Opened file "<< filename << "\n"); + std::cout << "Opened file "<< filename << std::endl; } public: @@ -271,8 +271,8 @@ public: } else if( filename.find(".fma")!=std::string::npos ) { binaryFile = false; } else { - std::cerr << "FFmaGenericLoader: " - << "Only .fma or .bfma input file are allowed. Got " + std::cout << "FFmaGenericLoader: " + << "Only .fma or .bfma input file are allowed. Got " << filename << "." << std::endl; std::exit ( EXIT_FAILURE) ; @@ -350,31 +350,6 @@ public: unsigned int getDataType(){ return typeData[0]; } - /** - * Fills a particle from the current position in the file. - * - * @param outParticlePositions the position of particle to fill (FPoint<FReal> class) - * @param outPhysicalValue the physical value of particle to fill (FReal) - */ - void fillParticle(FPoint<FReal>*const outParticlePositions){ - if(binaryFile){ - file->read((char*)(outParticlePositions), sizeof(FReal)*3); - if(otherDataToRead> 0){ - file->read((char*)(this->tmpVal), sizeof(FReal)*otherDataToRead); - } - } else { - FReal x,y,z; - (*this->file) >> x >> y >> z; - outParticlePositions->setPosition(x,y,z); - - if(otherDataToRead> 0){ - for (FSize i = 0 ; i <otherDataToRead; ++i){ - (*this->file) >> x ; - } - } - } - } - /** * Fills a particle from the current position in the file. * @@ -506,25 +481,25 @@ private: this->readAscciHeader(); } - FLOG(FLog::Controller << " nbParticles: " <<this->nbParticles << std::endl - << " Box width: " <<this->boxWidth << "\n" - << " Center: " << this->centerOfBox << "\n"); + std::cout << " nbParticles: " <<this->nbParticles << std::endl + << " Box width: " <<this->boxWidth << std::endl + << " Center: " << this->centerOfBox << std::endl; } void readAscciHeader() { - FLOG(FLog::Controller << " File open in ASCII mode\n"); + std::cout << " File open in ASCII mode "<< std::endl ; FReal x,y,z; (*this->file) >> typeData[0]>> typeData[1]; - FLOG(FLog::Controller << " Datatype "<< typeData[0] << " "<< typeData[1] << "\n"); + std::cout << " Datatype "<< typeData[0] << " "<< typeData[1] << std::endl; (*this->file) >> this->nbParticles >> this->boxWidth >> x >> y >> z; this->centerOfBox.setPosition(x,y,z); this->boxWidth *= 2; otherDataToRead = typeData[1] - (unsigned int)(4); }; void readBinaryHeader(){ - FLOG(FLog::Controller << " File open in binary mode\n"); + std::cout << " File open in binary mode "<< std::endl; file->seekg (std::ios::beg); file->read((char*)&typeData,2*sizeof(unsigned int)); - FLOG(FLog::Controller << " Datatype "<< typeData[0] << " "<< typeData[1] << "\n"); + std::cout << " Datatype "<< typeData[0] << " "<< typeData[1] << std::endl; if(typeData[0] != sizeof(FReal)){ std::cerr << "Size of elements in part file " << typeData[0] << " is different from size of FReal " << sizeof(FReal)<<std::endl; std::exit( EXIT_FAILURE); @@ -610,7 +585,7 @@ public: this->file->precision(10); } else { - std::cerr << "Input file not allowed only .fma or .bfma extensions" <<std::endl; + std::cout << "Input file not allowed only .fma or .bfma extensions" <<std::endl; std::exit ( EXIT_FAILURE) ; } // test if open @@ -671,7 +646,7 @@ public: void writeHeader(const FPoint<FReal> ¢erOfBox,const FReal &boxWidth, const FSize &nbParticles, const typePart data) { unsigned int typeFReal[2] = {sizeof(FReal) , sizeof(typePart) / sizeof(FReal) }; const unsigned int ndata = data.getWriteDataNumber(); - FLOG(FLog::Controller <<" WriteHeader: typeFReal: " << typeFReal[0] << " nb Elts: " << typeFReal[1] <<" NData to write "<< ndata<< "\n"); + std::cout <<" WriteHeader: typeFReal: " << typeFReal[0] << " nb Elts: " << typeFReal[1] <<" NData to write "<< ndata<< "\n"; if (ndata != typeFReal[1]){ typeFReal[1] = ndata; } @@ -882,16 +857,18 @@ private: file->seekg (std::ios::beg); file->write((const char*)typeFReal,2*sizeof(unsigned int)); if(typeFReal[0] != sizeof(FReal)){ - std::cerr << "Size of elements in part file " << typeFReal[0] << " is different from size of FReal " << sizeof(FReal)<<std::endl; + std::cout << "Size of elements in part file " << typeFReal[0] << " is different from size of FReal " << sizeof(FReal)<<std::endl; std::exit( EXIT_FAILURE); } else{ file->write( (const char*)&(nbParticles), sizeof(FSize) ); + // std::cout << "nbParticles "<< nbParticles<<std::endl; file->write( (const char*)&(boxWidth) ,sizeof(boxWidth) ); file->write( (const char*)(centerOfBox.getDataValue()),sizeof(FReal)*3); } } }; -#endif //FFmaGenericLoader_HPP + +#endif //FFmaGenericLoader_HPP diff --git a/Src/Files/FRandomLoader.hpp b/Src/Files/FRandomLoader.hpp index 597b9959be4749f5d7503a789702539b2c3c951b..28d442358eb16912e8fbfe33226363926e638bc6 100644 --- a/Src/Files/FRandomLoader.hpp +++ b/Src/Files/FRandomLoader.hpp @@ -106,53 +106,6 @@ public: (getRandom() * boxWidth) + centerOfBox.getY() - boxWidth/2, (getRandom() * boxWidth) + centerOfBox.getZ() - boxWidth/2); } - void fillParticleAtMortonIndex(FPoint<FReal>*const inParticlePositions, MortonIndex idx, unsigned int treeHeight){ - MortonIndex mask = 0x1LL; - //Largeur de la boite au niveau des feuilles - FReal leafWidth = boxWidth / FReal(1<<(treeHeight-1)); - //Décalage par rapport au centre de la moitié de la largeur de la boîte - FReal currentOffset = leafWidth / 2.0; - //Initialise x, y, z au centre de la boîte globale - FReal x, y, z; - x = centerOfBox.getX(); - y = centerOfBox.getY(); - z = centerOfBox.getZ(); - - //On va décaler le centre du père vers le centre du fils autant de fois qu'il y a de fils - //Comme ce sont des décalage succesif et plutôt indépendant, on peut commencer par les décalages au niveau des feuilles, ce qui est plus simple - for(unsigned int i = 0; i < treeHeight-1; ++i) - { - bool x_offset, y_offset, z_offset; - //Check le 1er bit qui correspond au z - z_offset = (idx & mask); - idx >>= 1; - //Check le 2nd bit qui correspond au y - y_offset = (idx & mask); - idx >>= 1; - //Check le 3ème bit qui correspond au x - x_offset = (idx & mask); - idx >>= 1; - //Décalage du x - if(x_offset) - x += currentOffset; - else - x -= currentOffset; - //Décalage du y - if(y_offset) - y += currentOffset; - else - y -= currentOffset; - //Décalage du z - if(z_offset) - z += currentOffset; - else - z -= currentOffset; - - //On augmente les décallages au fur et à mesure que l'on remonte les étages - currentOffset *= 2; - } - inParticlePositions->setPosition( x, y, z); - } /** Get a random number between 0 & 1 */ FReal getRandom() const{ diff --git a/Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp b/Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp index 9cb953a846d9e1ad95d80763d0a4a6c93fca1a10..50f952e2a4df55318327a0a53a72d6f136164110 100644 --- a/Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp +++ b/Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp @@ -907,7 +907,7 @@ protected: MortonIndex interactionsIndexes[26]; int interactionsPosition[26]; - FTreeCoordinate coord(mindex); + FTreeCoordinate coord(mindex, tree->getHeight()-1); int counter = coord.getNeighborsIndexes(tree->getHeight(),interactionsIndexes,interactionsPosition); for(int idxInter = 0 ; idxInter < counter ; ++idxInter){ @@ -995,7 +995,7 @@ protected: MortonIndex interactionsIndexes[189]; int interactionsPosition[189]; - const FTreeCoordinate coord(mindex); + const FTreeCoordinate coord(mindex, idxLevel); int counter = coord.getInteractionNeighbors(idxLevel,interactionsIndexes,interactionsPosition); for(int idxInter = 0 ; idxInter < counter ; ++idxInter){ diff --git a/Src/GroupTree/Core/FGroupTaskStarpuImplicitAlgorithm.hpp b/Src/GroupTree/Core/FGroupTaskStarpuImplicitAlgorithm.hpp index 3b17b0ffa86d03a592d4033ee4efb3d355ff654d..017dc3e8f3d28f2b016a6318d8873cdc69b0b214 100644 --- a/Src/GroupTree/Core/FGroupTaskStarpuImplicitAlgorithm.hpp +++ b/Src/GroupTree/Core/FGroupTaskStarpuImplicitAlgorithm.hpp @@ -15,13 +15,17 @@ #include <vector> #include <memory> +#ifdef SCALFMM_USE_STARPU_EXTRACT #include <list> +#endif #include <omp.h> #include <starpu.h> #include <starpu_mpi.h> +#ifdef SCALFMM_USE_STARPU_EXTRACT #include <algorithm> +#endif #include "../StarPUUtils/FStarPUUtils.hpp" #include "../StarPUUtils/FStarPUFmmPriorities.hpp" #include "../StarPUUtils/FStarPUFmmPrioritiesV2.hpp" @@ -94,11 +98,15 @@ protected: }; std::vector< std::vector< std::vector<BlockInteractions<CellContainerClass>>>> externalInteractionsAllLevel; +#ifdef SCALFMM_USE_STARPU_EXTRACT std::vector< std::vector< std::vector<std::vector<int>>>> externalInteractionsAllLevelInnerIndexes; std::vector< std::vector< std::vector<std::vector<int>>>> externalInteractionsAllLevelOuterIndexes; +#endif std::vector< std::vector<BlockInteractions<ParticleGroupClass>>> externalInteractionsLeafLevel; +#ifdef SCALFMM_USE_STARPU_EXTRACT std::vector< std::vector<std::vector<int>>> externalInteractionsLeafLevelOuter; std::vector< std::vector<std::vector<int>>> externalInteractionsLeafLevelInner; +#endif std::list<const std::vector<OutOfBlockInteraction>*> externalInteractionsLeafLevelOpposite; OctreeClass*const tree; //< The Tree @@ -167,6 +175,7 @@ protected: int mpi_rank, nproc; std::vector<std::vector<std::vector<MortonIndex>>> nodeRepartition; +#ifdef SCALFMM_USE_STARPU_EXTRACT struct ParticleExtractedHandles{ starpu_data_handle_t symb; size_t size; @@ -214,6 +223,7 @@ protected: starpu_codelet cell_extract_up; starpu_codelet cell_insert_up; starpu_codelet cell_insert_up_bis; +#endif public: FGroupTaskStarPUImplicitAlgorithm(OctreeClass*const inTree, KernelClass* inKernels, std::vector<MortonIndex>& distributedMortonIndex) @@ -836,6 +846,7 @@ protected: p2p_redux_read.name = "p2p_redux_read"; #endif +#ifdef SCALFMM_USE_STARPU_EXTRACT memset(&p2p_extract, 0, sizeof(p2p_extract)); p2p_extract.nbuffers = 2; p2p_extract.modes[0] = STARPU_R; @@ -887,8 +898,10 @@ protected: cell_insert_up_bis.name = "cell_insert_up_bis"; cell_insert_up_bis.cpu_funcs[0] = ThisClass::InsertCellUpBis; cell_insert_up_bis.where |= STARPU_CPU; +#endif } +#ifdef SCALFMM_USE_STARPU_EXTRACT static void InsertP2P(void *buffers[], void *cl_arg){ ParticleGroupClass containers((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), STARPU_VECTOR_GET_NX(buffers[1]), @@ -982,6 +995,7 @@ protected: (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]), STARPU_VECTOR_GET_NX(buffers[2])); } +#endif void initCodeletMpi(){ memset(&p2p_cl_inout_mpi, 0, sizeof(p2p_cl_inout_mpi)); @@ -1059,6 +1073,7 @@ protected: } particleHandles.clear(); } +#ifdef SCALFMM_USE_STARPU_EXTRACT for(auto& iter : extractedParticlesBuffer){ starpu_data_unregister(iter.symb); } @@ -1071,6 +1086,7 @@ protected: for(auto& iter : duplicatedCellBuffer){ starpu_data_unregister(iter.symb); } +#endif } /** Reset the handles array and create new ones to define @@ -1148,32 +1164,46 @@ protected: FLOG( FTic timer; FTic leafTimer; FTic cellTimer; ); // Reset interactions externalInteractionsAllLevel.clear(); +#ifdef SCALFMM_USE_STARPU_EXTRACT externalInteractionsAllLevelInnerIndexes.clear(); externalInteractionsAllLevelOuterIndexes.clear(); +#endif externalInteractionsLeafLevel.clear(); +#ifdef SCALFMM_USE_STARPU_EXTRACT externalInteractionsLeafLevelOuter.clear(); externalInteractionsLeafLevelInner.clear(); +#endif // One per level + leaf level externalInteractionsAllLevel.resize(tree->getHeight()); +#ifdef SCALFMM_USE_STARPU_EXTRACT externalInteractionsAllLevelInnerIndexes.resize(tree->getHeight()); externalInteractionsAllLevelOuterIndexes.resize(tree->getHeight()); +#endif // First leaf level { // We create one big vector per block externalInteractionsLeafLevel.resize(tree->getNbParticleGroup()); +#ifdef SCALFMM_USE_STARPU_EXTRACT externalInteractionsLeafLevelOuter.resize(tree->getNbParticleGroup()); externalInteractionsLeafLevelInner.resize(tree->getNbParticleGroup()); +#endif for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){ // Create the vector ParticleGroupClass* containers = tree->getParticleGroup(idxGroup); std::vector<BlockInteractions<ParticleGroupClass>>* externalInteractions = &externalInteractionsLeafLevel[idxGroup]; +#ifdef SCALFMM_USE_STARPU_EXTRACT std::vector<std::vector<int>>* externalInteractionsOuter = &externalInteractionsLeafLevelOuter[idxGroup]; std::vector<std::vector<int>>* externalInteractionsInner = &externalInteractionsLeafLevelInner[idxGroup]; +#endif +#ifdef SCALFMM_USE_STARPU_EXTRACT #pragma omp task default(none) firstprivate(idxGroup, containers, externalInteractions, externalInteractionsOuter, externalInteractionsInner) +#else + #pragma omp task default(none) firstprivate(idxGroup, containers, externalInteractions) +#endif { // Can be a task(inout:iterCells) std::vector<OutOfBlockInteraction> outsideInteractions; const MortonIndex blockStartIdx = containers->getStartingIndex(); @@ -1185,7 +1215,7 @@ protected: MortonIndex interactionsIndexes[26]; int interactionsPosition[26]; - FTreeCoordinate coord(mindex, tree->getHeight()-1); + FTreeCoordinate coord(mindex); int counter = coord.getNeighborsIndexes(tree->getHeight(),interactionsIndexes,interactionsPosition); for(int idxInter = 0 ; idxInter < counter ; ++idxInter){ @@ -1245,6 +1275,7 @@ protected: outsideInteractions.begin() + copyExistingInteraction, interactions->interactions.begin()); +#ifdef SCALFMM_USE_STARPU_EXTRACT externalInteractionsOuter->emplace_back(); externalInteractionsInner->emplace_back(); @@ -1260,6 +1291,7 @@ protected: interactionsOuter->erase(std::unique(interactionsOuter->begin(), interactionsOuter->end()), interactionsOuter->end()); interactionsInner->erase(std::unique(interactionsInner->begin(), interactionsInner->end()), interactionsInner->end()); +#endif } currentOutInteraction = lastOutInteraction; @@ -1272,17 +1304,24 @@ protected: { for(int idxLevel = tree->getHeight()-1 ; idxLevel >= 2 ; --idxLevel){ externalInteractionsAllLevel[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel)); +#ifdef SCALFMM_USE_STARPU_EXTRACT externalInteractionsAllLevelInnerIndexes[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel)); externalInteractionsAllLevelOuterIndexes[idxLevel].resize(tree->getNbCellGroupAtLevel(idxLevel)); - +#endif for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ CellContainerClass* currentCells = tree->getCellGroup(idxLevel, idxGroup); std::vector<BlockInteractions<CellContainerClass>>* externalInteractions = &externalInteractionsAllLevel[idxLevel][idxGroup]; +#ifdef SCALFMM_USE_STARPU_EXTRACT std::vector<std::vector<int>>* externalInteractionsInner = &externalInteractionsAllLevelInnerIndexes[idxLevel][idxGroup]; std::vector<std::vector<int>>* externalInteractionsOuter = &externalInteractionsAllLevelOuterIndexes[idxLevel][idxGroup]; +#endif +#ifdef SCALFMM_USE_STARPU_EXTRACT #pragma omp task default(none) firstprivate(idxGroup, currentCells, idxLevel, externalInteractions, externalInteractionsInner, externalInteractionsOuter) +#else + #pragma omp task default(none) firstprivate(idxGroup, currentCells, idxLevel, externalInteractions) +#endif { std::vector<OutOfBlockInteraction> outsideInteractions; const MortonIndex blockStartIdx = currentCells->getStartingIndex(); @@ -1293,7 +1332,7 @@ protected: MortonIndex interactionsIndexes[189]; int interactionsPosition[189]; - const FTreeCoordinate coord(mindex, idxLevel); + const FTreeCoordinate coord(mindex); int counter = coord.getInteractionNeighbors(idxLevel,interactionsIndexes,interactionsPosition); for(int idxInter = 0 ; idxInter < counter ; ++idxInter){ @@ -1354,6 +1393,7 @@ protected: outsideInteractions.begin() + copyExistingInteraction, interactions->interactions.begin()); +#ifdef SCALFMM_USE_STARPU_EXTRACT externalInteractionsInner->emplace_back(); std::vector<int>* interactionsInnerIndexes = &externalInteractionsInner->back(); externalInteractionsOuter->emplace_back(); @@ -1370,6 +1410,7 @@ protected: FQuickSort<int, int>::QsSequential(interactionsInnerIndexes->data(),int(interactionsInnerIndexes->size())); interactionsInnerIndexes->erase(std::unique(interactionsInnerIndexes->begin(), interactionsInnerIndexes->end()), interactionsInnerIndexes->end()); +#endif } currentOutInteraction = lastOutInteraction; @@ -1577,8 +1618,7 @@ protected: for(int idxInteraction = 0; idxInteraction < int(externalInteractionsAllLevel[idxLevel][idxGroup].size()) ; ++idxInteraction){ const int interactionid = externalInteractionsAllLevel[idxLevel][idxGroup][idxInteraction].otherBlockId; const std::vector<OutOfBlockInteraction>* outsideInteractions = &externalInteractionsAllLevel[idxLevel][idxGroup][idxInteraction].interactions; -#define M2L_EXTRACT_MPI -#ifdef M2L_EXTRACT_MPI +#ifdef SCALFMM_USE_STARPU_EXTRACT // On the same node -- do as usual if(starpu_mpi_data_get_rank(cellHandles[idxLevel][idxGroup].symb) == starpu_mpi_data_get_rank(cellHandles[idxLevel][interactionid].symb)){ #endif @@ -1653,7 +1693,8 @@ protected: #endif #endif 0); -#ifdef M2L_EXTRACT_MPI + +#ifdef SCALFMM_USE_STARPU_EXTRACT } else{ { @@ -2125,8 +2166,8 @@ protected: } else { -#define P2P_EXTRACT_MPI -#ifdef P2P_EXTRACT_MPI + +#ifdef SCALFMM_USE_STARPU_EXTRACT { // Extract data from second group for the first one // That is copy B to B' @@ -2281,9 +2322,9 @@ protected: // Reuse block but just to perform the send duplicateA.data = (unsigned char*) FAlignedMemory::AllocateBytes<64>(duplicateA.size);// = const_cast<unsigned char*>(tree->getParticleGroup(idxGroup)->getRawBuffer()); } - else{ - duplicateA.data = nullptr; - } + else{ + duplicateA.data = nullptr; + } registeringNode = starpu_mpi_data_get_rank(particleHandles[interactionid].down); where = (registeringNode == mpi_rank) ? STARPU_MAIN_RAM : -1; diff --git a/Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp b/Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp index da40ff89e625e90a5d8b0fdd3e06f0cfbb38f195..e40f807d7891758f165496775acdf209232d5a32 100644 --- a/Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp +++ b/Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp @@ -185,7 +185,6 @@ protected: #endif #ifdef STARPU_USE_TASK_NAME -#ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS std::vector<std::unique_ptr<char[]>> m2mTaskNames; std::vector<std::unique_ptr<char[]>> m2lTaskNames; std::vector<std::unique_ptr<char[]>> m2lOuterTaskNames; @@ -194,14 +193,10 @@ protected: std::unique_ptr<char[]> l2pTaskNames; std::unique_ptr<char[]> p2pTaskNames; std::unique_ptr<char[]> p2pOuterTaskNames; -#else - FStarPUTaskNameParams taskNames; -#endif #endif #ifdef SCALFMM_STARPU_USE_PRIO typedef FStarPUFmmPrioritiesV2 PrioClass;// FStarPUFmmPriorities #endif - std::list<char*> taskName; public: FGroupTaskStarPUMpiAlgorithm(const FMpi::FComm& inComm, OctreeClass*const inTree, KernelClass* inKernels) @@ -217,11 +212,6 @@ public: #endif #ifdef SCALFMM_ENABLE_OPENCL_KERNEL openclWrapper(tree->getHeight()), - #endif - #ifdef STARPU_USE_TASK_NAME - #ifdef SCALFMM_SIMGRID_TASKNAMEPARAMS - taskNames(inComm.processId(), inComm.processCount()), - #endif #endif wrapperptr(&wrappers){ FAssertLF(tree, "tree cannot be null"); @@ -300,7 +290,6 @@ public: void buildTaskNames(){ #ifdef STARPU_USE_TASK_NAME -#ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS const int namesLength = 128; m2mTaskNames.resize(tree->getHeight()); m2lTaskNames.resize(tree->getHeight()); @@ -325,7 +314,6 @@ public: snprintf(p2pTaskNames.get(), namesLength, "P2P"); p2pOuterTaskNames.reset(new char[namesLength]); snprintf(p2pOuterTaskNames.get(), namesLength, "P2P-out"); -#endif #endif } @@ -389,8 +377,6 @@ public: #ifdef STARPU_SUPPORT_ARBITER starpu_arbiter_destroy(arbiterGlobal); #endif - for(char* ptr : taskName) - free(ptr); starpu_mpi_shutdown(); starpu_shutdown(); } @@ -937,10 +923,6 @@ protected: nbBlocksBeforeMinPerLevel[idxLevel] = 0; for(int idxProc = 0 ; idxProc < comm.processId() ; ++idxProc){ nbBlocksBeforeMinPerLevel[idxLevel] += nbBlocksPerLevelAll[idxProc*tree->getHeight() + idxLevel]; - } - int nb = 0; - for(int idxProc = 0 ; idxProc < comm.processCount() ; ++idxProc){ - nb += nbBlocksPerLevelAll[idxProc*tree->getHeight() + idxLevel]; } } // Prepare the block infos @@ -1084,13 +1066,13 @@ protected: const size_t nbBytesInBlockSymb = processesBlockInfos[idxLevel][idxOtherGroup].bufferSizeSymb; unsigned char* memoryBlockSymb = (unsigned char*)FAlignedMemory::AllocateBytes<32>(nbBytesInBlockSymb); remoteCellGroups[idxLevel][idxOtherGroup].ptrSymb = memoryBlockSymb; - starpu_vector_data_register(&remoteCellGroups[idxLevel][idxOtherGroup].handleSymb, 0, - (uintptr_t)remoteCellGroups[idxLevel][idxOtherGroup].ptrSymb, nbBytesInBlockSymb, 1); + starpu_variable_data_register(&remoteCellGroups[idxLevel][idxOtherGroup].handleSymb, 0, + (uintptr_t)remoteCellGroups[idxLevel][idxOtherGroup].ptrSymb, nbBytesInBlockSymb); const size_t nbBytesInBlockUp = processesBlockInfos[idxLevel][idxOtherGroup].bufferSizeUp; unsigned char* memoryBlockUp = (unsigned char*)FAlignedMemory::AllocateBytes<32>(nbBytesInBlockUp); remoteCellGroups[idxLevel][idxOtherGroup].ptrUp = memoryBlockUp; - starpu_vector_data_register(&remoteCellGroups[idxLevel][idxOtherGroup].handleUp, 0, - (uintptr_t)remoteCellGroups[idxLevel][idxOtherGroup].ptrUp, nbBytesInBlockUp, 1); + starpu_variable_data_register(&remoteCellGroups[idxLevel][idxOtherGroup].handleUp, 0, + (uintptr_t)remoteCellGroups[idxLevel][idxOtherGroup].ptrUp, nbBytesInBlockUp); } } } @@ -1188,8 +1170,8 @@ protected: const size_t nbBytesInBlock = processesBlockInfos[tree->getHeight()-1][idxOtherGroup].leavesBufferSize; unsigned char* memoryBlock = (unsigned char*)FAlignedMemory::AllocateBytes<32>(nbBytesInBlock); remoteParticleGroupss[idxOtherGroup].ptrSymb = memoryBlock; - starpu_vector_data_register(&remoteParticleGroupss[idxOtherGroup].handleSymb, 0, - (uintptr_t)remoteParticleGroupss[idxOtherGroup].ptrSymb, nbBytesInBlock, 1); + starpu_variable_data_register(&remoteParticleGroupss[idxOtherGroup].handleSymb, 0, + (uintptr_t)remoteParticleGroupss[idxOtherGroup].ptrSymb, nbBytesInBlock); } } } @@ -1310,18 +1292,14 @@ protected: FLOG(FLog::Controller << "[SMpi] " << idxLevel << " Post a recv during M2L for Idx " << processesBlockInfos[idxLevel][idxHandle].firstIndex << " and dest is " << processesBlockInfos[idxLevel][idxHandle].owner << " tag " << getTag(idxLevel, processesBlockInfos[idxLevel][idxHandle].firstIndex, processesBlockInfos[idxLevel][idxHandle].globalIdx, 1, processesBlockInfos[idxLevel][idxHandle].owner) << "\n"); - mpiPostIRecv(remoteCellGroups[idxLevel][idxHandle].handleSymb, - processesBlockInfos[idxLevel][idxHandle].owner, - idxLevel, - processesBlockInfos[idxLevel][idxHandle].firstIndex, - processesBlockInfos[idxLevel][idxHandle].globalIdx, - 0); - mpiPostIRecv(remoteCellGroups[idxLevel][idxHandle].handleUp, - processesBlockInfos[idxLevel][idxHandle].owner, - idxLevel, - processesBlockInfos[idxLevel][idxHandle].firstIndex, - processesBlockInfos[idxLevel][idxHandle].globalIdx, - 1); + starpu_mpi_irecv_detached( remoteCellGroups[idxLevel][idxHandle].handleSymb, + processesBlockInfos[idxLevel][idxHandle].owner, + getTag(idxLevel,processesBlockInfos[idxLevel][idxHandle].firstIndex, 0), + comm.getComm(), 0, 0 ); + starpu_mpi_irecv_detached( remoteCellGroups[idxLevel][idxHandle].handleUp, + processesBlockInfos[idxLevel][idxHandle].owner, + getTag(idxLevel,processesBlockInfos[idxLevel][idxHandle].firstIndex, 1), + comm.getComm(), 0, 0 ); } } } @@ -1332,12 +1310,10 @@ protected: FLOG(FLog::Controller << "[SMpi] Post a recv during P2P for Idx " << processesBlockInfos[tree->getHeight()-1][idxHandle].firstIndex << " and dest is " << processesBlockInfos[tree->getHeight()-1][idxHandle].owner << " tag " << getTag(tree->getHeight(), processesBlockInfos[tree->getHeight()-1][idxHandle].firstIndex, processesBlockInfos[tree->getHeight()-1][idxHandle].globalIdx, 0, processesBlockInfos[tree->getHeight()-1][idxHandle].owner) << "\n"); - mpiPostIRecv(remoteParticleGroupss[idxHandle].handleSymb, - processesBlockInfos[tree->getHeight()-1][idxHandle].owner, - tree->getHeight(), //NOTE for tag, choose that particles level is tree height - processesBlockInfos[tree->getHeight()-1][idxHandle].firstIndex, - processesBlockInfos[tree->getHeight()-1][idxHandle].globalIdx, - 0); + starpu_mpi_irecv_detached( remoteParticleGroupss[idxHandle].handleSymb, + processesBlockInfos[tree->getHeight()-1][idxHandle].owner, + getTag(tree->getHeight(),processesBlockInfos[tree->getHeight()-1][idxHandle].firstIndex, 0), + comm.getComm(), 0, 0 ); } } } @@ -1355,12 +1331,9 @@ protected: FLOG(FLog::Controller << "[SMpi] Post a send during P2P for Idx " << tree->getParticleGroup(localId)->getStartingIndex() << " and dest is " << sd.dest << " tag " << getTag(tree->getHeight(), tree->getParticleGroup(localId)->getStartingIndex(), nbBlocksBeforeMinPerLevel[tree->getHeight()-1] + localId, 0, sd.dest) << "\n"); - mpiPostISend(particleHandles[localId].symb, - sd.dest, - tree->getHeight(), //NOTE for tag, choose that particles level is tree height - tree->getParticleGroup(localId)->getStartingIndex(), - nbBlocksBeforeMinPerLevel[tree->getHeight()-1] + localId, - 0); + starpu_mpi_isend_detached( particleHandles[localId].symb, sd.dest, + getTag(tree->getHeight(),tree->getParticleGroup(localId)->getStartingIndex(), 0), + comm.getComm(), 0/*callback*/, 0/*arg*/ ); } } FLOG(FLog::Controller.flush() ); @@ -1380,18 +1353,12 @@ protected: FLOG(FLog::Controller << "[SMpi] " << sd.level << " Post a send during M2L for Idx " << tree->getCellGroup(sd.level, localId)->getStartingIndex() << " and dest is " << sd.dest << " tag " << getTag(sd.level, tree->getCellGroup(sd.level, localId)->getStartingIndex(), nbBlocksBeforeMinPerLevel[sd.level] + localId, 1, sd.dest) << "\n"); - mpiPostISend(cellHandles[sd.level][localId].symb, - sd.dest, - sd.level, - tree->getCellGroup(sd.level, localId)->getStartingIndex(), - nbBlocksBeforeMinPerLevel[sd.level] + localId, - 0); - mpiPostISend(cellHandles[sd.level][localId].up, - sd.dest, - sd.level, - tree->getCellGroup(sd.level, localId)->getStartingIndex(), - nbBlocksBeforeMinPerLevel[sd.level] + localId, - 1); + starpu_mpi_isend_detached( cellHandles[sd.level][localId].symb, sd.dest, + getTag(sd.level,tree->getCellGroup(sd.level, localId)->getStartingIndex(), 0), + comm.getComm(), 0/*callback*/, 0/*arg*/ ); + starpu_mpi_isend_detached( cellHandles[sd.level][localId].up, sd.dest, + getTag(sd.level,tree->getCellGroup(sd.level, localId)->getStartingIndex(), 1), + comm.getComm(), 0/*callback*/, 0/*arg*/ ); } } FLOG(FLog::Controller.flush() ); @@ -1441,12 +1408,12 @@ protected: for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ const CellContainerClass* currentCells = tree->getCellGroup(idxLevel, idxGroup); - starpu_vector_data_register(&cellHandles[idxLevel][idxGroup].symb, 0, - (uintptr_t)currentCells->getRawBuffer(), currentCells->getBufferSizeInByte(), 1); - starpu_vector_data_register(&cellHandles[idxLevel][idxGroup].up, 0, - (uintptr_t)currentCells->getRawMultipoleBuffer(), currentCells->getMultipoleBufferSizeInByte(), 1); - starpu_vector_data_register(&cellHandles[idxLevel][idxGroup].down, 0, - (uintptr_t)currentCells->getRawLocalBuffer(), currentCells->getLocalBufferSizeInByte(), 1); + starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].symb, 0, + (uintptr_t)currentCells->getRawBuffer(), currentCells->getBufferSizeInByte()); + starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].up, 0, + (uintptr_t)currentCells->getRawMultipoleBuffer(), currentCells->getMultipoleBufferSizeInByte()); + starpu_variable_data_register(&cellHandles[idxLevel][idxGroup].down, 0, + (uintptr_t)currentCells->getRawLocalBuffer(), currentCells->getLocalBufferSizeInByte()); cellHandles[idxLevel][idxGroup].intervalSize = int(currentCells->getNumberOfCellsInBlock()); #ifdef STARPU_SUPPORT_ARBITER starpu_data_assign_arbiter(cellHandles[idxLevel][idxGroup].up, arbiterGlobal); @@ -1458,10 +1425,10 @@ protected: particleHandles.resize(tree->getNbParticleGroup()); for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){ ParticleGroupClass* containers = tree->getParticleGroup(idxGroup); - starpu_vector_data_register(&particleHandles[idxGroup].symb, 0, - (uintptr_t)containers->getRawBuffer(), containers->getBufferSizeInByte(), 1); - starpu_vector_data_register(&particleHandles[idxGroup].down, 0, - (uintptr_t)containers->getRawAttributesBuffer(), containers->getAttributesBufferSizeInByte(), 1); + starpu_variable_data_register(&particleHandles[idxGroup].symb, 0, + (uintptr_t)containers->getRawBuffer(), containers->getBufferSizeInByte()); + starpu_variable_data_register(&particleHandles[idxGroup].down, 0, + (uintptr_t)containers->getRawAttributesBuffer(), containers->getAttributesBufferSizeInByte()); #ifdef STARPU_USE_REDUX starpu_data_set_reduction_methods(particleHandles[idxGroup].down, &p2p_redux_perform, &p2p_redux_init); @@ -1677,21 +1644,6 @@ protected: FLOG( FLog::Controller << "\t\t\t Prepare at other levels in " << cellTimer.elapsed() << "s\n" ); } - ///////////////////////////////////////////////////////////////////////////////////// - /// Mpi Function overload - ///////////////////////////////////////////////////////////////////////////////////// - void mpiPostISend(starpu_data_handle_t handle, const int dest, const int level, const MortonIndex startingIndex, const int idxBlock, const int mode) - { - starpu_mpi_isend_detached(handle, dest, - getTag(level,startingIndex,idxBlock, mode, dest), - comm.getComm(), 0/*callback*/, 0/*arg*/ ); - } - void mpiPostIRecv(starpu_data_handle_t handle, const int dest, const int level, const MortonIndex startingIndex, const int idxBlock, const int mode) - { - starpu_mpi_irecv_detached(handle, dest, - getTag(level,startingIndex,idxBlock, mode, dest), - comm.getComm(), 0/*callback*/, 0/*arg*/ ); - } ///////////////////////////////////////////////////////////////////////////////////// /// Bottom Pass ///////////////////////////////////////////////////////////////////////////////////// @@ -1712,17 +1664,7 @@ protected: STARPU_RW, cellHandles[tree->getHeight()-1][idxGroup].up, STARPU_R, particleHandles[idxGroup].symb, #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS STARPU_NAME, p2mTaskNames.get(), - #else - STARPU_NAME, taskNames.print("P2M", "%d, %lld, %lld, %lld, %lld, %d\n", - 0, - 0, - 0, - tree->getParticleGroup(idxGroup)->getStartingIndex(), - tree->getParticleGroup(idxGroup)->getEndingIndex(), - comm.processId()), - #endif #endif 0); } @@ -1777,23 +1719,7 @@ protected: task->priority = PrioClass::Controller().getInsertionPosM2M(idxLevel); #endif #ifdef STARPU_USE_TASK_NAME -#ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS task->name = m2mTaskNames[idxLevel].get(); -#else - task->name = taskNames.print("M2M", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", - idxLevel, - 0, - 0, - 0, - 0, - 0, - tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), - tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(), - comm.processId()); - -#endif #endif FAssertLF(starpu_task_submit(task) == 0); } @@ -1828,22 +1754,7 @@ protected: task->priority = PrioClass::Controller().getInsertionPosM2M(idxLevel); #endif #ifdef STARPU_USE_TASK_NAME -#ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS task->name = m2mTaskNames[idxLevel].get(); -#else - task->name = taskNames.print("M2M", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", - idxLevel, - 0, - 0, - 0, - 0, - 0, - tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), - tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(), - comm.processId()); -#endif #endif FAssertLF(starpu_task_submit(task) == 0); } @@ -1872,14 +1783,14 @@ protected: const size_t nbBytesInBlockSymb = processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToRecv].bufferSizeSymb; unsigned char* memoryBlockSymb = (unsigned char*)FAlignedMemory::AllocateBytes<32>(nbBytesInBlockSymb); remoteCellGroups[idxLevel+1][firstOtherBlock + idxBlockToRecv].ptrSymb = memoryBlockSymb; - starpu_vector_data_register(&remoteCellGroups[idxLevel+1][firstOtherBlock + idxBlockToRecv].handleSymb, 0, - (uintptr_t)remoteCellGroups[idxLevel+1][firstOtherBlock + idxBlockToRecv].ptrSymb, nbBytesInBlockSymb, 1); + starpu_variable_data_register(&remoteCellGroups[idxLevel+1][firstOtherBlock + idxBlockToRecv].handleSymb, 0, + (uintptr_t)remoteCellGroups[idxLevel+1][firstOtherBlock + idxBlockToRecv].ptrSymb, nbBytesInBlockSymb); const size_t nbBytesInBlockUp = processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToRecv].bufferSizeUp; unsigned char* memoryBlockUp = (unsigned char*)FAlignedMemory::AllocateBytes<32>(nbBytesInBlockUp); remoteCellGroups[idxLevel+1][firstOtherBlock + idxBlockToRecv].ptrUp = memoryBlockUp; - starpu_vector_data_register(&remoteCellGroups[idxLevel+1][firstOtherBlock + idxBlockToRecv].handleUp, 0, - (uintptr_t)remoteCellGroups[idxLevel+1][firstOtherBlock + idxBlockToRecv].ptrUp, nbBytesInBlockUp, 1); + starpu_variable_data_register(&remoteCellGroups[idxLevel+1][firstOtherBlock + idxBlockToRecv].handleUp, 0, + (uintptr_t)remoteCellGroups[idxLevel+1][firstOtherBlock + idxBlockToRecv].ptrUp, nbBytesInBlockUp); } FLOG(FLog::Controller << "[SMpi] " << idxLevel << " Post a recv during M2M for Idx " << processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToRecv].firstIndex << @@ -1888,19 +1799,15 @@ protected: " and owner is " << processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToRecv].owner << " tag " << getTag(idxLevel, processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToRecv].firstIndex, processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToRecv].globalIdx, 1, processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToRecv].owner) << "\n"); FLOG(FLog::Controller.flush()); - mpiPostIRecv(remoteCellGroups[idxLevel+1][firstOtherBlock + idxBlockToRecv].handleSymb, - processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToRecv].owner, - idxLevel+1, - processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToRecv].firstIndex, - processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToRecv].globalIdx, - 0); + starpu_mpi_irecv_detached ( remoteCellGroups[idxLevel+1][firstOtherBlock + idxBlockToRecv].handleSymb, + processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToRecv].owner, + getTag(idxLevel,processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToRecv].firstIndex, 0), + comm.getComm(), 0/*callback*/, 0/*arg*/ ); + starpu_mpi_irecv_detached ( remoteCellGroups[idxLevel+1][firstOtherBlock + idxBlockToRecv].handleUp, + processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToRecv].owner, + getTag(idxLevel,processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToRecv].firstIndex, 1), + comm.getComm(), 0/*callback*/, 0/*arg*/ ); - mpiPostIRecv(remoteCellGroups[idxLevel+1][firstOtherBlock + idxBlockToRecv].handleUp, - processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToRecv].owner, - idxLevel+1, - processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToRecv].firstIndex, - processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToRecv].globalIdx, - 1); idxBlockToRecv += 1; } @@ -1933,22 +1840,7 @@ protected: task->priority = PrioClass::Controller().getInsertionPosM2M(idxLevel); #endif #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS task->name = m2mTaskNames[idxLevel].get(); - #else - task->name = taskNames.print("M2M", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", - idxLevel, - 0, - 0, - 0, - 0, - 0, - tree->getCellGroup(idxLevel, tree->getNbCellGroupAtLevel(idxLevel)-1)->getStartingIndex(), - tree->getCellGroup(idxLevel, tree->getNbCellGroupAtLevel(idxLevel)-1)->getEndingIndex(), - processesBlockInfos[idxLevel+1][firstOtherBlock + nbSubCellGroups].firstIndex, - processesBlockInfos[idxLevel+1][firstOtherBlock + nbSubCellGroups].lastIndex, - comm.processId()); - #endif #endif nbSubCellGroups += 1; FAssertLF(starpu_task_submit(task) == 0); @@ -1980,18 +1872,12 @@ protected: " and dest is " << dest << " tag " << getTag(idxLevel, tree->getCellGroup(idxLevel+1, lowerIdxToSend)->getStartingIndex(), nbBlocksBeforeMinPerLevel[idxLevel+1] + lowerIdxToSend, 1, dest) << "\n"); FLOG(FLog::Controller.flush()); - mpiPostISend(cellHandles[idxLevel+1][lowerIdxToSend].symb, - dest, - idxLevel+1, - tree->getCellGroup(idxLevel+1, lowerIdxToSend)->getStartingIndex(), - nbBlocksBeforeMinPerLevel[idxLevel+1] + lowerIdxToSend, - 0); - mpiPostISend(cellHandles[idxLevel+1][lowerIdxToSend].up, - dest, - idxLevel+1, - tree->getCellGroup(idxLevel+1, lowerIdxToSend)->getStartingIndex(), - nbBlocksBeforeMinPerLevel[idxLevel+1] + lowerIdxToSend, - 1); + starpu_mpi_isend_detached( cellHandles[idxLevel+1][lowerIdxToSend].symb, dest, + getTag(idxLevel,tree->getCellGroup(idxLevel+1, lowerIdxToSend)->getStartingIndex(), 0), + comm.getComm(), 0/*callback*/, 0/*arg*/ ); + starpu_mpi_isend_detached( cellHandles[idxLevel+1][lowerIdxToSend].up, dest, + getTag(idxLevel,tree->getCellGroup(idxLevel+1, lowerIdxToSend)->getStartingIndex(), 1), + comm.getComm(), 0/*callback*/, 0/*arg*/ ); lowerIdxToSend += 1; } @@ -2027,22 +1913,7 @@ protected: STARPU_R, remoteCellGroups[idxLevel][interactionid].handleSymb, STARPU_R, remoteCellGroups[idxLevel][interactionid].handleUp, #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS STARPU_NAME, m2lOuterTaskNames[idxLevel].get(), - #else - STARPU_NAME, taskNames.print("M2L_out", "%d, %d, %lld, %d, %lld, %d, %lld, %lld, %lld, %lld, %d\n", - idxLevel, - 0, - 0, - 0, - 0, - 0, - tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), - processesBlockInfos[idxLevel][interactionid].firstIndex, - processesBlockInfos[idxLevel][interactionid].lastIndex, - comm.processId()), - #endif #endif 0); } @@ -2073,20 +1944,7 @@ protected: STARPU_R, cellHandles[idxLevel][idxGroup].up, (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), cellHandles[idxLevel][idxGroup].down, #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS STARPU_NAME, m2lTaskNames[idxLevel].get(), - #else - //"M2L-l_nb_i" - STARPU_NAME, taskNames.print("M2L", "%d, %d, %lld, %lld, %lld, %lld, %lld, %d\n", - idxLevel, - 0, - 0, - tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), - comm.processId()), - #endif #endif 0); } @@ -2115,22 +1973,7 @@ protected: STARPU_R, cellHandles[idxLevel][interactionid].symb, STARPU_R, cellHandles[idxLevel][interactionid].up, #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS STARPU_NAME, m2lOuterTaskNames[idxLevel].get(), - #else - STARPU_NAME, taskNames.print("M2L_out", "%d, %d, %lld, %d, %lld, %d, %lld, %lld, %lld, %lld, %d\n", - idxLevel, - 0, - 0, - 0, - 0, - 0, - tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), - tree->getCellGroup(idxLevel, interactionid)->getStartingIndex(), - tree->getCellGroup(idxLevel, interactionid)->getEndingIndex(), - comm.processId()), - #endif #endif 0); @@ -2149,22 +1992,7 @@ protected: STARPU_R, cellHandles[idxLevel][idxGroup].symb, STARPU_R, cellHandles[idxLevel][idxGroup].up, #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS STARPU_NAME, m2lOuterTaskNames[idxLevel].get(), - #else - STARPU_NAME, taskNames.print("M2L_out", "%d, %d, %lld, %d, %lld, %d, %lld, %lld, %lld, %lld, %d\n", - idxLevel, - 0, - 0, - 0, - 0, - 0, - tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), - tree->getCellGroup(idxLevel, interactionid)->getStartingIndex(), - tree->getCellGroup(idxLevel, interactionid)->getEndingIndex(), - comm.processId()), - #endif #endif 0); } @@ -2215,18 +2043,14 @@ protected: << " size " << tree->getCellGroup(idxLevel, idxLastBlock)->getLocalBufferSizeInByte() << " tag " << getTag(idxLevel, tree->getCellGroup(idxLevel, idxLastBlock)->getStartingIndex(), nbBlocksBeforeMinPerLevel[idxLevel] + idxLastBlock, 2, processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToSend].owner) << "\n"); - mpiPostISend(cellHandles[idxLevel][idxLastBlock].symb, - processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToSend].owner, - idxLevel, - tree->getCellGroup(idxLevel, idxLastBlock)->getStartingIndex(), - nbBlocksBeforeMinPerLevel[idxLevel] + idxLastBlock, - 0); - mpiPostISend(cellHandles[idxLevel][idxLastBlock].down, - processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToSend].owner, - idxLevel, - tree->getCellGroup(idxLevel, idxLastBlock)->getStartingIndex(), - nbBlocksBeforeMinPerLevel[idxLevel] + idxLastBlock, - 2); + starpu_mpi_isend_detached( cellHandles[idxLevel][idxLastBlock].symb, + processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToSend].owner, + getTag(idxLevel,tree->getCellGroup(idxLevel, idxLastBlock)->getStartingIndex(), 0), + comm.getComm(), 0/*callback*/, 0/*arg*/ ); + starpu_mpi_isend_detached( cellHandles[idxLevel][idxLastBlock].down, + processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToSend].owner, + getTag(idxLevel,tree->getCellGroup(idxLevel, idxLastBlock)->getStartingIndex(), 2), + comm.getComm(), 0/*callback*/, 0/*arg*/ ); lastProcSend = processesBlockInfos[idxLevel+1][firstOtherBlock + idxBlockToSend].owner; } @@ -2253,15 +2077,15 @@ protected: const size_t nbBytesInBlock = processesBlockInfos[idxLevel][firstOtherBlock].bufferSizeSymb; unsigned char* memoryBlock = (unsigned char*)FAlignedMemory::AllocateBytes<32>(nbBytesInBlock); remoteCellGroups[idxLevel][firstOtherBlock].ptrSymb = memoryBlock; - starpu_vector_data_register(&remoteCellGroups[idxLevel][firstOtherBlock].handleSymb, 0, - (uintptr_t)remoteCellGroups[idxLevel][firstOtherBlock].ptrSymb, nbBytesInBlock, 1); + starpu_variable_data_register(&remoteCellGroups[idxLevel][firstOtherBlock].handleSymb, 0, + (uintptr_t)remoteCellGroups[idxLevel][firstOtherBlock].ptrSymb, nbBytesInBlock); } if(remoteCellGroups[idxLevel][firstOtherBlock].ptrDown == nullptr){ const size_t nbBytesInBlock = processesBlockInfos[idxLevel][firstOtherBlock].bufferSizeDown; unsigned char* memoryBlock = (unsigned char*)FAlignedMemory::AllocateBytes<32>(nbBytesInBlock); remoteCellGroups[idxLevel][firstOtherBlock].ptrDown = memoryBlock; - starpu_vector_data_register(&remoteCellGroups[idxLevel][firstOtherBlock].handleDown, 0, - (uintptr_t)remoteCellGroups[idxLevel][firstOtherBlock].ptrDown, nbBytesInBlock, 1); + starpu_variable_data_register(&remoteCellGroups[idxLevel][firstOtherBlock].handleDown, 0, + (uintptr_t)remoteCellGroups[idxLevel][firstOtherBlock].ptrDown, nbBytesInBlock); } FLOG(FLog::Controller << "[SMpi] " << idxLevel << " Post a recv during L2L for Idx " << processesBlockInfos[idxLevel][firstOtherBlock].firstIndex << @@ -2273,18 +2097,15 @@ protected: << " size " << processesBlockInfos[idxLevel][firstOtherBlock].bufferSizeDown << " tag " << getTag(idxLevel, processesBlockInfos[idxLevel][firstOtherBlock].firstIndex, processesBlockInfos[idxLevel][firstOtherBlock].globalIdx, 2, processesBlockInfos[idxLevel][firstOtherBlock].owner) << "\n"); - mpiPostIRecv(remoteCellGroups[idxLevel][firstOtherBlock].handleSymb, - processesBlockInfos[idxLevel][firstOtherBlock].owner, - idxLevel, - processesBlockInfos[idxLevel][firstOtherBlock].firstIndex, - processesBlockInfos[idxLevel][firstOtherBlock].globalIdx, - 0); - mpiPostIRecv(remoteCellGroups[idxLevel][firstOtherBlock].handleDown, - processesBlockInfos[idxLevel][firstOtherBlock].owner, - idxLevel, - processesBlockInfos[idxLevel][firstOtherBlock].firstIndex, - processesBlockInfos[idxLevel][firstOtherBlock].globalIdx, - 2); + starpu_mpi_irecv_detached ( remoteCellGroups[idxLevel][firstOtherBlock].handleSymb, + processesBlockInfos[idxLevel][firstOtherBlock].owner, + getTag(idxLevel,processesBlockInfos[idxLevel][firstOtherBlock].firstIndex, 0), + comm.getComm(), 0/*callback*/, 0/*arg*/ ); + starpu_mpi_irecv_detached ( remoteCellGroups[idxLevel][firstOtherBlock].handleDown, + processesBlockInfos[idxLevel][firstOtherBlock].owner, + getTag(idxLevel,processesBlockInfos[idxLevel][firstOtherBlock].firstIndex, 2), + comm.getComm(), 0/*callback*/, 0/*arg*/ ); + { const MortonIndex parentStartingIdx = processesBlockInfos[idxLevel][firstOtherBlock].firstIndex; const MortonIndex parentEndingIdx = processesBlockInfos[idxLevel][firstOtherBlock].lastIndex; @@ -2322,22 +2143,7 @@ protected: task->priority = PrioClass::Controller().getInsertionPosL2L(idxLevel); #endif #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS task->name = l2lTaskNames[idxLevel].get(); - #else - task->name = taskNames.print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", - idxLevel, - 0, - 0, - 0, - 0, - 0, - processesBlockInfos[idxLevel][firstOtherBlock].firstIndex, - processesBlockInfos[idxLevel][firstOtherBlock].lastIndex, - tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(), - comm.processId()); - #endif #endif FAssertLF(starpu_task_submit(task) == 0); } @@ -2371,22 +2177,7 @@ protected: task->priority = PrioClass::Controller().getInsertionPosL2L(idxLevel); #endif #ifdef STARPU_USE_TASK_NAME -#ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS task->name = l2lTaskNames[idxLevel].get(); -#else - task->name = taskNames.print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", - idxLevel, - 0, - 0, - 0, - 0, - 0, - processesBlockInfos[idxLevel][firstOtherBlock].firstIndex, - processesBlockInfos[idxLevel][firstOtherBlock].lastIndex, - tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(), - comm.processId()); - #endif #endif FAssertLF(starpu_task_submit(task) == 0); @@ -2444,22 +2235,7 @@ protected: task->priority = PrioClass::Controller().getInsertionPosL2L(idxLevel); #endif #ifdef STARPU_USE_TASK_NAME -#ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS task->name = l2lTaskNames[idxLevel].get(); -#else - task->name = taskNames.print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", - idxLevel, - 0, - 0, - 0, - 0, - 0, - tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), - tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(), - comm.processId()); -#endif #endif FAssertLF(starpu_task_submit(task) == 0); } @@ -2498,22 +2274,7 @@ protected: task->priority = PrioClass::Controller().getInsertionPosL2L(idxLevel); #endif #ifdef STARPU_USE_TASK_NAME -#ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS task->name = l2lTaskNames[idxLevel].get(); -#else - task->name = taskNames.print("L2L", "%d, %d, %lld, %d, %lld, %lld, %lld, %lld, %lld, %lld, %d\n", - idxLevel, - 0, - 0, - 0, - 0, - 0, - tree->getCellGroup(idxLevel, idxGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel, idxGroup)->getEndingIndex(), - tree->getCellGroup(idxLevel+1, idxSubGroup)->getStartingIndex(), - tree->getCellGroup(idxLevel+1, idxSubGroup)->getEndingIndex(), - comm.processId()); -#endif #endif FAssertLF(starpu_task_submit(task) == 0); } @@ -2543,17 +2304,7 @@ protected: (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down, STARPU_R, remoteParticleGroupss[interactionid].handleSymb, #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS STARPU_NAME, p2pOuterTaskNames.get(), - #else - //"P2P_out-nb_i_p_nb_i_p_s" - STARPU_NAME, taskNames.print("P2P_out", "0, 0, 0, 0, 0, 0, 0, %lld, %lld, %lld, %lld, %d\n", - tree->getParticleGroup(idxGroup)->getStartingIndex(), - tree->getParticleGroup(idxGroup)->getEndingIndex(), - processesBlockInfos[tree->getHeight()-1][interactionid].firstIndex, - processesBlockInfos[tree->getHeight()-1][interactionid].lastIndex, - comm.processId()), - #endif #endif 0); } @@ -2595,23 +2346,7 @@ protected: (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[interactionid].down, #endif #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS STARPU_NAME, p2pOuterTaskNames.get(), - #else - STARPU_NAME, taskNames.print("P2P_out", "%d, %lld, %lld, %d, %lld, %lld, %d, %lld, %lld, %lld, %lld, %d\n", - 0, - 0, - 0, - 0, - 0, - 0, - 0, - tree->getParticleGroup(idxGroup)->getStartingIndex(), - tree->getParticleGroup(idxGroup)->getEndingIndex(), - tree->getParticleGroup(interactionid)->getStartingIndex(), - tree->getParticleGroup(interactionid)->getEndingIndex(), - comm.processId()), - #endif #endif 0); } @@ -2632,16 +2367,7 @@ protected: (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down, #endif #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS STARPU_NAME, p2pTaskNames.get(), - #else - STARPU_NAME, taskNames.print("P2P", "0, 0, 0, %lld, %lld, %lld, %lld, %d\n", - tree->getParticleGroup(idxGroup)->getStartingIndex(), - tree->getParticleGroup(idxGroup)->getEndingIndex(), - tree->getParticleGroup(idxGroup)->getStartingIndex(), - tree->getParticleGroup(idxGroup)->getEndingIndex(), - comm.processId()), - #endif #endif 0); } @@ -2676,17 +2402,7 @@ protected: (STARPU_RW|STARPU_COMMUTE_IF_SUPPORTED), particleHandles[idxGroup].down, #endif #ifdef STARPU_USE_TASK_NAME - #ifndef SCALFMM_SIMGRID_TASKNAMEPARAMS STARPU_NAME, l2pTaskNames.get(), - #else - STARPU_NAME, taskNames.print("L2P", "%d, %lld, %lld, %lld, %lld, %d\n", - 0, - 0, - 0, - tree->getParticleGroup(idxGroup)->getStartingIndex(), - tree->getParticleGroup(idxGroup)->getEndingIndex(), - comm.processId()), - #endif #endif 0); } diff --git a/Src/GroupTree/Core/FGroupTreeBerenger.hpp b/Src/GroupTree/Core/FGroupTreeBerenger.hpp deleted file mode 100644 index c28f4c76255f47203d7dbb4b90c33cce9ea6fe7f..0000000000000000000000000000000000000000 --- a/Src/GroupTree/Core/FGroupTreeBerenger.hpp +++ /dev/null @@ -1,443 +0,0 @@ - -// Keep in private GIT -#ifndef FGROUPTREE_HPP -#define FGROUPTREE_HPP -#include <vector> -#include <functional> - -#include "../../Utils/FAssert.hpp" -#include "../../Utils/FPoint.hpp" -#include "../../Utils/FQuickSort.hpp" -#include "../../Containers/FTreeCoordinate.hpp" -#include "../../Containers/FCoordinateComputer.hpp" -#include "FGroupOfCells.hpp" -#include "FGroupOfParticles.hpp" -#include "FGroupAttachedLeaf.hpp" - - - -template <class FReal, class CompositeCellClass, class SymboleCellClass, class PoleCellClass, class LocalCellClass, - class GroupAttachedLeafClass, unsigned NbSymbAttributes, unsigned NbAttributesPerParticle, class AttributeClass = FReal> -class FGroupTreeBerenger { -public: - typedef GroupAttachedLeafClass BasicAttachedClass; - typedef FGroupOfParticles<FReal, NbSymbAttributes, NbAttributesPerParticle,AttributeClass> ParticleGroupClass; - typedef FGroupOfCells<CompositeCellClass, SymboleCellClass, PoleCellClass, LocalCellClass> CellGroupClass; - -protected: - //< height of the tree (1 => only the root) - const int treeHeight; - //< max number of cells in a block - const int nbElementsPerBlock; - //< all the blocks of the tree - std::vector<CellGroupClass*>* cellBlocksPerLevel; - //< all the blocks of leaves - std::vector<ParticleGroupClass*> particleBlocks; - - //< the space system center - const FPoint<FReal> boxCenter; - //< the space system corner (used to compute morton index) - const FPoint<FReal> boxCorner; - //< the space system width - const FReal boxWidth; - //< the width of a box at width level - const FReal boxWidthAtLeafLevel; - -public: - typedef typename std::vector<CellGroupClass*>::iterator CellGroupIterator; - typedef typename std::vector<CellGroupClass*>::const_iterator CellGroupConstIterator; - typedef typename std::vector<ParticleGroupClass*>::iterator ParticleGroupIterator; - typedef typename std::vector<ParticleGroupClass*>::const_iterator ParticleGroupConstIterator; - - /** - * This constructor create a group tree from a particle container index. - * The morton index are computed and the particles are sorted in a first stage. - * Then the leaf level is done. - * Finally the other leve are proceed one after the other. - * It should be easy to make it parallel using for and tasks. - * If no limite give inLeftLimite = -1 - */ - template<class ParticleContainer> - FGroupTreeBerenger(const int inTreeHeight, const FReal inBoxWidth, const FPoint<FReal>& inBoxCenter, - const int inNbElementsPerBlock, ParticleContainer* inParticlesContainer, - const bool particlesAreSorted, std::vector<MortonIndex> const& distributedMortonIndex, MortonIndex inLeftLimite = -1): - treeHeight(inTreeHeight),nbElementsPerBlock(inNbElementsPerBlock),cellBlocksPerLevel(nullptr), - boxCenter(inBoxCenter), boxCorner(inBoxCenter,-(inBoxWidth/2)), boxWidth(inBoxWidth), - boxWidthAtLeafLevel(inBoxWidth/FReal(1<<(inTreeHeight-1))){ - - cellBlocksPerLevel = new std::vector<CellGroupClass*>[treeHeight]; - - MortonIndex* currentBlockIndexes = new MortonIndex[nbElementsPerBlock]; - // First we work at leaf level - { - // Build morton index for particles - struct ParticleSortingStruct{ - FSize originalIndex; - MortonIndex mindex; - }; - // Convert position to morton index - const FSize nbParticles = inParticlesContainer->getNbParticles(); - ParticleSortingStruct* particlesToSort = new ParticleSortingStruct[nbParticles]; - { - const FReal* xpos = inParticlesContainer->getPositions()[0]; - const FReal* ypos = inParticlesContainer->getPositions()[1]; - const FReal* zpos = inParticlesContainer->getPositions()[2]; - - for(FSize idxPart = 0 ; idxPart < nbParticles ; ++idxPart){ - const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPositionAndCorner<FReal>(this->boxCorner, this->boxWidth, - treeHeight, - FPoint<FReal>(xpos[idxPart], ypos[idxPart], zpos[idxPart]) ); - const MortonIndex particleIndex = host.getMortonIndex(treeHeight-1); - particlesToSort[idxPart].mindex = particleIndex; - particlesToSort[idxPart].originalIndex = idxPart; - } - } - - // Sort if needed - if(particlesAreSorted == false){ - FQuickSort<ParticleSortingStruct, FSize>::QsOmp(particlesToSort, nbParticles, [](const ParticleSortingStruct& v1, const ParticleSortingStruct& v2){ - return v1.mindex <= v2.mindex; - }); - } - - FAssertLF(nbParticles == 0 || inLeftLimite < particlesToSort[0].mindex); - - // Convert to block - const int idxLevel = (treeHeight - 1); - FSize* nbParticlesPerLeaf = new FSize[nbElementsPerBlock]; - FSize firstParticle = 0; - // We need to proceed each group in sub level - while(firstParticle != nbParticles){ - int sizeOfBlock = 0; - FSize lastParticle = firstParticle; - // Count until end of sub group is reached or we have enough cells (or until it reach the next mortonIndex boundary) TODO - while(sizeOfBlock < nbElementsPerBlock && lastParticle < nbParticles){ - if(sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != particlesToSort[lastParticle].mindex){ - currentBlockIndexes[sizeOfBlock] = particlesToSort[lastParticle].mindex; - nbParticlesPerLeaf[sizeOfBlock] = 1; - sizeOfBlock += 1; - } - else{ - nbParticlesPerLeaf[sizeOfBlock-1] += 1; - } - lastParticle += 1; - } - while(lastParticle < nbParticles && currentBlockIndexes[sizeOfBlock-1] == particlesToSort[lastParticle].mindex){ - nbParticlesPerLeaf[sizeOfBlock-1] += 1; - lastParticle += 1; - } - - // Create a group - CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0], - currentBlockIndexes[sizeOfBlock-1]+1, - sizeOfBlock); - ParticleGroupClass*const newParticleBlock = new ParticleGroupClass(currentBlockIndexes[0], - currentBlockIndexes[sizeOfBlock-1]+1, - sizeOfBlock, lastParticle-firstParticle); - - // Init cells - size_t nbParticlesOffsetBeforeLeaf = 0; - FSize offsetParticles = firstParticle; - for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){ - newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock); - - CompositeCellClass newNode = newBlock->getCompleteCell(cellIdInBlock); - newNode.setMortonIndex(currentBlockIndexes[cellIdInBlock]); - FTreeCoordinate coord; - coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock], idxLevel); - newNode.setCoordinate(coord); - - // Add leaf - nbParticlesOffsetBeforeLeaf = newParticleBlock->newLeaf(currentBlockIndexes[cellIdInBlock], cellIdInBlock, - nbParticlesPerLeaf[cellIdInBlock], nbParticlesOffsetBeforeLeaf); - - BasicAttachedClass attachedLeaf = newParticleBlock->template getLeaf<BasicAttachedClass>(cellIdInBlock); - // Copy each particle from the original position - for(FSize idxPart = 0 ; idxPart < nbParticlesPerLeaf[cellIdInBlock] ; ++idxPart){ - attachedLeaf.setParticle(idxPart, particlesToSort[idxPart + offsetParticles].originalIndex, inParticlesContainer); - } - offsetParticles += nbParticlesPerLeaf[cellIdInBlock]; - } - - // Keep the block - cellBlocksPerLevel[idxLevel].push_back(newBlock); - particleBlocks.push_back(newParticleBlock); - - sizeOfBlock = 0; - firstParticle = lastParticle; - } - delete[] nbParticlesPerLeaf; - delete[] particlesToSort; - } - - - // For each level from heigth - 2 to 1 - for(int idxLevel = treeHeight-2; idxLevel > 0 ; --idxLevel){ - inLeftLimite = (inLeftLimite == -1 ? inLeftLimite : (inLeftLimite>>3)); - - CellGroupConstIterator iterChildCells = cellBlocksPerLevel[idxLevel+1].begin(); - const CellGroupConstIterator iterChildEndCells = cellBlocksPerLevel[idxLevel+1].end(); - - // Skip blocks that do not respect limit - while(iterChildCells != iterChildEndCells - && ((*iterChildCells)->getEndingIndex()>>3) <= inLeftLimite){ - ++iterChildCells; - } - // If lower level is empty or all blocks skiped stop here - if(iterChildCells == iterChildEndCells){ - break; - } - - MortonIndex currentCellIndex = (*iterChildCells)->getStartingIndex(); - if((currentCellIndex>>3) <= inLeftLimite) currentCellIndex = ((inLeftLimite+1)<<3); - int sizeOfBlock = 0; - - // We need to proceed each group in sub level - while(iterChildCells != iterChildEndCells){ - // Count until end of sub group is reached or we have enough cells - while(sizeOfBlock < nbElementsPerBlock && iterChildCells != iterChildEndCells ){ - if((sizeOfBlock == 0 || currentBlockIndexes[sizeOfBlock-1] != (currentCellIndex>>3)) - && (*iterChildCells)->exists(currentCellIndex)){ - currentBlockIndexes[sizeOfBlock] = (currentCellIndex>>3); - sizeOfBlock += 1; - currentCellIndex = (((currentCellIndex>>3)+1)<<3); - } - else{ - currentCellIndex += 1; - } - // If we are at the end of the sub group, move to next - while(iterChildCells != iterChildEndCells && (*iterChildCells)->getEndingIndex() <= currentCellIndex){ - ++iterChildCells; - // Update morton index - if(iterChildCells != iterChildEndCells && currentCellIndex < (*iterChildCells)->getStartingIndex()){ - currentCellIndex = (*iterChildCells)->getStartingIndex(); - } - } - } - - // If group is full - if(sizeOfBlock == nbElementsPerBlock || (sizeOfBlock && iterChildCells == iterChildEndCells)){ - // Create a group - CellGroupClass*const newBlock = new CellGroupClass(currentBlockIndexes[0], - currentBlockIndexes[sizeOfBlock-1]+1, - sizeOfBlock); - // Init cells - for(int cellIdInBlock = 0; cellIdInBlock != sizeOfBlock ; ++cellIdInBlock){ - newBlock->newCell(currentBlockIndexes[cellIdInBlock], cellIdInBlock); - - CompositeCellClass newNode = newBlock->getCompleteCell(cellIdInBlock); - newNode.setMortonIndex(currentBlockIndexes[cellIdInBlock]); - FTreeCoordinate coord; - coord.setPositionFromMorton(currentBlockIndexes[cellIdInBlock], idxLevel); - newNode.setCoordinate(coord); - } - - // Keep the block - cellBlocksPerLevel[idxLevel].push_back(newBlock); - - sizeOfBlock = 0; - } - } - } - delete[] currentBlockIndexes; - } - - /** This function dealloc the tree by deleting each block */ - ~FGroupTreeBerenger(){ - for(int idxLevel = 0 ; idxLevel < treeHeight ; ++idxLevel){ - std::vector<CellGroupClass*>& levelBlocks = cellBlocksPerLevel[idxLevel]; - for (CellGroupClass* block: levelBlocks){ - delete block; - } - } - delete[] cellBlocksPerLevel; - - for (ParticleGroupClass* block: particleBlocks){ - delete block; - } - } - - - ///////////////////////////////////////////////////////// - // Lambda function to apply to all member - ///////////////////////////////////////////////////////// - - /** - * @brief forEachLeaf iterate on the leaf and apply the function - * @param function - */ - template<class ParticlesAttachedClass> - void forEachLeaf(std::function<void(ParticlesAttachedClass*)> function){ - for (ParticleGroupClass* block: particleBlocks){ - block->forEachLeaf(function); - } - } - - /** - * @brief forEachLeaf iterate on the cell and apply the function - * @param function - */ - void forEachCell(std::function<void(CompositeCellClass)> function){ - for(int idxLevel = 0 ; idxLevel < treeHeight ; ++idxLevel){ - std::vector<CellGroupClass*>& levelBlocks = cellBlocksPerLevel[idxLevel]; - for (CellGroupClass* block: levelBlocks){ - block->forEachCell(function); - } - } - } - - /** - * @brief forEachLeaf iterate on the cell and apply the function - * @param function - */ - void forEachCellWithLevel(std::function<void(CompositeCellClass,const int)> function){ - for(int idxLevel = 0 ; idxLevel < treeHeight ; ++idxLevel){ - std::vector<CellGroupClass*>& levelBlocks = cellBlocksPerLevel[idxLevel]; - for (CellGroupClass* block: levelBlocks){ - block->forEachCell(function, idxLevel); - } - } - } - - /** - * @brief forEachLeaf iterate on the cell and apply the function - * @param function - */ - template<class ParticlesAttachedClass> - void forEachCellLeaf(std::function<void(CompositeCellClass,ParticlesAttachedClass*)> function){ - CellGroupIterator iterCells = cellBlocksPerLevel[treeHeight-1].begin(); - const CellGroupIterator iterEndCells = cellBlocksPerLevel[treeHeight-1].end(); - - ParticleGroupIterator iterLeaves = particleBlocks.begin(); - const ParticleGroupIterator iterEndLeaves = particleBlocks.end(); - - while(iterCells != iterEndCells && iterLeaves != iterEndLeaves){ - (*iterCells)->forEachCell([&](CompositeCellClass aCell){ - const int leafIdx = (*iterLeaves)->getLeafIndex(aCell.getMortonIndex()); - FAssertLF(leafIdx != -1); - ParticlesAttachedClass aLeaf = (*iterLeaves)->template getLeaf <ParticlesAttachedClass>(leafIdx); - FAssertLF(aLeaf.isAttachedToSomething()); - function(aCell, &aLeaf); - }); - - ++iterCells; - ++iterLeaves; - } - - FAssertLF(iterCells == iterEndCells && iterLeaves == iterEndLeaves); - } - - - - /** @brief, for statistic purpose, display each block with number of - * cell, size of header, starting index, and ending index - */ - void printInfoBlocks(){ - std::cout << "Group Tree information:\n"; - std::cout << "\t Group Size = " << nbElementsPerBlock << "\n"; - std::cout << "\t Tree height = " << treeHeight << "\n"; - for(int idxLevel = 1 ; idxLevel < treeHeight ; ++idxLevel){ - std::vector<CellGroupClass*>& levelBlocks = cellBlocksPerLevel[idxLevel]; - std::cout << "Level " << idxLevel << ", there are " << levelBlocks.size() << " groups.\n"; - int idxGroup = 0; - for (const CellGroupClass* block: levelBlocks){ - std::cout << "\t Group " << (idxGroup++); - std::cout << "\t Size = " << block->getNumberOfCellsInBlock(); - std::cout << "\t Starting Index = " << block->getStartingIndex(); - std::cout << "\t Ending Index = " << block->getEndingIndex(); - std::cout << "\t Ratio of usage = " << float(block->getNumberOfCellsInBlock())/float(block->getEndingIndex()-block->getStartingIndex()) << "\n"; - } - } - - std::cout << "There are " << particleBlocks.size() << " leaf-groups.\n"; - int idxGroup = 0; - FSize totalNbParticles = 0; - for (const ParticleGroupClass* block: particleBlocks){ - std::cout << "\t Group " << (idxGroup++); - std::cout << "\t Size = " << block->getNumberOfLeavesInBlock(); - std::cout << "\t Starting Index = " << block->getStartingIndex(); - std::cout << "\t Ending Index = " << block->getEndingIndex(); - std::cout << "\t Nb Particles = " << block->getNbParticlesInGroup(); - std::cout << "\t Ratio of usage = " << float(block->getNumberOfLeavesInBlock())/float(block->getEndingIndex()-block->getStartingIndex()) << "\n"; - totalNbParticles += block->getNbParticlesInGroup(); - } - std::cout << "There are " << totalNbParticles << " particles.\n"; - } - - ///////////////////////////////////////////////////////// - // Algorithm function - ///////////////////////////////////////////////////////// - - int getHeight() const { - return treeHeight; - } - - CellGroupIterator cellsBegin(const int inLevel){ - FAssertLF(inLevel < treeHeight); - return cellBlocksPerLevel[inLevel].begin(); - } - - CellGroupConstIterator cellsBegin(const int inLevel) const { - FAssertLF(inLevel < treeHeight); - return cellBlocksPerLevel[inLevel].begin(); - } - - CellGroupIterator cellsEnd(const int inLevel){ - FAssertLF(inLevel < treeHeight); - return cellBlocksPerLevel[inLevel].end(); - } - - CellGroupConstIterator cellsEnd(const int inLevel) const { - FAssertLF(inLevel < treeHeight); - return cellBlocksPerLevel[inLevel].end(); - } - - int getNbCellGroupAtLevel(const int inLevel) const { - FAssertLF(inLevel < treeHeight); - return int(cellBlocksPerLevel[inLevel].size()); - } - - CellGroupClass* getCellGroup(const int inLevel, const int inIdx){ - FAssertLF(inLevel < treeHeight); - FAssertLF(inIdx < int(cellBlocksPerLevel[inLevel].size())); - return cellBlocksPerLevel[inLevel][inIdx]; - } - - const CellGroupClass* getCellGroup(const int inLevel, const int inIdx) const { - FAssertLF(inLevel < treeHeight); - FAssertLF(inIdx < int(cellBlocksPerLevel[inLevel].size())); - return cellBlocksPerLevel[inLevel][inIdx]; - } - - ParticleGroupIterator leavesBegin(){ - return particleBlocks.begin(); - } - - ParticleGroupConstIterator leavesBegin() const { - return particleBlocks.begin(); - } - - ParticleGroupIterator leavesEnd(){ - return particleBlocks.end(); - } - - ParticleGroupConstIterator leavesEnd() const { - return particleBlocks.end(); - } - - int getNbParticleGroup() const { - return int(particleBlocks.size()); - } - - ParticleGroupClass* getParticleGroup(const int inIdx){ - FAssertLF(inIdx < int(particleBlocks.size())); - return particleBlocks[inIdx]; - } - - const ParticleGroupClass* getParticleGroup(const int inIdx) const { - FAssertLF(inIdx < int(particleBlocks.size())); - return particleBlocks[inIdx]; - } -}; - -#endif // FGROUPTREE_HPP diff --git a/Src/GroupTree/StarPUUtils/FStarPUCpuWrapper.hpp b/Src/GroupTree/StarPUUtils/FStarPUCpuWrapper.hpp index 0a18330d655185ab33bee66a2e6282821192fcc2..456cdbe482bb26fa87472a68a8ae7318e44cbf79 100644 --- a/Src/GroupTree/StarPUUtils/FStarPUCpuWrapper.hpp +++ b/Src/GroupTree/StarPUUtils/FStarPUCpuWrapper.hpp @@ -35,8 +35,6 @@ #include "FStarPUUtils.hpp" #include "../../Utils/FTaskTimer.hpp" -#include <iostream> -using namespace std; template <class CellContainerClass, class CellClass, class KernelClass, class ParticleGroupClass, class ParticleContainerClass> @@ -94,12 +92,12 @@ public: } static void bottomPassCallback(void *buffers[], void *cl_arg){ - CellContainerClass leafCells((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]), - STARPU_VECTOR_GET_NX(buffers[0]), - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), + CellContainerClass leafCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]), + STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]), + (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]), nullptr); - ParticleGroupClass containers((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]), - STARPU_VECTOR_GET_NX(buffers[2]), + ParticleGroupClass containers((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2]), + STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]), nullptr); FStarPUPtrInterface* worker = nullptr; @@ -130,9 +128,9 @@ public: ///////////////////////////////////////////////////////////////////////////////////// static void upwardPassCallback(void *buffers[], void *cl_arg){ - CellContainerClass currentCells((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]), - STARPU_VECTOR_GET_NX(buffers[0]), - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), + CellContainerClass currentCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]), + STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]), + (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]), nullptr); FStarPUPtrInterface* worker = nullptr; @@ -145,9 +143,9 @@ public: #endif CellContainerClass subCellGroup( - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]), - STARPU_VECTOR_GET_NX(buffers[2]), - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[3]), + (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2]), + STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]), + (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[3]), nullptr); worker->get<ThisClass>(FSTARPU_CPU_IDX)->upwardPassPerform(¤tCells, &subCellGroup, idxLevel); @@ -202,13 +200,13 @@ public: ///////////////////////////////////////////////////////////////////////////////////// #if defined(STARPU_USE_MPI) && defined(SCALFMM_USE_MPI) static void transferInoutPassCallbackMpi(void *buffers[], void *cl_arg){ - CellContainerClass currentCells((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]), - STARPU_VECTOR_GET_NX(buffers[0]), + CellContainerClass currentCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]), + STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]), nullptr, - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1])); - CellContainerClass externalCells((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]), - STARPU_VECTOR_GET_NX(buffers[2]), - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[3]), + (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1])); + CellContainerClass externalCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2]), + STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]), + (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[3]), nullptr); FStarPUPtrInterface* worker = nullptr; @@ -245,10 +243,10 @@ public: ///////////////////////////////////////////////////////////////////////////////////// static void transferInPassCallback(void *buffers[], void *cl_arg){ - CellContainerClass currentCells((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]), - STARPU_VECTOR_GET_NX(buffers[0]), - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]), - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2])); + CellContainerClass currentCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]), + STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]), + (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1]), + (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2])); FStarPUPtrInterface* worker = nullptr; int idxLevel = 0; @@ -301,13 +299,13 @@ public: } static void transferInoutPassCallback(void *buffers[], void *cl_arg){ - CellContainerClass currentCells((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]), - STARPU_VECTOR_GET_NX(buffers[0]), + CellContainerClass currentCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]), + STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]), nullptr, - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1])); - CellContainerClass externalCells((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]), - STARPU_VECTOR_GET_NX(buffers[2]), - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[3]), + (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1])); + CellContainerClass externalCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2]), + STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]), + (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[3]), nullptr); FStarPUPtrInterface* worker = nullptr; @@ -363,10 +361,10 @@ public: /// Downard Pass ///////////////////////////////////////////////////////////////////////////////////// static void downardPassCallback(void *buffers[], void *cl_arg){ - CellContainerClass currentCells((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]), - STARPU_VECTOR_GET_NX(buffers[0]), + CellContainerClass currentCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]), + STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]), nullptr, - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1])); + (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1])); FStarPUPtrInterface* worker = nullptr; int idxLevel = 0; @@ -378,10 +376,10 @@ public: #endif CellContainerClass subCellGroup( - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]), - STARPU_VECTOR_GET_NX(buffers[2]), + (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2]), + STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]), nullptr, - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[3])); + (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[3])); worker->get<ThisClass>(FSTARPU_CPU_IDX)->downardPassPerform(¤tCells, &subCellGroup, idxLevel); } @@ -435,11 +433,11 @@ public: #if defined(STARPU_USE_MPI) && defined(SCALFMM_USE_MPI) static void directInoutPassCallbackMpi(void *buffers[], void *cl_arg){ - ParticleGroupClass containers((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]), - STARPU_VECTOR_GET_NX(buffers[0]), - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1])); - ParticleGroupClass externalContainers((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]), - STARPU_VECTOR_GET_NX(buffers[2]), + ParticleGroupClass containers((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]), + STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]), + (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1])); + ParticleGroupClass externalContainers((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2]), + STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]), nullptr); FStarPUPtrInterface* worker = nullptr; @@ -453,11 +451,6 @@ public: void directInoutPassPerformMpi(ParticleGroupClass* containers, ParticleGroupClass* containersOther, const std::vector<OutOfBlockInteraction>* outsideInteractions){ KernelClass*const kernel = kernels[GetWorkerId()]; - //const MortonIndex blockStartIdx = containers->getStartingIndex(); - //const MortonIndex blockEndIdx = containers->getEndingIndex(); - //const MortonIndex blockStartIdxOther = containersOther->getStartingIndex(); - //const MortonIndex blockEndIdxOther = containersOther->getEndingIndex(); - //cerr << "DirectInoutMpi [" << blockStartIdx << "," << blockEndIdx << "] - [" << blockStartIdxOther << "," << blockEndIdxOther << "]" << endl; for(int outInterIdx = 0 ; outInterIdx < int(outsideInteractions->size()) ; ++outInterIdx){ const int leafPos = containersOther->getLeafIndex((*outsideInteractions)[outInterIdx].outIndex); if(leafPos != -1){ @@ -478,9 +471,9 @@ public: ///////////////////////////////////////////////////////////////////////////////////// static void directInPassCallback(void *buffers[], void *cl_arg){ - ParticleGroupClass containers((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]), - STARPU_VECTOR_GET_NX(buffers[0]), - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1])); + ParticleGroupClass containers((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]), + STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]), + (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1])); FStarPUPtrInterface* worker = nullptr; int intervalSize; @@ -494,9 +487,8 @@ public: void directInPassPerform(ParticleGroupClass* containers){ FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(GetWorkerId(), &taskTimeRecorder, containers->getStartingIndex()*20*8 + 5, "P2P")); - const MortonIndex blockStartIdx = containers->getStartingIndex(); - const MortonIndex blockEndIdx = containers->getEndingIndex(); - //cerr << "DirectIn [" << blockStartIdx << "," << blockEndIdx << "]" << endl; + const MortonIndex blockStartIdx = containers->getStartingIndex(); + const MortonIndex blockEndIdx = containers->getEndingIndex(); KernelClass*const kernel = kernels[GetWorkerId()]; for(int leafIdx = 0 ; leafIdx < containers->getNumberOfLeavesInBlock() ; ++leafIdx){ @@ -528,18 +520,12 @@ public: } static void directInoutPassCallback(void *buffers[], void *cl_arg){ - unsigned char* inBuffer = (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]); - size_t size = STARPU_VECTOR_GET_NX(buffers[0]); - unsigned char* inAttribut = (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1]); - ParticleGroupClass containers(inBuffer, - size, - inAttribut); - inBuffer = (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]); - size = STARPU_VECTOR_GET_NX(buffers[2]); - inAttribut = (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[3]); - ParticleGroupClass externalContainers(inBuffer, - size, - inAttribut); + ParticleGroupClass containers((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]), + STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]), + (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1])); + ParticleGroupClass externalContainers((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2]), + STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]), + (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[3])); FStarPUPtrInterface* worker = nullptr; const std::vector<OutOfBlockInteraction>* outsideInteractions = nullptr; @@ -557,11 +543,6 @@ public: const std::vector<OutOfBlockInteraction>* outsideInteractions){ FTIME_TASKS(FTaskTimer::ScopeEvent taskTime(GetWorkerId(), &taskTimeRecorder, ((containersOther->getStartingIndex()+1) * (containers->getStartingIndex()+1))*20*8 + 6, "P2P-ext")); KernelClass*const kernel = kernels[GetWorkerId()]; - //const MortonIndex blockStartIdx = containers->getStartingIndex(); - //const MortonIndex blockEndIdx = containers->getEndingIndex(); - //const MortonIndex blockStartIdxOther = containersOther->getStartingIndex(); - //const MortonIndex blockEndIdxOther = containersOther->getEndingIndex(); - //cerr << "DirectInout [" << blockStartIdx << "," << blockEndIdx << "] - [" << blockStartIdxOther << "," << blockEndIdxOther << "]" << endl; for(int outInterIdx = 0 ; outInterIdx < int(outsideInteractions->size()) ; ++outInterIdx){ ParticleContainerClass interParticles = containersOther->template getLeaf<ParticleContainerClass>((*outsideInteractions)[outInterIdx].outsideIdxInBlock); ParticleContainerClass particles = containers->template getLeaf<ParticleContainerClass>((*outsideInteractions)[outInterIdx].insideIdxInBlock); @@ -569,13 +550,13 @@ public: FAssertLF(containersOther->getLeafMortonIndex((*outsideInteractions)[outInterIdx].outsideIdxInBlock) == (*outsideInteractions)[outInterIdx].outIndex); FAssertLF(containers->getLeafMortonIndex((*outsideInteractions)[outInterIdx].insideIdxInBlock) == (*outsideInteractions)[outInterIdx].insideIndex); - ParticleContainerClass* ptrLeaf = &interParticles; - kernel->P2POuter( FTreeCoordinate((*outsideInteractions)[outInterIdx].insideIndex, treeHeight-1), - &particles , &ptrLeaf, &(*outsideInteractions)[outInterIdx].relativeOutPosition, 1); - const int otherPosition = getOppositeNeighIndex((*outsideInteractions)[outInterIdx].relativeOutPosition); - ptrLeaf = &particles; - kernel->P2POuter( FTreeCoordinate((*outsideInteractions)[outInterIdx].outIndex, treeHeight-1), - &interParticles , &ptrLeaf, &otherPosition, 1); + ParticleContainerClass* ptrLeaf = &interParticles; + kernel->P2POuter( FTreeCoordinate((*outsideInteractions)[outInterIdx].insideIndex, treeHeight-1), + &particles , &ptrLeaf, &(*outsideInteractions)[outInterIdx].relativeOutPosition, 1); + const int otherPosition = getOppositeNeighIndex((*outsideInteractions)[outInterIdx].relativeOutPosition); + ptrLeaf = &particles; + kernel->P2POuter( FTreeCoordinate((*outsideInteractions)[outInterIdx].outIndex, treeHeight-1), + &interParticles , &ptrLeaf, &otherPosition, 1); } } @@ -584,13 +565,13 @@ public: ///////////////////////////////////////////////////////////////////////////////////// static void mergePassCallback(void *buffers[], void *cl_arg){ - CellContainerClass leafCells((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[0]), - STARPU_VECTOR_GET_NX(buffers[0]), + CellContainerClass leafCells((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[0]), + STARPU_VARIABLE_GET_ELEMSIZE(buffers[0]), nullptr, - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[1])); - ParticleGroupClass containers((unsigned char*)STARPU_VECTOR_GET_PTR(buffers[2]), - STARPU_VECTOR_GET_NX(buffers[2]), - (unsigned char*)STARPU_VECTOR_GET_PTR(buffers[3])); + (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[1])); + ParticleGroupClass containers((unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[2]), + STARPU_VARIABLE_GET_ELEMSIZE(buffers[2]), + (unsigned char*)STARPU_VARIABLE_GET_PTR(buffers[3])); FStarPUPtrInterface* worker = nullptr; int intervalSize; diff --git a/Src/ScalFmmConfig.h.cmake b/Src/ScalFmmConfig.h.cmake index 559ddcf2356f99c33e7b7849d4ed974a45bd1e90..8663a90c1d290c2fa4feb80687ed5950807be562 100644 --- a/Src/ScalFmmConfig.h.cmake +++ b/Src/ScalFmmConfig.h.cmake @@ -161,6 +161,7 @@ const std::string SCALFMMCompileLibs("@SCALFMM_COMPILE_LIBS@"); #cmakedefine SCALFMM_STARPU_USE_REDUX #cmakedefine SCALFMM_STARPU_USE_PRIO #cmakedefine SCALFMM_STARPU_FORCE_NO_SCHEDULER +#cmakedefine SCALFMM_USE_STARPU_EXTRACT /////////////////////////////////////////////////////// // To control simgrid config diff --git a/Src/Utils/FMpi.cpp b/Src/Utils/FMpi.cpp index a12ffede68c0aa9bdaffa9fe4a4c04df2d90a314..6ca6e5021eef41e0cd7ed1a902ddb264a7807da7 100644 --- a/Src/Utils/FMpi.cpp +++ b/Src/Utils/FMpi.cpp @@ -2,3 +2,4 @@ #include "Utils/FMpi.hpp" +int FMpi::instanceCount = 0; diff --git a/Src/Utils/FMpi.hpp b/Src/Utils/FMpi.hpp index 5c442b4f9c19c092412068d803ca80782fbbea45..690e7332bf7f494cbc247d2e7f41e9a2c1944759 100644 --- a/Src/Utils/FMpi.hpp +++ b/Src/Utils/FMpi.hpp @@ -533,7 +533,6 @@ private: /// Counter to avoid several instanciations static int instanceCount; }; -int FMpi::instanceCount = 0; #endif //FMPI_HPP diff --git a/Tests/GroupTree/testBlockedChebyshev.cpp b/Tests/GroupTree/testBlockedChebyshev.cpp index d3452cbc9a5f9b663604e09266f9f317a5f6eef7..35863a7197d46408c24fec76be81aa730fc5045d 100644 --- a/Tests/GroupTree/testBlockedChebyshev.cpp +++ b/Tests/GroupTree/testBlockedChebyshev.cpp @@ -24,7 +24,6 @@ #include "../../Src/Files/FRandomLoader.hpp" #include "../../Src/Files/FFmaGenericLoader.hpp" -#include "../../Src/Files/FGenerateDistribution.hpp" #include "../../Src/GroupTree/Core/FGroupSeqAlgorithm.hpp" #include "../../Src/GroupTree/Core/FGroupTaskAlgorithm.hpp" @@ -42,14 +41,11 @@ #include <memory> -#define RANDOM_PARTICLES +//#define RANDOM_PARTICLES int main(int argc, char* argv[]){ const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree"}; const FParameterNames LocalOptionNoValidate { {"-no-validation"}, "To avoid comparing with direct computation"}; - const FParameterNames LocalOptionEllipsoid = {{"-ellipsoid"} , " non uniform distribution on an ellipsoid of aspect ratio given by a=0.5 b=0.25 c=0.125"}; - const FParameterNames LocalOptionPlummer = {{"-plummer"} , " (Highly non uniform) plummer distribution (astrophysics)"}; - const FParameterNames LocalOptionCube = {{"-cube"} , " uniform distribution on cube (default)"}; FHelpDescribeAndExit(argc, argv, "Test the blocked tree by counting the particles.", FParameterDefinitions::OctreeHeight, #ifdef RANDOM_PARTICLES @@ -57,7 +53,7 @@ int main(int argc, char* argv[]){ #else FParameterDefinitions::InputFile, #endif - LocalOptionBlocSize, LocalOptionNoValidate, LocalOptionEllipsoid, LocalOptionPlummer, LocalOptionCube); + LocalOptionBlocSize, LocalOptionNoValidate); // Initialize the types typedef double FReal; @@ -69,6 +65,7 @@ int main(int argc, char* argv[]){ typedef FChebCellPODLocal<FReal,ORDER> GroupCellDownClass; typedef FChebCellPOD<FReal,ORDER> GroupCellClass; + typedef FP2PGroupParticleContainer<FReal> GroupContainerClass; typedef FGroupTree< FReal, GroupCellClass, GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass, GroupContainerClass, 1, 4, FReal> GroupOctreeClass; #ifdef SCALFMM_USE_STARPU @@ -98,42 +95,24 @@ int main(int argc, char* argv[]){ FAssertLF(loader.isOpen()); FTic timer; -#ifdef RANDOM_PARTICLES - setSeed(1); - FReal * tmpParticles = new FReal[4*loader.getNumberOfParticles()]; - if(FParameters::existParameter(argc, argv, "-ellipsoid")) { - nonunifRandomPointsOnElipsoid(loader.getNumberOfParticles(), loader.getBoxWidth()/2, loader.getBoxWidth()/4, tmpParticles); - } - else if(FParameters::existParameter(argc, argv, "-plummer")) { - //The M argument is not used in the algorithm of the plummer distribution - unifRandomPlummer(loader.getNumberOfParticles(), loader.getBoxWidth()/2, tmpParticles) ; - } - else { //Uniform cube - unifRandomPointsInCube(loader.getNumberOfParticles(), loader.getBoxWidth()/2, loader.getBoxWidth()/2, loader.getBoxWidth()/2, tmpParticles); - } -#endif - FP2PParticleContainer<FReal> allParticles; for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ FPoint<FReal> particlePosition; FReal physicalValue; #ifdef RANDOM_PARTICLES physicalValue = 0.10; - particlePosition.setPosition(tmpParticles[idxPart*4], tmpParticles[idxPart*4+1], tmpParticles[idxPart*4+2]); + loader.fillParticle(&particlePosition); #else loader.fillParticle(&particlePosition, &physicalValue); #endif allParticles.push(particlePosition, physicalValue); } -#ifdef RANDOM_PARTICLES - delete[] tmpParticles; -#endif std::cout << "Particles loaded in " << timer.tacAndElapsed() << "s\n"; // Put the data into the tree timer.tic(); GroupOctreeClass groupedTree(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, &allParticles); - //groupedTree.printInfoBlocks(); + groupedTree.printInfoBlocks(); std::cout << "Tree created in " << timer.tacAndElapsed() << "s\n"; // Run the algorithm @@ -142,11 +121,8 @@ int main(int argc, char* argv[]){ GroupAlgorithm groupalgo(&groupedTree,&groupkernel); timer.tic(); - starpu_fxt_start_profiling(); groupalgo.execute(); - starpu_fxt_stop_profiling(); - timer.tac(); - std::cout << "Average executed in in " << timer.elapsed() << "s\n"; + std::cout << "Kernel executed in in " << timer.tacAndElapsed() << "s\n"; // Validate the result if(FParameters::existParameter(argc, argv, LocalOptionNoValidate.options) == false){ diff --git a/Tests/GroupTree/testBlockedChebyshevMartin.cpp b/Tests/GroupTree/testBlockedChebyshevMartin.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d3452cbc9a5f9b663604e09266f9f317a5f6eef7 --- /dev/null +++ b/Tests/GroupTree/testBlockedChebyshevMartin.cpp @@ -0,0 +1,222 @@ +// ==== CMAKE ===== +// @FUSE_BLAS +// ================ +// Keep in private GIT + + +#include "../../Src/Utils/FGlobal.hpp" + +#include "../../Src/GroupTree/Core/FGroupTree.hpp" + +#include "../../Src/Components/FSimpleLeaf.hpp" +#include "../../Src/Containers/FVector.hpp" + +#include "../../Src/Kernels/P2P/FP2PParticleContainer.hpp" + +#include "../../Src/Kernels/Chebyshev/FChebSymKernel.hpp" +#include "Kernels/Interpolation/FInterpMatrixKernel.hpp" + +#include "../../Src/GroupTree/Chebyshev/FChebCellPOD.hpp" + +#include "../../Src/Utils/FMath.hpp" +#include "../../Src/Utils/FMemUtils.hpp" +#include "../../Src/Utils/FParameters.hpp" + +#include "../../Src/Files/FRandomLoader.hpp" +#include "../../Src/Files/FFmaGenericLoader.hpp" +#include "../../Src/Files/FGenerateDistribution.hpp" + +#include "../../Src/GroupTree/Core/FGroupSeqAlgorithm.hpp" +#include "../../Src/GroupTree/Core/FGroupTaskAlgorithm.hpp" +#ifdef SCALFMM_USE_OMP4 +#include "../../Src/GroupTree/Core/FGroupTaskDepAlgorithm.hpp" +#endif +#ifdef SCALFMM_USE_STARPU +#include "../../Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp" +#include "../../Src/GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp" +#endif +#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp" + +#include "../../Src/Utils/FParameterNames.hpp" + +#include <memory> + + +#define RANDOM_PARTICLES + +int main(int argc, char* argv[]){ + const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree"}; + const FParameterNames LocalOptionNoValidate { {"-no-validation"}, "To avoid comparing with direct computation"}; + const FParameterNames LocalOptionEllipsoid = {{"-ellipsoid"} , " non uniform distribution on an ellipsoid of aspect ratio given by a=0.5 b=0.25 c=0.125"}; + const FParameterNames LocalOptionPlummer = {{"-plummer"} , " (Highly non uniform) plummer distribution (astrophysics)"}; + const FParameterNames LocalOptionCube = {{"-cube"} , " uniform distribution on cube (default)"}; + FHelpDescribeAndExit(argc, argv, "Test the blocked tree by counting the particles.", + FParameterDefinitions::OctreeHeight, +#ifdef RANDOM_PARTICLES + FParameterDefinitions::NbParticles, +#else + FParameterDefinitions::InputFile, +#endif + LocalOptionBlocSize, LocalOptionNoValidate, LocalOptionEllipsoid, LocalOptionPlummer, LocalOptionCube); + + // Initialize the types + typedef double FReal; + static const int ORDER = 6; + typedef FInterpMatrixKernelR<FReal> MatrixKernelClass; + + typedef FChebCellPODCore GroupCellSymbClass; + typedef FChebCellPODPole<FReal,ORDER> GroupCellUpClass; + typedef FChebCellPODLocal<FReal,ORDER> GroupCellDownClass; + typedef FChebCellPOD<FReal,ORDER> GroupCellClass; + + typedef FP2PGroupParticleContainer<FReal> GroupContainerClass; + typedef FGroupTree< FReal, GroupCellClass, GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass, GroupContainerClass, 1, 4, FReal> GroupOctreeClass; +#ifdef SCALFMM_USE_STARPU + typedef FStarPUAllCpuCapacities<FChebSymKernel<FReal,GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER>> GroupKernelClass; + typedef FStarPUCpuWrapper<typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass> GroupCpuWrapper; + typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupCpuWrapper > GroupAlgorithm; +#elif defined(SCALFMM_USE_OMP4) + typedef FChebSymKernel<FReal,GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER> GroupKernelClass; + typedef FGroupTaskDepAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, + GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm; +#else + typedef FChebSymKernel<FReal,GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER> GroupKernelClass; + //typedef FGroupSeqAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm; + typedef FGroupTaskAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm; +#endif + // Get params + const int NbLevels = FParameters::getValue(argc,argv,FParameterDefinitions::OctreeHeight.options, 5); + const int groupSize = FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250); + + // Load the particles +#ifdef RANDOM_PARTICLES + FRandomLoader<FReal> loader(FParameters::getValue(argc,argv,FParameterDefinitions::NbParticles.options, 2000), 1.0, FPoint<FReal>(0,0,0), 0); +#else + const char* const filename = FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma"); + FFmaGenericLoader<FReal> loader(filename); +#endif + FAssertLF(loader.isOpen()); + FTic timer; + +#ifdef RANDOM_PARTICLES + setSeed(1); + FReal * tmpParticles = new FReal[4*loader.getNumberOfParticles()]; + if(FParameters::existParameter(argc, argv, "-ellipsoid")) { + nonunifRandomPointsOnElipsoid(loader.getNumberOfParticles(), loader.getBoxWidth()/2, loader.getBoxWidth()/4, tmpParticles); + } + else if(FParameters::existParameter(argc, argv, "-plummer")) { + //The M argument is not used in the algorithm of the plummer distribution + unifRandomPlummer(loader.getNumberOfParticles(), loader.getBoxWidth()/2, tmpParticles) ; + } + else { //Uniform cube + unifRandomPointsInCube(loader.getNumberOfParticles(), loader.getBoxWidth()/2, loader.getBoxWidth()/2, loader.getBoxWidth()/2, tmpParticles); + } +#endif + + FP2PParticleContainer<FReal> allParticles; + for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + FPoint<FReal> particlePosition; + FReal physicalValue; +#ifdef RANDOM_PARTICLES + physicalValue = 0.10; + particlePosition.setPosition(tmpParticles[idxPart*4], tmpParticles[idxPart*4+1], tmpParticles[idxPart*4+2]); +#else + loader.fillParticle(&particlePosition, &physicalValue); +#endif + allParticles.push(particlePosition, physicalValue); + } +#ifdef RANDOM_PARTICLES + delete[] tmpParticles; +#endif + std::cout << "Particles loaded in " << timer.tacAndElapsed() << "s\n"; + + // Put the data into the tree + timer.tic(); + GroupOctreeClass groupedTree(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, &allParticles); + //groupedTree.printInfoBlocks(); + std::cout << "Tree created in " << timer.tacAndElapsed() << "s\n"; + + // Run the algorithm + const MatrixKernelClass MatrixKernel; + GroupKernelClass groupkernel(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), &MatrixKernel); + GroupAlgorithm groupalgo(&groupedTree,&groupkernel); + + timer.tic(); + starpu_fxt_start_profiling(); + groupalgo.execute(); + starpu_fxt_stop_profiling(); + timer.tac(); + std::cout << "Average executed in in " << timer.elapsed() << "s\n"; + + // Validate the result + if(FParameters::existParameter(argc, argv, LocalOptionNoValidate.options) == false){ + FSize offsetParticles = 0; + FReal*const allPhysicalValues = allParticles.getPhysicalValues(); + FReal*const allPosX = const_cast<FReal*>( allParticles.getPositions()[0]); + FReal*const allPosY = const_cast<FReal*>( allParticles.getPositions()[1]); + FReal*const allPosZ = const_cast<FReal*>( allParticles.getPositions()[2]); + + groupedTree.forEachCellLeaf<FP2PGroupParticleContainer<FReal> >([&](GroupCellClass cellTarget, FP2PGroupParticleContainer<FReal> * leafTarget){ + const FReal*const physicalValues = leafTarget->getPhysicalValues(); + const FReal*const posX = leafTarget->getPositions()[0]; + const FReal*const posY = leafTarget->getPositions()[1]; + const FReal*const posZ = leafTarget->getPositions()[2]; + const FSize nbPartsInLeafTarget = leafTarget->getNbParticles(); + + for(FSize idxPart = 0 ; idxPart < nbPartsInLeafTarget ; ++idxPart){ + allPhysicalValues[offsetParticles + idxPart] = physicalValues[idxPart]; + allPosX[offsetParticles + idxPart] = posX[idxPart]; + allPosY[offsetParticles + idxPart] = posY[idxPart]; + allPosZ[offsetParticles + idxPart] = posZ[idxPart]; + } + + offsetParticles += nbPartsInLeafTarget; + }); + + FAssertLF(offsetParticles == loader.getNumberOfParticles()); + + FReal*const allDirectPotentials = allParticles.getPotentials(); + FReal*const allDirectforcesX = allParticles.getForcesX(); + FReal*const allDirectforcesY = allParticles.getForcesY(); + FReal*const allDirectforcesZ = allParticles.getForcesZ(); + + for(int idxTgt = 0 ; idxTgt < offsetParticles ; ++idxTgt){ + for(int idxMutual = idxTgt + 1 ; idxMutual < offsetParticles ; ++idxMutual){ + FP2PR::MutualParticles( + allPosX[idxTgt],allPosY[idxTgt],allPosZ[idxTgt], allPhysicalValues[idxTgt], + &allDirectforcesX[idxTgt], &allDirectforcesY[idxTgt], &allDirectforcesZ[idxTgt], &allDirectPotentials[idxTgt], + allPosX[idxMutual],allPosY[idxMutual],allPosZ[idxMutual], allPhysicalValues[idxMutual], + &allDirectforcesX[idxMutual], &allDirectforcesY[idxMutual], &allDirectforcesZ[idxMutual], &allDirectPotentials[idxMutual] + ); + } + } + + FMath::FAccurater<FReal> potentialDiff; + FMath::FAccurater<FReal> fx, fy, fz; + offsetParticles = 0; + groupedTree.forEachCellLeaf<FP2PGroupParticleContainer<FReal> >([&](GroupCellClass cellTarget, FP2PGroupParticleContainer<FReal> * leafTarget){ + const FReal*const potentials = leafTarget->getPotentials(); + const FReal*const forcesX = leafTarget->getForcesX(); + const FReal*const forcesY = leafTarget->getForcesY(); + const FReal*const forcesZ = leafTarget->getForcesZ(); + const FSize nbPartsInLeafTarget = leafTarget->getNbParticles(); + + for(int idxTgt = 0 ; idxTgt < nbPartsInLeafTarget ; ++idxTgt){ + potentialDiff.add(allDirectPotentials[idxTgt + offsetParticles], potentials[idxTgt]); + fx.add(allDirectforcesX[idxTgt + offsetParticles], forcesX[idxTgt]); + fy.add(allDirectforcesY[idxTgt + offsetParticles], forcesY[idxTgt]); + fz.add(allDirectforcesZ[idxTgt + offsetParticles], forcesZ[idxTgt]); + } + + offsetParticles += nbPartsInLeafTarget; + }); + + std::cout << "Error : Potential " << potentialDiff << "\n"; + std::cout << "Error : fx " << fx << "\n"; + std::cout << "Error : fy " << fy << "\n"; + std::cout << "Error : fz " << fz << "\n"; + } + + return 0; +} + diff --git a/Tests/GroupTree/testBlockedMpiAlgorithm.cpp b/Tests/GroupTree/testBlockedMpiAlgorithm.cpp index f9d64669acb283f434faa8bd1c25186e6eca919c..a26d7935b8384d06e3abf6c84a21002462c7e6c1 100644 --- a/Tests/GroupTree/testBlockedMpiAlgorithm.cpp +++ b/Tests/GroupTree/testBlockedMpiAlgorithm.cpp @@ -17,7 +17,6 @@ #include "../../Src/Utils/FParameters.hpp" #include "../../Src/Files/FRandomLoader.hpp" -#include "../../Src/Files/FFmaGenericLoader.hpp" #include "../../Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp" @@ -42,12 +41,8 @@ #include "../../Src/GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp" #include "../../Src/GroupTree/StarPUUtils/FStarPUCpuWrapper.hpp" -#include <vector> -#include <iostream> -#include <fstream> -void timeAverage(int mpi_rank, int nproc, double elapsedTime); -FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total); + int main(int argc, char* argv[]){ const FParameterNames LocalOptionBlocSize { @@ -76,9 +71,10 @@ int main(int argc, char* argv[]){ FMpi mpiComm(argc, argv); // Get params const int NbLevels = FParameters::getValue(argc,argv,FParameterDefinitions::OctreeHeight.options, 5); - const int groupSize = FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 8); - const FSize totalNbParticles = FParameters::getValue(argc,argv,FParameterDefinitions::NbParticles.options, FSize(20)); - const FSize NbParticles = getNbParticlesPerNode(mpiComm.global().processCount(), mpiComm.global().processId(), totalNbParticles); + const FSize NbParticles = FParameters::getValue(argc,argv,FParameterDefinitions::NbParticles.options, FSize(20)); + const int groupSize = FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250); + const FSize totalNbParticles = (NbParticles*mpiComm.global().processCount()); + // Load the particles FRandomLoader<FReal> loader(NbParticles, 1.0, FPoint<FReal>(0,0,0), mpiComm.global().processId()); FAssertLF(loader.isOpen()); @@ -89,21 +85,12 @@ int main(int argc, char* argv[]){ const FPoint<FReal>& getPosition(){ return position; } - const unsigned int getWriteDataSize(void) const { - return sizeof(FReal); - } - const unsigned int getWriteDataNumber(void) const { - return 3; - } - const FReal* getPtrFirstData(void) const { - return position.data(); - } }; + std::unique_ptr<TestParticle[]> particles(new TestParticle[loader.getNumberOfParticles()]); memset(particles.get(), 0, sizeof(TestParticle) * loader.getNumberOfParticles()); for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ - //loader.fillParticleAtMortonIndex(&(particles[idxPart].position), mpiComm.global().processId()*NbParticles + idxPart,NbLevels); - loader.fillParticle(&(particles[idxPart].position)); + loader.fillParticle(&particles[idxPart].position); } // Sort in parallel FVector<TestParticle> myParticles; @@ -140,20 +127,22 @@ int main(int argc, char* argv[]){ mpiComm.global().processId()+1, 0, mpiComm.global().getComm()), __LINE__); } + FLOG(std::cout << "My last index is " << leftLimite << "\n"); + FLOG(std::cout << "My left limite is " << myLeftLimite << "\n"); + // Put the data into the tree GroupOctreeClass groupedTree(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, &allParticles, true, leftLimite); + groupedTree.printInfoBlocks(); // Run the algorithm GroupKernelClass groupkernel; GroupAlgorithm groupalgo(mpiComm.global(), &groupedTree,&groupkernel); - mpiComm.global().barrier(); - FTic timerExecute; groupalgo.execute(); + + std::cout << "Wait Others... " << std::endl; mpiComm.global().barrier(); - double elapsedTime = timerExecute.tacAndElapsed(); - timeAverage(mpiComm.global().processId(), mpiComm.global().processCount(), elapsedTime); groupedTree.forEachCellLeaf<GroupContainerClass>([&](GroupCellClass cell, GroupContainerClass* leaf){ const FSize nbPartsInLeaf = leaf->getNbParticles(); @@ -166,7 +155,6 @@ int main(int argc, char* argv[]){ }); - mpiComm.global().barrier(); typedef FTestCell CellClass; typedef FTestParticleContainer<FReal> ContainerClass; @@ -179,14 +167,13 @@ int main(int argc, char* argv[]){ // Usual octree OctreeClass tree(NbLevels, 2, loader.getBoxWidth(), loader.getCenterOfBox()); for(int idxProc = 0 ; idxProc < mpiComm.global().processCount() ; ++idxProc){ - FRandomLoader<FReal> loaderAll(getNbParticlesPerNode(mpiComm.global().processCount(), idxProc, totalNbParticles), 1.0, FPoint<FReal>(0,0,0), idxProc); + FRandomLoader<FReal> loaderAll(NbParticles, 1.0, FPoint<FReal>(0,0,0), idxProc); for(FSize idxPart = 0 ; idxPart < loaderAll.getNumberOfParticles() ; ++idxPart){ FPoint<FReal> pos; - loaderAll.fillParticle(&pos); - //loaderAll.fillParticleAtMortonIndex(&pos, idxProc*NbParticles + idxPart,NbLevels); + loaderAll.fillParticle(&pos); tree.insert(pos); } - } + } // Usual algorithm KernelClass kernels; // FTestKernels FBasicKernels FmmClass algo(&tree,&kernels); //FFmmAlgorithm FFmmAlgorithmThread @@ -211,28 +198,4 @@ int main(int argc, char* argv[]){ return 0; } -void timeAverage(int mpi_rank, int nproc, double elapsedTime) -{ - if(mpi_rank == 0) - { - double sumElapsedTime = elapsedTime; - for(int i = 1; i < nproc; ++i) - { - double tmp; - MPI_Recv(&tmp, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, 0); - if(tmp > sumElapsedTime) - sumElapsedTime = tmp; - } - std::cout << "Average time per node (implicit Cheby) : " << sumElapsedTime << "s" << std::endl; - } - else - { - MPI_Send(&elapsedTime, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); - } - MPI_Barrier(MPI_COMM_WORLD); -} -FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total){ - if(mpi_rank < (total%mpi_count)) - return ((total - (total%mpi_count))/mpi_count)+1; - return ((total - (total%mpi_count))/mpi_count); -} + diff --git a/Tests/GroupTree/testBlockedMpiAlgorithmMartin.cpp b/Tests/GroupTree/testBlockedMpiAlgorithmMartin.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f9d64669acb283f434faa8bd1c25186e6eca919c --- /dev/null +++ b/Tests/GroupTree/testBlockedMpiAlgorithmMartin.cpp @@ -0,0 +1,238 @@ + +// Keep in private GIT +// @FUSE_MPI +// @FUSE_STARPU + +#include "../../Src/Utils/FGlobal.hpp" +#include "../../Src/Utils/FMpi.hpp" + +#include "../../Src/GroupTree/Core/FGroupTree.hpp" + +#include "../../Src/Components/FSimpleLeaf.hpp" +#include "../../Src/Containers/FVector.hpp" + + +#include "../../Src/Utils/FMath.hpp" +#include "../../Src/Utils/FMemUtils.hpp" +#include "../../Src/Utils/FParameters.hpp" + +#include "../../Src/Files/FRandomLoader.hpp" +#include "../../Src/Files/FFmaGenericLoader.hpp" + +#include "../../Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp" + +#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp" +#include "../../Src/GroupTree/Core/FGroupTaskAlgorithm.hpp" + +#include "../../Src/Utils/FParameterNames.hpp" + +#include "../../Src/Components/FTestParticleContainer.hpp" +#include "../../Src/Components/FTestKernels.hpp" +#include "../../Src/Components/FTestCell.hpp" +#include "../../Src/GroupTree/TestKernel/FGroupTestParticleContainer.hpp" + +#include "../../Src/GroupTree/TestKernel/FTestCellPOD.hpp" + +#include "../../Src/Utils/FLeafBalance.hpp" +#include "../../Src/Files/FMpiTreeBuilder.hpp" + +#include "../../Src/Core/FFmmAlgorithm.hpp" +#include "../../Src/Containers/FCoordinateComputer.hpp" + +#include "../../Src/GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp" +#include "../../Src/GroupTree/StarPUUtils/FStarPUCpuWrapper.hpp" + +#include <vector> +#include <iostream> +#include <fstream> + +void timeAverage(int mpi_rank, int nproc, double elapsedTime); +FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total); + +int main(int argc, char* argv[]){ + const FParameterNames LocalOptionBlocSize { + {"-bs"}, + "The size of the block of the blocked tree" + }; + FHelpDescribeAndExit(argc, argv, "Test the blocked tree by counting the particles.", + FParameterDefinitions::OctreeHeight, + FParameterDefinitions::NbParticles, + LocalOptionBlocSize); + typedef double FReal; + // Initialize the types + typedef FTestCellPODCore GroupCellSymbClass; + typedef FTestCellPODData GroupCellUpClass; + typedef FTestCellPODData GroupCellDownClass; + typedef FTestCellPOD GroupCellClass; + + typedef FGroupTestParticleContainer<FReal> GroupContainerClass; + typedef FGroupTree< FReal, GroupCellClass, GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass, + GroupContainerClass, 0, 1, long long int> GroupOctreeClass; + typedef FStarPUAllCpuCapacities<FTestKernels< GroupCellClass, GroupContainerClass >> GroupKernelClass; + typedef FStarPUCpuWrapper<typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass> GroupCpuWrapper; + typedef FGroupTaskStarPUMpiAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupCpuWrapper> GroupAlgorithm; + + + FMpi mpiComm(argc, argv); + // Get params + const int NbLevels = FParameters::getValue(argc,argv,FParameterDefinitions::OctreeHeight.options, 5); + const int groupSize = FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 8); + const FSize totalNbParticles = FParameters::getValue(argc,argv,FParameterDefinitions::NbParticles.options, FSize(20)); + const FSize NbParticles = getNbParticlesPerNode(mpiComm.global().processCount(), mpiComm.global().processId(), totalNbParticles); + // Load the particles + FRandomLoader<FReal> loader(NbParticles, 1.0, FPoint<FReal>(0,0,0), mpiComm.global().processId()); + FAssertLF(loader.isOpen()); + + // Fill the particles + struct TestParticle{ + FPoint<FReal> position; + const FPoint<FReal>& getPosition(){ + return position; + } + const unsigned int getWriteDataSize(void) const { + return sizeof(FReal); + } + const unsigned int getWriteDataNumber(void) const { + return 3; + } + const FReal* getPtrFirstData(void) const { + return position.data(); + } + }; + std::unique_ptr<TestParticle[]> particles(new TestParticle[loader.getNumberOfParticles()]); + memset(particles.get(), 0, sizeof(TestParticle) * loader.getNumberOfParticles()); + for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + //loader.fillParticleAtMortonIndex(&(particles[idxPart].position), mpiComm.global().processId()*NbParticles + idxPart,NbLevels); + loader.fillParticle(&(particles[idxPart].position)); + } + // Sort in parallel + FVector<TestParticle> myParticles; + FLeafBalance balancer; + FMpiTreeBuilder<FReal, TestParticle >::DistributeArrayToContainer(mpiComm.global(), + particles.get(), + loader.getNumberOfParticles(), + loader.getCenterOfBox(), + loader.getBoxWidth(), + NbLevels, + &myParticles, + &balancer); + + FTestParticleContainer<FReal> allParticles; + for(FSize idxPart = 0 ; idxPart < myParticles.getSize() ; ++idxPart){ + allParticles.push(myParticles[idxPart].position); + } + + // Each proc need to know the righest morton index + const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPosition<FReal>( + loader.getCenterOfBox(), + loader.getBoxWidth(), + NbLevels, + myParticles[myParticles.getSize()-1].position ); + const MortonIndex myLeftLimite = host.getMortonIndex(); + MortonIndex leftLimite = -1; + if(mpiComm.global().processId() != 0){ + FMpi::Assert(MPI_Recv(&leftLimite, sizeof(leftLimite), MPI_BYTE, + mpiComm.global().processId()-1, 0, + mpiComm.global().getComm(), MPI_STATUS_IGNORE), __LINE__); + } + if(mpiComm.global().processId() != mpiComm.global().processCount()-1){ + FMpi::Assert(MPI_Send(const_cast<MortonIndex*>(&myLeftLimite), sizeof(myLeftLimite), MPI_BYTE, + mpiComm.global().processId()+1, 0, + mpiComm.global().getComm()), __LINE__); + } + + // Put the data into the tree + GroupOctreeClass groupedTree(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, + &allParticles, true, leftLimite); + + // Run the algorithm + GroupKernelClass groupkernel; + GroupAlgorithm groupalgo(mpiComm.global(), &groupedTree,&groupkernel); + mpiComm.global().barrier(); + FTic timerExecute; + groupalgo.execute(); + mpiComm.global().barrier(); + double elapsedTime = timerExecute.tacAndElapsed(); + timeAverage(mpiComm.global().processId(), mpiComm.global().processCount(), elapsedTime); + + groupedTree.forEachCellLeaf<GroupContainerClass>([&](GroupCellClass cell, GroupContainerClass* leaf){ + const FSize nbPartsInLeaf = leaf->getNbParticles(); + const long long int* dataDown = leaf->getDataDown(); + for(FSize idxPart = 0 ; idxPart < nbPartsInLeaf ; ++idxPart){ + if(dataDown[idxPart] != totalNbParticles-1){ + std::cout << "[Full] Error a particle has " << dataDown[idxPart] << " (it should be " << (totalNbParticles-1) << ") at index " << cell.getMortonIndex() << "\n"; + } + } + }); + + + mpiComm.global().barrier(); + + typedef FTestCell CellClass; + typedef FTestParticleContainer<FReal> ContainerClass; + typedef FSimpleLeaf<FReal, ContainerClass > LeafClass; + typedef FOctree<FReal, CellClass, ContainerClass , LeafClass > OctreeClass; + typedef FTestKernels< CellClass, ContainerClass > KernelClass; + typedef FFmmAlgorithm<OctreeClass, CellClass, ContainerClass, KernelClass, LeafClass > FmmClass; + + { + // Usual octree + OctreeClass tree(NbLevels, 2, loader.getBoxWidth(), loader.getCenterOfBox()); + for(int idxProc = 0 ; idxProc < mpiComm.global().processCount() ; ++idxProc){ + FRandomLoader<FReal> loaderAll(getNbParticlesPerNode(mpiComm.global().processCount(), idxProc, totalNbParticles), 1.0, FPoint<FReal>(0,0,0), idxProc); + for(FSize idxPart = 0 ; idxPart < loaderAll.getNumberOfParticles() ; ++idxPart){ + FPoint<FReal> pos; + loaderAll.fillParticle(&pos); + //loaderAll.fillParticleAtMortonIndex(&pos, idxProc*NbParticles + idxPart,NbLevels); + tree.insert(pos); + } + } + // Usual algorithm + KernelClass kernels; // FTestKernels FBasicKernels + FmmClass algo(&tree,&kernels); //FFmmAlgorithm FFmmAlgorithmThread + algo.execute(); + + // Compare the results + groupedTree.forEachCellWithLevel([&](GroupCellClass gcell, const int level){ + const CellClass* cell = tree.getCell(gcell.getMortonIndex(), level); + if(cell == nullptr){ + std::cout << "[Empty] Error cell should not exist " << gcell.getMortonIndex() << "\n"; + } + else { + if(gcell.getDataUp() != cell->getDataUp()){ + std::cout << "[Up] Up is different at index " << gcell.getMortonIndex() << " level " << level << " is " << gcell.getDataUp() << " should be " << cell->getDataUp() << "\n"; + } + if(gcell.getDataDown() != cell->getDataDown()){ + std::cout << "[Down] Down is different at index " << gcell.getMortonIndex() << " level " << level << " is " << gcell.getDataDown() << " should be " << cell->getDataDown() << "\n"; + } + } + }); + } + + return 0; +} +void timeAverage(int mpi_rank, int nproc, double elapsedTime) +{ + if(mpi_rank == 0) + { + double sumElapsedTime = elapsedTime; + for(int i = 1; i < nproc; ++i) + { + double tmp; + MPI_Recv(&tmp, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, 0); + if(tmp > sumElapsedTime) + sumElapsedTime = tmp; + } + std::cout << "Average time per node (implicit Cheby) : " << sumElapsedTime << "s" << std::endl; + } + else + { + MPI_Send(&elapsedTime, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); + } + MPI_Barrier(MPI_COMM_WORLD); +} +FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total){ + if(mpi_rank < (total%mpi_count)) + return ((total - (total%mpi_count))/mpi_count)+1; + return ((total - (total%mpi_count))/mpi_count); +} diff --git a/Tests/GroupTree/testBlockedMpiChebyshev.cpp b/Tests/GroupTree/testBlockedMpiChebyshev.cpp index 34faef4fd9219792f7bc7a7c6b7c5fc266c511c8..721908d355e8f143c32bc77384daf3446884dd5a 100644 --- a/Tests/GroupTree/testBlockedMpiChebyshev.cpp +++ b/Tests/GroupTree/testBlockedMpiChebyshev.cpp @@ -43,28 +43,20 @@ #include "../../Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp" #include "../../Src/Files/FMpiFmaGenericLoader.hpp" -#include "../../Src/Files/FGenerateDistribution.hpp" #include "../../Src/Containers/FCoordinateComputer.hpp" #include "../../Src/GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp" #include <memory> -void timeAverage(int mpi_rank, int nproc, double elapsedTime); -FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total); int main(int argc, char* argv[]){ const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree"}; const FParameterNames LocalOptionNoValidate { {"-no-validation"}, "To avoid comparing with direct computation"}; - const FParameterNames LocalOptionEllipsoid = {{"-ellipsoid"} , " non uniform distribution on an ellipsoid of aspect ratio given by a=0.5 b=0.25 c=0.125"}; - const FParameterNames LocalOptionEllipsoidv2 = {{"-ellipsoidv2"} , " non uniform distribution on an ellipsoid of aspect ratio given by a=0.5 b=0.25 c=0.125"}; - const FParameterNames LocalOptionPlummer = {{"-plummer"} , " (Highly non uniform) plummer distribution (astrophysics)"}; - const FParameterNames LocalOptionCube = {{"-cube", "-uniform"} , " uniform distribution on cube (default)"}; FHelpDescribeAndExit(argc, argv, "Test the blocked tree by counting the particles.", FParameterDefinitions::OctreeHeight,FParameterDefinitions::InputFile, - FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::NbParticles, - LocalOptionBlocSize, LocalOptionNoValidate, LocalOptionEllipsoid, LocalOptionPlummer, LocalOptionCube, - LocalOptionEllipsoidv2); + FParameterDefinitions::OctreeSubHeight, + LocalOptionBlocSize, LocalOptionNoValidate); typedef double FReal; // Initialize the types @@ -90,10 +82,9 @@ int main(int argc, char* argv[]){ FMpi mpiComm(argc,argv); + const char* const filename = FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma"); const unsigned int TreeHeight = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5); const unsigned int SubTreeHeight = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeSubHeight.options, 2); - const FSize totalNbParticles = FParameters::getValue(argc,argv,FParameterDefinitions::NbParticles.options, FSize(20)); - const FSize NbParticles = getNbParticlesPerNode(mpiComm.global().processCount(), mpiComm.global().processId(), totalNbParticles); // init particles position and physical value struct TestParticle{ @@ -102,55 +93,10 @@ int main(int argc, char* argv[]){ const FPoint<FReal>& getPosition(){ return position; } - const unsigned int getWriteDataSize(void) const { - return sizeof(FReal); - } - const unsigned int getWriteDataNumber(void) const { - return 3; - } - const FReal* getPtrFirstData(void) const { - return position.data(); - } }; -//#define LOAD_FILE -#ifndef LOAD_FILE - srand48(0); - FReal boxWidth = 1.0; - FRandomLoader<FReal> loader(NbParticles, boxWidth, FPoint<FReal>(0,0,0), mpiComm.global().processId()); - FAssertLF(loader.isOpen()); - - setSeed(mpiComm.global().processId()+1); - TestParticle* allParticles = new TestParticle[loader.getNumberOfParticles()]; - FReal * tmpParticles = new FReal[4*loader.getNumberOfParticles()]; - memset(allParticles,0,(unsigned int) (sizeof(TestParticle)* loader.getNumberOfParticles())); - memset(tmpParticles,0,(unsigned int) (sizeof(FReal)* loader.getNumberOfParticles() * 4)); - if(FParameters::existParameter(argc, argv, "-ellipsoid")) { - std::cout << "ellipsoid\n"; - nonunifRandomPointsOnElipsoid(loader.getNumberOfParticles(), 0.5, 0.1, tmpParticles); - } - else if(FParameters::existParameter(argc, argv, LocalOptionEllipsoidv2.options)) { - std::cout << "ellipsoidv2\n"; - unifRandomPointsOnProlate(loader.getNumberOfParticles(), boxWidth/2, boxWidth/8, tmpParticles); - } - else if(FParameters::existParameter(argc, argv, "-plummer")) { - //The M argument is not used in the algorithm of the plummer distribution - std::cout << "plummer\n"; - unifRandomPlummer(loader.getNumberOfParticles(), boxWidth/2, tmpParticles) ; - } - else { //Uniform cube - std::cout << "cube\n"; - unifRandomPointsInCube(loader.getNumberOfParticles(), boxWidth/2, boxWidth/2, boxWidth/2, tmpParticles); - } - - for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ - allParticles[idxPart].position.setPosition(tmpParticles[idxPart*4], tmpParticles[idxPart*4+1], tmpParticles[idxPart*4+2]); - allParticles[idxPart].physicalValue = 0.1; - } - delete[] tmpParticles; -#else // open particle file - const char* const filename = FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma"); + std::cout << "Opening : " << filename << "\n" << std::endl; FMpiFmaGenericLoader<FReal> loader(filename,mpiComm.global()); FAssertLF(loader.isOpen()); @@ -159,17 +105,18 @@ int main(int argc, char* argv[]){ for(FSize idxPart = 0 ; idxPart < loader.getMyNumberOfParticles() ; ++idxPart){ loader.fillParticle(&allParticles[idxPart].position,&allParticles[idxPart].physicalValue); } -#endif FVector<TestParticle> myParticles; FLeafBalance balancer; FMpiTreeBuilder< FReal,TestParticle >::DistributeArrayToContainer(mpiComm.global(),allParticles, - loader.getNumberOfParticles(), + loader.getMyNumberOfParticles(), loader.getCenterOfBox(), loader.getBoxWidth(),TreeHeight, &myParticles, &balancer); - //std::cout << "\tHeight : " << TreeHeight << " \t sub-height : " << SubTreeHeight << std::endl; + std::cout << "Creating & Inserting " << loader.getMyNumberOfParticles() << " particles ..." << std::endl; + std::cout << "For a total of " << loader.getNumberOfParticles() << " particles ..." << std::endl; + std::cout << "\tHeight : " << TreeHeight << " \t sub-height : " << SubTreeHeight << std::endl; // Each proc need to know the righest morton index const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPosition<FReal>( @@ -192,6 +139,7 @@ int main(int argc, char* argv[]){ FLOG(std::cout << "My last index is " << leftLimite << "\n"); FLOG(std::cout << "My left limite is " << myLeftLimite << "\n"); + // Put the data into the tree FP2PParticleContainer<FReal> myParticlesInContainer; for(FSize idxPart = 0 ; idxPart < myParticles.getSize() ; ++idxPart){ @@ -200,28 +148,25 @@ int main(int argc, char* argv[]){ } GroupOctreeClass groupedTree(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, &myParticlesInContainer, true, leftLimite); - + groupedTree.printInfoBlocks(); + timer.tac(); - std::cerr << "Done " << "(@Creating and Inserting Particles = " << timer.elapsed() << "s)." << std::endl; + std::cout << "Done " << "(@Creating and Inserting Particles = " + << timer.elapsed() << "s)." << std::endl; - int operationsToProceed = FFmmP2M | FFmmM2M | FFmmM2L | FFmmL2L | FFmmL2P | FFmmP2P; { // ----------------------------------------------------- - //std::cout << "\nChebyshev FMM (ORDER="<< ORDER << ") ... " << std::endl; + std::cout << "\nChebyshev FMM (ORDER="<< ORDER << ") ... " << std::endl; + timer.tic(); - const MatrixKernelClass MatrixKernel; + MatrixKernelClass MatrixKernel; // Create Matrix Kernel GroupKernelClass groupkernel(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), &MatrixKernel); // Run the algorithm GroupAlgorithm groupalgo(mpiComm.global(), &groupedTree,&groupkernel); - mpiComm.global().barrier(); - timer.tic(); - starpu_fxt_start_profiling(); - groupalgo.execute(operationsToProceed); - mpiComm.global().barrier(); - starpu_fxt_stop_profiling(); + groupalgo.execute(); + timer.tac(); - timeAverage(mpiComm.global().processId(), mpiComm.global().processCount(), timer.elapsed()); - //std::cout << "Done " << "(@Algorithm = " << timer.elapsed() << "s)." << std::endl; + std::cout << "Done " << "(@Algorithm = " << timer.elapsed() << "s)." << std::endl; } // ----------------------------------------------------- @@ -246,13 +191,8 @@ int main(int argc, char* argv[]){ MatrixKernelClass MatrixKernel; KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), &MatrixKernel); FmmClass algorithm(mpiComm.global(),&treeCheck, &kernels); - mpiComm.global().barrier(); - timer.tic(); - algorithm.execute(operationsToProceed); - mpiComm.global().barrier(); - timer.tac(); - timeAverage(mpiComm.global().processId(), mpiComm.global().processCount(), timer.elapsed()); - //std::cout << "Algo is over" << std::endl; + algorithm.execute(); + std::cout << "Algo is over" << std::endl; groupedTree.forEachCellWithLevel([&](GroupCellClass gcell, const int level){ const CellClass* cell = treeCheck.getCell(gcell.getMortonIndex(), level); @@ -334,39 +274,10 @@ int main(int argc, char* argv[]){ } }); - //std::cout << "Comparing is over" << std::endl; + std::cout << "Comparing is over" << std::endl; } return 0; } -void timeAverage(int mpi_rank, int nproc, double elapsedTime) -{ - if(mpi_rank == 0) - { - double sumElapsedTimeMin = elapsedTime; - double sumElapsedTimeMax = elapsedTime; - for(int i = 1; i < nproc; ++i) - { - double tmp; - MPI_Recv(&tmp, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); - if(tmp < sumElapsedTimeMin) - sumElapsedTimeMin = tmp; - if(tmp > sumElapsedTimeMax) - sumElapsedTimeMax = tmp; - } - std::cout << "Min time per node (MPI) : " << sumElapsedTimeMin << "s" << std::endl; - std::cout << "Max time per node (MPI) : " << sumElapsedTimeMax << "s" << std::endl; - } - else - { - MPI_Send(&elapsedTime, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); - } - MPI_Barrier(MPI_COMM_WORLD); -} -FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total){ - if(mpi_rank < (total%mpi_count)) - return ((total - (total%mpi_count))/mpi_count)+1; - return ((total - (total%mpi_count))/mpi_count); -} diff --git a/Tests/GroupTree/testBlockedMpiChebyshevMartin.cpp b/Tests/GroupTree/testBlockedMpiChebyshevMartin.cpp new file mode 100644 index 0000000000000000000000000000000000000000..34faef4fd9219792f7bc7a7c6b7c5fc266c511c8 --- /dev/null +++ b/Tests/GroupTree/testBlockedMpiChebyshevMartin.cpp @@ -0,0 +1,372 @@ +// ==== CMAKE ===== +// @FUSE_BLAS +// ================ +// Keep in private GIT +// @FUSE_MPI +// @FUSE_STARPU + + +#include "../../Src/Utils/FGlobal.hpp" + +#include "../../Src/GroupTree/Core/FGroupTree.hpp" + +#include "../../Src/Components/FSimpleLeaf.hpp" +#include "../../Src/Containers/FVector.hpp" + +#include "../../Src/Kernels/P2P/FP2PParticleContainer.hpp" + +#include "../../Src/Kernels/Chebyshev/FChebSymKernel.hpp" +#include "../../Src/Kernels/Chebyshev/FChebCell.hpp" +#include "../../Src/GroupTree/Chebyshev/FChebCellPOD.hpp" +#include "Kernels/Interpolation/FInterpMatrixKernel.hpp" + +#include "../../Src/Utils/FMath.hpp" +#include "../../Src/Utils/FMemUtils.hpp" +#include "../../Src/Utils/FParameters.hpp" + +#include "../../Src/Files/FRandomLoader.hpp" +#include "../../Src/Files/FFmaGenericLoader.hpp" + +#include "../../Src/GroupTree/Core/FGroupSeqAlgorithm.hpp" +#include "../../Src/GroupTree/Core/FGroupTaskAlgorithm.hpp" +#include "../../Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp" +#include "../../Src/GroupTree/Core/FP2PGroupParticleContainer.hpp" + +#include "../../Src/Utils/FParameterNames.hpp" + +#include "../../Src/Components/FTestParticleContainer.hpp" +#include "../../Src/Components/FTestCell.hpp" +#include "../../Src/Components/FTestKernels.hpp" + +#include "../../Src/Core/FFmmAlgorithmThreadProc.hpp" +#include "../../Src/Files/FMpiTreeBuilder.hpp" +#include "../../Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp" + +#include "../../Src/Files/FMpiFmaGenericLoader.hpp" +#include "../../Src/Files/FGenerateDistribution.hpp" +#include "../../Src/Containers/FCoordinateComputer.hpp" + +#include "../../Src/GroupTree/StarPUUtils/FStarPUKernelCapacities.hpp" + +#include <memory> + +void timeAverage(int mpi_rank, int nproc, double elapsedTime); +FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total); + +int main(int argc, char* argv[]){ + const FParameterNames LocalOptionBlocSize { {"-bs"}, "The size of the block of the blocked tree"}; + const FParameterNames LocalOptionNoValidate { {"-no-validation"}, "To avoid comparing with direct computation"}; + const FParameterNames LocalOptionEllipsoid = {{"-ellipsoid"} , " non uniform distribution on an ellipsoid of aspect ratio given by a=0.5 b=0.25 c=0.125"}; + const FParameterNames LocalOptionEllipsoidv2 = {{"-ellipsoidv2"} , " non uniform distribution on an ellipsoid of aspect ratio given by a=0.5 b=0.25 c=0.125"}; + const FParameterNames LocalOptionPlummer = {{"-plummer"} , " (Highly non uniform) plummer distribution (astrophysics)"}; + const FParameterNames LocalOptionCube = {{"-cube", "-uniform"} , " uniform distribution on cube (default)"}; + FHelpDescribeAndExit(argc, argv, "Test the blocked tree by counting the particles.", + FParameterDefinitions::OctreeHeight,FParameterDefinitions::InputFile, + FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::NbParticles, + LocalOptionBlocSize, LocalOptionNoValidate, LocalOptionEllipsoid, LocalOptionPlummer, LocalOptionCube, + LocalOptionEllipsoidv2); + + typedef double FReal; + // Initialize the types + static const int ORDER = 6; + typedef FInterpMatrixKernelR<FReal> MatrixKernelClass; + + typedef FChebCellPODCore GroupCellSymbClass; + typedef FChebCellPODPole<FReal,ORDER> GroupCellUpClass; + typedef FChebCellPODLocal<FReal,ORDER> GroupCellDownClass; + typedef FChebCellPOD<FReal,ORDER> GroupCellClass; + + + typedef FP2PGroupParticleContainer<FReal> GroupContainerClass; + typedef FGroupTree< FReal, GroupCellClass, GroupCellSymbClass, GroupCellUpClass, GroupCellDownClass, GroupContainerClass, 1, 4, FReal> GroupOctreeClass; + + typedef FStarPUAllCpuCapacities<FChebSymKernel<FReal,GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER>> GroupKernelClass; + typedef FStarPUCpuWrapper<typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass> GroupCpuWrapper; + typedef FGroupTaskStarPUMpiAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupCpuWrapper> GroupAlgorithm; + + // Get params + FTic timer; + const int groupSize = FParameters::getValue(argc,argv,LocalOptionBlocSize.options, 250); + + FMpi mpiComm(argc,argv); + + const unsigned int TreeHeight = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 5); + const unsigned int SubTreeHeight = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeSubHeight.options, 2); + const FSize totalNbParticles = FParameters::getValue(argc,argv,FParameterDefinitions::NbParticles.options, FSize(20)); + const FSize NbParticles = getNbParticlesPerNode(mpiComm.global().processCount(), mpiComm.global().processId(), totalNbParticles); + + // init particles position and physical value + struct TestParticle{ + FPoint<FReal> position; + FReal physicalValue; + const FPoint<FReal>& getPosition(){ + return position; + } + const unsigned int getWriteDataSize(void) const { + return sizeof(FReal); + } + const unsigned int getWriteDataNumber(void) const { + return 3; + } + const FReal* getPtrFirstData(void) const { + return position.data(); + } + }; + +//#define LOAD_FILE +#ifndef LOAD_FILE + srand48(0); + FReal boxWidth = 1.0; + FRandomLoader<FReal> loader(NbParticles, boxWidth, FPoint<FReal>(0,0,0), mpiComm.global().processId()); + FAssertLF(loader.isOpen()); + + setSeed(mpiComm.global().processId()+1); + TestParticle* allParticles = new TestParticle[loader.getNumberOfParticles()]; + FReal * tmpParticles = new FReal[4*loader.getNumberOfParticles()]; + memset(allParticles,0,(unsigned int) (sizeof(TestParticle)* loader.getNumberOfParticles())); + memset(tmpParticles,0,(unsigned int) (sizeof(FReal)* loader.getNumberOfParticles() * 4)); + if(FParameters::existParameter(argc, argv, "-ellipsoid")) { + std::cout << "ellipsoid\n"; + nonunifRandomPointsOnElipsoid(loader.getNumberOfParticles(), 0.5, 0.1, tmpParticles); + } + else if(FParameters::existParameter(argc, argv, LocalOptionEllipsoidv2.options)) { + std::cout << "ellipsoidv2\n"; + unifRandomPointsOnProlate(loader.getNumberOfParticles(), boxWidth/2, boxWidth/8, tmpParticles); + } + else if(FParameters::existParameter(argc, argv, "-plummer")) { + //The M argument is not used in the algorithm of the plummer distribution + std::cout << "plummer\n"; + unifRandomPlummer(loader.getNumberOfParticles(), boxWidth/2, tmpParticles) ; + } + else { //Uniform cube + std::cout << "cube\n"; + unifRandomPointsInCube(loader.getNumberOfParticles(), boxWidth/2, boxWidth/2, boxWidth/2, tmpParticles); + } + + for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + allParticles[idxPart].position.setPosition(tmpParticles[idxPart*4], tmpParticles[idxPart*4+1], tmpParticles[idxPart*4+2]); + allParticles[idxPart].physicalValue = 0.1; + } + delete[] tmpParticles; +#else + // open particle file + const char* const filename = FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma"); + FMpiFmaGenericLoader<FReal> loader(filename,mpiComm.global()); + FAssertLF(loader.isOpen()); + + TestParticle* allParticles = new TestParticle[loader.getMyNumberOfParticles()]; + memset(allParticles,0,(unsigned int) (sizeof(TestParticle)* loader.getMyNumberOfParticles())); + for(FSize idxPart = 0 ; idxPart < loader.getMyNumberOfParticles() ; ++idxPart){ + loader.fillParticle(&allParticles[idxPart].position,&allParticles[idxPart].physicalValue); + } +#endif + + FVector<TestParticle> myParticles; + FLeafBalance balancer; + FMpiTreeBuilder< FReal,TestParticle >::DistributeArrayToContainer(mpiComm.global(),allParticles, + loader.getNumberOfParticles(), + loader.getCenterOfBox(), + loader.getBoxWidth(),TreeHeight, + &myParticles, &balancer); + + //std::cout << "\tHeight : " << TreeHeight << " \t sub-height : " << SubTreeHeight << std::endl; + + // Each proc need to know the righest morton index + const FTreeCoordinate host = FCoordinateComputer::GetCoordinateFromPosition<FReal>( + loader.getCenterOfBox(), + loader.getBoxWidth(), + TreeHeight, + myParticles[myParticles.getSize()-1].position ); + const MortonIndex myLeftLimite = host.getMortonIndex(); + MortonIndex leftLimite = -1; + if(mpiComm.global().processId() != 0){ + FMpi::Assert(MPI_Recv(&leftLimite, sizeof(leftLimite), MPI_BYTE, + mpiComm.global().processId()-1, 0, + mpiComm.global().getComm(), MPI_STATUS_IGNORE), __LINE__); + } + if(mpiComm.global().processId() != mpiComm.global().processCount()-1){ + FMpi::Assert(MPI_Send(const_cast<MortonIndex*>(&myLeftLimite), sizeof(myLeftLimite), MPI_BYTE, + mpiComm.global().processId()+1, 0, + mpiComm.global().getComm()), __LINE__); + } + FLOG(std::cout << "My last index is " << leftLimite << "\n"); + FLOG(std::cout << "My left limite is " << myLeftLimite << "\n"); + + // Put the data into the tree + FP2PParticleContainer<FReal> myParticlesInContainer; + for(FSize idxPart = 0 ; idxPart < myParticles.getSize() ; ++idxPart){ + myParticlesInContainer.push(myParticles[idxPart].position, + myParticles[idxPart].physicalValue); + } + GroupOctreeClass groupedTree(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), groupSize, + &myParticlesInContainer, true, leftLimite); + + timer.tac(); + std::cerr << "Done " << "(@Creating and Inserting Particles = " << timer.elapsed() << "s)." << std::endl; + + int operationsToProceed = FFmmP2M | FFmmM2M | FFmmM2L | FFmmL2L | FFmmL2P | FFmmP2P; + { // ----------------------------------------------------- + //std::cout << "\nChebyshev FMM (ORDER="<< ORDER << ") ... " << std::endl; + + const MatrixKernelClass MatrixKernel; + // Create Matrix Kernel + GroupKernelClass groupkernel(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), &MatrixKernel); + // Run the algorithm + GroupAlgorithm groupalgo(mpiComm.global(), &groupedTree,&groupkernel); + mpiComm.global().barrier(); + timer.tic(); + starpu_fxt_start_profiling(); + groupalgo.execute(operationsToProceed); + mpiComm.global().barrier(); + starpu_fxt_stop_profiling(); + timer.tac(); + timeAverage(mpiComm.global().processId(), mpiComm.global().processCount(), timer.elapsed()); + //std::cout << "Done " << "(@Algorithm = " << timer.elapsed() << "s)." << std::endl; + } // ----------------------------------------------------- + + + if(FParameters::existParameter(argc, argv, LocalOptionNoValidate.options) == false){ + typedef FP2PParticleContainer<FReal> ContainerClass; + typedef FSimpleLeaf<FReal, ContainerClass > LeafClass; + typedef FChebCell<FReal,ORDER> CellClass; + typedef FOctree<FReal, CellClass,ContainerClass,LeafClass> OctreeClass; + typedef FChebSymKernel<FReal,CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass; + typedef FFmmAlgorithmThreadProc<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass; + + const FReal epsi = 1E-10; + + OctreeClass treeCheck(TreeHeight, SubTreeHeight,loader.getBoxWidth(),loader.getCenterOfBox()); + + for(FSize idxPart = 0 ; idxPart < myParticles.getSize() ; ++idxPart){ + // put in tree + treeCheck.insert(myParticles[idxPart].position, + myParticles[idxPart].physicalValue); + } + + MatrixKernelClass MatrixKernel; + KernelClass kernels(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(), &MatrixKernel); + FmmClass algorithm(mpiComm.global(),&treeCheck, &kernels); + mpiComm.global().barrier(); + timer.tic(); + algorithm.execute(operationsToProceed); + mpiComm.global().barrier(); + timer.tac(); + timeAverage(mpiComm.global().processId(), mpiComm.global().processCount(), timer.elapsed()); + //std::cout << "Algo is over" << std::endl; + + groupedTree.forEachCellWithLevel([&](GroupCellClass gcell, const int level){ + const CellClass* cell = treeCheck.getCell(gcell.getMortonIndex(), level); + if(cell == nullptr){ + std::cout << "[Empty] Error cell should exist " << gcell.getMortonIndex() << "\n"; + } + else { + FMath::FAccurater<FReal> diffUp; + diffUp.add(cell->getMultipole(0), gcell.getMultipole(0), gcell.getVectorSize()); + if(diffUp.getRelativeInfNorm() > epsi || diffUp.getRelativeL2Norm() > epsi){ + std::cout << "[Up] Up is different at index " << gcell.getMortonIndex() << " level " << level << " is " << diffUp << "\n"; + } + FMath::FAccurater<FReal> diffDown; + diffDown.add(cell->getLocal(0), gcell.getLocal(0), gcell.getVectorSize()); + if(diffDown.getRelativeInfNorm() > epsi || diffDown.getRelativeL2Norm() > epsi){ + std::cout << "[Up] Down is different at index " << gcell.getMortonIndex() << " level " << level << " is " << diffDown << "\n"; + } + } + }); + + groupedTree.forEachCellLeaf<FP2PGroupParticleContainer<FReal> >([&](GroupCellClass gcell, FP2PGroupParticleContainer<FReal> * leafTarget){ + const ContainerClass* targets = treeCheck.getLeafSrc(gcell.getMortonIndex()); + if(targets == nullptr){ + std::cout << "[Empty] Error leaf should exist " << gcell.getMortonIndex() << "\n"; + } + else{ + const FReal*const gposX = leafTarget->getPositions()[0]; + const FReal*const gposY = leafTarget->getPositions()[1]; + const FReal*const gposZ = leafTarget->getPositions()[2]; + const FSize gnbPartsInLeafTarget = leafTarget->getNbParticles(); + const FReal*const gforceX = leafTarget->getForcesX(); + const FReal*const gforceY = leafTarget->getForcesY(); + const FReal*const gforceZ = leafTarget->getForcesZ(); + const FReal*const gpotential = leafTarget->getPotentials(); + + const FReal*const posX = targets->getPositions()[0]; + const FReal*const posY = targets->getPositions()[1]; + const FReal*const posZ = targets->getPositions()[2]; + const FSize nbPartsInLeafTarget = targets->getNbParticles(); + const FReal*const forceX = targets->getForcesX(); + const FReal*const forceY = targets->getForcesY(); + const FReal*const forceZ = targets->getForcesZ(); + const FReal*const potential = targets->getPotentials(); + + if(gnbPartsInLeafTarget != nbPartsInLeafTarget){ + std::cout << "[Empty] Not the same number of particles at " << gcell.getMortonIndex() + << " gnbPartsInLeafTarget " << gnbPartsInLeafTarget << " nbPartsInLeafTarget " << nbPartsInLeafTarget << "\n"; + } + else{ + FMath::FAccurater<FReal> potentialDiff; + FMath::FAccurater<FReal> fx, fy, fz; + for(FSize idxPart = 0 ; idxPart < nbPartsInLeafTarget ; ++idxPart){ + if(gposX[idxPart] != posX[idxPart] || gposY[idxPart] != posY[idxPart] + || gposZ[idxPart] != posZ[idxPart]){ + std::cout << "[Empty] Not the same particlea at " << gcell.getMortonIndex() << " idx " << idxPart + << gposX[idxPart] << " " << posX[idxPart] << " " << gposY[idxPart] << " " << posY[idxPart] + << " " << gposZ[idxPart] << " " << posZ[idxPart] << "\n"; + } + else{ + potentialDiff.add(potential[idxPart], gpotential[idxPart]); + fx.add(forceX[idxPart], gforceX[idxPart]); + fy.add(forceY[idxPart], gforceY[idxPart]); + fz.add(forceZ[idxPart], gforceZ[idxPart]); + } + } + if(potentialDiff.getRelativeInfNorm() > epsi || potentialDiff.getRelativeL2Norm() > epsi){ + std::cout << "[Up] potentialDiff is different at index " << gcell.getMortonIndex() << " is " << potentialDiff << "\n"; + } + if(fx.getRelativeInfNorm() > epsi || fx.getRelativeL2Norm() > epsi){ + std::cout << "[Up] fx is different at index " << gcell.getMortonIndex() << " is " << fx << "\n"; + } + if(fy.getRelativeInfNorm() > epsi || fy.getRelativeL2Norm() > epsi){ + std::cout << "[Up] fy is different at index " << gcell.getMortonIndex() << " is " << fy << "\n"; + } + if(fz.getRelativeInfNorm() > epsi || fz.getRelativeL2Norm() > epsi){ + std::cout << "[Up] fz is different at index " << gcell.getMortonIndex() << " is " << fz << "\n"; + } + } + } + }); + + //std::cout << "Comparing is over" << std::endl; + } + + return 0; +} + + +void timeAverage(int mpi_rank, int nproc, double elapsedTime) +{ + if(mpi_rank == 0) + { + double sumElapsedTimeMin = elapsedTime; + double sumElapsedTimeMax = elapsedTime; + for(int i = 1; i < nproc; ++i) + { + double tmp; + MPI_Recv(&tmp, 1, MPI_DOUBLE, i, 0, MPI_COMM_WORLD, MPI_STATUS_IGNORE); + if(tmp < sumElapsedTimeMin) + sumElapsedTimeMin = tmp; + if(tmp > sumElapsedTimeMax) + sumElapsedTimeMax = tmp; + } + std::cout << "Min time per node (MPI) : " << sumElapsedTimeMin << "s" << std::endl; + std::cout << "Max time per node (MPI) : " << sumElapsedTimeMax << "s" << std::endl; + } + else + { + MPI_Send(&elapsedTime, 1, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD); + } + MPI_Barrier(MPI_COMM_WORLD); +} +FSize getNbParticlesPerNode(FSize mpi_count, FSize mpi_rank, FSize total){ + if(mpi_rank < (total%mpi_count)) + return ((total - (total%mpi_count))/mpi_count)+1; + return ((total - (total%mpi_count))/mpi_count); +}