diff --git a/CMakeLists.txt b/CMakeLists.txt index a649ac06c7ba311cd4eb517b9581b49475a857e6..967aa57161ad4fd902e2e6ea7c6482633060e7c6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -70,7 +70,8 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") option( SCALFMM_USE_MIC_NATIVE "Set to ON to compile in native mode for MIC" OFF ) option( SCALFMM_ONLY_DEVEL "Set to ON to compile Development tools (only scalfmm team)" ON ) if( SCALFMM_ONLY_DEVEL ) - option( SCALFMM_USE_STARPU "Set to ON to build SCALFMM with StarPU" OFF ) + option( SCALFMM_USE_STARPU "Set to ON to build SCALFMM with StarPU" OFF ) + option( SCALFMM_BUILD_UTILS "Set to ON to build utils Tests" OFF ) endif() if( SCALFMM_USE_MPI ) try_compile(COMPILE_INTEL ${CMAKE_CURRENT_BINARY_DIR} @@ -647,7 +648,7 @@ if (MORSE_DISTRIB_DIR OR EXISTS "${CMAKE_SOURCE_DIR}/CMakeModules/morse/") # Build tools (Internal use) # ################################################################## # Build - UTILs - if( SCALFMM_ONLY_DEVEL ) + if( SCALFMM_ONLY_DEVEL AND SCALFMM_BUILD_UTILS) add_subdirectory(Utils) endif() diff --git a/Src/Containers/FOctree.hpp b/Src/Containers/FOctree.hpp index 607e3b1f7af84bfccf4781bd4f1423a7e0dd61b0..2ff347f2b34c166f70e7d5a90bafc6da18c5ba57 100644 --- a/Src/Containers/FOctree.hpp +++ b/Src/Containers/FOctree.hpp @@ -808,8 +808,8 @@ public: * @return the number of neighbors */ int getInteractionNeighbors(const CellClass* inNeighbors[343], - const FTreeCoordinate& workingCell, - const int inLevel) const{ + const FTreeCoordinate& workingCell, + const int inLevel, const int neighSeparation = 1) const{ // reset memset(inNeighbors, 0, sizeof(CellClass*) * 343); @@ -832,7 +832,7 @@ public: if(!FMath::Between(parentCell.getZ() + idxZ,0,boxLimite)) continue; // if we are not on the current cell - if( idxX || idxY || idxZ ){ + if( neighSeparation<1 || idxX || idxY || idxZ ){ const FTreeCoordinate otherParent(parentCell.getX() + idxX,parentCell.getY() + idxY,parentCell.getZ() + idxZ); const MortonIndex mortonOtherParent = otherParent.getMortonIndex(inLevel-1) << 3; // Get child @@ -848,7 +848,7 @@ public: const int zdiff = ((otherParent.getZ()<<1) | (idxCousin&1)) - workingCell.getZ(); // Test if it is a direct neighbor - if(FMath::Abs(xdiff) > 1 || FMath::Abs(ydiff) > 1 || FMath::Abs(zdiff) > 1){ + if(FMath::Abs(xdiff) > neighSeparation || FMath::Abs(ydiff) > neighSeparation || FMath::Abs(zdiff) > neighSeparation){ // add to neighbors inNeighbors[ (((xdiff+3) * 7) + (ydiff+3)) * 7 + zdiff + 3] = cells[idxCousin]; ++idxNeighbors; @@ -875,8 +875,8 @@ public: * @return the number of neighbors */ int getFullNeighborhood(const CellClass* inNeighbors[343], - const FTreeCoordinate& workingCell, - const int inLevel) const{ + const FTreeCoordinate& workingCell, + const int inLevel) const{ // reset memset(inNeighbors, 0, sizeof(CellClass*) * 343); @@ -935,10 +935,8 @@ public: * @return the number of neighbors */ int getPeriodicInteractionNeighbors(const CellClass* inNeighbors[343], - const FTreeCoordinate& workingCell, - const int inLevel, const int inDirection) const{ - // TODO : REMOVE NEXT COMMENTS - // std::cout << " Begin in getPeriodicInteractionNeighbors"<<std::endl; + const FTreeCoordinate& workingCell, + const int inLevel, const int inDirection, const int neighSeparation = 1) const{ // Then take each child of the parent's neighbors if not in directNeighbors // Father coordinate @@ -963,18 +961,14 @@ public: const int endY = (TestPeriodicCondition(inDirection, DirPlusY) || parentCell.getY() != boxLimite - 1 ?1:0); const int startZ = (TestPeriodicCondition(inDirection, DirMinusZ) || parentCell.getZ() != 0 ?-1:0); const int endZ = (TestPeriodicCondition(inDirection, DirPlusZ) || parentCell.getZ() != boxLimite - 1 ?1:0); - // TODO : REMOVE NEXT COMMENTS - // std::cout << " -- startX " << startX << " endX "<< endX<< std::endl ; - // std::cout << " -- startY " << startY << " endX "<< endY<< std::endl ; - // std::cout << " -- startZ " << startZ << " endX "<< endZ<< std::endl ; - // std::cout << " boxLimite "<< boxLimite<<std::endl; + int idxNeighbors = 0; // We test all cells around for(int idxX = startX ; idxX <= endX ; ++idxX){ for(int idxY = startY ; idxY <= endY ; ++idxY){ for(int idxZ = startZ ; idxZ <= endZ ; ++idxZ){ // if we are not on the current cell - if( idxX || idxY || idxZ ){ + if(neighSeparation<1 || idxX || idxY || idxZ ){ const FTreeCoordinate otherParent(parentCell.getX() + idxX,parentCell.getY() + idxY,parentCell.getZ() + idxZ); FTreeCoordinate otherParentInBox(otherParent); @@ -1018,15 +1012,10 @@ public: const int zdiff = ((otherParent.getZ()<<1) | (idxCousin&1)) - workingCell.getZ(); // Test if it is a direct neighbor - if(FMath::Abs(xdiff) > 1 || FMath::Abs(ydiff) > 1 || FMath::Abs(zdiff) > 1){ + if(FMath::Abs(xdiff) > neighSeparation || FMath::Abs(ydiff) > neighSeparation || FMath::Abs(zdiff) > neighSeparation){ // add to neighbors - // TODO : REMOVE NEXT COMMENTS - // std::cout << " Voisin numero "<< idxNeighbors - // << " indexinTab "<< (((xdiff+3) * 7) + (ydiff+3)) * 7 + zdiff + 3 - // << " idxXousin " << idxCousin<< std::endl; inNeighbors[ (((xdiff+3) * 7) + (ydiff+3)) * 7 + zdiff + 3] = cells[idxCousin]; ++idxNeighbors; - } } } diff --git a/Src/Containers/FTreeCoordinate.hpp b/Src/Containers/FTreeCoordinate.hpp index f84fc8e1e429cb237b07453016cfbaf29fa7226e..40d805d27e0a6d22398f7f9bc77305c58c849e96 100644 --- a/Src/Containers/FTreeCoordinate.hpp +++ b/Src/Containers/FTreeCoordinate.hpp @@ -342,7 +342,8 @@ public: return idxNeig; } - int getInteractionNeighbors(const int inLevel, MortonIndex inNeighbors[189], int inNeighborsPosition[189]) const{ + int getInteractionNeighbors(const int inLevel, MortonIndex inNeighbors[/*189+26+1*/216], int inNeighborsPosition[/*189+26+1*/216], + const int neighSeparation = 1) const{ // Then take each child of the parent's neighbors if not in directNeighbors // Father coordinate const FTreeCoordinate parentCell(this->getX()>>1,this->getY()>>1,this->getZ()>>1); @@ -362,7 +363,7 @@ public: if(!FMath::Between(parentCell.getZ() + idxZ,0,limite)) continue; // if we are not on the current cell - if( idxX || idxY || idxZ ){ + if(neighSeparation<1 || idxX || idxY || idxZ ){ const FTreeCoordinate otherParent(parentCell.getX() + idxX,parentCell.getY() + idxY,parentCell.getZ() + idxZ); const MortonIndex mortonOther = otherParent.getMortonIndex(inLevel-1); @@ -373,7 +374,7 @@ public: const int zdiff = ((otherParent.getZ()<<1) | (idxCousin&1)) - this->getZ(); // Test if it is a direct neighbor - if(FMath::Abs(xdiff) > 1 || FMath::Abs(ydiff) > 1 || FMath::Abs(zdiff) > 1){ + if(FMath::Abs(xdiff) > neighSeparation || FMath::Abs(ydiff) > neighSeparation || FMath::Abs(zdiff) > neighSeparation){ // add to neighbors inNeighborsPosition[idxNeighbors] = ((( (xdiff+3) * 7) + (ydiff+3))) * 7 + zdiff + 3; inNeighbors[idxNeighbors++] = (mortonOther << 3) | idxCousin; @@ -387,7 +388,7 @@ public: return idxNeighbors; } - int getInteractionNeighbors(const int inLevel, MortonIndex inNeighbors[189]) const{ + int getInteractionNeighbors(const int inLevel, MortonIndex inNeighbors[/*189+26+1*/216], const int neighSeparation = 1) const{ // Then take each child of the parent's neighbors if not in directNeighbors // Father coordinate const FTreeCoordinate parentCell(this->getX()>>1,this->getY()>>1,this->getZ()>>1); @@ -407,7 +408,7 @@ public: if(!FMath::Between(parentCell.getZ() + idxZ,0,limite)) continue; // if we are not on the current cell - if( idxX || idxY || idxZ ){ + if(neighSeparation<1 || idxX || idxY || idxZ ){ const FTreeCoordinate otherParent(parentCell.getX() + idxX,parentCell.getY() + idxY,parentCell.getZ() + idxZ); const MortonIndex mortonOther = otherParent.getMortonIndex(inLevel-1); @@ -418,7 +419,7 @@ public: const int zdiff = ((otherParent.getZ()<<1) | (idxCousin&1)) - this->getZ(); // Test if it is a direct neighbor - if(FMath::Abs(xdiff) > 1 || FMath::Abs(ydiff) > 1 || FMath::Abs(zdiff) > 1){ + if(FMath::Abs(xdiff) > neighSeparation || FMath::Abs(ydiff) > neighSeparation || FMath::Abs(zdiff) > neighSeparation){ // add to neighbors inNeighbors[idxNeighbors++] = (mortonOther << 3) | idxCousin; } diff --git a/Src/Core/FFmmAlgorithm.hpp b/Src/Core/FFmmAlgorithm.hpp index 5526e19afe7f30b9308aa2151def7c49a65d5aac..4804b459dbdca726f5f8b0133e1acab2af4abba3 100644 --- a/Src/Core/FFmmAlgorithm.hpp +++ b/Src/Core/FFmmAlgorithm.hpp @@ -47,6 +47,7 @@ class FFmmAlgorithm : public FAbstractAlgorithm, public FAlgorithmTimers { const int OctreeHeight; ///< The height of the given tree. + const int leafLevelSeparationCriteria; public: /** Class constructor * @@ -56,8 +57,8 @@ public: * * \except An exception is thrown if one of the arguments is NULL. */ - FFmmAlgorithm(OctreeClass* const inTree, KernelClass* const inKernels) - : tree(inTree) , kernels(inKernels), OctreeHeight(tree->getHeight()) { + FFmmAlgorithm(OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeparationCriteria = 1) + : tree(inTree) , kernels(inKernels), OctreeHeight(tree->getHeight()), leafLevelSeparationCriteria(inLeafLevelSeparationCriteria) { FAssertLF(tree, "tree cannot be null"); FAssertLF(kernels, "kernels cannot be null"); @@ -194,9 +195,11 @@ protected: for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ FLOG(FTic counterTimeLevel); + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria); + // for each cells do{ - const int counter = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel); + const int counter = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel, separationCriteria); FLOG(computationCounter.tic()); if(counter) kernels->M2L( octreeIterator.getCurrentCell() , neighbors, counter, idxLevel); FLOG(computationCounter.tac()); diff --git a/Src/Core/FFmmAlgorithmPeriodic.hpp b/Src/Core/FFmmAlgorithmPeriodic.hpp index e82dfce6adef62ec20245060705f0b71df472aff..bdd5f8cca3a34931a991c33d788369fbcb957947 100644 --- a/Src/Core/FFmmAlgorithmPeriodic.hpp +++ b/Src/Core/FFmmAlgorithmPeriodic.hpp @@ -51,6 +51,7 @@ class FFmmAlgorithmPeriodic : public FAbstractAlgorithm{ const int nbLevelsAboveRoot; //< The nb of level the user ask to go above the tree (>= -1) const int offsetRealTree; //< nbLevelsAboveRoot GetFackLevel + const int leafLevelSeperationCriteria; public: /** The constructor need the octree and the kernels used for computation @@ -60,9 +61,9 @@ public: * @param inUpperLevel this parameter defines the behavior of the periodicity refer to the main doc * */ - FFmmAlgorithmPeriodic(OctreeClass* const inTree, const int inUpperLevel = 0) + FFmmAlgorithmPeriodic(OctreeClass* const inTree, const int inUpperLevel = 0, const int inLeafLevelSeperationCriteria = 1) : tree(inTree) , kernels(nullptr), OctreeHeight(tree->getHeight()), - nbLevelsAboveRoot(inUpperLevel), offsetRealTree(inUpperLevel + 3) { + nbLevelsAboveRoot(inUpperLevel), offsetRealTree(inUpperLevel + 3), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) { FAssertLF(tree, "tree cannot be null"); FAssertLF(-1 <= inUpperLevel, "inUpperLevel cannot be < -1"); @@ -254,9 +255,10 @@ protected: for(int idxLevel = 1 ; idxLevel < OctreeHeight ; ++idxLevel ){ FLOG(FTic counterTimeLevel); const int fackLevel = idxLevel + offsetRealTree; + const int separationCriteria = (idxLevel != OctreeHeight-1 ? 1 : leafLevelSeperationCriteria); // for each cells do{ - const int counter = tree->getPeriodicInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel, AllDirs); + const int counter = tree->getPeriodicInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel, AllDirs, separationCriteria); FLOG(computationCounter.tic()); if(counter) kernels->M2L( octreeIterator.getCurrentCell() , neighbors, counter, fackLevel); FLOG(computationCounter.tac()); diff --git a/Src/Core/FFmmAlgorithmSectionTask.hpp b/Src/Core/FFmmAlgorithmSectionTask.hpp index 0a797b499044d8713e2ecb3caa34a3075fae4f64..0544d84814234404448f74643282892ea68e2921 100644 --- a/Src/Core/FFmmAlgorithmSectionTask.hpp +++ b/Src/Core/FFmmAlgorithmSectionTask.hpp @@ -49,15 +49,16 @@ class FFmmAlgorithmSectionTask : public FAbstractAlgorithm{ const int OctreeHeight; + const int leafLevelSeperationCriteria; public: /** The constructor need the octree and the kernels used for computation * @param inTree the octree to work on * @param inKernels the kernels to call * An assert is launched if one of the arguments is null */ - FFmmAlgorithmSectionTask(OctreeClass* const inTree, KernelClass* const inKernels) + FFmmAlgorithmSectionTask(OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1) : tree(inTree) , kernels(0), - MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()) + MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) { FAssertLF(tree, "tree cannot be null"); @@ -214,9 +215,10 @@ protected: // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ FLOG(FTic counterTimeLevel); + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); // for each cells do{ - int counter = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel); + const int counter = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel, separationCriteria); if(counter){ #pragma omp task firstprivate(octreeIterator, neighbors, counter) shared(idxLevel) { diff --git a/Src/Core/FFmmAlgorithmTask.hpp b/Src/Core/FFmmAlgorithmTask.hpp index 9301a76b908dac3c70e99120c11e3cb0752f3627..84cd2c4c8e9cb57e4e3a63adbe88ed40433b40d0 100644 --- a/Src/Core/FFmmAlgorithmTask.hpp +++ b/Src/Core/FFmmAlgorithmTask.hpp @@ -49,15 +49,16 @@ class FFmmAlgorithmTask : public FAbstractAlgorithm, public FAlgorithmTimers { const int OctreeHeight; + const int leafLevelSeperationCriteria; public: /** The constructor need the octree and the kernels used for computation * @param inTree the octree to work on * @param inKernels the kernels to call * An assert is launched if one of the arguments is null */ - FFmmAlgorithmTask(OctreeClass* const inTree, KernelClass* const inKernels) + FFmmAlgorithmTask(OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1) : tree(inTree) , kernels(nullptr), - MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()) + MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) { FAssertLF(tree, "tree cannot be null"); @@ -228,9 +229,10 @@ protected: // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ FLOG(FTic counterTimeLevel); + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); // for each cells do{ - int counter = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel); + const int counter = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel, separationCriteria); if(counter){ #pragma omp task firstprivate(octreeIterator, neighbors, counter) shared(idxLevel) { diff --git a/Src/Core/FFmmAlgorithmThread.hpp b/Src/Core/FFmmAlgorithmThread.hpp index 529fc14883c876d46ca6ce3ac70e8957f4309c8f..5f850aaafa045991f725a96d998de61f1baebcb1 100644 --- a/Src/Core/FFmmAlgorithmThread.hpp +++ b/Src/Core/FFmmAlgorithmThread.hpp @@ -62,6 +62,8 @@ class FFmmAlgorithmThread : public FAbstractAlgorithm, public FAlgorithmTimers{ const bool staticSchedule; + const int leafLevelSeperationCriteria; + template <class NumType> NumType getChunkSize(const NumType inSize) const { if(staticSchedule){ @@ -83,10 +85,10 @@ public: * \except An exception is thrown if one of the arguments is NULL. */ FFmmAlgorithmThread(OctreeClass* const inTree, KernelClass* const inKernels, - const bool inStaticSchedule = true) + const bool inStaticSchedule = true, const int inLeafLevelSeperationCriteria = 1) : tree(inTree) , kernels(nullptr), iterArray(nullptr), leafsNumber(0), MaxThreads(omp_get_max_threads()), OctreeHeight(tree->getHeight()), - staticSchedule(inStaticSchedule) { + staticSchedule(inStaticSchedule), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) { FAssertLF(tree, "tree cannot be null"); @@ -278,6 +280,7 @@ protected: // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ FLOG(FTic counterTimeLevel); + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); int numberOfCells = 0; // for each cells do{ @@ -297,7 +300,7 @@ protected: #pragma omp for schedule(dynamic, chunkSize) nowait for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){ - const int counter = tree->getInteractionNeighbors(neighbors, iterArray[idxCell].getCurrentGlobalCoordinate(),idxLevel); + const int counter = tree->getInteractionNeighbors(neighbors, iterArray[idxCell].getCurrentGlobalCoordinate(), idxLevel, separationCriteria); if(counter) myThreadkernels->M2L( iterArray[idxCell].getCurrentCell() , neighbors, counter, idxLevel); } diff --git a/Src/Core/FFmmAlgorithmThreadProc.hpp b/Src/Core/FFmmAlgorithmThreadProc.hpp index c96c035f2584e25bde2235fdc12d6e18214f39e6..2115736ace96d97dff2070ba412594951c826fc9 100644 --- a/Src/Core/FFmmAlgorithmThreadProc.hpp +++ b/Src/Core/FFmmAlgorithmThreadProc.hpp @@ -81,6 +81,7 @@ class FFmmAlgorithmThreadProc : public FAbstractAlgorithm, public FAlgorithmTime const int OctreeHeight; //<Height of the tree + const int leafLevelSeperationCriteria; /** An interval is the morton index interval * that a proc use (it holds data in this interval) @@ -134,10 +135,12 @@ public: * @param inKernels the kernels to call * An assert is launched if one of the arguments is null */ - FFmmAlgorithmThreadProc(const FMpi::FComm& inComm, OctreeClass* const inTree, KernelClass* const inKernels) + FFmmAlgorithmThreadProc(const FMpi::FComm& inComm, OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1) : tree(inTree) , kernels(nullptr), comm(inComm), iterArray(nullptr),iterArrayComm(nullptr),numberOfLeafs(0), MaxThreads(omp_get_max_threads()), nbProcess(inComm.processCount()), idProcess(inComm.processId()), - OctreeHeight(tree->getHeight()),intervals(new Interval[inComm.processCount()]), + OctreeHeight(tree->getHeight()), + leafLevelSeperationCriteria(inLeafLevelSeperationCriteria), + intervals(new Interval[inComm.processCount()]), workingIntervalsPerLevel(new Interval[inComm.processCount() * tree->getHeight()]) { FAssertLF(tree, "tree cannot be null"); @@ -622,6 +625,9 @@ protected: typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator); // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ + + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); + if(!procHasWorkAtLevel(idxLevel, idProcess)){ avoidGotoLeftIterator.moveDown(); octreeIterator = avoidGotoLeftIterator; @@ -646,10 +652,10 @@ protected: // Which cell potentialy needs other data and in the same time // are potentialy needed by other - MortonIndex neighborsIndexes[189]; + MortonIndex neighborsIndexes[/*189+26+1*/216]; for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){ // Find the M2L neigbors of a cell - const int counter = iterArrayLocal[idxCell].getCurrentGlobalCoordinate().getInteractionNeighbors(idxLevel, neighborsIndexes); + const int counter = iterArrayLocal[idxCell].getCurrentGlobalCoordinate().getInteractionNeighbors(idxLevel, neighborsIndexes, separationCriteria); memset(alreadySent, false, sizeof(bool) * nbProcess); bool needOther = false; @@ -778,6 +784,8 @@ protected: // Now we can compute all the data // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); + if(!procHasWorkAtLevel(idxLevel, idProcess)){ avoidGotoLeftIterator.moveDown(); octreeIterator = avoidGotoLeftIterator; @@ -807,7 +815,7 @@ protected: const int nbCellToCompute = FMath::Min(chunckSize, numberOfCells-idxCell); for(int idxCellToCompute = idxCell ; idxCellToCompute < idxCell+nbCellToCompute ; ++idxCellToCompute){ - const int counter = tree->getInteractionNeighbors(neighbors, iterArray[idxCellToCompute].getCurrentGlobalCoordinate(), idxLevel); + const int counter = tree->getInteractionNeighbors(neighbors, iterArray[idxCellToCompute].getCurrentGlobalCoordinate(), idxLevel, separationCriteria); if(counter) myThreadkernels->M2L( iterArray[idxCellToCompute].getCurrentCell() , neighbors, counter, idxLevel); } } @@ -843,6 +851,8 @@ protected: // compute the second time // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); + if(!procHasWorkAtLevel(idxLevel, idProcess)){ avoidGotoLeftIterator.moveDown(); octreeIterator = avoidGotoLeftIterator; @@ -900,15 +910,15 @@ protected: #pragma omp parallel { KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; - MortonIndex neighborsIndex[189]; - int neighborsPosition[189]; + MortonIndex neighborsIndex[/*189+26+1*/216]; + int neighborsPosition[/*189+26+1*/216]; const CellClass* neighbors[343]; #pragma omp for schedule(static) nowait for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){ // compute indexes memset(neighbors, 0, 343 * sizeof(CellClass*)); - const int counterNeighbors = iterArray[idxCell].getCurrentGlobalCoordinate().getInteractionNeighbors(idxLevel, neighborsIndex, neighborsPosition); + const int counterNeighbors = iterArray[idxCell].getCurrentGlobalCoordinate().getInteractionNeighbors(idxLevel, neighborsIndex, neighborsPosition, separationCriteria); int counter = 0; // does we receive this index from someone? diff --git a/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp b/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp index 54605f98e3dc902e74d0c47c2b44b9af756f14f6..e2a0ab116633fcc4e5cb503c9ba97e0f52735eb2 100644 --- a/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp +++ b/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp @@ -83,6 +83,8 @@ class FFmmAlgorithmThreadProcPeriodic : public FAbstractAlgorithm { const int OctreeHeight; + const int leafLevelSeperationCriteria; + public: struct Interval{ MortonIndex leftIndex; @@ -139,11 +141,13 @@ public: * An assert is launched if one of the arguments is null */ FFmmAlgorithmThreadProcPeriodic(const FMpi::FComm& inComm, OctreeClass* const inTree, - const int inUpperLevel = 2) + const int inUpperLevel = 2, const int inLeafLevelSeperationCriteria = 1) : tree(inTree) , kernels(nullptr), comm(inComm), nbLevelsAboveRoot(inUpperLevel), offsetRealTree(inUpperLevel + 3), numberOfLeafs(0), MaxThreads(omp_get_max_threads()), nbProcess(inComm.processCount()), idProcess(inComm.processId()), - OctreeHeight(tree->getHeight()),intervals(new Interval[inComm.processCount()]), + OctreeHeight(tree->getHeight()), + leafLevelSeperationCriteria(inLeafLevelSeperationCriteria), + intervals(new Interval[inComm.processCount()]), workingIntervalsPerLevel(new Interval[inComm.processCount() * tree->getHeight()]) { FAssertLF(tree, "tree cannot be null"); @@ -752,6 +756,7 @@ protected: typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator); // for each levels for(int idxLevel = 1 ; idxLevel < OctreeHeight ; ++idxLevel ){ + if(!procHasWorkAtLevel(idxLevel, idProcess)){ avoidGotoLeftIterator.moveDown(); octreeIterator = avoidGotoLeftIterator; @@ -776,13 +781,13 @@ protected: // Which cell potentialy needs other data and in the same time // are potentialy needed by other - int neighborsPosition[189]; - MortonIndex neighborsIndexes[189]; + int neighborsPosition[/*189+26+1*/216]; + MortonIndex neighborsIndexes[/*189+26+1*/216]; for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){ // Find the M2L neigbors of a cell const int counter = getPeriodicInteractionNeighbors(iterArray[idxCell].getCurrentGlobalCoordinate(), idxLevel, - neighborsIndexes, neighborsPosition, AllDirs); + neighborsIndexes, neighborsPosition, AllDirs, leafLevelSeperationCriteria); memset(alreadySent, false, sizeof(bool) * nbProcess); bool needOther = false; @@ -907,6 +912,9 @@ protected: // for each levels for(int idxLevel = 1 ; idxLevel < OctreeHeight ; ++idxLevel ){ const int fackLevel = idxLevel + offsetRealTree; + + const int separationCriteria = (idxLevel != OctreeHeight-1 ? 1 : leafLevelSeperationCriteria); + if(!procHasWorkAtLevel(idxLevel, idProcess)){ avoidGotoLeftIterator.moveDown(); octreeIterator = avoidGotoLeftIterator; @@ -936,10 +944,9 @@ protected: const int nbCellToCompute = FMath::Min(chunckSize, numberOfCells-idxCell); for(int idxCellToCompute = idxCell ; idxCellToCompute < idxCell+nbCellToCompute ; ++idxCellToCompute){ - const int counter = tree-> - getPeriodicInteractionNeighbors(neighbors, + const int counter = tree->getPeriodicInteractionNeighbors(neighbors, iterArray[idxCellToCompute].getCurrentGlobalCoordinate(), - idxLevel, AllDirs); + idxLevel, AllDirs, separationCriteria); if(counter) myThreadkernels->M2L( iterArray[idxCellToCompute].getCurrentCell() , neighbors, counter, fackLevel); @@ -973,6 +980,9 @@ protected: // for each levels for(int idxLevel = 1 ; idxLevel < OctreeHeight ; ++idxLevel ){ const int fackLevel = idxLevel + offsetRealTree; + + const int separationCriteria = (fackLevel != OctreeHeight-1 ? 1 : leafLevelSeperationCriteria); + if(!procHasWorkAtLevel(idxLevel, idProcess)){ avoidGotoLeftIterator.moveDown(); octreeIterator = avoidGotoLeftIterator; @@ -1030,15 +1040,15 @@ protected: #pragma omp parallel { KernelClass * const myThreadkernels = kernels[omp_get_thread_num()]; - MortonIndex neighborsIndex[189]; - int neighborsPosition[189]; + MortonIndex neighborsIndex[/*189+26+1*/216]; + int neighborsPosition[/*189+26+1*/216]; const CellClass* neighbors[343]; #pragma omp for schedule(static) nowait for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){ // compute indexes memset(neighbors, 0, 343 * sizeof(CellClass*)); - const int counterNeighbors = getPeriodicInteractionNeighbors(iterArray[idxCell].getCurrentGlobalCoordinate(), idxLevel, neighborsIndex, neighborsPosition, AllDirs); + const int counterNeighbors = getPeriodicInteractionNeighbors(iterArray[idxCell].getCurrentGlobalCoordinate(), idxLevel, neighborsIndex, neighborsPosition, AllDirs, separationCriteria); //const int counterNeighbors = iterArray[idxCell].getCurrentGlobalCoordinate().getInteractionNeighbors(idxLevel, neighborsIndex, neighborsPosition); int counter = 0; @@ -1758,7 +1768,8 @@ protected: } - int getPeriodicInteractionNeighbors(const FTreeCoordinate& workingCell,const int inLevel, MortonIndex inNeighbors[189], int inNeighborsPosition[189], const int inDirection) const{ + int getPeriodicInteractionNeighbors(const FTreeCoordinate& workingCell,const int inLevel, MortonIndex inNeighbors[/*189+26+1*/216], int inNeighborsPosition[/*189+26+1*/216], + const int inDirection, const int neighSeparation) const{ // Then take each child of the parent's neighbors if not in directNeighbors // Father coordinate @@ -1770,7 +1781,7 @@ protected: // This is not on a border we can use normal interaction list method if( !(parentCell.getX() == 0 || parentCell.getY() == 0 || parentCell.getZ() == 0 || parentCell.getX() == boxLimite - 1 || parentCell.getY() == boxLimite - 1 || parentCell.getZ() == boxLimite - 1 ) ) { - return getInteractionNeighbors( workingCell, inLevel, inNeighbors, inNeighborsPosition); + return getInteractionNeighbors( workingCell, inLevel, inNeighbors, inNeighborsPosition, neighSeparation); } const int startX = (TestPeriodicCondition(inDirection, DirMinusX) || parentCell.getX() != 0 ?-1:0); @@ -1821,7 +1832,7 @@ protected: const int zdiff = ((otherParent.getZ()<<1) | (idxCousin&1)) - workingCell.getZ(); // Test if it is a direct neighbor - if(FMath::Abs(xdiff) > 1 || FMath::Abs(ydiff) > 1 || FMath::Abs(zdiff) > 1){ + if(FMath::Abs(xdiff) > neighSeparation || FMath::Abs(ydiff) > neighSeparation || FMath::Abs(zdiff) > neighSeparation){ // add to neighbors inNeighborsPosition[idxNeighbors] = (((xdiff+3) * 7) + (ydiff+3)) * 7 + zdiff + 3; inNeighbors[idxNeighbors++] = (mortonOtherParent << 3) | idxCousin; @@ -1835,7 +1846,8 @@ protected: return idxNeighbors; } - int getInteractionNeighbors(const FTreeCoordinate& workingCell,const int inLevel, MortonIndex inNeighbors[189], int inNeighborsPosition[189]) const{ + int getInteractionNeighbors(const FTreeCoordinate& workingCell,const int inLevel, MortonIndex inNeighbors[/*189+26+1*/216], int inNeighborsPosition[/*189+26+1*/216], + const int neighSeparation) const{ // Then take each child of the parent's neighbors if not in directNeighbors // Father coordinate @@ -1860,7 +1872,7 @@ protected: const int zdiff = ((otherParent.getZ()<<1) | (idxCousin&1)) - workingCell.getZ(); // Test if it is a direct neighbor - if(FMath::Abs(xdiff) > 1 || FMath::Abs(ydiff) > 1 || FMath::Abs(zdiff) > 1){ + if(FMath::Abs(xdiff) > neighSeparation || FMath::Abs(ydiff) > neighSeparation || FMath::Abs(zdiff) > neighSeparation){ // add to neighbors inNeighborsPosition[idxNeighbors] = ((( (xdiff+3) * 7) + (ydiff+3))) * 7 + zdiff + 3; inNeighbors[idxNeighbors++] = (mortonOtherParent << 3) | idxCousin; diff --git a/Src/Core/FFmmAlgorithmThreadTsm.hpp b/Src/Core/FFmmAlgorithmThreadTsm.hpp index 86434c6746c2f7b8e34c28be425be59a0f42fa24..6bb6a268a25c1f58c012b7909d14d61b0121b213 100644 --- a/Src/Core/FFmmAlgorithmThreadTsm.hpp +++ b/Src/Core/FFmmAlgorithmThreadTsm.hpp @@ -55,15 +55,17 @@ class FFmmAlgorithmThreadTsm : public FAbstractAlgorithm, public FAlgorithmTimer const int OctreeHeight; + const int leafLevelSeperationCriteria; + public: /** The constructor need the octree and the kernels used for computation * @param inTree the octree to work on * @param inKernels the kernels to call * An assert is launched if one of the arguments is null */ - FFmmAlgorithmThreadTsm(OctreeClass* const inTree, KernelClass* const inKernels) + FFmmAlgorithmThreadTsm(OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1) : tree(inTree) , kernels(nullptr), iterArray(nullptr), - MaxThreads(omp_get_max_threads()) , OctreeHeight(tree->getHeight()) { + MaxThreads(omp_get_max_threads()) , OctreeHeight(tree->getHeight()), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria) { FAssertLF(tree, "tree cannot be null"); @@ -248,6 +250,7 @@ protected: // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ FLOG(FTic counterTimeLevel); + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); int numberOfCells = 0; // for each cells @@ -270,7 +273,7 @@ protected: for(int idxCell = 0 ; idxCell < numberOfCells ; ++idxCell){ CellClass* const currentCell = iterArray[idxCell].getCurrentCell(); if(currentCell->hasTargetsChild()){ - const int counter = tree->getInteractionNeighbors(neighbors, iterArray[idxCell].getCurrentGlobalCoordinate(),idxLevel); + const int counter = tree->getInteractionNeighbors(neighbors, iterArray[idxCell].getCurrentGlobalCoordinate(), idxLevel, separationCriteria); if( counter ){ int counterWithSrc = 0; for(int idxRealNeighbors = 0 ; idxRealNeighbors < 343 ; ++idxRealNeighbors ){ diff --git a/Src/Core/FFmmAlgorithmTsm.hpp b/Src/Core/FFmmAlgorithmTsm.hpp index 2937a880bd59841a9e18e02e5086ea2c79a7f672..80f238d68465a3ec2905c8892ce30797e6ea322e 100644 --- a/Src/Core/FFmmAlgorithmTsm.hpp +++ b/Src/Core/FFmmAlgorithmTsm.hpp @@ -46,6 +46,8 @@ class FFmmAlgorithmTsm : public FAbstractAlgorithm{ const int OctreeHeight; + const int leafLevelSeperationCriteria; + FLOG(FTic counterTime); //< In case of debug: to count the elapsed time FLOG(FTic computationCounter); //< In case of debug: to count computation time @@ -55,8 +57,8 @@ public: * @param inKernels the kernels to call * An assert is launched if one of the arguments is null */ - FFmmAlgorithmTsm(OctreeClass* const inTree, KernelClass* const inKernels) - : tree(inTree) , kernels(inKernels) , OctreeHeight(tree->getHeight()){ + FFmmAlgorithmTsm(OctreeClass* const inTree, KernelClass* const inKernels, const int inLeafLevelSeperationCriteria = 1) + : tree(inTree) , kernels(inKernels) , OctreeHeight(tree->getHeight()), leafLevelSeperationCriteria(inLeafLevelSeperationCriteria){ FAssertLF(tree, "tree cannot be null"); FAssertLF(kernels, "kernels cannot be null"); @@ -198,13 +200,14 @@ protected: // for each levels for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ FLOG(FTic counterTimeLevel); + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeperationCriteria); // for each cells do{ FLOG(computationCounter.tic()); CellClass* const currentCell = octreeIterator.getCurrentCell(); if(currentCell->hasTargetsChild()){ - const int counter = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(),idxLevel); + const int counter = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(),idxLevel, separationCriteria); if( counter ){ int counterWithSrc = 0; for(int idxRealNeighbors = 0 ; idxRealNeighbors < 343 ; ++idxRealNeighbors ){ diff --git a/Src/GroupTree/Core/FGroupOfParticlesDyn.hpp b/Src/GroupTree/Core/FGroupOfParticlesDyn.hpp index 8b374b9e253c5f6b629e4aebda1d2ed5d16027bd..82d6022f246027bfad728d43ce068850b2c4c9f2 100644 --- a/Src/GroupTree/Core/FGroupOfParticlesDyn.hpp +++ b/Src/GroupTree/Core/FGroupOfParticlesDyn.hpp @@ -248,8 +248,8 @@ public: for(int idxLeafPtr = 0 ; idxLeafPtr < blockHeader->blockIndexesTableSize ; ++idxLeafPtr){ if(blockIndexesTable[idxLeafPtr] != LeafIsEmptyFlag){ const int id = blockIndexesTable[idxLeafPtr]; - ParticlesAttachedClass leaf(symbPart + leafHeader[id].offSetSymb, - (downPart?downPart + leafHeader[id].offSetDown : nullptr)); + ParticlesAttachedClass leaf( (leafHeader[id].sizeSymb? symbPart + leafHeader[id].offSetSymb : nullptr), + (downPart && leafHeader[id].sizeDown ?downPart + leafHeader[id].offSetDown : nullptr) ); function(&leaf); } } @@ -261,8 +261,8 @@ public: ParticlesAttachedClass getLeaf(const MortonIndex leafIndex){ if(blockIndexesTable[leafIndex - blockHeader->startingIndex] != LeafIsEmptyFlag){ const int id = blockIndexesTable[leafIndex - blockHeader->startingIndex]; - return ParticlesAttachedClass(symbPart + leafHeader[id].offSetSymb, - (downPart?downPart + leafHeader[id].offSetDown : nullptr)); + return ParticlesAttachedClass((leafHeader[id].sizeSymb? symbPart + leafHeader[id].offSetSymb : nullptr), + (downPart && leafHeader[id].sizeDown ?downPart + leafHeader[id].offSetDown : nullptr) ); } return ParticlesAttachedClass(); } @@ -271,7 +271,9 @@ public: unsigned char* getLeafSymbBuffer(const MortonIndex leafIndex){ if(blockIndexesTable[leafIndex - blockHeader->startingIndex] != LeafIsEmptyFlag){ const int id = blockIndexesTable[leafIndex - blockHeader->startingIndex]; - return (symbPart + leafHeader[id].offSetSymb); + if(leafHeader[id].sizeSymb){ + return (symbPart + leafHeader[id].offSetSymb); + } } return nullptr; } @@ -280,7 +282,9 @@ public: unsigned char* getLeafDownBuffer(const MortonIndex leafIndex){ if(blockIndexesTable[leafIndex - blockHeader->startingIndex] != LeafIsEmptyFlag){ const int id = blockIndexesTable[leafIndex - blockHeader->startingIndex]; - return (downPart?downPart + leafHeader[id].offSetDown : nullptr); + if(leafHeader[id].sizeDown){ + return (downPart?downPart + leafHeader[id].offSetDown : nullptr); + } } return nullptr; } diff --git a/Src/GroupTree/Core/FGroupSeqAlgorithm.hpp b/Src/GroupTree/Core/FGroupSeqAlgorithm.hpp index e261f4c8996941b1335e8ead8bc56469a162de0f..fadf2a150c795d51c7b5f142a6cb5c65f3c11629 100644 --- a/Src/GroupTree/Core/FGroupSeqAlgorithm.hpp +++ b/Src/GroupTree/Core/FGroupSeqAlgorithm.hpp @@ -17,7 +17,7 @@ #include <vector> template <class OctreeClass, class CellContainerClass, class CellClass, class KernelClass, class ParticleGroupClass, class ParticleContainerClass> -class FGroupSeqAlgorithm { +class FGroupSeqAlgorithm : public FAbstractAlgorithm { protected: const int MaxThreads; //< The number of threads OctreeClass*const tree; //< The Tree @@ -28,13 +28,19 @@ public: FAssertLF(tree, "tree cannot be null"); FAssertLF(kernels, "kernels cannot be null"); + FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight()); + FLOG(FLog::Controller << "FGroupSeqAlgorithm (Max Thread " << MaxThreads << ")\n"); } ~FGroupSeqAlgorithm(){ } - void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){ +protected: + /** + * Runs the complete algorithm. + */ + void executeCore(const unsigned operationsToProceed) override { FLOG( FLog::Controller << "\tStart FGroupSeqAlgorithm\n" ); if(operationsToProceed & FFmmP2M) bottomPass(); @@ -45,10 +51,11 @@ public: if(operationsToProceed & FFmmL2L) downardPass(); - if( (operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P) ) directPass(); + if( (operationsToProceed & FFmmP2P) || (operationsToProceed & FFmmL2P) ){ + directPass((operationsToProceed & FFmmP2P), (operationsToProceed & FFmmL2P)); + } } -protected: void bottomPass(){ FLOG( FTic timer; ); typename OctreeClass::ParticleGroupIterator iterParticles = tree->leavesBegin(); @@ -83,7 +90,7 @@ protected: void upwardPass(){ FLOG( FTic timer; ); - for(int idxLevel = tree->getHeight()-2 ; idxLevel >= 2 ; --idxLevel){ + for(int idxLevel = FMath::Min(tree->getHeight() - 2, FAbstractAlgorithm::lowerWorkingLevel - 1) ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ typename OctreeClass::CellGroupIterator iterCells = tree->cellsBegin(idxLevel); const typename OctreeClass::CellGroupIterator endCells = tree->cellsEnd(idxLevel); @@ -133,7 +140,7 @@ protected: void transferPass(){ FLOG( FTic timer; ); - for(int idxLevel = tree->getHeight()-1 ; idxLevel >= 2 ; --idxLevel){ + for(int idxLevel = FAbstractAlgorithm::lowerWorkingLevel-1 ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ typename OctreeClass::CellGroupIterator iterCells = tree->cellsBegin(idxLevel); const typename OctreeClass::CellGroupIterator endCells = tree->cellsEnd(idxLevel); @@ -238,7 +245,7 @@ protected: void downardPass(){ FLOG( FTic timer; ); - for(int idxLevel = 2 ; idxLevel <= tree->getHeight()-2 ; ++idxLevel){ + for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel - 1 ; ++idxLevel){ typename OctreeClass::CellGroupIterator iterCells = tree->cellsBegin(idxLevel); const typename OctreeClass::CellGroupIterator endCells = tree->cellsEnd(idxLevel); @@ -284,9 +291,9 @@ protected: FLOG( FLog::Controller << "\t\t downardPass in " << timer.tacAndElapsed() << "s\n" ); } - void directPass(){ + void directPass(const bool p2pEnabled, const bool l2pEnabled){ FLOG( FTic timer; ); - { + if(l2pEnabled){ typename OctreeClass::ParticleGroupIterator iterParticles = tree->leavesBegin(); const typename OctreeClass::ParticleGroupIterator endParticles = tree->leavesEnd(); @@ -314,7 +321,7 @@ protected: FAssertLF(iterParticles == endParticles && iterCells == endCells); } - { + if(p2pEnabled){ typename OctreeClass::ParticleGroupIterator iterParticles = tree->leavesBegin(); const typename OctreeClass::ParticleGroupIterator endParticles = tree->leavesEnd(); diff --git a/Src/GroupTree/Core/FGroupTaskAlgorithm.hpp b/Src/GroupTree/Core/FGroupTaskAlgorithm.hpp index cf1b0bcab5696a13f6770999125e98d922eee29c..36cafc45f781d34adf0d1593611bd39fcd31a330 100644 --- a/Src/GroupTree/Core/FGroupTaskAlgorithm.hpp +++ b/Src/GroupTree/Core/FGroupTaskAlgorithm.hpp @@ -19,7 +19,7 @@ #include <omp.h> template <class OctreeClass, class CellContainerClass, class CellClass, class KernelClass, class ParticleGroupClass, class ParticleContainerClass> -class FGroupTaskAlgorithm { +class FGroupTaskAlgorithm : public FAbstractAlgorithm { protected: template <class OtherBlockClass> struct BlockInteractions{ @@ -40,6 +40,8 @@ public: FAssertLF(tree, "tree cannot be null"); FAssertLF(inKernels, "kernels cannot be null"); + FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight()); + kernels = new KernelClass*[MaxThreads]; #pragma omp parallel for schedule(static) num_threads(MaxThreads) for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ @@ -61,7 +63,11 @@ public: delete[] kernels; } - void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){ +protected: + /** + * Runs the complete algorithm. + */ + void executeCore(const unsigned operationsToProceed) override { FLOG( FLog::Controller << "\tStart FGroupTaskAlgorithm\n" ); #pragma omp parallel num_threads(MaxThreads) @@ -101,7 +107,6 @@ public: } } -protected: /** * This function is creating the interactions vector between blocks. * It fills externalInteractionsAllLevel and externalInteractionsLeafLevel. @@ -308,7 +313,7 @@ protected: void upwardPass(){ FLOG( FTic timer; ); - for(int idxLevel = tree->getHeight()-2 ; idxLevel >= 2 ; --idxLevel){ + for(int idxLevel = FMath::Min(tree->getHeight() - 2, FAbstractAlgorithm::lowerWorkingLevel - 1) ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ typename OctreeClass::CellGroupIterator iterCells = tree->cellsBegin(idxLevel); const typename OctreeClass::CellGroupIterator endCells = tree->cellsEnd(idxLevel); @@ -388,7 +393,7 @@ protected: void transferPass(){ FLOG( FTic timer; ); FLOG( FTic timerInBlock; FTic timerOutBlock; ); - for(int idxLevel = tree->getHeight()-1 ; idxLevel >= 2 ; --idxLevel){ + for(int idxLevel = FAbstractAlgorithm::lowerWorkingLevel-1 ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ FLOG( timerInBlock.tic() ); { typename OctreeClass::CellGroupIterator iterCells = tree->cellsBegin(idxLevel); @@ -499,7 +504,7 @@ protected: void downardPass(){ FLOG( FTic timer; ); - for(int idxLevel = 2 ; idxLevel <= tree->getHeight()-2 ; ++idxLevel){ + for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel - 1 ; ++idxLevel){ typename OctreeClass::CellGroupIterator iterCells = tree->cellsBegin(idxLevel); const typename OctreeClass::CellGroupIterator endCells = tree->cellsEnd(idxLevel); diff --git a/Src/GroupTree/Core/FGroupTaskDepAlgorithm.hpp b/Src/GroupTree/Core/FGroupTaskDepAlgorithm.hpp index a7ebd08cb2634436a6d79ae48cf565e9d7cbee18..80f7241799d9abb39f065bad3e5b4e35bc028254 100644 --- a/Src/GroupTree/Core/FGroupTaskDepAlgorithm.hpp +++ b/Src/GroupTree/Core/FGroupTaskDepAlgorithm.hpp @@ -21,7 +21,7 @@ template <class OctreeClass, class CellContainerClass, class CellClass, class SymboleCellClass, class PoleCellClass, class LocalCellClass, class KernelClass, class ParticleGroupClass, class ParticleContainerClass> -class FGroupTaskDepAlgorithm { +class FGroupTaskDepAlgorithm : public FAbstractAlgorithm { protected: template <class OtherBlockClass> struct BlockInteractions{ @@ -42,6 +42,8 @@ public: FAssertLF(tree, "tree cannot be null"); FAssertLF(inKernels, "kernels cannot be null"); + FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight()); + kernels = new KernelClass*[MaxThreads]; #pragma omp parallel for schedule(static) num_threads(MaxThreads) for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){ @@ -63,7 +65,11 @@ public: delete[] kernels; } - void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){ +protected: + /** + * Runs the complete algorithm. + */ + void executeCore(const unsigned operationsToProceed) override { FLOG( FLog::Controller << "\tStart FGroupTaskDepAlgorithm\n" ); #pragma omp parallel num_threads(MaxThreads) @@ -96,7 +102,7 @@ public: } } -protected: + /** * This function is creating the interactions vector between blocks. * It fills externalInteractionsAllLevel and externalInteractionsLeafLevel. @@ -304,7 +310,7 @@ protected: void upwardPass(){ FLOG( FTic timer; ); - for(int idxLevel = tree->getHeight()-2 ; idxLevel >= 2 ; --idxLevel){ + for(int idxLevel = FMath::Min(tree->getHeight() - 2, FAbstractAlgorithm::lowerWorkingLevel - 1) ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ typename OctreeClass::CellGroupIterator iterCells = tree->cellsBegin(idxLevel); const typename OctreeClass::CellGroupIterator endCells = tree->cellsEnd(idxLevel); @@ -388,7 +394,7 @@ protected: void transferPass(){ FLOG( FTic timer; ); FLOG( FTic timerInBlock; FTic timerOutBlock; ); - for(int idxLevel = tree->getHeight()-1 ; idxLevel >= 2 ; --idxLevel){ + for(int idxLevel = FAbstractAlgorithm::lowerWorkingLevel-1 ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ FLOG( timerInBlock.tic() ); { typename OctreeClass::CellGroupIterator iterCells = tree->cellsBegin(idxLevel); @@ -502,7 +508,7 @@ protected: void downardPass(){ FLOG( FTic timer; ); - for(int idxLevel = 2 ; idxLevel <= tree->getHeight()-2 ; ++idxLevel){ + for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel - 1 ; ++idxLevel){ typename OctreeClass::CellGroupIterator iterCells = tree->cellsBegin(idxLevel); const typename OctreeClass::CellGroupIterator endCells = tree->cellsEnd(idxLevel); diff --git a/Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp b/Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp index ff31b802f0cec54577fe51839a1f853cf72f3c91..abc6fa653cccae2c7c8431975b904a42331894f9 100644 --- a/Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp +++ b/Src/GroupTree/Core/FGroupTaskStarpuAlgorithm.hpp @@ -48,7 +48,7 @@ template <class OctreeClass, class CellContainerClass, class KernelClass, class , class StarPUOpenClWrapperClass = FStarPUOpenClWrapper<KernelClass, FOpenCLDeviceWrapper<KernelClass>> #endif > -class FGroupTaskStarPUAlgorithm { +class FGroupTaskStarPUAlgorithm : public FAbstractAlgorithm { protected: typedef FGroupTaskStarPUAlgorithm<OctreeClass, CellContainerClass, KernelClass, ParticleGroupClass, StarPUCpuWrapperClass #ifdef SCALFMM_ENABLE_CUDA_KERNEL @@ -134,6 +134,8 @@ public: FAssertLF(tree, "tree cannot be null"); FAssertLF(inKernels, "kernels cannot be null"); + FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight()); + struct starpu_conf conf; FAssertLF(starpu_conf_init(&conf) == 0); FStarPUFmmPriorities::Controller().init(&conf, tree->getHeight(), inKernels); @@ -235,7 +237,11 @@ public: starpu_shutdown(); } - void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){ +protected: + /** + * Runs the complete algorithm. + */ + void executeCore(const unsigned operationsToProceed) override { FLOG( FLog::Controller << "\tStart FGroupTaskStarPUAlgorithm\n" ); const bool directOnly = (tree->getHeight() <= 2); @@ -263,7 +269,7 @@ public: starpu_pause(); } -protected: + void initCodelet(){ memset(&p2m_cl, 0, sizeof(p2m_cl)); #ifdef STARPU_USE_CPU @@ -738,7 +744,7 @@ protected: void upwardPass(){ FLOG( FTic timer; ); - for(int idxLevel = tree->getHeight()-2 ; idxLevel >= 2 ; --idxLevel){ + for(int idxLevel = FMath::Min(tree->getHeight() - 2, FAbstractAlgorithm::lowerWorkingLevel - 1) ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ int idxSubGroup = 0; for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ @@ -799,7 +805,7 @@ protected: void transferPass(){ FLOG( FTic timer; ); FLOG( FTic timerInBlock; FTic timerOutBlock; ); - for(int idxLevel = tree->getHeight()-1 ; idxLevel >= 2 ; --idxLevel){ + for(int idxLevel = FAbstractAlgorithm::lowerWorkingLevel-1 ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ FLOG( timerInBlock.tic() ); for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ starpu_insert_task(&m2l_cl_in, @@ -848,7 +854,7 @@ protected: void downardPass(){ FLOG( FTic timer; ); - for(int idxLevel = 2 ; idxLevel <= tree->getHeight()-2 ; ++idxLevel){ + for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel - 1 ; ++idxLevel){ int idxSubGroup = 0; for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ diff --git a/Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp b/Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp index a2d4a1ed0f49ff5666c875f451881f697f398880..c6301bec83d7fa20233dfdce3b4db2230371491e 100644 --- a/Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp +++ b/Src/GroupTree/Core/FGroupTaskStarpuMpiAlgorithm.hpp @@ -54,7 +54,7 @@ template <class OctreeClass, class CellContainerClass, class KernelClass, class , class StarPUOpenClWrapperClass = FStarPUOpenClWrapper<KernelClass, FOpenCLDeviceWrapper<KernelClass>> #endif > -class FGroupTaskStarPUMpiAlgorithm { +class FGroupTaskStarPUMpiAlgorithm : public FAbstractAlgorithm { protected: typedef FGroupTaskStarPUMpiAlgorithm<OctreeClass, CellContainerClass, KernelClass, ParticleGroupClass, StarPUCpuWrapperClass #ifdef SCALFMM_ENABLE_CUDA_KERNEL @@ -151,6 +151,8 @@ public: FAssertLF(tree, "tree cannot be null"); FAssertLF(inKernels, "kernels cannot be null"); + FAbstractAlgorithm::setNbLevelsInTree(tree->getHeight()); + struct starpu_conf conf; FAssertLF(starpu_conf_init(&conf) == 0); FStarPUFmmPriorities::Controller().init(&conf, tree->getHeight(), inKernels); @@ -254,7 +256,11 @@ public: starpu_shutdown(); } - void execute(const unsigned operationsToProceed = FFmmNearAndFarFields){ +protected: + /** + * Runs the complete algorithm. + */ + void executeCore(const unsigned operationsToProceed) override { FLOG( FLog::Controller << "\tStart FGroupTaskStarPUMpiAlgorithm\n" ); const bool directOnly = (tree->getHeight() <= 2); @@ -291,7 +297,7 @@ public: starpu_pause(); } -protected: + void initCodelet(){ memset(&p2m_cl, 0, sizeof(p2m_cl)); #ifdef STARPU_USE_CPU @@ -921,7 +927,7 @@ protected: FAssertLF(tree->getHeight() == int(remoteCellGroups.size())); const bool directOnly = (tree->getHeight() <= 2); if(!directOnly){ - for(int idxLevel = 0 ; idxLevel < tree->getHeight() ; ++idxLevel){ + for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel){ for(int idxHandle = 0 ; idxHandle < int(remoteCellGroups[idxLevel].size()) ; ++idxHandle){ if(remoteCellGroups[idxLevel][idxHandle].ptrSymb){ FAssertLF(remoteCellGroups[idxLevel][idxHandle].ptrUp); @@ -1328,7 +1334,7 @@ protected: void upwardPass(){ FLOG( FTic timer; ); - for(int idxLevel = tree->getHeight()-2 ; idxLevel >= 2 ; --idxLevel){ + for(int idxLevel = FMath::Min(tree->getHeight() - 2, FAbstractAlgorithm::lowerWorkingLevel - 1) ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ int idxSubGroup = 0; for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) @@ -1507,7 +1513,7 @@ protected: void transferPassMpi(){ FLOG( FTic timer; ); - for(int idxLevel = tree->getHeight()-1 ; idxLevel >= 2 ; --idxLevel){ + for(int idxLevel = FAbstractAlgorithm::lowerWorkingLevel-1 ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ for(int idxInteraction = 0; idxInteraction < int(externalInteractionsAllLevelMpi[idxLevel][idxGroup].size()) ; ++idxInteraction){ const int interactionid = externalInteractionsAllLevelMpi[idxLevel][idxGroup][idxInteraction].otherBlockId; @@ -1537,7 +1543,7 @@ protected: void transferPass(){ FLOG( FTic timer; ); FLOG( FTic timerInBlock; FTic timerOutBlock; ); - for(int idxLevel = tree->getHeight()-1 ; idxLevel >= 2 ; --idxLevel){ + for(int idxLevel = FAbstractAlgorithm::lowerWorkingLevel-1 ; idxLevel >= FAbstractAlgorithm::upperWorkingLevel ; --idxLevel){ FLOG( timerInBlock.tic() ); for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){ starpu_insert_task(&m2l_cl_in, @@ -1586,7 +1592,7 @@ protected: void downardPass(){ FLOG( FTic timer; ); - for(int idxLevel = 2 ; idxLevel <= tree->getHeight()-2 ; ++idxLevel){ + for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel - 1 ; ++idxLevel){ ///////////////////////////////////////////////////////////// // Exchange for MPI ///////////////////////////////////////////////////////////// diff --git a/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp b/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp index 0558ae5cdc3535c7104c71aef6d2855d653d61a0..5f3ef5f9ce435daa70170196e6d8985d10576f3a 100644 --- a/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp +++ b/Src/Kernels/Interpolation/FInterpMatrixKernel.hpp @@ -433,6 +433,107 @@ struct FInterpMatrixKernelLJ : FInterpAbstractMatrixKernel<FReal> } }; + +/// One over (a+r^2) +template <class FReal> +struct FInterpMatrixKernelAPLUSRR : FInterpAbstractMatrixKernel<FReal> +{ + static const KERNEL_FUNCTION_TYPE Type = NON_HOMOGENEOUS; + static const unsigned int NCMP = 1; //< number of components + static const unsigned int NPV = 1; //< dim of physical values + static const unsigned int NPOT = 1; //< dim of potentials + static const unsigned int NRHS = 1; //< dim of mult exp + static const unsigned int NLHS = 1; //< dim of loc exp + + const FReal CoreWidth; + + FInterpMatrixKernelAPLUSRR(const FReal inCoreWidth = .25) + : CoreWidth(inCoreWidth) + {} + + // copy ctor + FInterpMatrixKernelAPLUSRR(const FInterpMatrixKernelAPLUSRR& other) + : CoreWidth(other.CoreWidth) + {} + + static const char* getID() { return "ONE_OVER_A_PLUS_RR"; } + + // returns position in reduced storage + int getPosition(const unsigned int) const + {return 0;} + + // evaluate interaction + template <class ValueClass> + ValueClass evaluate(const ValueClass& x1, const ValueClass& y1, const ValueClass& z1, + const ValueClass& x2, const ValueClass& y2, const ValueClass& z2) const + { + const ValueClass diffx = (x1-x2); + const ValueClass diffy = (y1-y2); + const ValueClass diffz = (z1-z2); + return FMath::One<ValueClass>() / (FMath::ConvertTo<ValueClass,FReal>(CoreWidth) + // WHY FReal?? + diffx*diffx + + diffy*diffy + + diffz*diffz); + } + + // evaluate interaction (blockwise) + template <class ValueClass> + void evaluateBlock(const ValueClass& x1, const ValueClass& y1, const ValueClass& z1, + const ValueClass& x2, const ValueClass& y2, const ValueClass& z2, ValueClass* block) const + { + block[0]=this->evaluate(x1,y1,z1,x2,y2,z2); + } + + // evaluate interaction and derivative (blockwise) + template <class ValueClass> + void evaluateBlockAndDerivative(const ValueClass& x1, const ValueClass& y1, const ValueClass& z1, + const ValueClass& x2, const ValueClass& y2, const ValueClass& z2, + ValueClass block[1], ValueClass blockDerivative[3]) const + { + const ValueClass diffx = (x1-x2); + const ValueClass diffy = (y1-y2); + const ValueClass diffz = (z1-z2); + const ValueClass r2 = (diffx*diffx + + diffy*diffy + + diffz*diffz); + const ValueClass one_over_a_plus_r2 = FMath::One<ValueClass>() / (FMath::ConvertTo<ValueClass,FReal>(CoreWidth)+r2); + const ValueClass one_over_a_plus_r2_squared = one_over_a_plus_r2*one_over_a_plus_r2; + + block[0] = one_over_a_plus_r2; + + // TODO Fix derivative + const ValueClass coef = FMath::ConvertTo<ValueClass,FReal>(-2.) * one_over_a_plus_r2_squared; + blockDerivative[0] = coef * diffx; + blockDerivative[1] = coef * diffy; + blockDerivative[2] = coef * diffz; + + } + + FReal getScaleFactor(const FReal, const int) const + { + // return 1 because non homogeneous kernel functions cannot be scaled!!! + return FReal(1.0); + } + + FReal getScaleFactor(const FReal) const + { + // return 1 because non homogeneous kernel functions cannot be scaled!!! + return FReal(1.0); + } + + FReal evaluate(const FPoint<FReal>& p1, const FPoint<FReal>& p2) const{ + return evaluate<FReal>(p1.getX(), p1.getY(), p1.getZ(), p2.getX(), p2.getY(), p2.getZ()); + } + void evaluateBlock(const FPoint<FReal>& p1, const FPoint<FReal>& p2, FReal* block) const{ + evaluateBlock<FReal>(p1.getX(), p1.getY(), p1.getZ(), p2.getX(), p2.getY(), p2.getZ(), block); + } + void evaluateBlockAndDerivative(const FPoint<FReal>& p1, const FPoint<FReal>& p2, + FReal block[1], FReal blockDerivative[3]) const { + evaluateBlockAndDerivative<FReal>(p1.getX(), p1.getY(), p1.getZ(), p2.getX(), p2.getY(), p2.getZ(), block, blockDerivative); + } +}; + + //////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////// // diff --git a/Src/Kernels/Uniform/FUnifKernel.hpp b/Src/Kernels/Uniform/FUnifKernel.hpp index 44ad33a64df11b57f6d7c5b92a8c49a364cb7ca7..d7d003b1a3ad1ecdcc01543e558bb0cadd7175ad 100644 --- a/Src/Kernels/Uniform/FUnifKernel.hpp +++ b/Src/Kernels/Uniform/FUnifKernel.hpp @@ -60,6 +60,8 @@ class FUnifKernel /// Needed for M2L operator const M2LHandlerClass M2LHandler; + /// Leaf level separation criterion + const int LeafLevelSeparationCriterion; public: /** @@ -70,12 +72,15 @@ public: FUnifKernel(const int inTreeHeight, const FReal inBoxWidth, const FPoint<FReal>& inBoxCenter, - const MatrixKernelClass *const inMatrixKernel) + const MatrixKernelClass *const inMatrixKernel, + const int inLeafLevelSeparationCriterion = 1) : FAbstractUnifKernel< FReal, CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>(inTreeHeight,inBoxWidth,inBoxCenter), MatrixKernel(inMatrixKernel), M2LHandler(MatrixKernel, inTreeHeight, - inBoxWidth) + inBoxWidth, + inLeafLevelSeparationCriterion), + LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) { } @@ -186,7 +191,13 @@ public: ContainerClass* const NeighborSourceParticles[27], const int /* size */) { - DirectInteractionComputer<FReal,MatrixKernelClass::NCMP, NVALS>::P2P(TargetParticles,NeighborSourceParticles,MatrixKernel); + // Standard FMM separation criterion, i.e. max 27 neighbor clusters per leaf + if(LeafLevelSeparationCriterion==1) + DirectInteractionComputer<FReal,MatrixKernelClass::NCMP, NVALS>::P2P(TargetParticles,NeighborSourceParticles,MatrixKernel); + // Nearfield interactions are only computed within the target leaf + if(LeafLevelSeparationCriterion==0) + DirectInteractionComputer<FReal,MatrixKernelClass::NCMP, NVALS>::P2PRemote(TargetParticles,NeighborSourceParticles,0,MatrixKernel); + // If criterion equals -1 then no P2P need to be performed. } @@ -194,7 +205,13 @@ public: ContainerClass* const FRestrict inTargets, const ContainerClass* const FRestrict /*inSources*/, ContainerClass* const inNeighbors[27], const int /*inSize*/) { - DirectInteractionComputer<FReal,MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,27,MatrixKernel); + // Standard FMM separation criterion, i.e. max 27 neighbor clusters per leaf + if(LeafLevelSeparationCriterion==1) + DirectInteractionComputer<FReal,MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,27,MatrixKernel); + // Nearfield interactions are only computed within the target leaf + if(LeafLevelSeparationCriterion==0) + DirectInteractionComputer<FReal,MatrixKernelClass::NCMP, NVALS>::P2PRemote(inTargets,inNeighbors,0,MatrixKernel); + // If criterion equals -1 then no P2P need to be performed. } }; diff --git a/Src/Kernels/Uniform/FUnifM2LHandler.hpp b/Src/Kernels/Uniform/FUnifM2LHandler.hpp index 92a2abfab88eaf2f01d1b3dad58a0bceee9c7fe7..0c38e1861998931213f0df9accd0c32022b07c83 100644 --- a/Src/Kernels/Uniform/FUnifM2LHandler.hpp +++ b/Src/Kernels/Uniform/FUnifM2LHandler.hpp @@ -38,14 +38,14 @@ /*! Precomputation of the 316 interactions by evaluation of the matrix kernel on the uniform grid and transformation into Fourier space. PB: Compute() does not belong to the M2LHandler like it does in the Chebyshev kernel. This allows much nicer specialization of the M2LHandler class with respect to the homogeneity of the kernel of interaction like in the ChebyshevSym kernel.*/ template < class FReal,int ORDER, typename MatrixKernelClass> -static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal CellWidth, FComplex<FReal>* &FC) +static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal CellWidth, FComplex<FReal>* &FC, const int SeparationCriterion = 1) { // allocate memory and store compressed M2L operators if (FC) throw std::runtime_error("M2L operators are already set"); // dimensions of operators const unsigned int order = ORDER; const unsigned int nnodes = TensorTraits<ORDER>::nnodes; - const unsigned int ninteractions = 316; + const unsigned int ninteractions = 316+26*(SeparationCriterion<1 ? 1 : 0) + 1*(SeparationCriterion<0 ? 1 : 0); typedef FUnifTensor<FReal,ORDER> TensorType; // interpolation points of source (Y) and target (X) cell @@ -53,7 +53,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal Cel // set roots of target cell (X) TensorType::setRoots(FPoint<FReal>(0.,0.,0.), CellWidth, X); - // allocate memory and compute 316 m2l operators + // allocate memory and compute 316 m2l operators (342 if separation equals 0, 343 if separation equals -1) FReal *_C; FComplex<FReal> *_FC; @@ -78,7 +78,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal Cel for (int i=-3; i<=3; ++i) { for (int j=-3; j<=3; ++j) { for (int k=-3; k<=3; ++k) { - if (abs(i)>1 || abs(j)>1 || abs(k)>1) { + if (abs(i)>SeparationCriterion || abs(j)>SeparationCriterion || abs(k)>SeparationCriterion) { // set roots of source cell (Y) const FPoint<FReal> cy(CellWidth*FReal(i), CellWidth*FReal(j), CellWidth*FReal(k)); FUnifTensor<FReal,order>::setRoots(cy, CellWidth, Y); @@ -110,7 +110,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal Cel } } if (counter != ninteractions) - throw std::runtime_error("Number of interactions must correspond to 316"); + throw std::runtime_error("Number of interactions must correspond to " + std::to_string(ninteractions)); // Free _C delete [] _C; @@ -126,7 +126,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal Cel for (int j=-3; j<=3; ++j) for (int k=-3; k<=3; ++k) { const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3); - if (abs(i)>1 || abs(j)>1 || abs(k)>1) { + if (abs(i)>SeparationCriterion || abs(j)>SeparationCriterion || abs(k)>SeparationCriterion) { FBlas::c_copy(opt_rc, reinterpret_cast<FReal*>(_FC + counter*rc), reinterpret_cast<FReal*>(FC + idx*opt_rc)); counter++; @@ -136,7 +136,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal Cel } if (counter != ninteractions) - throw std::runtime_error("Number of interactions must correspond to 316"); + throw std::runtime_error("Number of interactions must correspond to " + std::to_string(ninteractions)); delete [] _FC; } @@ -182,6 +182,8 @@ class FUnifM2LHandler<FReal, ORDER,HOMOGENEOUS> DftClass Dft; const unsigned int opt_rc; // specific to real valued kernel + /// Leaf level separation criterion + const int LeafLevelSeparationCriterion; static const std::string getFileName() { @@ -195,8 +197,8 @@ class FUnifM2LHandler<FReal, ORDER,HOMOGENEOUS> public: template <typename MatrixKernelClass> - FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal) - : FC(nullptr), Dft(), opt_rc(rc/2+1) + FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal, const int inLeafLevelSeparationCriterion = 1) + : FC(nullptr), Dft(), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) { // init DFT const int steps[dimfft] = {rc}; @@ -214,7 +216,7 @@ public: * Copy constructor */ FUnifM2LHandler(const FUnifM2LHandler& other) - : FC(other.FC), Dft(), opt_rc(other.opt_rc) + : FC(other.FC), Dft(), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion) { // init DFT const int steps[dimfft] = {rc}; @@ -239,7 +241,7 @@ public: // Compute matrix of interactions const FReal ReferenceCellWidth = FReal(2.); FComplex<FReal>* pFC = NULL; - Compute<FReal,order>(MatrixKernel,ReferenceCellWidth,pFC); + Compute<FReal,order>(MatrixKernel,ReferenceCellWidth,pFC,LeafLevelSeparationCriterion); FC.assign(pFC); // Compute memory usage @@ -250,7 +252,10 @@ public: std::cout << "Compute and set M2L operators ("<< long(sizeM2L/**1e-6*/) <<" B) in " << time.tacAndElapsed() << "sec." << std::endl; } - + + unsigned long long getMemory() const { + return 343*opt_rc*sizeof(FComplex<FReal>); + } /** * Expands potentials \f$x+=IDFT(X)\f$ of a target cell. This operation can be @@ -341,6 +346,8 @@ class FUnifM2LHandler<FReal,ORDER,NON_HOMOGENEOUS> DftClass Dft; const unsigned int opt_rc; // specific to real valued kernel + /// Leaf level separation criterion + const int LeafLevelSeparationCriterion; static const std::string getFileName() { @@ -354,10 +361,10 @@ class FUnifM2LHandler<FReal,ORDER,NON_HOMOGENEOUS> public: template <typename MatrixKernelClass> - FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int inTreeHeight, const FReal inRootCellWidth) + FUnifM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int inTreeHeight, const FReal inRootCellWidth, const int inLeafLevelSeparationCriterion = 1) : TreeHeight(inTreeHeight), RootCellWidth(inRootCellWidth), - Dft(), opt_rc(rc/2+1) + Dft(), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) { // init DFT const int steps[dimfft] = {rc}; @@ -383,7 +390,7 @@ public: : FC(other.FC), TreeHeight(other.TreeHeight), RootCellWidth(other.RootCellWidth), - Dft(), opt_rc(other.opt_rc) + Dft(), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion) { // init DFT const int steps[dimfft] = {rc}; @@ -414,9 +421,12 @@ public: CellWidth /= FReal(2.); // at level 2 for (unsigned int l=2; l<TreeHeight; ++l) { + // Determine separation criteria wrt level + const int SeparationCriterion = (l != TreeHeight-1 ? 1 : LeafLevelSeparationCriterion); + // check if already set if (FC[l]) throw std::runtime_error("M2L operator already set"); - Compute<FReal,order>(MatrixKernel,CellWidth,FC[l]); + Compute<FReal,order>(MatrixKernel,CellWidth,FC[l],SeparationCriterion); CellWidth /= FReal(2.); // at level l+1 } @@ -429,6 +439,9 @@ public: << time.tacAndElapsed() << "sec." << std::endl; } + unsigned long long getMemory() const { + return (TreeHeight-2)*343*opt_rc*sizeof(FComplex<FReal>); + } /** * Expands potentials \f$x+=IDFT(X)\f$ of a target cell. This operation can be diff --git a/Src/Kernels/Uniform/FUnifTensorialKernel.hpp b/Src/Kernels/Uniform/FUnifTensorialKernel.hpp index 3becc11741dc3d466f2ccd34644cb7ec694d322b..4ce8f472abc4c24385cbd8e4654a2a5b6822f8ee 100644 --- a/Src/Kernels/Uniform/FUnifTensorialKernel.hpp +++ b/Src/Kernels/Uniform/FUnifTensorialKernel.hpp @@ -84,6 +84,9 @@ protected://PB: for OptiDis /// Needed for M2L operator const M2LHandlerClass M2LHandler; + /// Leaf level separation criterion + const int LeafLevelSeparationCriterion; + public: /** * The constructor initializes all constant attributes and it reads the @@ -94,13 +97,16 @@ public: const FReal inBoxWidth, const FPoint<FReal>& inBoxCenter, const MatrixKernelClass *const inMatrixKernel, - const FReal inBoxWidthExtension) + const FReal inBoxWidthExtension, + const int inLeafLevelSeparationCriterion = 1) : FAbstractUnifKernel< FReal, CellClass, ContainerClass, MatrixKernelClass, ORDER, NVALS>(inTreeHeight,inBoxWidth,inBoxCenter,inBoxWidthExtension), MatrixKernel(inMatrixKernel), M2LHandler(MatrixKernel, inTreeHeight, inBoxWidth, - inBoxWidthExtension) + inBoxWidthExtension, + inLeafLevelSeparationCriterion), + LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) { } diff --git a/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp b/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp index 65a8a138359274b624dadaa23643a665b80302fc..cf92d7b28ee1ad494630091d21848dfe68eb4ea0 100644 --- a/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp +++ b/Src/Kernels/Uniform/FUnifTensorialM2LHandler.hpp @@ -42,12 +42,13 @@ template < class FReal, int ORDER, class MatrixKernelClass> static void Compute(const MatrixKernelClass *const MatrixKernel, const FReal CellWidth, const FReal CellWidthExtension, - FComplex<FReal>** &FC) + FComplex<FReal>** &FC, + const int SeparationCriterion = 1) { // dimensions of operators const unsigned int order = ORDER; const unsigned int nnodes = TensorTraits<ORDER>::nnodes; - const unsigned int ninteractions = 316; + const unsigned int ninteractions = 316+26*(SeparationCriterion<1 ? 1 : 0) + 1*(SeparationCriterion<0 ? 1 : 0); const unsigned int ncmp = MatrixKernelClass::NCMP; // utils @@ -90,11 +91,14 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, unsigned int perm[rc]; TensorType::setStoragePermutation(perm); + // Allocate intermediate interaction block + FReal* block = new FReal[ncmp]; + unsigned int counter = 0; for (int i=-3; i<=3; ++i) { for (int j=-3; j<=3; ++j) { for (int k=-3; k<=3; ++k) { - if (abs(i)>1 || abs(j)>1 || abs(k)>1) { + if (abs(i)>SeparationCriterion || abs(j)>SeparationCriterion || abs(k)>SeparationCriterion) { // set roots of source cell (Y) const FPoint<FReal> cy(CellWidth*FReal(i), CellWidth*FReal(j), CellWidth*FReal(k)); FUnifTensor<FReal,order>::setRoots(cy, ExtendedCellWidth, Y); @@ -104,8 +108,6 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, for(unsigned int m=0; m<2*order-1; ++m) for(unsigned int n=0; n<2*order-1; ++n){ // Compute current M2L interaction (block matrix) - FReal* block; - block = new FReal[ncmp]; MatrixKernel->evaluateBlock(X[node_ids_pairs[ido][0]], Y[node_ids_pairs[ido][1]], block); @@ -133,6 +135,9 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, if (counter != ninteractions) throw std::runtime_error("Number of interactions must correspond to 316"); + // Free block + delete [] block; + // Free _C for (unsigned int d=0; d<ncmp; ++d) delete [] _C[d]; @@ -149,7 +154,7 @@ static void Compute(const MatrixKernelClass *const MatrixKernel, for (int j=-3; j<=3; ++j) for (int k=-3; k<=3; ++k) { const unsigned int idx = (i+3)*7*7 + (j+3)*7 + (k+3); - if (abs(i)>1 || abs(j)>1 || abs(k)>1) { + if (abs(i)>SeparationCriterion || abs(j)>SeparationCriterion || abs(k)>SeparationCriterion) { for (unsigned int d=0; d<ncmp; ++d) FBlas::c_copy(opt_rc, reinterpret_cast<FReal*>(_FC[d] + counter*rc), reinterpret_cast<FReal*>(FC[d] + idx*opt_rc)); @@ -207,6 +212,8 @@ class FUnifTensorialM2LHandler<FReal, ORDER,MatrixKernelClass,HOMOGENEOUS> DftClass Dft; const unsigned int opt_rc; // specific to real valued kernel + /// Leaf level separation criterion + const int LeafLevelSeparationCriterion; static const std::string getFileName() { @@ -219,9 +226,9 @@ class FUnifTensorialM2LHandler<FReal, ORDER,MatrixKernelClass,HOMOGENEOUS> public: - FUnifTensorialM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal, const FReal inCellWidthExtension) + FUnifTensorialM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int, const FReal, const FReal inCellWidthExtension, const int inLeafLevelSeparationCriterion = 1) : CellWidthExtension(inCellWidthExtension), - Dft(), opt_rc(rc/2+1) + Dft(), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) { // init DFT const int steps[dimfft] = {rc}; @@ -245,7 +252,7 @@ public: FUnifTensorialM2LHandler(const FUnifTensorialM2LHandler& other) : FC(other.FC), CellWidthExtension(other.CellWidthExtension), - Dft(), opt_rc(other.opt_rc) + Dft(), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion) { // init DFT const int steps[dimfft] = {rc}; @@ -278,7 +285,7 @@ public: // but it NEEDS to match the numerator of the scale factor in matrix kernel! // Therefore box width extension is not yet supported for homog kernels const FReal ReferenceCellWidth = FReal(2.); - Compute<FReal,order>(MatrixKernel,ReferenceCellWidth, 0., FC); + Compute<FReal,order>(MatrixKernel,ReferenceCellWidth, 0., FC, LeafLevelSeparationCriterion); // Compute memory usage unsigned long sizeM2L = 343*ncmp*opt_rc*sizeof(FComplex<FReal>); @@ -288,6 +295,10 @@ public: << time.tacAndElapsed() << "sec." << std::endl; } + unsigned long long getMemory() const { + return 343*ncmp*opt_rc*sizeof(FComplex<FReal>); + } + /** * Expands potentials \f$x+=IDFT(X)\f$ of a target cell. This operation can be * seen as part of the L2L operation. @@ -396,6 +407,8 @@ class FUnifTensorialM2LHandler<FReal,ORDER,MatrixKernelClass,NON_HOMOGENEOUS> DftClass Dft; const unsigned int opt_rc; // specific to real valued kernel + /// Leaf level separation criterion + const int LeafLevelSeparationCriterion; static const std::string getFileName() { @@ -408,11 +421,11 @@ class FUnifTensorialM2LHandler<FReal,ORDER,MatrixKernelClass,NON_HOMOGENEOUS> public: - FUnifTensorialM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int inTreeHeight, const FReal inRootCellWidth, const FReal inCellWidthExtension) + FUnifTensorialM2LHandler(const MatrixKernelClass *const MatrixKernel, const unsigned int inTreeHeight, const FReal inRootCellWidth, const FReal inCellWidthExtension, const int inLeafLevelSeparationCriterion = 1) : TreeHeight(inTreeHeight), RootCellWidth(inRootCellWidth), CellWidthExtension(inCellWidthExtension), - Dft(), opt_rc(rc/2+1) + Dft(), opt_rc(rc/2+1), LeafLevelSeparationCriterion(inLeafLevelSeparationCriterion) { // init DFT const int steps[dimfft] = {rc}; @@ -441,7 +454,7 @@ public: TreeHeight(other.TreeHeight), RootCellWidth(other.RootCellWidth), CellWidthExtension(other.CellWidthExtension), - Dft(), opt_rc(other.opt_rc) + Dft(), opt_rc(other.opt_rc), LeafLevelSeparationCriterion(other.LeafLevelSeparationCriterion) { // init DFT const int steps[dimfft] = {rc}; @@ -471,10 +484,12 @@ public: FReal CellWidth = RootCellWidth / FReal(2.); // at level 1 CellWidth /= FReal(2.); // at level 2 for (unsigned int l=2; l<TreeHeight; ++l) { + // Determine separation criteria wrt level + const int SeparationCriterion = (l != TreeHeight-1 ? 1 : LeafLevelSeparationCriterion); // check if already set for (unsigned int d=0; d<ncmp; ++d) if (FC[l][d]) throw std::runtime_error("M2L operator already set"); - Compute<FReal,order>(MatrixKernel,CellWidth,CellWidthExtension,FC[l]); + Compute<FReal,order>(MatrixKernel,CellWidth,CellWidthExtension,FC[l],SeparationCriterion); CellWidth /= FReal(2.); // at level l+1 } @@ -486,6 +501,10 @@ public: << time.tacAndElapsed() << "sec." << std::endl; } + unsigned long long getMemory() const { + return (TreeHeight-2)*343*ncmp*opt_rc*sizeof(FComplex<FReal>); + } + /** * Expands potentials \f$x+=IDFT(X)\f$ of a target cell. This operation can be * seen as part of the L2L operation. diff --git a/Src/Utils/FParameterNames.hpp b/Src/Utils/FParameterNames.hpp index 2825a4bcf828a4638b8622095758f91dd3b658b9..6fcfcf4741994f4624adb53a1b170a2a272a1a2b 100644 --- a/Src/Utils/FParameterNames.hpp +++ b/Src/Utils/FParameterNames.hpp @@ -182,6 +182,11 @@ static const FParameterNames PhysicalValue = { "The physical value of the particles." }; +static const FParameterNames SeparationCriterion = { + {"-sep", "--separation-criterion"} , + "Specify number of clusters separing 2 well-separated clusters." +}; + /** To print a list of parameters */ inline void PrintUsedOptions(const std::vector<FParameterNames>& options){ std::cout << ">> Here is the list of the parameters you can pass to this application :\n"; diff --git a/Tests/Kernels/testSmoothUnifAlgorithm.cpp b/Tests/Kernels/testSmoothUnifAlgorithm.cpp new file mode 100644 index 0000000000000000000000000000000000000000..72dee1005bab053e4ffc5b121fd3e3260a26bdcd --- /dev/null +++ b/Tests/Kernels/testSmoothUnifAlgorithm.cpp @@ -0,0 +1,237 @@ +// =================================================================================== +// Copyright ScalFmm 2011 INRIA, Olivier Coulaud, Berenger Bramas, Matthias Messner +// olivier.coulaud@inria.fr, berenger.bramas@inria.fr +// This software is a computer program whose purpose is to compute the FMM. +// +// This software is governed by the CeCILL-C and LGPL licenses and +// abiding by the rules of distribution of free software. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public and CeCILL-C Licenses for more details. +// "http://www.cecill.info". +// "http://www.gnu.org/licenses". +// =================================================================================== + + +/** + *@author Pierre Blanchard + * + * **/ +// ==== CMAKE ===== +// @FUSE_FFT +// ================ +// Keep in private GIT +// @SCALFMM_PRIVATE + + +#include <iostream> + +#include <cstdio> +#include <cstdlib> + +#include "Files/FFmaGenericLoader.hpp" + + +#include "Kernels/Uniform/FUnifCell.hpp" +#include "Kernels/Interpolation/FInterpMatrixKernel.hpp" +#include "Kernels/Uniform/FUnifKernel.hpp" + +#include "Components/FSimpleLeaf.hpp" +#include "Kernels/P2P/FP2PParticleContainerIndexed.hpp" + +#include "Utils/FParameters.hpp" +#include "Utils/FMemUtils.hpp" + +#include "Containers/FOctree.hpp" +#include "Containers/FVector.hpp" + +#include "Core/FFmmAlgorithm.hpp" +#include "Core/FFmmAlgorithmThread.hpp" + +#include "../../Src/Utils/FParameterNames.hpp" + +/** + * This program runs the FMM Algorithm with the Uniform kernel + * and a separation criterion of -1 (i.e. no nearfield and only farfield interaction) + * and compares the results with a direct computation. + * The matrix kernel has to be smooth at the origin, e.g. 1/r^2. + */ + +// Simply create particles and try the kernels +int main(int argc, char* argv[]) +{ + FHelpDescribeAndExit(argc, argv, + "Test Uniform kernel and compare it with the direct computation.", + FParameterDefinitions::OctreeHeight,FParameterDefinitions::NbThreads, + FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::InputFile); + + typedef double FReal; + const char* const filename = FParameters::getStr(argc,argv,FParameterDefinitions::InputFile.options, "../Data/test20k.fma"); + const unsigned int TreeHeight = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeHeight.options, 3); + const unsigned int SubTreeHeight = FParameters::getValue(argc, argv, FParameterDefinitions::OctreeSubHeight.options, 2); + const unsigned int NbThreads = FParameters::getValue(argc, argv, FParameterDefinitions::NbThreads.options, 1); + +#ifdef _OPENMP + omp_set_num_threads(NbThreads); + std::cout << "\n>> Using " << omp_get_max_threads() << " threads.\n" << std::endl; +#else + std::cout << "\n>> Sequential version.\n" << std::endl; +#endif + + // init timer + FTic time; + + // interaction kernel evaluator +// typedef FInterpMatrixKernelR<FReal> MatrixKernelClass; + typedef FInterpMatrixKernelAPLUSRR<FReal> MatrixKernelClass; + const MatrixKernelClass MatrixKernel; + + // init particles position and physical value + struct TestParticle{ + FPoint<FReal> position; + FReal forces[3]; + FReal physicalValue; + FReal potential; + }; + + // open particle file + FFmaGenericLoader<FReal> loader(filename); + if(!loader.isOpen()) throw std::runtime_error("Particle file couldn't be opened!"); + + TestParticle* const particles = new TestParticle[loader.getNumberOfParticles()]; + for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + FPoint<FReal> position; + FReal physicalValue = 0.0; + loader.fillParticle(&position,&physicalValue); + // get copy + particles[idxPart].position = position; + particles[idxPart].physicalValue = physicalValue; + particles[idxPart].potential = 0.0; + particles[idxPart].forces[0] = 0.0; + particles[idxPart].forces[1] = 0.0; + particles[idxPart].forces[2] = 0.0; + } + + //////////////////////////////////////////////////////////////////// + + { // begin direct computation + std::cout << "\nDirect computation ... " << std::endl; + time.tic(); + { + for(FSize idxTarget = 0 ; idxTarget < loader.getNumberOfParticles() ; ++idxTarget){ + for(FSize idxOther = idxTarget + 1 ; idxOther < loader.getNumberOfParticles() ; ++idxOther){ + FP2P::MutualParticles(particles[idxTarget].position.getX(), particles[idxTarget].position.getY(), + particles[idxTarget].position.getZ(), particles[idxTarget].physicalValue, + &particles[idxTarget].forces[0], &particles[idxTarget].forces[1], + &particles[idxTarget].forces[2], &particles[idxTarget].potential, + particles[idxOther].position.getX(), particles[idxOther].position.getY(), + particles[idxOther].position.getZ(), particles[idxOther].physicalValue, + &particles[idxOther].forces[0], &particles[idxOther].forces[1], + &particles[idxOther].forces[2], &particles[idxOther].potential, + &MatrixKernel); + } + } + } + time.tac(); + std::cout << "Done " << "(@Direct computation = " + << time.elapsed() << "s)." << std::endl; + + } // end direct computation + + //////////////////////////////////////////////////////////////////// + + { // begin Lagrange kernel + + // accuracy + const unsigned int ORDER = 5 ; + + // typedefs + typedef FP2PParticleContainerIndexed<FReal> ContainerClass; + typedef FSimpleLeaf<FReal, ContainerClass > LeafClass; + typedef FUnifCell<FReal,ORDER> CellClass; + typedef FOctree<FReal, CellClass,ContainerClass,LeafClass> OctreeClass; + typedef FUnifKernel<FReal,CellClass,ContainerClass,MatrixKernelClass,ORDER> KernelClass; + typedef FFmmAlgorithm<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass; + // typedef FFmmAlgorithmThread<OctreeClass,CellClass,ContainerClass,KernelClass,LeafClass> FmmClass; + + // init oct-tree + OctreeClass tree(TreeHeight, SubTreeHeight, loader.getBoxWidth(), loader.getCenterOfBox()); + + // Separation criterion for the leaf clusters + const int LeafLevelSeparationCriterion = FParameters::getValue(argc, argv, FParameterDefinitions::SeparationCriterion.options, 1); + + + { // ----------------------------------------------------- + std::cout << "Creating & Inserting " << loader.getNumberOfParticles() + << " particles ..." << std::endl; + std::cout << "\tHeight : " << TreeHeight << " \t sub-height : " << SubTreeHeight << std::endl; + time.tic(); + + for(FSize idxPart = 0 ; idxPart < loader.getNumberOfParticles() ; ++idxPart){ + // put in tree + tree.insert(particles[idxPart].position, idxPart, particles[idxPart].physicalValue); + } + + time.tac(); + std::cout << "Done " << "(@Creating and Inserting Particles = " + << time.elapsed() << "s)." << std::endl; + } // ----------------------------------------------------- + + { // ----------------------------------------------------- + std::cout << "\nLagrange/Uniform grid FMM (ORDER="<< ORDER << ") ... " << std::endl; + time.tic(); + KernelClass* kernels = new KernelClass(TreeHeight, loader.getBoxWidth(), loader.getCenterOfBox(),&MatrixKernel, LeafLevelSeparationCriterion); + FmmClass algorithm(&tree, kernels, LeafLevelSeparationCriterion); + algorithm.execute(); + time.tac(); + std::cout << "Done " << "(@Algorithm = " << time.elapsed() << "s)." << std::endl; + } // ----------------------------------------------------- + + + { // ----------------------------------------------------- + std::cout << "\nError computation ... " << std::endl; + FMath::FAccurater<FReal> potentialDiff; + FMath::FAccurater<FReal> fx, fy, fz; + + FReal checkPotential[20000]; + + { // Check that each particle has been summed with all other + + tree.forEachLeaf([&](LeafClass* leaf){ + const FReal*const potentials = leaf->getTargets()->getPotentials(); + const FReal*const forcesX = leaf->getTargets()->getForcesX(); + const FReal*const forcesY = leaf->getTargets()->getForcesY(); + const FReal*const forcesZ = leaf->getTargets()->getForcesZ(); + const FSize nbParticlesInLeaf = leaf->getTargets()->getNbParticles(); + const FVector<FSize>& indexes = leaf->getTargets()->getIndexes(); + + for(FSize idxPart = 0 ; idxPart < nbParticlesInLeaf ; ++idxPart){ + const FSize indexPartOrig = indexes[idxPart]; + //PB: store potential in nbParticles array + checkPotential[indexPartOrig]=potentials[idxPart]; + + potentialDiff.add(particles[indexPartOrig].potential,potentials[idxPart]); + fx.add(particles[indexPartOrig].forces[0],forcesX[idxPart]); + fy.add(particles[indexPartOrig].forces[1],forcesY[idxPart]); + fz.add(particles[indexPartOrig].forces[2],forcesZ[idxPart]); + } + }); + } + + // Print for information + std::cout << "Potential " << potentialDiff << std::endl; + std::cout << "Fx " << fx << std::endl; + std::cout << "Fy " << fy << std::endl; + std::cout << "Fz " << fz << std::endl; + } // ----------------------------------------------------- + + } // end Lagrange kernel + + std::cout << "Memory used at the end " << FMemStats::controler.getCurrentAllocated() << " Bytes (" << FMemStats::controler.getCurrentAllocatedMB() << "MB)\n"; + std::cout << "Max memory used " << FMemStats::controler.getMaxAllocated() << " Bytes (" << FMemStats::controler.getMaxAllocatedMB() << "MB)\n"; + std::cout << "Total memory used " << FMemStats::controler.getTotalAllocated() << " Bytes (" << FMemStats::controler.getTotalAllocatedMB() << "MB)\n"; + + return 0; +} diff --git a/Tests/noDist/AlgoLoaderThread.hpp b/Tests/noDist/AlgoLoaderThread.hpp index 7ccec611b719e7026bbfd314fe1623eae954020c..1e68345211c0f9d6c59af3747b920e9859f0a941 100644 --- a/Tests/noDist/AlgoLoaderThread.hpp +++ b/Tests/noDist/AlgoLoaderThread.hpp @@ -24,17 +24,21 @@ public: TreeLoader& _treeLoader; KernelLoader& _kernelLoader; - AlgoLoaderThread(FPerfTestParams& /*params*/, + bool _omp_static_schedule; + + AlgoLoaderThread(FPerfTestParams& params, TreeLoader& treeLoader, KernelLoader& kernelLoader) : _treeLoader(treeLoader), - _kernelLoader(kernelLoader) { + _kernelLoader(kernelLoader), + _omp_static_schedule(params.omp_static_schedule) { } void run() { - FMMClass algo(&(_treeLoader._tree), &(_kernelLoader._kernel), false); + FMMClass algo(&(_treeLoader._tree), &(_kernelLoader._kernel), + _omp_static_schedule); algo.execute(); } }; diff --git a/Tests/noDist/PerfTest.cpp b/Tests/noDist/PerfTest.cpp index 9f694d905f3599a216ef275448d8839b732334f6..25ddbf9a944efe17f6a93fdb1516f171713b0ea7 100644 --- a/Tests/noDist/PerfTest.cpp +++ b/Tests/noDist/PerfTest.cpp @@ -58,8 +58,8 @@ int main (int argc, char** argv) FParameterDefinitions::OctreeHeight, FParameterDefinitions::OctreeSubHeight, FParameterDefinitions::NbThreads, - {{"--algo"},"Algorithm to run (costzones, basic-static," - " basic-dynamic, task)"}); + {{"--algo"},"Algorithm to run (costzones, basic, task)"}, + {{"--schedule"},"OpenMP scheduling policy (static, dynamic)."}); FPerfTestParams params; { using namespace FParameterDefinitions; @@ -70,15 +70,20 @@ int main (int argc, char** argv) params.subTreeHeight = getValue(argc, argv, OctreeSubHeight.options, 2); params.nbThreads = getValue(argc, argv, NbThreads.options, 1); params.algo = getStr(argc,argv,{"--algo"},"task"); + params.omp_static_schedule = + getStr(argc,argv,{"--schedule"},"dynamic") == std::string("static"); } + omp_set_num_threads(params.nbThreads); - if( "basic-dynamic" == params.algo ) { + if( "basic" == params.algo ) { runperf<TreeLoaderFCheb<>, KernelLoaderFChebSym, AlgoLoaderThread>(params); } else if( "task" == params.algo ) { runperf<TreeLoaderFCheb<>, KernelLoaderFChebSym, AlgoLoaderTask>(params); } else if ( "costzones" == params.algo ) { runperf<TreeLoaderFCheb<>, KernelLoaderFChebSym, AlgoLoaderCostZones>(params); + } else { + std::cout << "Unknown algorithm: " << params.algo << std::endl; } diff --git a/Tests/noDist/PerfTestUtils.hpp b/Tests/noDist/PerfTestUtils.hpp index a3b632250af3e3167abf26c32bb2265bcea711d6..6c300207d3a8dbfb8da57b7fd7ca316ffe95c6a3 100644 --- a/Tests/noDist/PerfTestUtils.hpp +++ b/Tests/noDist/PerfTestUtils.hpp @@ -17,6 +17,7 @@ struct FPerfTestParams { int nbThreads = 1; ///< Maximum number of threads (when used). std::string filename = ""; ///< Particles file. std::string algo = "task"; ///< Algorithm to run. + bool omp_static_schedule = false; ///< OpenMP static or dynamic schedule. }; diff --git a/Tests/noDist/testFmmAlgorithmBalancedArgs.hpp b/Tests/noDist/testFmmAlgorithmBalancedArgs.hpp index 3faea507e7e03abdbc0f57a83946ec4e4b9a6061..8f1a3e75d5143223ed6f54f5ba476a5413a79902 100644 --- a/Tests/noDist/testFmmAlgorithmBalancedArgs.hpp +++ b/Tests/noDist/testFmmAlgorithmBalancedArgs.hpp @@ -5,102 +5,117 @@ #ifndef _LOADFMAANDRUNFMMARGS_HPP_ #define _LOADFMAANDRUNFMMARGS_HPP_ -#include <sys/ioctl.h> +#include <string> + +#include "Utils/FParameters.hpp" +#include "Utils/FParameterNames.hpp" -#include "tclap/CmdLine.h" -#include "tclap/LinuxOutput.hpp" -#include "tclap/CompletionVisitor.h" class loadFMAAndRunFMMArgs { - using vs = std::vector<std::string>; - const int _treeHeightInit = 5; const int _subTreeHeightInit = 1; const int _zoneCountInit = 4; const int _verboseInit = 0; - const std::string _inFileNameInit = ""; - const std::string _outFileNameInit = "balancetest"; - const std::string _outFileNameExtInit = "csv"; - - TCLAP::CmdLine - _cmd { "Loads an FMA file into a tree and runs a pseudo FMM algorithm " - "through it to compute load balancing.", ' ', "0.0"}; - - TCLAP::LinuxOutput _cmdFormat; - - TCLAP::BashCompletion::Visitor - _compVistor { &_cmd, - { { &_inFileName, {TCLAP::BashCompletion::FILE, "!*.fma" }}, - {&_outFileExt, {"csv"}}}}; - - TCLAP::SwitchArg - _compArg { "", - "completion", - "Show completion arguments", - _cmd, false, &_compVistor}; - - TCLAP::MultiSwitchArg - _verboseArg { "v", - "verbose", - "Activate verbosity", - _cmd, _verboseInit}; - - - TCLAP::ValueArg <int> - _subTreeHeight { "", vs{"subtree-height", "sth"}, - "Subtree height.", false, _subTreeHeightInit, "sub-tree height", _cmd}; - - TCLAP::ValueArg <int> - _treeHeight { "H", vs{"tree-height", "th"}, - "Tree height.", true, _treeHeightInit, "tree height", _cmd}; - - TCLAP::ValueArg <int> - _zoneCount { "z", vs{"zone-count"}, - "Number of zones to create", true, _zoneCountInit, "zone count", _cmd}; - - TCLAP::ValueArg <std::string> - _outFileExt { "x", "output-file-extension", - "Output files extension. One file is created for each level in the" - " tree. Each file has a 'basename_$nbZones$z.$i$.extension' " - "extension where $i is the level. Default value is " - + _outFileNameExtInit + ".", - false, _outFileNameExtInit, "suffix", _cmd}; - - TCLAP::ValueArg <std::string> - _outFileName { "o", "output-file-basename", - "Output files' basename. One file is created for each level in " - "the tree. Each file has a level-in-tree based extension.", - false, _outFileNameInit, "basename", _cmd}; - - TCLAP::ValueArg <std::string> - _inFileName { "f", "input-file", - "Input file name.", true, _inFileNameInit, "filename", _cmd}; - + const char* _inFileNameInit = ""; + const char* _outFileNameInit = "balancetest"; + const char* _outFileNameExtInit = "csv"; + + int _argc; + char** _argv; + + const FParameterNames OutputFileBasename = + {{"--output-file-basename", "-fout-base"}, + "Output files' basename. One file is created for each level in " + "the tree. Each file has a level-in-tree based extension."}; + + const FParameterNames OutputFileExtension = + {{"--output-file-extention", "-fout-ext"}, + ("Output files extension. One file is created for each level in the" + " tree. Each file has a 'basename_$nbZones$z.$i$.extension' " + "extension where $i is the level. Default value is " + + std::string(_outFileNameExtInit) + ".").c_str()}; + + const FParameterNames ZoneCount = + {{"--zone-count","-z"},"Number of zones to create."}; + public: - int treeHeight() const {return _treeHeight.getValue();} - int subTreeHeight() const {return _subTreeHeight.getValue();} - int zoneCount() const {return _zoneCount.getValue();} - int verboseLevel() const {return _verboseArg.getValue();} - const std::string& inFileName() const {return _inFileName.getValue();} - const std::string& outFileName() const {return _outFileName.getValue();} + int treeHeight() const { + using namespace FParameterDefinitions; + using namespace FParameters; + + return getValue(_argc, _argv, OctreeHeight.options, _treeHeightInit); + } + + int subTreeHeight() const { + using namespace FParameterDefinitions; + using namespace FParameters; + + return getValue(_argc, _argv, OctreeSubHeight.options, + _subTreeHeightInit); + } + + int zoneCount() const { + using namespace FParameterDefinitions; + using namespace FParameters; + + return getValue(_argc, _argv, ZoneCount.options, _zoneCountInit); + } + + int verboseLevel() const { + using namespace FParameterDefinitions; + using namespace FParameters; + + return getValue(_argc, _argv, EnabledVerbose.options, _verboseInit); + } + + std::string inFileName() const { + using namespace FParameterDefinitions; + using namespace FParameters; + + return getStr(_argc, _argv, InputFile.options, _inFileNameInit); + } + + std::string outFileName() const { + using namespace FParameterDefinitions; + using namespace FParameters; + + return getStr(_argc, _argv, OutputFileBasename.options, _outFileNameInit); + } + std::string outFileExt() const { - std::string ext = _outFileExt.getValue(); + using namespace FParameterDefinitions; + using namespace FParameters; + + std::string ext = getStr(_argc, _argv, OutputFileExtension.options, + _outFileNameExtInit); if ( ext.at(0) != '.' ) return '.' + ext; return ext; } - loadFMAAndRunFMMArgs(int argc, char** argv) { - int columns = 80; - struct winsize w; - if (ioctl(0, TIOCGWINSZ, &w) == 0) - columns = w.ws_col; + loadFMAAndRunFMMArgs(int argc, char** argv) : _argc(argc), _argv(argv) { + parse(); + } + + int parse() { + using namespace FParameterDefinitions; + using namespace FParameters; - _compArg.setHideDesc(true); - _cmdFormat.setTextWidth(columns); - _cmd.setOutput(&_cmdFormat); - _cmd.parse(argc,argv); + FHelpDescribeAndExit + (_argc, _argv, + "Loads an FMA file into a tree and runs a pseudo FMM algorithm " + "through it to compute load balancing.", + OctreeHeight, + OctreeSubHeight, + InputFile, + OutputFileBasename, + OutputFileExtension, + ZoneCount, + EnabledVerbose + ); + return 0; } + };