From dc1809b4bf2e3b52b153ebee47fd972d665e3b46 Mon Sep 17 00:00:00 2001 From: Berenger Bramas Date: Mon, 17 Aug 2015 08:46:57 +0200 Subject: [PATCH] update the transfer pass with or without finalize --- Src/Components/FAbstractKernels.hpp | 2 +- Src/Core/FFmmAlgorithmOmp4.hpp | 2 + Src/Core/FFmmAlgorithmSectionTask.hpp | 54 +++++++++++++++++-- Src/Core/FFmmAlgorithmTask.hpp | 29 +++++----- Src/Core/FFmmAlgorithmThreadProc.hpp | 2 +- Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp | 2 +- .../Spherical/FSphericalBlockBlasKernel.hpp | 2 +- 7 files changed, 73 insertions(+), 20 deletions(-) diff --git a/Src/Components/FAbstractKernels.hpp b/Src/Components/FAbstractKernels.hpp index 9238a9fb..323f63b0 100644 --- a/Src/Components/FAbstractKernels.hpp +++ b/Src/Components/FAbstractKernels.hpp @@ -77,7 +77,7 @@ public: * * @return false */ - bool needFinishedM2LEvent(){ + constexpr static bool NeedFinishedM2LEvent(){ return false; } /** This method can be optionally inherited diff --git a/Src/Core/FFmmAlgorithmOmp4.hpp b/Src/Core/FFmmAlgorithmOmp4.hpp index e7d7e80e..de1cf363 100644 --- a/Src/Core/FFmmAlgorithmOmp4.hpp +++ b/Src/Core/FFmmAlgorithmOmp4.hpp @@ -86,6 +86,8 @@ public: FLOG(FLog::Controller << "FFmmAlgorithmOmp4 (Max Thread " << omp_get_max_threads() << ")\n"); + FAssertLF(KernelClass::NeedFinishedM2LEvent() == false, "FFmmAlgorithmOmp4 cannot notify for M2L at level ending."); + #ifdef OPENMP_SUPPORT_PRIORITY size_t nbLeaves = 0; size_t nbParticles = 0; diff --git a/Src/Core/FFmmAlgorithmSectionTask.hpp b/Src/Core/FFmmAlgorithmSectionTask.hpp index 79863e0a..7324a8aa 100644 --- a/Src/Core/FFmmAlgorithmSectionTask.hpp +++ b/Src/Core/FFmmAlgorithmSectionTask.hpp @@ -190,7 +190,7 @@ protected: do{ // We need the current cell and the child // child is an array (of 8 child) that may be null - #pragma omp task firstprivate(octreeIterator) shared(idxLevel) + #pragma omp task firstprivate(octreeIterator,idxLevel) { kernels[omp_get_thread_num()]->M2M( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel); } @@ -218,7 +218,12 @@ protected: eztrace_start(); #endif - this->transferPassWithFinalize() ; + if(KernelClass::NeedFinishedM2LEvent()){ + this->transferPassWithFinalize() ; + } + else{ + this->transferPassWithoutFinalize() ; + } #ifdef SCALFMM_USE_EZTRACE eztrace_stop(); #endif @@ -246,7 +251,7 @@ protected: do{ const int counter = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel, separationCriteria); if(counter){ - #pragma omp task firstprivate(octreeIterator, neighbors, counter) shared(idxLevel) + #pragma omp task firstprivate(octreeIterator, neighbors, counter,idxLevel) { kernels[omp_get_thread_num()]->M2L( octreeIterator.getCurrentCell() , neighbors, counter, idxLevel); } @@ -273,6 +278,47 @@ protected: FLOG( FLog::Controller << "\tFinished (@Downward Pass (M2L) = " << counterTime.tacAndElapsed() << " s)\n" ); } + void transferPassWithoutFinalize(){ + FLOG( FLog::Controller.write("\tStart Downward Pass (M2L)\n").write(FLog::Flush); ); + FLOG(FTic counterTime); + + const CellClass* neighbors[343]; + + typename OctreeClass::Iterator octreeIterator(tree); + octreeIterator.moveDown(); + + for(int idxLevel = 2 ; idxLevel < FAbstractAlgorithm::upperWorkingLevel ; ++idxLevel){ + octreeIterator.moveDown(); + } + + typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator); + // + // for each levels + for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){ + FLOG(FTic counterTimeLevel); + const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria); + // for each cells + do{ + const int counter = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel, separationCriteria); + if(counter){ + #pragma omp task firstprivate(octreeIterator, neighbors, counter,idxLevel) + { + kernels[omp_get_thread_num()]->M2L( octreeIterator.getCurrentCell() , neighbors, counter, idxLevel); + } + } + + } while(octreeIterator.moveRight()); + + avoidGotoLeftIterator.moveDown(); + octreeIterator = avoidGotoLeftIterator; + + } + + #pragma omp taskwait + + FLOG( FLog::Controller << "\tFinished (@Downward Pass (M2L) = " << counterTime.tacAndElapsed() << " s)\n" ); + } + ///////////////////////////////////////////////////////////////////////////// // Downward ///////////////////////////////////////////////////////////////////////////// @@ -296,7 +342,7 @@ protected: FLOG(FTic counterTimeLevel); // for each cells do{ - #pragma omp task firstprivate(octreeIterator) shared(idxLevel) + #pragma omp task firstprivate(octreeIterator,idxLevel) { kernels[omp_get_thread_num()]->L2L( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel); } diff --git a/Src/Core/FFmmAlgorithmTask.hpp b/Src/Core/FFmmAlgorithmTask.hpp index 88b11dbd..eb648795 100644 --- a/Src/Core/FFmmAlgorithmTask.hpp +++ b/Src/Core/FFmmAlgorithmTask.hpp @@ -209,18 +209,23 @@ protected: ///////////////////////////////////////////////////////////////////////////// /** M2L */ - void transferPass(){ - #ifdef SCALFMM_USE_EZTRACE - - eztrace_start(); -#endif - this->transferPassWithFinalize() ; -#ifdef SCALFMM_USE_EZTRACE - eztrace_stop(); -#endif - } - - void transferPassWithOutFinalize(){ + void transferPass(){ + #ifdef SCALFMM_USE_EZTRACE + + eztrace_start(); + #endif + if(KernelClass::NeedFinishedM2LEvent()){ + this->transferPassWithFinalize() ; + } + else{ + this->transferPassWithoutFinalize() ; + } + #ifdef SCALFMM_USE_EZTRACE + eztrace_stop(); + #endif + } + + void transferPassWithoutFinalize(){ FLOG( FLog::Controller.write("\tStart Downward Pass (M2L)\n").write(FLog::Flush); ); FLOG(FTic counterTime); diff --git a/Src/Core/FFmmAlgorithmThreadProc.hpp b/Src/Core/FFmmAlgorithmThreadProc.hpp index dee7f4cf..af16d7b8 100644 --- a/Src/Core/FFmmAlgorithmThreadProc.hpp +++ b/Src/Core/FFmmAlgorithmThreadProc.hpp @@ -859,7 +859,7 @@ protected: #pragma omp taskwait for(int idxThread = 0 ; idxThread < omp_get_num_threads() ; ++idxThread){ - #pragma omp task default(none) firstprivate(idxThread) shared(idxLevel) + #pragma omp task default(none) firstprivate(idxThread,idxLevel) { kernels[idxThread]->finishedLevelM2L(idxLevel); } diff --git a/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp b/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp index ce9690d5..e6fef287 100644 --- a/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp +++ b/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp @@ -960,7 +960,7 @@ protected: #pragma omp taskwait for(int idxThread = 0 ; idxThread < omp_get_num_threads() ; ++idxThread){ - #pragma omp task default(none) firstprivate(idxThread) shared(idxLevel) + #pragma omp task default(none) firstprivate(idxThread,idxLevel) { kernels[idxThread]->finishedLevelM2L(fackLevel); } diff --git a/Src/Kernels/Spherical/FSphericalBlockBlasKernel.hpp b/Src/Kernels/Spherical/FSphericalBlockBlasKernel.hpp index cf5a4654..c64b7b72 100644 --- a/Src/Kernels/Spherical/FSphericalBlockBlasKernel.hpp +++ b/Src/Kernels/Spherical/FSphericalBlockBlasKernel.hpp @@ -189,7 +189,7 @@ public: } } /** the needFinishedM2LEvent method is needed */ - bool needFinishedM2LEvent(){ + constexpr static bool NeedFinishedM2LEvent(){ return true ; } /** Do we have some computation to do in the buffers */ -- GitLab