From dc1809b4bf2e3b52b153ebee47fd972d665e3b46 Mon Sep 17 00:00:00 2001
From: Berenger Bramas <Berenger.Bramas@inria.fr>
Date: Mon, 17 Aug 2015 08:46:57 +0200
Subject: [PATCH] update the transfer pass with or without finalize

---
 Src/Components/FAbstractKernels.hpp           |  2 +-
 Src/Core/FFmmAlgorithmOmp4.hpp                |  2 +
 Src/Core/FFmmAlgorithmSectionTask.hpp         | 54 +++++++++++++++++--
 Src/Core/FFmmAlgorithmTask.hpp                | 29 +++++-----
 Src/Core/FFmmAlgorithmThreadProc.hpp          |  2 +-
 Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp  |  2 +-
 .../Spherical/FSphericalBlockBlasKernel.hpp   |  2 +-
 7 files changed, 73 insertions(+), 20 deletions(-)

diff --git a/Src/Components/FAbstractKernels.hpp b/Src/Components/FAbstractKernels.hpp
index 9238a9fbd..323f63b07 100644
--- a/Src/Components/FAbstractKernels.hpp
+++ b/Src/Components/FAbstractKernels.hpp
@@ -77,7 +77,7 @@ public:
        *
        * @return false
        */
-     bool needFinishedM2LEvent(){
+     constexpr static bool NeedFinishedM2LEvent(){
     	 	 return false;
      }
      /** This method can be optionally inherited
diff --git a/Src/Core/FFmmAlgorithmOmp4.hpp b/Src/Core/FFmmAlgorithmOmp4.hpp
index e7d7e80e0..de1cf3639 100644
--- a/Src/Core/FFmmAlgorithmOmp4.hpp
+++ b/Src/Core/FFmmAlgorithmOmp4.hpp
@@ -86,6 +86,8 @@ public:
 
         FLOG(FLog::Controller << "FFmmAlgorithmOmp4 (Max Thread " << omp_get_max_threads() << ")\n");
 
+        FAssertLF(KernelClass::NeedFinishedM2LEvent() == false, "FFmmAlgorithmOmp4 cannot notify for M2L at level ending.");
+
 #ifdef OPENMP_SUPPORT_PRIORITY
         size_t nbLeaves = 0;
         size_t nbParticles = 0;
diff --git a/Src/Core/FFmmAlgorithmSectionTask.hpp b/Src/Core/FFmmAlgorithmSectionTask.hpp
index 79863e0a0..7324a8aac 100644
--- a/Src/Core/FFmmAlgorithmSectionTask.hpp
+++ b/Src/Core/FFmmAlgorithmSectionTask.hpp
@@ -190,7 +190,7 @@ protected:
             do{
                 // We need the current cell and the child
                 // child is an array (of 8 child) that may be null
-                #pragma omp task firstprivate(octreeIterator) shared(idxLevel)
+                #pragma omp task firstprivate(octreeIterator,idxLevel)
                 {
                     kernels[omp_get_thread_num()]->M2M( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel);
                 }
@@ -218,7 +218,12 @@ protected:
     
     eztrace_start();
 #endif
-    this->transferPassWithFinalize() ;
+    if(KernelClass::NeedFinishedM2LEvent()){
+        this->transferPassWithFinalize() ;
+    }
+    else{
+        this->transferPassWithoutFinalize() ;
+    }
 #ifdef SCALFMM_USE_EZTRACE
     eztrace_stop();
 #endif
@@ -246,7 +251,7 @@ protected:
             do{
                 const int counter = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel, separationCriteria);
                 if(counter){
-                    #pragma omp task firstprivate(octreeIterator, neighbors, counter) shared(idxLevel)
+                    #pragma omp task firstprivate(octreeIterator, neighbors, counter,idxLevel)
                     {
                         kernels[omp_get_thread_num()]->M2L( octreeIterator.getCurrentCell() , neighbors, counter, idxLevel);
                     }
@@ -273,6 +278,47 @@ protected:
         FLOG( FLog::Controller << "\tFinished (@Downward Pass (M2L) = "  << counterTime.tacAndElapsed() << " s)\n" );
     }
 
+   void transferPassWithoutFinalize(){
+        FLOG( FLog::Controller.write("\tStart Downward Pass (M2L)\n").write(FLog::Flush); );
+        FLOG(FTic counterTime);
+
+        const CellClass* neighbors[343];
+
+        typename OctreeClass::Iterator octreeIterator(tree);
+        octreeIterator.moveDown();
+
+        for(int idxLevel = 2 ; idxLevel < FAbstractAlgorithm::upperWorkingLevel ; ++idxLevel){
+            octreeIterator.moveDown();
+        }
+
+        typename OctreeClass::Iterator avoidGotoLeftIterator(octreeIterator);
+        //
+        // for each levels
+        for(int idxLevel = FAbstractAlgorithm::upperWorkingLevel ; idxLevel < FAbstractAlgorithm::lowerWorkingLevel ; ++idxLevel ){
+            FLOG(FTic counterTimeLevel);
+            const int separationCriteria = (idxLevel != FAbstractAlgorithm::lowerWorkingLevel-1 ? 1 : leafLevelSeparationCriteria);
+            // for each cells
+            do{
+                const int counter = tree->getInteractionNeighbors(neighbors, octreeIterator.getCurrentGlobalCoordinate(), idxLevel, separationCriteria);
+                if(counter){
+                    #pragma omp task firstprivate(octreeIterator, neighbors, counter,idxLevel)
+                    {
+                        kernels[omp_get_thread_num()]->M2L( octreeIterator.getCurrentCell() , neighbors, counter, idxLevel);
+                    }
+                }
+
+            } while(octreeIterator.moveRight());
+
+            avoidGotoLeftIterator.moveDown();
+            octreeIterator = avoidGotoLeftIterator;
+
+        }
+
+        #pragma omp taskwait
+
+        FLOG( FLog::Controller << "\tFinished (@Downward Pass (M2L) = "  << counterTime.tacAndElapsed() << " s)\n" );
+    }
+
     /////////////////////////////////////////////////////////////////////////////
     // Downward
     /////////////////////////////////////////////////////////////////////////////
@@ -296,7 +342,7 @@ protected:
             FLOG(FTic counterTimeLevel);
             // for each cells
             do{
-                #pragma omp task firstprivate(octreeIterator) shared(idxLevel)
+                #pragma omp task firstprivate(octreeIterator,idxLevel)
                 {
                     kernels[omp_get_thread_num()]->L2L( octreeIterator.getCurrentCell() , octreeIterator.getCurrentChild(), idxLevel);
                 }
diff --git a/Src/Core/FFmmAlgorithmTask.hpp b/Src/Core/FFmmAlgorithmTask.hpp
index 88b11dbd8..eb6487954 100644
--- a/Src/Core/FFmmAlgorithmTask.hpp
+++ b/Src/Core/FFmmAlgorithmTask.hpp
@@ -209,18 +209,23 @@ protected:
 	/////////////////////////////////////////////////////////////////////////////
 
 	/** M2L  */
-	void transferPass(){
-      #ifdef SCALFMM_USE_EZTRACE
-	  
-	  eztrace_start();
-#endif
-		this->transferPassWithFinalize() ;
-#ifdef SCALFMM_USE_EZTRACE
-	  eztrace_stop();
-#endif
-	    }
-
-	void transferPassWithOutFinalize(){
+    void transferPass(){
+  #ifdef SCALFMM_USE_EZTRACE
+
+      eztrace_start();
+  #endif
+      if(KernelClass::NeedFinishedM2LEvent()){
+          this->transferPassWithFinalize() ;
+      }
+      else{
+          this->transferPassWithoutFinalize() ;
+      }
+  #ifdef SCALFMM_USE_EZTRACE
+      eztrace_stop();
+  #endif
+    }
+
+    void transferPassWithoutFinalize(){
 		FLOG( FLog::Controller.write("\tStart Downward Pass (M2L)\n").write(FLog::Flush); );
 		FLOG(FTic counterTime);
 
diff --git a/Src/Core/FFmmAlgorithmThreadProc.hpp b/Src/Core/FFmmAlgorithmThreadProc.hpp
index dee7f4cfc..af16d7b8d 100644
--- a/Src/Core/FFmmAlgorithmThreadProc.hpp
+++ b/Src/Core/FFmmAlgorithmThreadProc.hpp
@@ -859,7 +859,7 @@ protected:
                     #pragma omp taskwait
 
                     for(int idxThread = 0 ; idxThread < omp_get_num_threads() ; ++idxThread){
-                        #pragma omp task  default(none) firstprivate(idxThread) shared(idxLevel)
+                        #pragma omp task  default(none) firstprivate(idxThread,idxLevel)
                         {
                             kernels[idxThread]->finishedLevelM2L(idxLevel);
                         }
diff --git a/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp b/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp
index ce9690d55..e6fef2877 100644
--- a/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp
+++ b/Src/Core/FFmmAlgorithmThreadProcPeriodic.hpp
@@ -960,7 +960,7 @@ protected:
                     #pragma omp taskwait
 
                     for(int idxThread = 0 ; idxThread < omp_get_num_threads() ; ++idxThread){
-                        #pragma omp task  default(none) firstprivate(idxThread) shared(idxLevel)
+                        #pragma omp task  default(none) firstprivate(idxThread,idxLevel)
                         {
                             kernels[idxThread]->finishedLevelM2L(fackLevel);
                         }
diff --git a/Src/Kernels/Spherical/FSphericalBlockBlasKernel.hpp b/Src/Kernels/Spherical/FSphericalBlockBlasKernel.hpp
index cf5a4654c..c64b7b72b 100644
--- a/Src/Kernels/Spherical/FSphericalBlockBlasKernel.hpp
+++ b/Src/Kernels/Spherical/FSphericalBlockBlasKernel.hpp
@@ -189,7 +189,7 @@ public:
         }
     }
     /** the needFinishedM2LEvent method is needed   */
-    bool needFinishedM2LEvent(){
+    constexpr static bool NeedFinishedM2LEvent(){
    	 	 return true ;
     }
     /** Do we have some computation to do in the buffers */
-- 
GitLab