From e76a4d3171ff49de0def4f91876059c907b51e68 Mon Sep 17 00:00:00 2001
From: bramas <berenger.bramas@inria.fr>
Date: Mon, 24 Nov 2014 12:12:27 +0100
Subject: [PATCH] Prepare to use StarPU and check OMP4

---
 CMakeLists.txt                        |  1 +
 Src/Utils/FGlobal.hpp                 |  9 +++++++++
 Tests/noDist/testBlockedAlgorithm.cpp | 14 ++++++++++++--
 Tests/noDist/testBlockedChebyshev.cpp | 14 ++++++++++----
 Tests/noDist/testBlockedRotation.cpp  | 18 ++++++++++++------
 Tests/noDist/testBlockedTree.cpp      | 12 +++++++++++-
 6 files changed, 55 insertions(+), 13 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 42eb27737..934eb257e 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -64,6 +64,7 @@ OPTION( ScalFMM_USE_AVX              "Set to ON to compile with AVX support"
 OPTION( ScalFMM_USE_ASSERT           "Set to ON to enable safe tests during execution" ON  )
 OPTION( ScalFMM_USE_MIC_NATIVE       "Set to ON to compile in native mode for MIC" OFF  )
 OPTION( ScalFMM_BUILD_ONLY_LIB        "Set to ON to compile only the lib (examples are not compiled) " OFF  )
+OPTION( ScalFMM_USE_STARPU 		"Set to ON to build ScaFMM with StarPU" OFF )
 #OPTION( ScalFMM_ONLY_DEVEL             "Set to ON to compile Development tools (only scalfmm team)" OFF  )
 # Set scalfmm to default libraries
 SET(SCALFMM_LIBRARIES "")
diff --git a/Src/Utils/FGlobal.hpp b/Src/Utils/FGlobal.hpp
index 9f4f3b1a8..34d6fe71c 100755
--- a/Src/Utils/FGlobal.hpp
+++ b/Src/Utils/FGlobal.hpp
@@ -95,5 +95,14 @@ typedef long long MortonIndex;
     #endif
 #endif
 
+
+///////////////////////////////////////////////////////
+// Test OMP4
+///////////////////////////////////////////////////////
+
+#if _OPENMP >= 201307
+#define ScalFMM_USE_OMP4
+#endif
+
 #endif //FGLOBAL_HPP
 
diff --git a/Tests/noDist/testBlockedAlgorithm.cpp b/Tests/noDist/testBlockedAlgorithm.cpp
index 6f0b945d4..3f5ef766d 100644
--- a/Tests/noDist/testBlockedAlgorithm.cpp
+++ b/Tests/noDist/testBlockedAlgorithm.cpp
@@ -1,3 +1,5 @@
+#include "../../Src/Utils/FGlobal.hpp"
+
 #include "../../Src/GroupTree/FGroupTree.hpp"
 
 #include "../../Src/Components/FSimpleLeaf.hpp"
@@ -29,15 +31,23 @@ int main(int argc, char* argv[]){
         "The size of the block of the blocked tree"
     };
     FHelpDescribeAndExit(argc, argv, "Test the blocked tree by counting the particles.",
-                         FParameterDefinitions::OctreeHeight,
+                         FParameterDefinitions::OctreeHeight, FParameterDefinitions::NbThreads,
                          FParameterDefinitions::NbParticles, LocalOptionBlocSize);
     // Initialize the types
     typedef FTestCell                                                       GroupCellClass;
     typedef FGroupTestParticleContainer                                     GroupContainerClass;
     typedef FGroupTree< GroupCellClass, GroupContainerClass, 2, long long int>  GroupOctreeClass;
     typedef FTestKernels< GroupCellClass, GroupContainerClass >                       GroupKernelClass;
+#ifdef ScalFMM_USE_STARPU
+    typedef FGroupStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
+#elif defined(ScalFMM_USE_OMP4)
+    // Set the number of threads
+    omp_set_num_threads(FParameters::getValue(argc,argv,FParameterDefinitions::NbThreads.options, omp_get_max_threads()));
+    typedef FGroupTaskDepAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
+#else
     //typedef FGroupSeqAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
-typedef FGroupTaskDepAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
+    typedef FGroupTaskAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
+#endif
 
     typedef FTestCell                   CellClass;
     typedef FTestParticleContainer      ContainerClass;
diff --git a/Tests/noDist/testBlockedChebyshev.cpp b/Tests/noDist/testBlockedChebyshev.cpp
index 9d5029992..345d0e7f3 100644
--- a/Tests/noDist/testBlockedChebyshev.cpp
+++ b/Tests/noDist/testBlockedChebyshev.cpp
@@ -1,3 +1,4 @@
+#include "../../Src/Utils/FGlobal.hpp"
 
 #include "../../Src/GroupTree/FGroupTree.hpp"
 
@@ -42,8 +43,6 @@ int main(int argc, char* argv[]){
                          FParameterDefinitions::OctreeHeight,FParameterDefinitions::InputFile,
                          FParameterDefinitions::NbParticles, FParameterDefinitions::NbThreads,
                          LocalOptionBlocSize, LocalOptionNoValidate);
-    // Set the number of threads
-    omp_set_num_threads(FParameters::getValue(argc,argv,FParameterDefinitions::NbThreads.options, omp_get_max_threads()));
 
     // Initialize the types
     static const int ORDER = 6;
@@ -52,9 +51,16 @@ int main(int argc, char* argv[]){
     typedef FP2PGroupParticleContainer<>          GroupContainerClass;
     typedef FGroupTree< GroupCellClass, GroupContainerClass, 5, FReal>  GroupOctreeClass;
     typedef FChebSymKernel<GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER> GroupKernelClass;
-    //typedef FGroupSeqAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
-    //typedef FGroupTaskAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
+#ifdef ScalFMM_USE_STARPU
+    typedef FGroupStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
+#elif defined(ScalFMM_USE_OMP4)
+    // Set the number of threads
+    omp_set_num_threads(FParameters::getValue(argc,argv,FParameterDefinitions::NbThreads.options, omp_get_max_threads()));
     typedef FGroupTaskDepAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
+#else
+    //typedef FGroupSeqAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
+    typedef FGroupTaskAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
+#endif
 
     // Get params
     const int NbLevels      = FParameters::getValue(argc,argv,FParameterDefinitions::OctreeHeight.options, 5);
diff --git a/Tests/noDist/testBlockedRotation.cpp b/Tests/noDist/testBlockedRotation.cpp
index a85d2860c..330b134e5 100644
--- a/Tests/noDist/testBlockedRotation.cpp
+++ b/Tests/noDist/testBlockedRotation.cpp
@@ -1,3 +1,5 @@
+#include "../../Src/Utils/FGlobal.hpp"
+
 #include "../../Src/GroupTree/FGroupTree.hpp"
 
 #include "../../Src/Components/FSimpleLeaf.hpp"
@@ -41,19 +43,23 @@ int main(int argc, char* argv[]){
                           FParameterDefinitions::NbThreads,
                          FParameterDefinitions::NbParticles, LocalOptionBlocSize, LocalOptionNoValidate);
 
-    // Set the number of threads
-    omp_set_num_threads(FParameters::getValue(argc,argv,FParameterDefinitions::NbThreads.options, omp_get_max_threads()));
-
-
     // Initialize the types
     static const int P = 9;
     typedef FRotationCell<P>               GroupCellClass;
     typedef FP2PGroupParticleContainer<>          GroupContainerClass;
     typedef FGroupTree< GroupCellClass, GroupContainerClass, 5, FReal>  GroupOctreeClass;
     typedef FRotationKernel< GroupCellClass, GroupContainerClass , P>   GroupKernelClass;
-    //typedef FGroupSeqAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
-    //typedef FGroupTaskAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
+#ifdef ScalFMM_USE_STARPU
+    typedef FGroupStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
+#elif defined(ScalFMM_USE_OMP4)
+    // Set the number of threads
+    omp_set_num_threads(FParameters::getValue(argc,argv,FParameterDefinitions::NbThreads.options, omp_get_max_threads()));
     typedef FGroupTaskDepAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
+#else
+    //typedef FGroupSeqAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
+    typedef FGroupTaskAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
+#endif
+
 
     // Get params
     const int NbLevels      = FParameters::getValue(argc,argv,FParameterDefinitions::OctreeHeight.options, 5);
diff --git a/Tests/noDist/testBlockedTree.cpp b/Tests/noDist/testBlockedTree.cpp
index 9b67cefb0..2d0eb6a9d 100644
--- a/Tests/noDist/testBlockedTree.cpp
+++ b/Tests/noDist/testBlockedTree.cpp
@@ -1,3 +1,4 @@
+#include "../../Src/Utils/FGlobal.hpp"
 
 #include "../../Src/GroupTree/FGroupTree.hpp"
 
@@ -24,6 +25,8 @@
 #include "../../Src/Files/FFmaGenericLoader.hpp"
 
 #include "../../Src/GroupTree/FGroupSeqAlgorithm.hpp"
+#include "../../Src/GroupTree/FGroupTaskAlgorithm.hpp"
+#include "../../Src/GroupTree/FGroupTaskDepAlgorithm.hpp"
 #include "../../Src/GroupTree/FP2PGroupParticleContainer.hpp"
 
 #include "../../Src/Utils/FParameterNames.hpp"
@@ -84,7 +87,14 @@ int main(int argc, char* argv[]){
 
 
     typedef FRotationKernel< CellClass, FP2PGroupParticleContainer<> , P>   KernelClass;
-    typedef FGroupSeqAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, CellClass, KernelClass, typename GroupOctreeClass::ParticleGroupClass, FP2PGroupParticleContainer<> > GroupAlgorithm;
+#ifdef ScalFMM_USE_STARPU
+    typedef FGroupStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, CellClass, KernelClass, typename GroupOctreeClass::ParticleGroupClass, FP2PGroupParticleContainer<> > GroupAlgorithm;
+#elif defined(ScalFMM_USE_OMP4)
+    typedef FGroupTaskDepAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, CellClass, KernelClass, typename GroupOctreeClass::ParticleGroupClass, FP2PGroupParticleContainer<> > GroupAlgorithm;
+#else
+    //typedef FGroupSeqAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, CellClass, KernelClass, typename GroupOctreeClass::ParticleGroupClass, FP2PGroupParticleContainer<> > GroupAlgorithm;
+    typedef FGroupTaskAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, CellClass, KernelClass, typename GroupOctreeClass::ParticleGroupClass, FP2PGroupParticleContainer<> > GroupAlgorithm;
+#endif
 
     KernelClass kernel(NbLevels, loader.getBoxWidth(), loader.getCenterOfBox());
     GroupAlgorithm algo(&groupedTree2,&kernel);
-- 
GitLab