From a8fe9cf6f709a41008824014b14d61d4e4ae460f Mon Sep 17 00:00:00 2001
From: bramas <berenger.bramas@inria.fr>
Date: Tue, 25 Nov 2014 15:19:11 +0100
Subject: [PATCH] make the starpu version working

---
 Src/GroupTree/FGroupTaskStarpuAlgorithm.hpp | 79 ++++++++++-----------
 Tests/noDist/testBlockedAlgorithm.cpp       |  2 +-
 Tests/noDist/testBlockedChebyshev.cpp       |  2 +-
 Tests/noDist/testBlockedTree.cpp            |  2 +-
 4 files changed, 42 insertions(+), 43 deletions(-)

diff --git a/Src/GroupTree/FGroupTaskStarpuAlgorithm.hpp b/Src/GroupTree/FGroupTaskStarpuAlgorithm.hpp
index 5ae7ab991..36736c316 100644
--- a/Src/GroupTree/FGroupTaskStarpuAlgorithm.hpp
+++ b/Src/GroupTree/FGroupTaskStarpuAlgorithm.hpp
@@ -42,9 +42,10 @@ protected:
     std::vector< std::vector< std::vector<BlockInteractions<CellContainerClass>>>> externalInteractionsAllLevel;
     std::vector< std::vector<BlockInteractions<ParticleGroupClass>>> externalInteractionsLeafLevel;
 
-    const int MaxThreads;         //< The number of threads
+    int MaxThreads;         //< The number of threads
     OctreeClass*const tree;       //< The Tree
     KernelClass** kernels;        //< The kernels
+    ThisClass* thisptr;
 
     std::vector<starpu_data_handle_t>* handles;
 
@@ -61,24 +62,29 @@ protected:
 
 public:
     FGroupTaskStarPUAlgorithm(OctreeClass*const inTree, KernelClass* inKernels, const int inMaxThreads = -1)
-        : MaxThreads(inMaxThreads==-1?omp_get_max_threads():inMaxThreads), tree(inTree), kernels(nullptr),
-          handles(nullptr){
+        : MaxThreads(inMaxThreads), tree(inTree), kernels(nullptr),
+          thisptr(this), handles(nullptr){
         FAssertLF(tree, "tree cannot be null");
         FAssertLF(inKernels, "kernels cannot be null");
+        FAssertLF(MaxThreads <= STARPU_MAXCPUS, "number of threads to high");
 
+        struct starpu_conf conf;
+        FAssertLF(starpu_conf_init(&conf) == 0);
+        conf.ncpus = MaxThreads;
+        FAssertLF(starpu_init(&conf) == 0);
+        starpu_pause();
+
+        MaxThreads = starpu_worker_get_count();//starpu_cpu_worker_get_count();
+
+        handles = new std::vector<starpu_data_handle_t>[tree->getHeight()+1];
         kernels = new KernelClass*[MaxThreads];
         for(int idxThread = 0 ; idxThread < MaxThreads ; ++idxThread){
             this->kernels[idxThread] = new KernelClass(*inKernels);
         }
 
-        handles = new std::vector<starpu_data_handle_t>[tree->getHeight()+1];
+        initCodelet();
 
         FLOG(FLog::Controller << "FGroupTaskStarPUAlgorithm (Max Thread " << MaxThreads << ")\n");
-
-        FAssertLF(starpu_init(NULL) == 0);
-        starpu_pause();
-
-        initCodelet();
     }
 
     ~FGroupTaskStarPUAlgorithm(){
@@ -90,6 +96,7 @@ public:
         cleanHandle();
         delete[] handles;
 
+        starpu_resume();
         starpu_shutdown();
     }
 
@@ -104,17 +111,17 @@ public:
 
         starpu_resume();
 
-  //      if( operationsToProceed & FFmmP2P ) directPass();
+        if( operationsToProceed & FFmmP2P ) directPass();
 
         if(operationsToProceed & FFmmP2M) bottomPass();
 
-  //      if(operationsToProceed & FFmmM2M) upwardPass();
+        if(operationsToProceed & FFmmM2M) upwardPass();
 
-  //      if(operationsToProceed & FFmmM2L) transferPass();
+        if(operationsToProceed & FFmmM2L) transferPass();
 
-  //      if(operationsToProceed & FFmmL2L) downardPass();
+        if(operationsToProceed & FFmmL2L) downardPass();
 
-  //      if( operationsToProceed & FFmmL2P ) mergePass();
+        if( operationsToProceed & FFmmL2P ) mergePass();
 
         starpu_task_wait_for_all();
         starpu_pause();
@@ -408,7 +415,7 @@ protected:
 
         for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
             starpu_insert_task(&p2m_cl,
-                    STARPU_VALUE, this, sizeof(ThisClass*),
+                    STARPU_VALUE, &thisptr, sizeof(ThisClass*),
                     STARPU_RW, handles[tree->getHeight()-1][idxGroup],
                     STARPU_R, handles[tree->getHeight()][idxGroup],
                     0);
@@ -482,7 +489,7 @@ protected:
                 char *arg_buffer;
                 size_t arg_buffer_size;
                 starpu_codelet_pack_args((void**)&arg_buffer, &arg_buffer_size,
-                                         STARPU_VALUE, this, sizeof(ThisClass*),
+                                         STARPU_VALUE, &thisptr, sizeof(ThisClass*),
                                          STARPU_VALUE, &nbSubCellGroups, sizeof(nbSubCellGroups),
                                          STARPU_VALUE, &idxLevel, sizeof(idxLevel),
                                          0);
@@ -498,11 +505,9 @@ protected:
         CellContainerClass* currentCells = reinterpret_cast<CellContainerClass*>(STARPU_VARIABLE_GET_PTR(buffers[0]));
 
         ThisClass* worker = nullptr;
-        starpu_codelet_unpack_args(cl_arg, &worker);
         int nbSubCellGroups = 0;
-        starpu_codelet_unpack_args(cl_arg, &nbSubCellGroups);
         int idxLevel = 0;
-        starpu_codelet_unpack_args(cl_arg, &idxLevel);
+        starpu_codelet_unpack_args(cl_arg, &worker, &nbSubCellGroups, &idxLevel);
 
         CellContainerClass* subCellGroups[9];
         memset(subCellGroups, 0, 9*sizeof(CellContainerClass*));
@@ -554,7 +559,7 @@ protected:
             FLOG( timerInBlock.tic() );
             for(int idxGroup = 0 ; idxGroup < tree->getNbCellGroupAtLevel(idxLevel) ; ++idxGroup){
                 starpu_insert_task(&m2l_cl_in,
-                        STARPU_VALUE, this, sizeof(ThisClass*),
+                        STARPU_VALUE, &thisptr, sizeof(ThisClass*),
                         STARPU_VALUE, &idxLevel, sizeof(idxLevel),
                         STARPU_RW, handles[idxLevel][idxGroup],
                         0);
@@ -568,9 +573,9 @@ protected:
                     const std::vector<OutOfBlockInteraction>* outsideInteractions = &externalInteractionsAllLevel[idxLevel][idxGroup][idxInteraction].interactions;
 
                     starpu_insert_task(&m2l_cl_inout,
-                            STARPU_VALUE, this, sizeof(ThisClass*),
+                            STARPU_VALUE, &thisptr, sizeof(ThisClass*),
                             STARPU_VALUE, &idxLevel, sizeof(idxLevel),
-                            STARPU_VALUE, outsideInteractions, sizeof(outsideInteractions),
+                            STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
                             STARPU_RW, handles[idxLevel][idxGroup],
                             STARPU_RW, handles[idxLevel][interactionid],
                             0);
@@ -587,9 +592,8 @@ protected:
         CellContainerClass* currentCells = reinterpret_cast<CellContainerClass*>(STARPU_VARIABLE_GET_PTR(buffers[0]));
 
         ThisClass* worker = nullptr;
-        starpu_codelet_unpack_args(cl_arg, &worker);
         int idxLevel = 0;
-        starpu_codelet_unpack_args(cl_arg, &idxLevel);
+        starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel);
 
         worker->transferInPassPerform(currentCells, idxLevel);
     }
@@ -631,14 +635,12 @@ protected:
 
     static void transferInoutPassCallback(void *buffers[], void *cl_arg){
         CellContainerClass* currentCells = reinterpret_cast<CellContainerClass*>(STARPU_VARIABLE_GET_PTR(buffers[0]));
-        CellContainerClass* externalCells = reinterpret_cast<CellContainerClass*>(STARPU_VARIABLE_GET_PTR(buffers[0]));
+        CellContainerClass* externalCells = reinterpret_cast<CellContainerClass*>(STARPU_VARIABLE_GET_PTR(buffers[1]));
 
         ThisClass* worker = nullptr;
-        starpu_codelet_unpack_args(cl_arg, &worker);
         int idxLevel = 0;
-        starpu_codelet_unpack_args(cl_arg, &idxLevel);
         const std::vector<OutOfBlockInteraction>* outsideInteractions;
-        starpu_codelet_unpack_args(cl_arg, &outsideInteractions);
+        starpu_codelet_unpack_args(cl_arg, &worker, &idxLevel, &outsideInteractions);
 
         worker->transferInoutPassPerform(currentCells, externalCells, idxLevel, outsideInteractions);
     }
@@ -711,7 +713,7 @@ protected:
                 char *arg_buffer;
                 size_t arg_buffer_size;
                 starpu_codelet_pack_args((void**)&arg_buffer, &arg_buffer_size,
-                                         STARPU_VALUE, this, sizeof(ThisClass*),
+                                         STARPU_VALUE, &thisptr, sizeof(ThisClass*),
                                          STARPU_VALUE, &nbSubCellGroups, sizeof(nbSubCellGroups),
                                          STARPU_VALUE, &idxLevel, sizeof(idxLevel),
                                          0);
@@ -727,11 +729,9 @@ protected:
         CellContainerClass* currentCells = reinterpret_cast<CellContainerClass*>(STARPU_VARIABLE_GET_PTR(buffers[0]));
 
         ThisClass* worker = nullptr;
-        starpu_codelet_unpack_args(cl_arg, &worker);
         int nbSubCellGroups = 0;
-        starpu_codelet_unpack_args(cl_arg, &nbSubCellGroups);
         int idxLevel = 0;
-        starpu_codelet_unpack_args(cl_arg, &idxLevel);
+        starpu_codelet_unpack_args(cl_arg, &worker, &nbSubCellGroups, &idxLevel);
 
         CellContainerClass* subCellGroups[9];
         memset(subCellGroups, 0, 9*sizeof(CellContainerClass*));
@@ -783,19 +783,19 @@ protected:
         FLOG( timerInBlock.tic() );
         for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
             starpu_insert_task(&p2p_cl_in,
-                    STARPU_VALUE, this, sizeof(ThisClass*),
+                    STARPU_VALUE, &thisptr, sizeof(ThisClass*),
                     STARPU_RW, handles[tree->getHeight()][idxGroup],
                     0);
         }
         FLOG( timerInBlock.tac() );
         FLOG( timerOutBlock.tic() );
         for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
-            for(int idxInteraction = 0; idxInteraction < int(externalInteractionsLeafLevel.size()) ; ++idxInteraction){
+            for(int idxInteraction = 0; idxInteraction < int(externalInteractionsLeafLevel[idxGroup].size()) ; ++idxInteraction){
                 const int interactionid = externalInteractionsLeafLevel[idxGroup][idxInteraction].otherBlockId;
                 const std::vector<OutOfBlockInteraction>* outsideInteractions = &externalInteractionsLeafLevel[idxGroup][idxInteraction].interactions;
-                starpu_insert_task(&p2p_cl_in,
-                        STARPU_VALUE, this, sizeof(ThisClass*),
-                        STARPU_VALUE, outsideInteractions, sizeof(outsideInteractions),
+                starpu_insert_task(&p2p_cl_inout,
+                        STARPU_VALUE, &thisptr, sizeof(ThisClass*),
+                        STARPU_VALUE, &outsideInteractions, sizeof(outsideInteractions),
                         STARPU_RW, handles[tree->getHeight()][idxGroup],
                         STARPU_RW, handles[tree->getHeight()][interactionid],
                         0);
@@ -856,9 +856,8 @@ protected:
         ParticleGroupClass* externalContainers = reinterpret_cast<ParticleGroupClass*>(STARPU_VARIABLE_GET_PTR(buffers[1]));
 
         ThisClass* worker = nullptr;
-        starpu_codelet_unpack_args(cl_arg, &worker);
         const std::vector<OutOfBlockInteraction>* outsideInteractions = nullptr;
-        starpu_codelet_unpack_args(cl_arg, &outsideInteractions);
+        starpu_codelet_unpack_args(cl_arg, &worker, &outsideInteractions);
 
         worker->directInoutPassPerform(containers, externalContainers, outsideInteractions);
     }
@@ -893,7 +892,7 @@ protected:
 
         for(int idxGroup = 0 ; idxGroup < tree->getNbParticleGroup() ; ++idxGroup){
             starpu_insert_task(&l2p_cl,
-                    STARPU_VALUE, this, sizeof(ThisClass*),
+                    STARPU_VALUE, &thisptr, sizeof(ThisClass*),
                     STARPU_R, handles[tree->getHeight()-1][idxGroup],
                     STARPU_RW, handles[tree->getHeight()][idxGroup],
                     0);
diff --git a/Tests/noDist/testBlockedAlgorithm.cpp b/Tests/noDist/testBlockedAlgorithm.cpp
index 8771c294a..fe48ea2ac 100644
--- a/Tests/noDist/testBlockedAlgorithm.cpp
+++ b/Tests/noDist/testBlockedAlgorithm.cpp
@@ -40,7 +40,7 @@ int main(int argc, char* argv[]){
     typedef FGroupTree< GroupCellClass, GroupContainerClass, 2, long long int>  GroupOctreeClass;
     typedef FTestKernels< GroupCellClass, GroupContainerClass >                       GroupKernelClass;
 #ifdef ScalFMM_USE_STARPU
-    typedef FGroupTaskStarpuAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
+    typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
 #elif defined(ScalFMM_USE_OMP4)
     // Set the number of threads
     omp_set_num_threads(FParameters::getValue(argc,argv,FParameterDefinitions::NbThreads.options, omp_get_max_threads()));
diff --git a/Tests/noDist/testBlockedChebyshev.cpp b/Tests/noDist/testBlockedChebyshev.cpp
index 366a85243..afa9dc865 100644
--- a/Tests/noDist/testBlockedChebyshev.cpp
+++ b/Tests/noDist/testBlockedChebyshev.cpp
@@ -53,7 +53,7 @@ int main(int argc, char* argv[]){
     typedef FGroupTree< GroupCellClass, GroupContainerClass, 5, FReal>  GroupOctreeClass;
     typedef FChebSymKernel<GroupCellClass,GroupContainerClass,MatrixKernelClass,ORDER> GroupKernelClass;
 #ifdef ScalFMM_USE_STARPU
-    typedef FGroupTaskStarpuAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
+    typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, GroupCellClass, GroupKernelClass, typename GroupOctreeClass::ParticleGroupClass, GroupContainerClass > GroupAlgorithm;
 #elif defined(ScalFMM_USE_OMP4)
     // Set the number of threads
     omp_set_num_threads(FParameters::getValue(argc,argv,FParameterDefinitions::NbThreads.options, omp_get_max_threads()));
diff --git a/Tests/noDist/testBlockedTree.cpp b/Tests/noDist/testBlockedTree.cpp
index 27f6d5351..1b4e7544e 100644
--- a/Tests/noDist/testBlockedTree.cpp
+++ b/Tests/noDist/testBlockedTree.cpp
@@ -89,7 +89,7 @@ int main(int argc, char* argv[]){
 
     typedef FRotationKernel< CellClass, FP2PGroupParticleContainer<> , P>   KernelClass;
 #ifdef ScalFMM_USE_STARPU
-    typedef FGroupTaskStarpuAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, CellClass, KernelClass, typename GroupOctreeClass::ParticleGroupClass, FP2PGroupParticleContainer<> > GroupAlgorithm;
+    typedef FGroupTaskStarPUAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, CellClass, KernelClass, typename GroupOctreeClass::ParticleGroupClass, FP2PGroupParticleContainer<> > GroupAlgorithm;
 #elif defined(ScalFMM_USE_OMP4)
     typedef FGroupTaskDepAlgorithm<GroupOctreeClass, typename GroupOctreeClass::CellGroupClass, CellClass, KernelClass, typename GroupOctreeClass::ParticleGroupClass, FP2PGroupParticleContainer<> > GroupAlgorithm;
 #else
-- 
GitLab