From ab7de6c8c7184e32371ed53ad4b280f39d4e2107 Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Tue, 1 Dec 2020 17:05:45 +0100
Subject: [PATCH] Fix profile initialization with StarPU

---
 runtime/starpu/codelets/codelet_zcallback.c   |  1 +
 runtime/starpu/control/runtime_context.c      | 21 ++++++++++++++++++-
 .../starpu/include/runtime_codelet_profile.h  |  2 ++
 3 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/runtime/starpu/codelets/codelet_zcallback.c b/runtime/starpu/codelets/codelet_zcallback.c
index cb381cfc1..40c4c24ab 100644
--- a/runtime/starpu/codelets/codelet_zcallback.c
+++ b/runtime/starpu/codelets/codelet_zcallback.c
@@ -28,6 +28,7 @@ CHAMELEON_CL_CB(dlag2z,        cti_handle_get_m(task->handles[1]), cti_handle_ge
 CHAMELEON_CL_CB(dzasum,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      M*N)
 CHAMELEON_CL_CB(zaxpy,         cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[1]), 0,                                      M)
 CHAMELEON_CL_CB(zgeadd,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      M*N)
+CHAMELEON_CL_CB(ztradd,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                  0.5*M*N)
 CHAMELEON_CL_CB(zlascal,       cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      M*N)
 CHAMELEON_CL_CB(zgelqt,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      (4./3.)*M*N*K)
 CHAMELEON_CL_CB(zgemv,         cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      2. *M*N  )
diff --git a/runtime/starpu/control/runtime_context.c b/runtime/starpu/control/runtime_context.c
index f44ae2043..9632aef93 100644
--- a/runtime/starpu/control/runtime_context.c
+++ b/runtime/starpu/control/runtime_context.c
@@ -21,6 +21,11 @@
 #include <stdlib.h>
 #include "chameleon_starpu.h"
 
+/**
+ * @brief Store the status of some flags to knwo when enable/disable them
+ */
+static int context_starpu_flags = 0;
+
 #if (STARPU_MAJOR_VERSION > 1) || ((STARPU_MAJOR_VERSION == 1) && (STARPU_MINOR_VERSION >= 3))
 /* Defined by StarPU as external function */
 #else
@@ -76,7 +81,12 @@ void RUNTIME_enable( void *runtime_ctxt, int lever )
     case CHAMELEON_DAG:
         fprintf(stderr, "StarPU is providing DAG generation through tracing support (CHAMELEON_PROFILING_MODE)\n");
         break;
+    case CHAMELEON_KERNELPROFILE_MODE:
+        context_starpu_flags |= (1 << CHAMELEON_KERNELPROFILE_MODE);
+        starpu_profiling_status_set(STARPU_PROFILING_ENABLE);
+        break;
     case CHAMELEON_PROFILING_MODE:
+        context_starpu_flags |= (1 << CHAMELEON_PROFILING_MODE);
         starpu_profiling_status_set(STARPU_PROFILING_ENABLE);
         break;
     case CHAMELEON_BOUND:
@@ -101,7 +111,16 @@ void RUNTIME_disable( void *runtime_ctxt, int lever )
         fprintf(stderr, "StarPU is providing DAG generation through tracing support (CHAMELEON_PROFILING_MODE)\n");
         break;
     case CHAMELEON_PROFILING_MODE:
-        starpu_profiling_status_set(STARPU_PROFILING_DISABLE);
+        context_starpu_flags |= ~(1 << CHAMELEON_PROFILING_MODE);
+        if ( !context_starpu_flags ) {
+            starpu_profiling_status_set(STARPU_PROFILING_DISABLE);
+        }
+        break;
+    case CHAMELEON_KERNELPROFILE_MODE:
+        context_starpu_flags |= ~(1 << CHAMELEON_KERNELPROFILE_MODE);
+        if ( !context_starpu_flags ) {
+            starpu_profiling_status_set(STARPU_PROFILING_DISABLE);
+        }
         break;
     case CHAMELEON_BOUND:
         starpu_bound_stop();
diff --git a/runtime/starpu/include/runtime_codelet_profile.h b/runtime/starpu/include/runtime_codelet_profile.h
index 20d954fdd..8ff80dab0 100644
--- a/runtime/starpu/include/runtime_codelet_profile.h
+++ b/runtime/starpu/include/runtime_codelet_profile.h
@@ -22,6 +22,7 @@
 #define _runtime_codelet_profile_h_
 
 #include <math.h>
+#include <assert.h>
 
 #define CHAMELEON_CL_CB(name, _m, _n, _k, _nflops)			\
     static measure_t name##_perf[STARPU_NMAXWORKERS];                                          \
@@ -34,6 +35,7 @@
         __attribute__ ((unused)) double K = (double)(_k);                                      \
         double flops = (_nflops);                                                              \
         struct starpu_profiling_task_info *info = task->profiling_info;                        \
+        assert( info != NULL );                                                                \
         double duration = starpu_timing_timespec_delay_us(&info->start_time, &info->end_time); \
         double speed = flops/(1000.0*duration);                                                \
         name##_perf[info->workerid].sum  += speed;                                             \
-- 
GitLab