From ab7de6c8c7184e32371ed53ad4b280f39d4e2107 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Tue, 1 Dec 2020 17:05:45 +0100 Subject: [PATCH] Fix profile initialization with StarPU --- runtime/starpu/codelets/codelet_zcallback.c | 1 + runtime/starpu/control/runtime_context.c | 21 ++++++++++++++++++- .../starpu/include/runtime_codelet_profile.h | 2 ++ 3 files changed, 23 insertions(+), 1 deletion(-) diff --git a/runtime/starpu/codelets/codelet_zcallback.c b/runtime/starpu/codelets/codelet_zcallback.c index cb381cfc1..40c4c24ab 100644 --- a/runtime/starpu/codelets/codelet_zcallback.c +++ b/runtime/starpu/codelets/codelet_zcallback.c @@ -28,6 +28,7 @@ CHAMELEON_CL_CB(dlag2z, cti_handle_get_m(task->handles[1]), cti_handle_ge CHAMELEON_CL_CB(dzasum, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N) CHAMELEON_CL_CB(zaxpy, cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[1]), 0, M) CHAMELEON_CL_CB(zgeadd, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N) +CHAMELEON_CL_CB(ztradd, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, 0.5*M*N) CHAMELEON_CL_CB(zlascal, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N) CHAMELEON_CL_CB(zgelqt, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, (4./3.)*M*N*K) CHAMELEON_CL_CB(zgemv, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, 2. *M*N ) diff --git a/runtime/starpu/control/runtime_context.c b/runtime/starpu/control/runtime_context.c index f44ae2043..9632aef93 100644 --- a/runtime/starpu/control/runtime_context.c +++ b/runtime/starpu/control/runtime_context.c @@ -21,6 +21,11 @@ #include <stdlib.h> #include "chameleon_starpu.h" +/** + * @brief Store the status of some flags to knwo when enable/disable them + */ +static int context_starpu_flags = 0; + #if (STARPU_MAJOR_VERSION > 1) || ((STARPU_MAJOR_VERSION == 1) && (STARPU_MINOR_VERSION >= 3)) /* Defined by StarPU as external function */ #else @@ -76,7 +81,12 @@ void RUNTIME_enable( void *runtime_ctxt, int lever ) case CHAMELEON_DAG: fprintf(stderr, "StarPU is providing DAG generation through tracing support (CHAMELEON_PROFILING_MODE)\n"); break; + case CHAMELEON_KERNELPROFILE_MODE: + context_starpu_flags |= (1 << CHAMELEON_KERNELPROFILE_MODE); + starpu_profiling_status_set(STARPU_PROFILING_ENABLE); + break; case CHAMELEON_PROFILING_MODE: + context_starpu_flags |= (1 << CHAMELEON_PROFILING_MODE); starpu_profiling_status_set(STARPU_PROFILING_ENABLE); break; case CHAMELEON_BOUND: @@ -101,7 +111,16 @@ void RUNTIME_disable( void *runtime_ctxt, int lever ) fprintf(stderr, "StarPU is providing DAG generation through tracing support (CHAMELEON_PROFILING_MODE)\n"); break; case CHAMELEON_PROFILING_MODE: - starpu_profiling_status_set(STARPU_PROFILING_DISABLE); + context_starpu_flags |= ~(1 << CHAMELEON_PROFILING_MODE); + if ( !context_starpu_flags ) { + starpu_profiling_status_set(STARPU_PROFILING_DISABLE); + } + break; + case CHAMELEON_KERNELPROFILE_MODE: + context_starpu_flags |= ~(1 << CHAMELEON_KERNELPROFILE_MODE); + if ( !context_starpu_flags ) { + starpu_profiling_status_set(STARPU_PROFILING_DISABLE); + } break; case CHAMELEON_BOUND: starpu_bound_stop(); diff --git a/runtime/starpu/include/runtime_codelet_profile.h b/runtime/starpu/include/runtime_codelet_profile.h index 20d954fdd..8ff80dab0 100644 --- a/runtime/starpu/include/runtime_codelet_profile.h +++ b/runtime/starpu/include/runtime_codelet_profile.h @@ -22,6 +22,7 @@ #define _runtime_codelet_profile_h_ #include <math.h> +#include <assert.h> #define CHAMELEON_CL_CB(name, _m, _n, _k, _nflops) \ static measure_t name##_perf[STARPU_NMAXWORKERS]; \ @@ -34,6 +35,7 @@ __attribute__ ((unused)) double K = (double)(_k); \ double flops = (_nflops); \ struct starpu_profiling_task_info *info = task->profiling_info; \ + assert( info != NULL ); \ double duration = starpu_timing_timespec_delay_us(&info->start_time, &info->end_time); \ double speed = flops/(1000.0*duration); \ name##_perf[info->workerid].sum += speed; \ -- GitLab