diff --git a/runtime/starpu/codelets/codelet_zcallback.c b/runtime/starpu/codelets/codelet_zcallback.c index cb381cfc153f21234fbc7b3fbea21f5663669627..40c4c24ab4cbd1352d516a6ebae7a8447b3e5f67 100644 --- a/runtime/starpu/codelets/codelet_zcallback.c +++ b/runtime/starpu/codelets/codelet_zcallback.c @@ -28,6 +28,7 @@ CHAMELEON_CL_CB(dlag2z, cti_handle_get_m(task->handles[1]), cti_handle_ge CHAMELEON_CL_CB(dzasum, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N) CHAMELEON_CL_CB(zaxpy, cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[1]), 0, M) CHAMELEON_CL_CB(zgeadd, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N) +CHAMELEON_CL_CB(ztradd, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, 0.5*M*N) CHAMELEON_CL_CB(zlascal, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N) CHAMELEON_CL_CB(zgelqt, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, (4./3.)*M*N*K) CHAMELEON_CL_CB(zgemv, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, 2. *M*N ) diff --git a/runtime/starpu/control/runtime_context.c b/runtime/starpu/control/runtime_context.c index f44ae20438faee407fb7b061c03574ce28694985..9632aef93dd00fe2931832145c15820eda4d2b44 100644 --- a/runtime/starpu/control/runtime_context.c +++ b/runtime/starpu/control/runtime_context.c @@ -21,6 +21,11 @@ #include <stdlib.h> #include "chameleon_starpu.h" +/** + * @brief Store the status of some flags to knwo when enable/disable them + */ +static int context_starpu_flags = 0; + #if (STARPU_MAJOR_VERSION > 1) || ((STARPU_MAJOR_VERSION == 1) && (STARPU_MINOR_VERSION >= 3)) /* Defined by StarPU as external function */ #else @@ -76,7 +81,12 @@ void RUNTIME_enable( void *runtime_ctxt, int lever ) case CHAMELEON_DAG: fprintf(stderr, "StarPU is providing DAG generation through tracing support (CHAMELEON_PROFILING_MODE)\n"); break; + case CHAMELEON_KERNELPROFILE_MODE: + context_starpu_flags |= (1 << CHAMELEON_KERNELPROFILE_MODE); + starpu_profiling_status_set(STARPU_PROFILING_ENABLE); + break; case CHAMELEON_PROFILING_MODE: + context_starpu_flags |= (1 << CHAMELEON_PROFILING_MODE); starpu_profiling_status_set(STARPU_PROFILING_ENABLE); break; case CHAMELEON_BOUND: @@ -101,7 +111,16 @@ void RUNTIME_disable( void *runtime_ctxt, int lever ) fprintf(stderr, "StarPU is providing DAG generation through tracing support (CHAMELEON_PROFILING_MODE)\n"); break; case CHAMELEON_PROFILING_MODE: - starpu_profiling_status_set(STARPU_PROFILING_DISABLE); + context_starpu_flags |= ~(1 << CHAMELEON_PROFILING_MODE); + if ( !context_starpu_flags ) { + starpu_profiling_status_set(STARPU_PROFILING_DISABLE); + } + break; + case CHAMELEON_KERNELPROFILE_MODE: + context_starpu_flags |= ~(1 << CHAMELEON_KERNELPROFILE_MODE); + if ( !context_starpu_flags ) { + starpu_profiling_status_set(STARPU_PROFILING_DISABLE); + } break; case CHAMELEON_BOUND: starpu_bound_stop(); diff --git a/runtime/starpu/include/runtime_codelet_profile.h b/runtime/starpu/include/runtime_codelet_profile.h index 20d954fdd2cda5e2071f0c945aaf9de297051a7f..8ff80dab03cfc5a3a5a20037345efcb3712c75d6 100644 --- a/runtime/starpu/include/runtime_codelet_profile.h +++ b/runtime/starpu/include/runtime_codelet_profile.h @@ -22,6 +22,7 @@ #define _runtime_codelet_profile_h_ #include <math.h> +#include <assert.h> #define CHAMELEON_CL_CB(name, _m, _n, _k, _nflops) \ static measure_t name##_perf[STARPU_NMAXWORKERS]; \ @@ -34,6 +35,7 @@ __attribute__ ((unused)) double K = (double)(_k); \ double flops = (_nflops); \ struct starpu_profiling_task_info *info = task->profiling_info; \ + assert( info != NULL ); \ double duration = starpu_timing_timespec_delay_us(&info->start_time, &info->end_time); \ double speed = flops/(1000.0*duration); \ name##_perf[info->workerid].sum += speed; \