diff --git a/includes/kaapi_trace_lib.h b/includes/kaapi_trace_lib.h index e7a8631e50d03f58a992df0effa03ff9288deb9a..53e412b42adcab7f7feaaa31de23ed28e762f2ff 100644 --- a/includes/kaapi_trace_lib.h +++ b/includes/kaapi_trace_lib.h @@ -44,4 +44,5 @@ static kmp_lock_t __kaapi_global_lock; extern kaapi_ompt_thread_info_t* __kaapi_oth_info; extern size_t __kaapi_oth_info_capacity; extern ompt_get_thread_data_t ompt_get_thread_data; +extern ompt_get_unique_id_t ompt_get_unique_id; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index d77017e448f2926fd98686aa3855c3b675d2659e..84989b7412b1b87affc3520d271ba9edc02213c2 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -3,3 +3,4 @@ find_package(OpenMP REQUIRED) set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") add_executable(hello hello.c) +add_executable(hello-task hello-task.c) diff --git a/test/hello-task.c b/test/hello-task.c new file mode 100644 index 0000000000000000000000000000000000000000..74473999d83040b605416304d50d63e739d7ed06 --- /dev/null +++ b/test/hello-task.c @@ -0,0 +1,23 @@ +#include <stdio.h> +#include <unistd.h> +#include <omp.h> + +int main() +{ +#pragma omp parallel +#pragma omp master + { + printf("Hello from %i\n", omp_get_thread_num()); + #pragma omp task + { + sleep(1); + printf("Hey there\n"); + } + #pragma omp task + { + sleep(1); + printf("Hey there another task\n"); + } + } + return 0; +} diff --git a/tool/ompt_callbacks.cpp b/tool/ompt_callbacks.cpp index f96eb6c6c05b087e1e3b49bedb1b3645c04ed3f3..23a560e577b9ce1aca3711428507ae083c37f460 100644 --- a/tool/ompt_callbacks.cpp +++ b/tool/ompt_callbacks.cpp @@ -198,16 +198,15 @@ static inline void realloc_ifrequired( size_t size ) using namespace std; -atomic<uint64_t> parallel_id(0); - void ompt_callback_thread_begin_action( ompt_thread_t thread_type, ompt_data_t *thread_data ) { - int thread_id = thread_data->value = omp_get_thread_num(); + //int thread_id = thread_data->value = omp_get_thread_num(); + int thread_id = thread_data->value = ompt_get_unique_id(); - printf("%" PRIu64 ": ompt_event_thread_begin: thread_type=%" PRIu64 "\n", thread_data->value, (uint64_t)thread_type); + printf("%" PRIu64 ": ompt_event_thread_begin: thread_id=%" PRIu64 "\n", thread_data->value, (uint64_t)thread_type); kaapi_assert ( thread_id < __kaapi_oth_info_capacity ); kaapi_ompt_thread_info_t* koti = &__kaapi_oth_info[thread_id]; #if LIBOMP_USE_NUMA @@ -230,19 +229,17 @@ at the begining of the parallel region */ sizeof(kaapi_taskstackentry_t) * koti->pstack.capacity /* */ ); koti->tstack.top = 0; -#if USE_KAAPI kaapi_tracelib_thread_start( koti->kproc ); -#endif } void ompt_callback_thread_end_action( ompt_data_t *thread_data ) { + printf("%" PRIu64 ": ompt_event_thread_end: thread_id=%" PRIu64 "\n", thread_data->value, thread_data->value); kaapi_ompt_thread_info_t* koti = &__kaapi_oth_info[thread_data->value]; kaapi_tracelib_thread_stop( koti->kproc ); kaapi_tracelib_thread_fini( koti->kproc ); - printf("%" PRIu64 ": ompt_event_thread_end\n", thread_data->value); } void ompt_callback_parallel_begin_action ( @@ -255,14 +252,16 @@ void ompt_callback_parallel_begin_action ( ) { ompt_data_t *thread_data = ompt_get_thread_data(); - parallel_data->value = parallel_id++; + parallel_data->value = ompt_get_unique_id(); uint64_t thread_id = thread_data->value; - printf("%" PRIu64 ": omp threadid:%" PRIu64 ": ompt_event_parallel_begin: parent_task_frame=%p, requested_team_size=%" PRIu32 ", parallel_function=%p, parallel_data: %" PRIu64 "\n", thread_id, (uint64_t)omp_get_thread_num(), + printf("%" PRIu64 ": omp threadid:%" PRIu64 ": ompt_event_parallel_begin: parent_task_frame=%p, task_id=%" PRIu64 ", requested_team_size=%" PRIu32 ", parallel_function=%p, parallel_data: %" PRIu64 "\n", thread_id, (uint64_t)omp_get_thread_num(), encountering_task_frame, + encountering_task_data->value, requested_parallelism, codeptr_ra, parallel_data->value ); +#if 0 /* TODO here: the key is the way several instances of the same parallel region are collapsed to compute statistics. @@ -300,4 +299,194 @@ void ompt_callback_parallel_begin_action ( parallel_data->value ); ++koti->tstack.top; +#endif +} + +void +implicit_task_end( + ompt_data_t *parallel_data, + ompt_data_t *task_data) +{ + uint64_t thread_id = ompt_get_thread_data()->value; + /* But because the end of implicit task is not related to on_ompt_event_parallel_end. + */ + printf("%" PRIu64 ": ompt_event_implicit_task_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", thread_id, parallel_data->value, task_data->value); + /* thread 0 ends the parallel region + But because the end of implicit task is not related to + the end of the parallel region, we define here the end of implicit task. + */ + kaapi_ompt_thread_info_t* koti = &__kaapi_oth_info[thread_id]; + int idxtop = --koti->pstack.top; + //kaapi_assert( idxtop == 0 ); + + /* reset that accumulated into */ + memset( koti->pstack.stack[idxtop].accum, 0, sizeof(koti->pstack.stack[idxtop].accum)); + kaapi_tracelib_task_end( + koti->kproc, + koti->pstack.stack[idxtop].task, + 0, 0, + koti->pstack.stack[idxtop].fdescr, + koti->pstack.t0, + koti->pstack.stack[idxtop].accum + ); +} + +void +ompt_callback_parallel_end_action( + ompt_data_t *parallel_data, + ompt_data_t *encountering_task_data, + int flags, + const void *codeptr_ra) +{ + uint64_t thread_id = ompt_get_thread_data()->value; + /* end implicit task here */ + printf("%" PRIu64 ": on_ompt_event_parallel_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", thread_id, parallel_data->value, encountering_task_data->value); +#if 0 + implicit_task_end(parallel_data, encountering_task_data); + /* end of the parallel region */ + kaapi_ompt_thread_info_t* koti = &__kaapi_oth_info[thread_id]; + int idxtop = --koti->tstack.top; + kaapi_tracelib_team_stop( koti->kproc, + koti->tstack.stack[idxtop], + idxtop >0 ? koti->tstack.stack[idxtop-1]: 0, + parallel_data->value + ); + kaapi_tracelib_team_fini( koti->kproc, koti->tstack.stack[idxtop] ); +#endif +} + + +void +ompt_callback_task_create_action( + ompt_data_t *parent_task_data, /* id of parent task */ + const ompt_frame_t *parent_frame, /* frame data for parent task */ + ompt_data_t* new_task_data, /* id of created task */ + int type, + int has_dependences, + const void *codeptr_ra) /* pointer to outlined function */ +{ + uint64_t thread_id = ompt_get_thread_data()->value; + new_task_data->value = ompt_get_unique_id(); + printf("%" PRIu64 ": ompt_task_create: parent_id=%" PRIu64 ", task_id=%" PRIu64\ + ", type=%i, has_dep=%i, ptr=%" PRIu64 "\n", thread_id, parent_task_data->value, new_task_data->value, type, has_dependences, codeptr_ra); +} + + +void +ompt_callback_task_schedule_action( + ompt_data_t *prior_task_data, + ompt_task_status_t prior_task_status, + ompt_data_t *next_task_data) +{ + uint64_t thread_id = ompt_get_thread_data()->value; + if (prior_task_data->ptr) { + uint64_t prior_task_id = prior_task_data->value; + // We are ending a task + printf("%" PRIu64 ": ompt_event_task_end: task_id=%" PRIu64 "\n", thread_id, prior_task_id ); +#if 0 + /* thread 0 ends the parallel region + But because the end of implicit task is not related to + the end of the parallel region, we define here the end of implicit task. + */ + kaapi_ompt_thread_info_t* koti = &__kaapi_oth_info[thread_id]; + int idxtop = --koti->pstack.top; + + /* reset that accumulated into */ + memset( koti->pstack.stack[idxtop].accum, 0, sizeof(koti->pstack.stack[idxtop].accum)); + kaapi_tracelib_task_end( + koti->kproc, + koti->pstack.stack[idxtop].task, + 0, + 0, + koti->pstack.stack[idxtop].fdescr, + koti->pstack.t0, + koti->pstack.stack[idxtop].accum + ); + kaapi_tracelib_thread_switchstate(koti->kproc); +#endif + } + if (next_task_data->ptr) { + uint64_t new_task_id = next_task_data->value; + // We are starting a task + printf("%" PRIu64 ": ompt_event_task_begin: task_id=%" PRIu64 ", status=%i\n", thread_id, new_task_id, prior_task_status); + /* This is code for implicit task begin. + */ +#if 0 + kaapi_ompt_thread_info_t* koti = &__kaapi_oth_info[thread_id]; + kaapi_tracelib_thread_switchstate(koti->kproc); + kaapi_descrformat_t* fdescr = kaapi_tracelib_register_fmtdescr( + 0, + // TODO: get codeptr_ra there + next_task_data, + 0, //loc + "<undef>", + libomp_filter_func + ); + int idxtop = koti->pstack.top; + koti->pstack.stack[idxtop].fdescr = fdescr; + koti->pstack.stack[idxtop].task = (void*)new_task_id; + + kaapi_tracelib_task_begin( + koti->kproc, + (kaapi_task_id_t)new_task_id, + fdescr->fmtid, + 1, + 0, 0, 0, + 0, 0, + koti->pstack.t0, + koti->pstack.top ==0 ? 0 : koti->pstack.stack[idxtop-1].accum + ); + ++koti->pstack.top; +#endif + } +} + +void +ompt_callback_implicit_task_action ( + ompt_scope_endpoint_t endpoint, + ompt_data_t *parallel_data, + ompt_data_t *task_data, + unsigned int actual_parallelism, + unsigned int index, + int flags + ) +{ + uint64_t thread_id = ompt_get_thread_data()->value; + if (endpoint == ompt_scope_begin) { + task_data->value = ompt_get_unique_id(); + printf("%" PRIu64 ": ompt_event_implicit_task_action: begin. parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", thread_id, parallel_data->value, task_data->value); + } else if (endpoint == ompt_scope_end) { + printf("%" PRIu64 ": ompt_event_implicit_task_action: end. task_id=%" PRIu64 "\n", thread_id, task_data->value); + } else { + printf("%" PRIu64 ": ompt_event_implicit_task_action: unknown endpoint. task_id=%" PRIu64 "\n", thread_id, task_data->value); + } + /* This is code for implicit task begin. + */ +#if 0 + // TODO: debug below + kaapi_ompt_thread_info_t* koti = &__kaapi_oth_info[thread_id]; + kaapi_tracelib_team_t* team = koti->tstack.stack[koti->tstack.top-1]; + kaapi_descrformat_t* fdescr = kaapi_tracelib_register_fmtdescr( + 1, + team->key, /* same key as the team. Not implicit task ? why ? [TG] */ + team->name, + 0, + 0 /* no filter: team name should be already well formed */ + ); + int idxtop = koti->pstack.top; + koti->pstack.stack[idxtop].fdescr = fdescr; + koti->pstack.stack[idxtop].task = (void*)task_data->value; + + kaapi_tracelib_task_begin( + koti->kproc, + (kaapi_task_id_t)task_data->value, + fdescr->fmtid, + 0, + 0, 0, 0, + 0, 0, + koti->pstack.t0, + 0 + ); + ++koti->pstack.top; +#endif } diff --git a/tool/ompt_callbacks.def b/tool/ompt_callbacks.def index b14066fd6b5b4d2afce257348e37a5d52d7dcc95..7ae6d690e0d3a7b2b1d14e09377f15c57f18758e 100644 --- a/tool/ompt_callbacks.def +++ b/tool/ompt_callbacks.def @@ -8,10 +8,10 @@ CALLBACK(ompt_callback_thread_begin, ompt_thread_t, ompt_data_t*) CALLBACK(ompt_callback_thread_end, ompt_data_t*) CALLBACK(ompt_callback_parallel_begin, ompt_data_t *, const ompt_frame_t *, ompt_data_t *, unsigned int, int, const void *) -UNUSED(ompt_callback_parallel_end) -UNUSED(ompt_callback_task_create) -UNUSED(ompt_callback_task_schedule) -UNUSED(ompt_callback_implicit_task) +CALLBACK(ompt_callback_parallel_end, ompt_data_t *, ompt_data_t *, int, const void *) +CALLBACK(ompt_callback_task_create, ompt_data_t*, const ompt_frame_t*, ompt_data_t*, int, int, const void *codeptr_ra) +CALLBACK(ompt_callback_task_schedule, ompt_data_t *, ompt_task_status_t, ompt_data_t *) +CALLBACK(ompt_callback_implicit_task, ompt_scope_endpoint_t, ompt_data_t *, ompt_data_t*, unsigned int, unsigned int, int) UNUSED(ompt_callback_target) UNUSED(ompt_callback_target_data_op) UNUSED(ompt_callback_target_submit) diff --git a/tool/tool.cpp b/tool/tool.cpp index cc8c77a3627db07ae6eda98fe365abcc405a0033..d34572bff28c010657ceebc9b25ffdb3949c0846 100644 --- a/tool/tool.cpp +++ b/tool/tool.cpp @@ -14,6 +14,9 @@ kaapi_ompt_thread_info_t* __kaapi_oth_info = 0; size_t __kaapi_oth_info_capacity = 256; ompt_get_thread_data_t ompt_get_thread_data; +ompt_get_unique_id_t ompt_get_unique_id; +#include <atomic> +std::atomic<uint64_t> myuid(0); void initTool(ompt_function_lookup_t lookup) { printf("init tool\n"); @@ -30,6 +33,11 @@ void initTool(ompt_function_lookup_t lookup) { printf("oth_info %p\n", __kaapi_oth_info); ompt_set_callback_t ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback"); ompt_get_thread_data = (ompt_get_thread_data_t) lookup("ompt_get_thread_data"); + ompt_get_unique_id = (ompt_get_unique_id_t) lookup("ompt_get_unique_id"); + // FIXME: restore the call to omp get unique id + ompt_get_unique_id = []() { + return ++myuid; + }; #define CALLBACK(name, ...) \ do{ \