diff --git a/runtime/src/include/30/ompt.h.var b/runtime/src/include/30/ompt.h.var index 5e17697a986f72887f7aa951d522830ba4eeba6f..b1b4fc380e22f52959280ab6392e90e2a61071af 100644 --- a/runtime/src/include/30/ompt.h.var +++ b/runtime/src/include/30/ompt.h.var @@ -177,8 +177,8 @@ \ macro (ompt_event_thread_state_begin, ompt_threadstate_callback_t, 72) /* begin state */ \ macro (ompt_event_thread_state_end, ompt_threadstate_callback_t, 73) /* end state */ \ - macro (ompt_event_thread_steal, ompt_thread_steal_callback_t, 74) /* steal event */ - + macro (ompt_event_thread_steal, ompt_thread_steal_callback_t, 74) /* steal event */ \ + macro (ompt_event_thread_schedinfo, ompt_threadstate_callback_t, 75) /* set sched info */ /***************************************************************************** diff --git a/runtime/src/include/40/ompt.h.var b/runtime/src/include/40/ompt.h.var index f4d09d93de9a59a35069bec91b3d7b16de555be7..bb5492b099f8120443351fa3d86e2e1b27d92138 100644 --- a/runtime/src/include/40/ompt.h.var +++ b/runtime/src/include/40/ompt.h.var @@ -180,9 +180,8 @@ \ macro (ompt_event_thread_state_begin, ompt_threadstate_callback_t, 72) /* begin state */ \ macro (ompt_event_thread_state_end, ompt_threadstate_callback_t, 73) /* end state */ \ - macro (ompt_event_thread_steal, ompt_thread_steal_callback_t, 74) /* steal event */ - - + macro (ompt_event_thread_steal, ompt_thread_steal_callback_t, 74) /* steal event */ \ + macro (ompt_event_thread_schedinfo, ompt_threadstate_callback_t, 75) /* set sched info */ /***************************************************************************** * data types *****************************************************************************/ diff --git a/runtime/src/include/45/ompt.h.var b/runtime/src/include/45/ompt.h.var index 59635af27cbb38f3e5b7fc39c22dfbe60d4b34a1..488f0888d72fdce99783672a83eef27f81bff89d 100644 --- a/runtime/src/include/45/ompt.h.var +++ b/runtime/src/include/45/ompt.h.var @@ -180,7 +180,8 @@ \ macro (ompt_event_thread_state_begin, ompt_threadstate_callback_t, 72) /* begin state */ \ macro (ompt_event_thread_state_end, ompt_threadstate_callback_t, 73) /* end state */ \ - macro (ompt_event_thread_steal, ompt_thread_steal_callback_t, 74) /* steal event */ + macro (ompt_event_thread_steal, ompt_thread_steal_callback_t, 74) /* steal event */ \ + macro (ompt_event_thread_schedinfo, ompt_threadstate_callback_t, 75) /* set sched info */ /***************************************************************************** * data types diff --git a/runtime/src/include/50/ompt.h.var b/runtime/src/include/50/ompt.h.var index dfc55fe4b3db6b39609b53d9425740b4b53b5560..f26ec5cac669053f22087ad437c2b3ec037b1913 100644 --- a/runtime/src/include/50/ompt.h.var +++ b/runtime/src/include/50/ompt.h.var @@ -180,7 +180,8 @@ \ macro (ompt_event_thread_state_begin, ompt_threadstate_callback_t, 72) /* begin state */ \ macro (ompt_event_thread_state_end, ompt_threadstate_callback_t, 73) /* end state */ \ - macro (ompt_event_thread_steal, ompt_thread_steal_callback_t, 74) /* steal event */ + macro (ompt_event_thread_steal, ompt_thread_steal_callback_t, 74) /* steal event */ \ + macro (ompt_event_thread_schedinfo, ompt_threadstate_callback_t, 75) /* set sched info */ /***************************************************************************** * data types diff --git a/runtime/src/kaapi_ompt.c b/runtime/src/kaapi_ompt.c index 1311e19418de1b95e677435b2b6b022a8d407376..e31df12044e8d2e5d1756ada532bd5a5c795a3da 100644 --- a/runtime/src/kaapi_ompt.c +++ b/runtime/src/kaapi_ompt.c @@ -827,6 +827,23 @@ on_ompt_event_task_dependence_pair( #endif } +static void +on_ompt_event_thread_schedinfo( + ompt_thread_id_t thread_id, + uint64_t state, + uint32_t cpu, + uint32_t node +) +{ +#if LOG + printf("%" PRIu64 ": ompt_event_thread_schedinfo: state id=%" PRIu64 ", cpu=%i, node=%i\n", thread_id, state, cpu, node); +#endif +#if USE_KAAPI + kaapi_ompt_thread_info_t* koti = &__kaapi_oth_info[thread_id]; + kaapi_tracelib_thread_state( koti->kproc, 0, cpu, node, state ); +#endif +} + static void on_ompt_event_thread_state_begin( ompt_thread_id_t thread_id, @@ -939,6 +956,7 @@ void kaapi_ompt_initialize( //ompt_set_callback(ompt_event_idle_begin, (ompt_callback_t) &on_ompt_event_idle_begin); //ompt_set_callback(ompt_event_idle_end, (ompt_callback_t) &on_ompt_event_idle_end); + ompt_set_callback(ompt_event_thread_schedinfo, (ompt_callback_t) &on_ompt_event_thread_schedinfo); ompt_set_callback(ompt_event_thread_state_begin, (ompt_callback_t) &on_ompt_event_thread_state_begin); ompt_set_callback(ompt_event_thread_state_end, (ompt_callback_t) &on_ompt_event_thread_state_end); ompt_set_callback(ompt_event_thread_steal, (ompt_callback_t) &on_ompt_event_thread_steal); diff --git a/runtime/src/kaapi_trace_lib.c b/runtime/src/kaapi_trace_lib.c index cc41bf2bba2068303e2172e5bbb117df2c67459d..d429d8d3ce6902db3cc902438164b8f4d1894923 100644 --- a/runtime/src/kaapi_trace_lib.c +++ b/runtime/src/kaapi_trace_lib.c @@ -650,7 +650,8 @@ int kaapi_tracelib_init( if (getenv("KAAPI_RECORD_MASK") !=0) { /* actual grammar: - eventno[,eventno]* + descr[,descr]* + descr -> groupname |Â eventno eventno is an integer less than 2^sizeof(kaapi_event_mask_type_t) grammar must be more complex using predefined set */ @@ -1234,7 +1235,7 @@ void kaapi_tracelib_thread_fini( */ void kaapi_tracelib_thread_state ( kaapi_tracelib_thread_t* kproc, - uint8_t begend, /* 0==begin, 1==end */ + uint8_t begend, /* 0==begin, 1==end, 2==running */ uint32_t cpu, uint32_t node, uint64_t state @@ -1247,7 +1248,7 @@ void kaapi_tracelib_thread_state ( evt->u.s.d1.i32[0] = cpu; evt->u.s.d1.i32[1] = node; evt->u.s.d2.i64[0] = state; - KAAPI_EVENT_PUSH(kproc,0, KAAPI_EVT_THREAD_STATE); + KAAPI_EVENT_PUSH(kproc, 0, KAAPI_EVT_THREAD_STATE); } } diff --git a/runtime/src/kmp_runtime.cpp b/runtime/src/kmp_runtime.cpp index 35f8c77b95d6f53de7b1894e94b1b00ee95770d2..9ca49e128a34c84ec3f99b73e93cca8c7c4d81ef 100644 --- a/runtime/src/kmp_runtime.cpp +++ b/runtime/src/kmp_runtime.cpp @@ -1376,6 +1376,19 @@ int __kmp_fork_call(ident_t *loc, int gtid, #if KMP_NESTED_HOT_TEAMS kmp_hot_team_ptr_t **p_hot_teams; #endif +#if OMPT_SUPPORT + ompt_thread_id_t tid = GTID_TO_OMPT_THREAD_ID(gtid); +#if defined(__linux__) + kmp_int32 cpu = sched_getcpu(); +#else + kmp_int32 cpu = tid; +#endif +#if LIBOMP_USE_AFFINITY + kmp_int32 node = __kmp_cpu2node(cpu); +#else + kmp_int32 node = 0; +#endif +#endif // OMPT_SUPPORT { // KMP_TIME_BLOCK KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call); KMP_COUNT_VALUE(OMP_PARALLEL_args, argc); @@ -1502,9 +1515,15 @@ int __kmp_fork_call(ident_t *loc, int gtid, /* OMPT implicit task begin */ my_task_id = lw_taskteam.ompt_task_info.task_id; my_parallel_id = parent_team->t.ompt_team_info.parallel_id; + ompt_thread_id_t tid = GTID_TO_OMPT_THREAD_ID(gtid); if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - GTID_TO_OMPT_THREAD_ID(gtid), my_parallel_id, my_task_id); + tid, my_parallel_id, my_task_id); + } + if (ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)) { + ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)( + tid, (uint64_t)3, cpu, node + ); } #endif @@ -1530,11 +1549,16 @@ int __kmp_fork_call(ident_t *loc, int gtid, *exit_runtime_p = NULL; if (ompt_enabled) { #if OMPT_TRACE + if (ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)) { + ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)( + tid, (uint64_t)3, cpu, node + ); + } lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - GTID_TO_OMPT_THREAD_ID(gtid), ompt_parallel_id, ompt_task_id); + tid, ompt_parallel_id, ompt_task_id); } __ompt_lw_taskteam_unlink(master_th); @@ -1544,7 +1568,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - GTID_TO_OMPT_THREAD_ID(gtid), ompt_parallel_id, + tid, ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context)); } master_th->th.ompt_thread_info.state = ompt_state_overhead; @@ -1724,7 +1748,12 @@ int __kmp_fork_call(ident_t *loc, int gtid, my_task_id = lw_taskteam.ompt_task_info.task_id; if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - GTID_TO_OMPT_THREAD_ID(gtid), ompt_parallel_id, my_task_id); + tid, ompt_parallel_id, my_task_id); + } + if (ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)) { + ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)( + tid, (uint64_t)3, cpu, node + ); } #endif @@ -1753,6 +1782,11 @@ int __kmp_fork_call(ident_t *loc, int gtid, lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; #if OMPT_TRACE + if (ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)) { + ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)( + tid, (uint64_t)3, cpu, node + ); + } if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( GTID_TO_OMPT_THREAD_ID(gtid), ompt_parallel_id, ompt_task_id); @@ -1837,6 +1871,11 @@ int __kmp_fork_call(ident_t *loc, int gtid, ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( GTID_TO_OMPT_THREAD_ID(gtid), my_parallel_id, my_task_id); } + if (ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)) { + ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)( + tid, (uint64_t)3, cpu, node + ); + } #endif /* OMPT state */ @@ -1863,6 +1902,11 @@ int __kmp_fork_call(ident_t *loc, int gtid, #if OMPT_TRACE lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; + if (ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)) { + ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)( + tid, (uint64_t)3, cpu, node + ); + } if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( GTID_TO_OMPT_THREAD_ID(gtid), my_parallel_id, my_task_id); @@ -5505,6 +5549,18 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { } #if OMPT_SUPPORT + ompt_thread_id_t tid = GTID_TO_OMPT_THREAD_ID(gtid); +#if defined(__linux__) + kmp_int32 cpu = sched_getcpu(); +#else + kmp_int32 cpu = GTID_TO_OMPT_THREAD_ID(gtid); +#endif +#if LIBOMP_USE_AFFINITY + kmp_int32 node = __kmp_cpu2node(cpu); +#else + kmp_int32 node = 0; +#endif + if (ompt_enabled) { this_thr->th.ompt_thread_info.state = ompt_state_overhead; this_thr->th.ompt_thread_info.wait_id = 0; @@ -5514,6 +5570,11 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) { __ompt_thread_begin(ompt_thread_worker, gtid); } + if (ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)) { + ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)( + tid, (uint64_t)3, cpu, node + ); + } } #endif @@ -5567,6 +5628,11 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { 0 /* loc is missing */ ); } + if (ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)) { + ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)( + tid, (uint64_t)3, cpu, node + ); + } } #endif /* we were just woken up, so run our new task */ @@ -5587,7 +5653,6 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { task_info->task_id = __ompt_task_id_new(tid); } #endif - { KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); @@ -5612,6 +5677,11 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { __kmp_join_barrier(gtid); #if OMPT_SUPPORT && OMPT_TRACE if (ompt_enabled) { + if (ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)) { + ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)( + tid, (uint64_t)3, cpu, node + ); + } if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { // don't access *pteam here: it may have already been freed // by the master thread behind the barrier (possible race) diff --git a/runtime/src/kmp_tasking.cpp b/runtime/src/kmp_tasking.cpp index 54a0b2fae68b684b3e345796322d1f83b26baa9b..823bf4415bb6692a4bfe813dbc753a0a06a74993 100644 --- a/runtime/src/kmp_tasking.cpp +++ b/runtime/src/kmp_tasking.cpp @@ -2622,7 +2622,7 @@ static inline int __kmp_execute_tasks_template( #else kmp_int32 node = 0; #endif - enum { INIT_STATE, IDLE_STATE, ACTIVE_STATE }; + enum { INIT_STATE=0, IDLE_STATE=1, ACTIVE_STATE=2, UNKNOWN_STATE=3 }; kmp_int32 state = INIT_STATE; kmp_base_queue_data_t *own_queue = &task_team->tt.tt_task_queues[KMP_LEVEL_THREAD][tid].qd; diff --git a/runtime/src/ompt-event-specific.h b/runtime/src/ompt-event-specific.h index 848072145ba782037e6e1ef641faa3b5f8afa189..be168b5b6c84f7170fb8681d57b0fc264666ea97 100644 --- a/runtime/src/ompt-event-specific.h +++ b/runtime/src/ompt-event-specific.h @@ -146,6 +146,7 @@ #define ompt_event_thread_state_begin_implemented ompt_event_MAY_ALWAYS_TRACE #define ompt_event_thread_state_end_implemented ompt_event_MAY_ALWAYS_TRACE #define ompt_event_thread_steal_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_thread_schedinfo_implemented ompt_event_MAY_ALWAYS_TRACE #if OMP_40_ENABLED #define ompt_event_task_dependences_implemented ompt_event_MAY_ALWAYS_TRACE