diff --git a/runtime/src/include/30/ompt.h.var b/runtime/src/include/30/ompt.h.var
index 5e17697a986f72887f7aa951d522830ba4eeba6f..b1b4fc380e22f52959280ab6392e90e2a61071af 100644
--- a/runtime/src/include/30/ompt.h.var
+++ b/runtime/src/include/30/ompt.h.var
@@ -177,8 +177,8 @@
                                                                                                                 \
     macro (ompt_event_thread_state_begin,       ompt_threadstate_callback_t,   72) /* begin state */            \
     macro (ompt_event_thread_state_end,         ompt_threadstate_callback_t,   73) /* end state */              \
-    macro (ompt_event_thread_steal,             ompt_thread_steal_callback_t,  74) /* steal event */
-
+    macro (ompt_event_thread_steal,             ompt_thread_steal_callback_t,  74) /* steal event */            \
+    macro (ompt_event_thread_schedinfo,         ompt_threadstate_callback_t,   75) /* set sched info */
 
 
 /*****************************************************************************
diff --git a/runtime/src/include/40/ompt.h.var b/runtime/src/include/40/ompt.h.var
index f4d09d93de9a59a35069bec91b3d7b16de555be7..bb5492b099f8120443351fa3d86e2e1b27d92138 100644
--- a/runtime/src/include/40/ompt.h.var
+++ b/runtime/src/include/40/ompt.h.var
@@ -180,9 +180,8 @@
                                                                                                                 \
     macro (ompt_event_thread_state_begin,       ompt_threadstate_callback_t,   72) /* begin state */            \
     macro (ompt_event_thread_state_end,         ompt_threadstate_callback_t,   73) /* end state */              \
-    macro (ompt_event_thread_steal,             ompt_thread_steal_callback_t,  74) /* steal event */
-
-
+    macro (ompt_event_thread_steal,             ompt_thread_steal_callback_t,  74) /* steal event */            \
+    macro (ompt_event_thread_schedinfo,         ompt_threadstate_callback_t,   75) /* set sched info */
 /*****************************************************************************
  * data types
  *****************************************************************************/
diff --git a/runtime/src/include/45/ompt.h.var b/runtime/src/include/45/ompt.h.var
index 59635af27cbb38f3e5b7fc39c22dfbe60d4b34a1..488f0888d72fdce99783672a83eef27f81bff89d 100644
--- a/runtime/src/include/45/ompt.h.var
+++ b/runtime/src/include/45/ompt.h.var
@@ -180,7 +180,8 @@
                                                                                                                 \
     macro (ompt_event_thread_state_begin,       ompt_threadstate_callback_t,   72) /* begin state */            \
     macro (ompt_event_thread_state_end,         ompt_threadstate_callback_t,   73) /* end state */              \
-    macro (ompt_event_thread_steal,             ompt_thread_steal_callback_t,  74) /* steal event */
+    macro (ompt_event_thread_steal,             ompt_thread_steal_callback_t,  74) /* steal event */            \
+    macro (ompt_event_thread_schedinfo,         ompt_threadstate_callback_t,   75) /* set sched info */
 
 /*****************************************************************************
  * data types
diff --git a/runtime/src/include/50/ompt.h.var b/runtime/src/include/50/ompt.h.var
index dfc55fe4b3db6b39609b53d9425740b4b53b5560..f26ec5cac669053f22087ad437c2b3ec037b1913 100644
--- a/runtime/src/include/50/ompt.h.var
+++ b/runtime/src/include/50/ompt.h.var
@@ -180,7 +180,8 @@
                                                                                                                 \
     macro (ompt_event_thread_state_begin,       ompt_threadstate_callback_t,   72) /* begin state */            \
     macro (ompt_event_thread_state_end,         ompt_threadstate_callback_t,   73) /* end state */              \
-    macro (ompt_event_thread_steal,             ompt_thread_steal_callback_t,  74) /* steal event */
+    macro (ompt_event_thread_steal,             ompt_thread_steal_callback_t,  74) /* steal event */            \
+    macro (ompt_event_thread_schedinfo,         ompt_threadstate_callback_t,   75) /* set sched info */
 
 /*****************************************************************************
  * data types
diff --git a/runtime/src/kaapi_ompt.c b/runtime/src/kaapi_ompt.c
index 1311e19418de1b95e677435b2b6b022a8d407376..e31df12044e8d2e5d1756ada532bd5a5c795a3da 100644
--- a/runtime/src/kaapi_ompt.c
+++ b/runtime/src/kaapi_ompt.c
@@ -827,6 +827,23 @@ on_ompt_event_task_dependence_pair(
 #endif
 }
 
+static void
+on_ompt_event_thread_schedinfo(
+  ompt_thread_id_t thread_id,
+  uint64_t state,
+  uint32_t cpu,
+  uint32_t node
+)
+{
+#if LOG
+  printf("%" PRIu64 ": ompt_event_thread_schedinfo: state id=%" PRIu64 ", cpu=%i, node=%i\n", thread_id, state, cpu, node);
+#endif
+#if USE_KAAPI
+  kaapi_ompt_thread_info_t* koti = &__kaapi_oth_info[thread_id];
+  kaapi_tracelib_thread_state( koti->kproc, 0, cpu, node, state );
+#endif
+}
+
 static void
 on_ompt_event_thread_state_begin(
   ompt_thread_id_t thread_id,
@@ -939,6 +956,7 @@ void kaapi_ompt_initialize(
   //ompt_set_callback(ompt_event_idle_begin, (ompt_callback_t) &on_ompt_event_idle_begin);
   //ompt_set_callback(ompt_event_idle_end, (ompt_callback_t) &on_ompt_event_idle_end);
 
+  ompt_set_callback(ompt_event_thread_schedinfo, (ompt_callback_t) &on_ompt_event_thread_schedinfo);
   ompt_set_callback(ompt_event_thread_state_begin, (ompt_callback_t) &on_ompt_event_thread_state_begin);
   ompt_set_callback(ompt_event_thread_state_end, (ompt_callback_t) &on_ompt_event_thread_state_end);
   ompt_set_callback(ompt_event_thread_steal, (ompt_callback_t) &on_ompt_event_thread_steal);
diff --git a/runtime/src/kaapi_trace_lib.c b/runtime/src/kaapi_trace_lib.c
index cc41bf2bba2068303e2172e5bbb117df2c67459d..d429d8d3ce6902db3cc902438164b8f4d1894923 100644
--- a/runtime/src/kaapi_trace_lib.c
+++ b/runtime/src/kaapi_trace_lib.c
@@ -650,7 +650,8 @@ int kaapi_tracelib_init(
     if (getenv("KAAPI_RECORD_MASK") !=0)
     {
       /* actual grammar:
-         eventno[,eventno]*
+         descr[,descr]*
+         descr -> groupname | eventno
          eventno is an integer less than 2^sizeof(kaapi_event_mask_type_t)
          grammar must be more complex using predefined set
       */
@@ -1234,7 +1235,7 @@ void kaapi_tracelib_thread_fini(
 */
 void kaapi_tracelib_thread_state (
     kaapi_tracelib_thread_t*     kproc,
-    uint8_t                      begend, /* 0==begin, 1==end */
+    uint8_t                      begend, /* 0==begin, 1==end, 2==running */
     uint32_t                     cpu,
     uint32_t                     node,
     uint64_t                     state
@@ -1247,7 +1248,7 @@ void kaapi_tracelib_thread_state (
     evt->u.s.d1.i32[0] = cpu;
     evt->u.s.d1.i32[1] = node;
     evt->u.s.d2.i64[0] = state;
-    KAAPI_EVENT_PUSH(kproc,0, KAAPI_EVT_THREAD_STATE);
+    KAAPI_EVENT_PUSH(kproc, 0, KAAPI_EVT_THREAD_STATE);
   }
 }
 
diff --git a/runtime/src/kmp_runtime.cpp b/runtime/src/kmp_runtime.cpp
index 35f8c77b95d6f53de7b1894e94b1b00ee95770d2..9ca49e128a34c84ec3f99b73e93cca8c7c4d81ef 100644
--- a/runtime/src/kmp_runtime.cpp
+++ b/runtime/src/kmp_runtime.cpp
@@ -1376,6 +1376,19 @@ int __kmp_fork_call(ident_t *loc, int gtid,
 #if KMP_NESTED_HOT_TEAMS
   kmp_hot_team_ptr_t **p_hot_teams;
 #endif
+#if OMPT_SUPPORT
+  ompt_thread_id_t tid = GTID_TO_OMPT_THREAD_ID(gtid);
+#if defined(__linux__)
+  kmp_int32 cpu = sched_getcpu();
+#else
+  kmp_int32 cpu = tid;
+#endif
+#if LIBOMP_USE_AFFINITY
+  kmp_int32 node  = __kmp_cpu2node(cpu);
+#else
+  kmp_int32 node  = 0;
+#endif
+#endif // OMPT_SUPPORT
   { // KMP_TIME_BLOCK
     KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call);
     KMP_COUNT_VALUE(OMP_PARALLEL_args, argc);
@@ -1502,9 +1515,15 @@ int __kmp_fork_call(ident_t *loc, int gtid,
           /* OMPT implicit task begin */
           my_task_id = lw_taskteam.ompt_task_info.task_id;
           my_parallel_id = parent_team->t.ompt_team_info.parallel_id;
+          ompt_thread_id_t tid = GTID_TO_OMPT_THREAD_ID(gtid);
           if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
             ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
-                GTID_TO_OMPT_THREAD_ID(gtid), my_parallel_id, my_task_id);
+                tid, my_parallel_id, my_task_id);
+          }
+          if (ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)) {
+            ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)(
+                tid, (uint64_t)3, cpu, node
+            );
           }
 #endif
 
@@ -1530,11 +1549,16 @@ int __kmp_fork_call(ident_t *loc, int gtid,
         *exit_runtime_p = NULL;
         if (ompt_enabled) {
 #if OMPT_TRACE
+          if (ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)) {
+            ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)(
+                tid, (uint64_t)3, cpu, node
+            );
+          }
           lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
 
           if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
             ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
-                GTID_TO_OMPT_THREAD_ID(gtid), ompt_parallel_id, ompt_task_id);
+                tid, ompt_parallel_id, ompt_task_id);
           }
 
           __ompt_lw_taskteam_unlink(master_th);
@@ -1544,7 +1568,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
 
           if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) {
             ompt_callbacks.ompt_callback(ompt_event_parallel_end)(
-                GTID_TO_OMPT_THREAD_ID(gtid), ompt_parallel_id,
+                tid, ompt_parallel_id,
                 ompt_task_id, OMPT_INVOKER(call_context));
           }
           master_th->th.ompt_thread_info.state = ompt_state_overhead;
@@ -1724,7 +1748,12 @@ int __kmp_fork_call(ident_t *loc, int gtid,
             my_task_id = lw_taskteam.ompt_task_info.task_id;
             if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) {
               ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
-                  GTID_TO_OMPT_THREAD_ID(gtid), ompt_parallel_id, my_task_id);
+                  tid, ompt_parallel_id, my_task_id);
+            }
+            if (ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)) {
+              ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)(
+                  tid, (uint64_t)3, cpu, node
+              );
             }
 #endif
 
@@ -1753,6 +1782,11 @@ int __kmp_fork_call(ident_t *loc, int gtid,
             lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
 
 #if OMPT_TRACE
+            if (ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)) {
+              ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)(
+                  tid, (uint64_t)3, cpu, node
+              );
+            }
             if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
               ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
                   GTID_TO_OMPT_THREAD_ID(gtid), ompt_parallel_id, ompt_task_id);
@@ -1837,6 +1871,11 @@ int __kmp_fork_call(ident_t *loc, int gtid,
               ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(
                   GTID_TO_OMPT_THREAD_ID(gtid), my_parallel_id, my_task_id);
             }
+            if (ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)) {
+              ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)(
+                  tid, (uint64_t)3, cpu, node
+              );
+            }
 #endif
 
             /* OMPT state */
@@ -1863,6 +1902,11 @@ int __kmp_fork_call(ident_t *loc, int gtid,
 #if OMPT_TRACE
             lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL;
 
+            if (ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)) {
+              ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)(
+                  tid, (uint64_t)3, cpu, node
+              );
+            }
             if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
               ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)(
                   GTID_TO_OMPT_THREAD_ID(gtid), my_parallel_id, my_task_id);
@@ -5505,6 +5549,18 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
   }
 
 #if OMPT_SUPPORT
+  ompt_thread_id_t tid = GTID_TO_OMPT_THREAD_ID(gtid);
+#if defined(__linux__)
+  kmp_int32 cpu = sched_getcpu();
+#else
+  kmp_int32 cpu = GTID_TO_OMPT_THREAD_ID(gtid);
+#endif
+#if LIBOMP_USE_AFFINITY
+  kmp_int32 node  = __kmp_cpu2node(cpu);
+#else
+  kmp_int32 node  = 0;
+#endif
+
   if (ompt_enabled) {
     this_thr->th.ompt_thread_info.state = ompt_state_overhead;
     this_thr->th.ompt_thread_info.wait_id = 0;
@@ -5514,6 +5570,11 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
     if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) {
       __ompt_thread_begin(ompt_thread_worker, gtid);
     }
+    if (ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)) {
+      ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)(
+          tid, (uint64_t)3, cpu, node
+      );
+    }
   }
 #endif
 
@@ -5567,6 +5628,11 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
               0 /* loc is missing */
               );
         }
+        if (ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)) {
+          ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)(
+              tid, (uint64_t)3, cpu, node
+          );
+        }
       }
 #endif
       /* we were just woken up, so run our new task */
@@ -5587,7 +5653,6 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
           task_info->task_id = __ompt_task_id_new(tid);
         }
 #endif
-
         {
           KMP_TIME_PARTITIONED_BLOCK(OMP_parallel);
           KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK);
@@ -5612,6 +5677,11 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
       __kmp_join_barrier(gtid);
 #if OMPT_SUPPORT && OMPT_TRACE
       if (ompt_enabled) {
+        if (ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)) {
+          ompt_callbacks.ompt_callback(ompt_event_thread_schedinfo)(
+              tid, (uint64_t)3, cpu, node
+          );
+        }
         if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) {
           // don't access *pteam here: it may have already been freed
           // by the master thread behind the barrier (possible race)
diff --git a/runtime/src/kmp_tasking.cpp b/runtime/src/kmp_tasking.cpp
index 54a0b2fae68b684b3e345796322d1f83b26baa9b..823bf4415bb6692a4bfe813dbc753a0a06a74993 100644
--- a/runtime/src/kmp_tasking.cpp
+++ b/runtime/src/kmp_tasking.cpp
@@ -2622,7 +2622,7 @@ static inline int __kmp_execute_tasks_template(
 #else
   kmp_int32 node  = 0;
 #endif
-  enum { INIT_STATE, IDLE_STATE, ACTIVE_STATE };
+  enum { INIT_STATE=0, IDLE_STATE=1, ACTIVE_STATE=2, UNKNOWN_STATE=3 };
   kmp_int32 state = INIT_STATE;
   kmp_base_queue_data_t *own_queue = &task_team->tt.tt_task_queues[KMP_LEVEL_THREAD][tid].qd;
 
diff --git a/runtime/src/ompt-event-specific.h b/runtime/src/ompt-event-specific.h
index 848072145ba782037e6e1ef641faa3b5f8afa189..be168b5b6c84f7170fb8681d57b0fc264666ea97 100644
--- a/runtime/src/ompt-event-specific.h
+++ b/runtime/src/ompt-event-specific.h
@@ -146,6 +146,7 @@
 #define ompt_event_thread_state_begin_implemented       ompt_event_MAY_ALWAYS_TRACE
 #define ompt_event_thread_state_end_implemented         ompt_event_MAY_ALWAYS_TRACE
 #define ompt_event_thread_steal_implemented             ompt_event_MAY_ALWAYS_TRACE
+#define ompt_event_thread_schedinfo_implemented         ompt_event_MAY_ALWAYS_TRACE
 
 #if OMP_40_ENABLED
 #define ompt_event_task_dependences_implemented ompt_event_MAY_ALWAYS_TRACE