diff --git a/includes/kaapi_trace_lib.h b/includes/kaapi_trace_lib.h
index e7a8631e50d03f58a992df0effa03ff9288deb9a..53e412b42adcab7f7feaaa31de23ed28e762f2ff 100644
--- a/includes/kaapi_trace_lib.h
+++ b/includes/kaapi_trace_lib.h
@@ -44,4 +44,5 @@ static kmp_lock_t                __kaapi_global_lock;
 extern kaapi_ompt_thread_info_t* __kaapi_oth_info;
 extern size_t                    __kaapi_oth_info_capacity;
 extern ompt_get_thread_data_t    ompt_get_thread_data;
+extern ompt_get_unique_id_t    ompt_get_unique_id;
 
diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt
index d77017e448f2926fd98686aa3855c3b675d2659e..84989b7412b1b87affc3520d271ba9edc02213c2 100644
--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -3,3 +3,4 @@ find_package(OpenMP REQUIRED)
 set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
 
 add_executable(hello hello.c)
+add_executable(hello-task hello-task.c)
diff --git a/test/hello-task.c b/test/hello-task.c
new file mode 100644
index 0000000000000000000000000000000000000000..74473999d83040b605416304d50d63e739d7ed06
--- /dev/null
+++ b/test/hello-task.c
@@ -0,0 +1,23 @@
+#include <stdio.h>
+#include <unistd.h>
+#include <omp.h>
+
+int main()
+{
+#pragma omp parallel
+#pragma omp master
+  {
+    printf("Hello from %i\n", omp_get_thread_num());
+    #pragma omp task
+    {
+      sleep(1);
+      printf("Hey there\n");
+    }
+    #pragma omp task
+    {
+      sleep(1);
+      printf("Hey there another task\n");
+    }
+  }
+  return 0;
+}
diff --git a/tool/ompt_callbacks.cpp b/tool/ompt_callbacks.cpp
index f96eb6c6c05b087e1e3b49bedb1b3645c04ed3f3..23a560e577b9ce1aca3711428507ae083c37f460 100644
--- a/tool/ompt_callbacks.cpp
+++ b/tool/ompt_callbacks.cpp
@@ -198,16 +198,15 @@ static inline void realloc_ifrequired( size_t size )
 
 using namespace std;
 
-atomic<uint64_t> parallel_id(0);
-
 void ompt_callback_thread_begin_action(
     ompt_thread_t thread_type,
     ompt_data_t *thread_data
     )
 {
-  int thread_id = thread_data->value = omp_get_thread_num();
+  //int thread_id = thread_data->value = omp_get_thread_num();
+  int thread_id = thread_data->value = ompt_get_unique_id();
 
-  printf("%" PRIu64 ": ompt_event_thread_begin: thread_type=%" PRIu64 "\n", thread_data->value, (uint64_t)thread_type);
+  printf("%" PRIu64 ": ompt_event_thread_begin: thread_id=%" PRIu64 "\n", thread_data->value, (uint64_t)thread_type);
   kaapi_assert ( thread_id < __kaapi_oth_info_capacity );
   kaapi_ompt_thread_info_t* koti = &__kaapi_oth_info[thread_id];
 #if LIBOMP_USE_NUMA
@@ -230,19 +229,17 @@ at the begining of the parallel region */
     sizeof(kaapi_taskstackentry_t) * koti->pstack.capacity /*  */
   );
   koti->tstack.top = 0;
-#if USE_KAAPI
   kaapi_tracelib_thread_start( koti->kproc );
-#endif
 }
 
 void ompt_callback_thread_end_action(
     ompt_data_t *thread_data
     )
 {
+  printf("%" PRIu64 ": ompt_event_thread_end: thread_id=%" PRIu64 "\n", thread_data->value, thread_data->value);
   kaapi_ompt_thread_info_t* koti = &__kaapi_oth_info[thread_data->value];
   kaapi_tracelib_thread_stop( koti->kproc );
   kaapi_tracelib_thread_fini( koti->kproc );
-  printf("%" PRIu64 ": ompt_event_thread_end\n", thread_data->value);
 }
 
 void ompt_callback_parallel_begin_action (
@@ -255,14 +252,16 @@ void ompt_callback_parallel_begin_action (
 )
 {
   ompt_data_t *thread_data = ompt_get_thread_data();
-  parallel_data->value = parallel_id++;
+  parallel_data->value = ompt_get_unique_id();
   uint64_t thread_id = thread_data->value;
 
-  printf("%" PRIu64 ": omp threadid:%" PRIu64 ": ompt_event_parallel_begin: parent_task_frame=%p, requested_team_size=%" PRIu32 ", parallel_function=%p, parallel_data: %" PRIu64 "\n", thread_id, (uint64_t)omp_get_thread_num(),
+  printf("%" PRIu64 ": omp threadid:%" PRIu64 ": ompt_event_parallel_begin: parent_task_frame=%p, task_id=%" PRIu64 ", requested_team_size=%" PRIu32 ", parallel_function=%p, parallel_data: %" PRIu64 "\n", thread_id, (uint64_t)omp_get_thread_num(),
       encountering_task_frame,
+      encountering_task_data->value,
       requested_parallelism, codeptr_ra,
       parallel_data->value
       );
+#if 0
 
   /* TODO here: the key is the way several instances of the same parallel region are collapsed
      to compute statistics. 
@@ -300,4 +299,194 @@ void ompt_callback_parallel_begin_action (
       parallel_data->value
       );
   ++koti->tstack.top;
+#endif
+}
+
+void
+implicit_task_end(
+  ompt_data_t *parallel_data,
+  ompt_data_t *task_data)
+{
+  uint64_t thread_id = ompt_get_thread_data()->value;
+  /* But because the end of implicit task is not related to on_ompt_event_parallel_end.
+  */
+  printf("%" PRIu64 ": ompt_event_implicit_task_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", thread_id, parallel_data->value, task_data->value);
+  /* thread 0 ends the parallel region
+     But because the end of implicit task is not related to
+     the end of the parallel region, we define here the end of implicit task.
+  */
+  kaapi_ompt_thread_info_t* koti = &__kaapi_oth_info[thread_id];
+  int idxtop = --koti->pstack.top;
+  //kaapi_assert( idxtop == 0 );
+
+  /* reset that accumulated into */
+  memset( koti->pstack.stack[idxtop].accum, 0, sizeof(koti->pstack.stack[idxtop].accum));
+  kaapi_tracelib_task_end(
+    koti->kproc,
+    koti->pstack.stack[idxtop].task,
+    0, 0,
+    koti->pstack.stack[idxtop].fdescr,
+    koti->pstack.t0,
+    koti->pstack.stack[idxtop].accum
+  );
+}
+
+void
+ompt_callback_parallel_end_action(
+  ompt_data_t *parallel_data,
+  ompt_data_t *encountering_task_data,
+  int flags,
+  const void *codeptr_ra)
+{
+  uint64_t thread_id = ompt_get_thread_data()->value;
+  /* end implicit task here */
+  printf("%" PRIu64 ": on_ompt_event_parallel_end: parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", thread_id, parallel_data->value, encountering_task_data->value);
+#if 0
+  implicit_task_end(parallel_data, encountering_task_data);
+  /* end of the parallel region */
+  kaapi_ompt_thread_info_t* koti = &__kaapi_oth_info[thread_id];
+  int idxtop = --koti->tstack.top;
+  kaapi_tracelib_team_stop( koti->kproc,
+      koti->tstack.stack[idxtop],
+      idxtop >0 ?  koti->tstack.stack[idxtop-1]: 0,
+      parallel_data->value
+  );
+  kaapi_tracelib_team_fini( koti->kproc, koti->tstack.stack[idxtop] );
+#endif
+}
+
+
+void
+ompt_callback_task_create_action(
+    ompt_data_t *parent_task_data,    /* id of parent task            */
+    const ompt_frame_t *parent_frame,  /* frame data for parent task   */
+    ompt_data_t* new_task_data,      /* id of created task           */
+    int type,
+    int has_dependences,
+    const void *codeptr_ra)               /* pointer to outlined function */
+{
+  uint64_t thread_id = ompt_get_thread_data()->value;
+  new_task_data->value = ompt_get_unique_id();
+  printf("%" PRIu64 ": ompt_task_create: parent_id=%" PRIu64 ", task_id=%" PRIu64\
+      ", type=%i, has_dep=%i, ptr=%" PRIu64 "\n", thread_id, parent_task_data->value, new_task_data->value, type, has_dependences, codeptr_ra);
+}
+
+
+void
+ompt_callback_task_schedule_action(
+    ompt_data_t *prior_task_data,
+    ompt_task_status_t prior_task_status,
+    ompt_data_t *next_task_data)
+{
+  uint64_t thread_id = ompt_get_thread_data()->value;
+  if (prior_task_data->ptr) {
+    uint64_t prior_task_id = prior_task_data->value;
+    // We are ending a task
+    printf("%" PRIu64 ": ompt_event_task_end: task_id=%" PRIu64 "\n", thread_id, prior_task_id );
+#if 0
+    /* thread 0 ends the parallel region
+       But because the end of implicit task is not related to
+       the end of the parallel region, we define here the end of implicit task.
+       */
+    kaapi_ompt_thread_info_t* koti = &__kaapi_oth_info[thread_id];
+    int idxtop = --koti->pstack.top;
+
+    /* reset that accumulated into */
+    memset( koti->pstack.stack[idxtop].accum, 0, sizeof(koti->pstack.stack[idxtop].accum));
+    kaapi_tracelib_task_end(
+        koti->kproc,
+        koti->pstack.stack[idxtop].task,
+        0,
+        0,
+        koti->pstack.stack[idxtop].fdescr,
+        koti->pstack.t0,
+        koti->pstack.stack[idxtop].accum
+        );
+    kaapi_tracelib_thread_switchstate(koti->kproc);
+#endif
+  }
+  if (next_task_data->ptr) {
+    uint64_t new_task_id = next_task_data->value;
+    // We are starting a task
+    printf("%" PRIu64 ": ompt_event_task_begin: task_id=%" PRIu64 ", status=%i\n", thread_id, new_task_id, prior_task_status);
+    /* This is code for implicit task begin.
+    */
+#if 0
+    kaapi_ompt_thread_info_t* koti = &__kaapi_oth_info[thread_id];
+    kaapi_tracelib_thread_switchstate(koti->kproc);
+    kaapi_descrformat_t* fdescr = kaapi_tracelib_register_fmtdescr(
+        0,
+        // TODO: get codeptr_ra there
+        next_task_data,
+        0, //loc
+        "<undef>",
+        libomp_filter_func
+        );
+    int idxtop = koti->pstack.top;
+    koti->pstack.stack[idxtop].fdescr = fdescr;
+    koti->pstack.stack[idxtop].task   = (void*)new_task_id;
+
+    kaapi_tracelib_task_begin(
+        koti->kproc,
+        (kaapi_task_id_t)new_task_id,
+        fdescr->fmtid,
+        1,
+        0, 0, 0,
+        0, 0,
+        koti->pstack.t0,
+        koti->pstack.top ==0 ? 0 : koti->pstack.stack[idxtop-1].accum
+        );
+    ++koti->pstack.top;
+#endif
+  }
+}
+
+void
+ompt_callback_implicit_task_action (
+    ompt_scope_endpoint_t endpoint,
+    ompt_data_t *parallel_data,
+    ompt_data_t *task_data,
+    unsigned int actual_parallelism,
+    unsigned int index,
+    int flags
+    )
+{
+  uint64_t thread_id = ompt_get_thread_data()->value;
+  if (endpoint == ompt_scope_begin) {
+    task_data->value = ompt_get_unique_id();
+    printf("%" PRIu64 ": ompt_event_implicit_task_action: begin. parallel_id=%" PRIu64 ", task_id=%" PRIu64 "\n", thread_id, parallel_data->value, task_data->value);
+  } else if (endpoint == ompt_scope_end) {
+    printf("%" PRIu64 ": ompt_event_implicit_task_action: end. task_id=%" PRIu64 "\n", thread_id, task_data->value);
+  } else {
+    printf("%" PRIu64 ": ompt_event_implicit_task_action: unknown endpoint. task_id=%" PRIu64 "\n", thread_id, task_data->value);
+  }
+  /* This is code for implicit task begin.
+  */
+#if 0
+  // TODO: debug below
+  kaapi_ompt_thread_info_t* koti = &__kaapi_oth_info[thread_id];
+  kaapi_tracelib_team_t* team = koti->tstack.stack[koti->tstack.top-1];
+  kaapi_descrformat_t* fdescr = kaapi_tracelib_register_fmtdescr(
+        1,
+        team->key, /* same key as the team. Not implicit task ? why ? [TG] */
+        team->name,
+        0,
+        0          /* no filter: team name should be already well formed */
+  );
+  int idxtop = koti->pstack.top;
+  koti->pstack.stack[idxtop].fdescr = fdescr;
+  koti->pstack.stack[idxtop].task   = (void*)task_data->value;
+
+  kaapi_tracelib_task_begin(
+    koti->kproc,
+    (kaapi_task_id_t)task_data->value,
+    fdescr->fmtid,
+    0,
+    0, 0, 0,
+    0, 0,
+    koti->pstack.t0,
+    0
+  );
+  ++koti->pstack.top;
+#endif
 }
diff --git a/tool/ompt_callbacks.def b/tool/ompt_callbacks.def
index b14066fd6b5b4d2afce257348e37a5d52d7dcc95..7ae6d690e0d3a7b2b1d14e09377f15c57f18758e 100644
--- a/tool/ompt_callbacks.def
+++ b/tool/ompt_callbacks.def
@@ -8,10 +8,10 @@
 CALLBACK(ompt_callback_thread_begin, ompt_thread_t, ompt_data_t*)
 CALLBACK(ompt_callback_thread_end, ompt_data_t*)
 CALLBACK(ompt_callback_parallel_begin, ompt_data_t *, const ompt_frame_t *, ompt_data_t *, unsigned int, int, const void *)
-UNUSED(ompt_callback_parallel_end)
-UNUSED(ompt_callback_task_create)
-UNUSED(ompt_callback_task_schedule)
-UNUSED(ompt_callback_implicit_task)
+CALLBACK(ompt_callback_parallel_end, ompt_data_t *, ompt_data_t *, int, const void *)
+CALLBACK(ompt_callback_task_create, ompt_data_t*, const ompt_frame_t*, ompt_data_t*, int, int, const void *codeptr_ra)
+CALLBACK(ompt_callback_task_schedule, ompt_data_t *, ompt_task_status_t, ompt_data_t *)
+CALLBACK(ompt_callback_implicit_task, ompt_scope_endpoint_t, ompt_data_t *, ompt_data_t*, unsigned int, unsigned int, int)
 UNUSED(ompt_callback_target)
 UNUSED(ompt_callback_target_data_op)
 UNUSED(ompt_callback_target_submit)
diff --git a/tool/tool.cpp b/tool/tool.cpp
index cc8c77a3627db07ae6eda98fe365abcc405a0033..d34572bff28c010657ceebc9b25ffdb3949c0846 100644
--- a/tool/tool.cpp
+++ b/tool/tool.cpp
@@ -14,6 +14,9 @@
 kaapi_ompt_thread_info_t* __kaapi_oth_info = 0;
 size_t                    __kaapi_oth_info_capacity = 256;
 ompt_get_thread_data_t ompt_get_thread_data;
+ompt_get_unique_id_t ompt_get_unique_id;
+#include <atomic>
+std::atomic<uint64_t> myuid(0);
 
 void initTool(ompt_function_lookup_t lookup) {
   printf("init tool\n");
@@ -30,6 +33,11 @@ void initTool(ompt_function_lookup_t lookup) {
   printf("oth_info %p\n", __kaapi_oth_info);
   ompt_set_callback_t ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback");
   ompt_get_thread_data = (ompt_get_thread_data_t) lookup("ompt_get_thread_data");
+  ompt_get_unique_id = (ompt_get_unique_id_t) lookup("ompt_get_unique_id");
+  // FIXME: restore the call to omp get unique id
+  ompt_get_unique_id = []() {
+    return ++myuid;
+  };
 
 #define CALLBACK(name, ...)                       \
   do{                                                           \