diff --git a/includes/kaapi_trace.h b/includes/kaapi_trace.h
index f2db61a3ac8d08ea479ba41d2e62553917d316c7..3f653e724cd5622924473e79de7cc6c6332b54c3 100644
--- a/includes/kaapi_trace.h
+++ b/includes/kaapi_trace.h
@@ -739,7 +739,7 @@ extern kaapi_tracelib_team_t* kaapi_tracelib_team_init(
   kaapi_tracelib_thread_t* kproc,
   void*                    key,
   kaapi_tracelib_team_t*   parent_team,
-  void*                  (*routine)(),
+  const void*              routine,
   const char*              psource,
   const char*              name,
   char*                  (*filter_name)(char*, int, const char*, const char*)
diff --git a/includes/kaapi_trace_lib.h b/includes/kaapi_trace_lib.h
index 5138858ef0bd9253d944c28ccc05b2d02b858f1f..e7a8631e50d03f58a992df0effa03ff9288deb9a 100644
--- a/includes/kaapi_trace_lib.h
+++ b/includes/kaapi_trace_lib.h
@@ -1,3 +1,4 @@
+#include <omp-tools.h>
 #include "kaapi_trace.h"
 
 /* Return human readable name from encoding of the psource information */
@@ -42,4 +43,5 @@ static kmp_lock_t                __kaapi_global_lock;
 #endif
 extern kaapi_ompt_thread_info_t* __kaapi_oth_info;
 extern size_t                    __kaapi_oth_info_capacity;
+extern ompt_get_thread_data_t    ompt_get_thread_data;
 
diff --git a/test/hello.c b/test/hello.c
index a7c30437795b220186248c32772b3aec9a24916d..52494a01bd4d8db565464196b366a26649775ab1 100644
--- a/test/hello.c
+++ b/test/hello.c
@@ -7,5 +7,9 @@ int main()
   {
     printf("Hello from %i\n", omp_get_thread_num());
   }
+#pragma omp parallel
+  {
+    printf("Parallel 2, hello from %i\n", omp_get_thread_num());
+  }
   return 0;
 }
diff --git a/tool/ompt_callbacks.cpp b/tool/ompt_callbacks.cpp
index 064a89e83aa0c867cca51934dce9cbc28a6fc84f..f96eb6c6c05b087e1e3b49bedb1b3645c04ed3f3 100644
--- a/tool/ompt_callbacks.cpp
+++ b/tool/ompt_callbacks.cpp
@@ -81,9 +81,94 @@ static ompt_get_parallel_id_t ompt_get_parallel_id;
 
 
 
+/* Return human readable name from encoding of the psource information */
+/* Input format: ";../../plasma/src/timing.inc;run;129;15;;" 
+   Output format: "func: <funcname>\nfile: <filename>\nline: <linenumber>"
+*/
+char* libomp_filter_func(char* output, int size, const char* psource, const char* name)
+{
+  char* buffer = output;
+  const char* p0;
+  const char* p1;
+  --size;
+  if (psource ==0) goto return_fast;
+  if (size <=0) return 0;
+
+  /* pathname starting position */
+  p0 = psource+1;
+
+  /* functionname start: else name is the function name */
+  p1 = strstr( p0+1, ";");
+  if (p1 ==0)
+  {
+return_fast:
+    if (name)
+      return strcpy( output, name );
+    else
+      return 0;
+  }
+
+  /* find the filename in the path */
+  p0 = p1-1;
+  while (p0 != psource)
+  {
+    --p0;
+    if (*p0== '/') break;
+    if (*p0== ';') break;
+  }
+  ++p0;
+
+  /* recopy file: <function name > */
+  if ((buffer+5 - output) >= size) return output;
+  ++p1;
+  strcpy(buffer,"func: ");
+  buffer += 6;
+  if (name)
+  {
+    while (*name != 0)
+    {
+      if ((buffer - output) >= size) return output;
+      *buffer++ = *name++;
+    }
+  }
+  for (; *p1 != ';'; )
+  {
+    if ((buffer - output) >= size) return output;
+    if (name)
+      p1++;
+    else
+      *buffer++ = *p1++;
+  }
+  *buffer++ = '\n';
+  ++p1;
+
+  /* recopy the filename */
+  if ((buffer+5 - output) >= size) return output;
+  strcpy(buffer,"file: ");
+  buffer += 6;
+  for (; *p0 != ';'; )
+  {
+    if ((buffer - output) >= size) return output;
+    *buffer++ = *p0++;
+  }
+  *buffer++ = '\n';
+
+  /* recopy line number */
+  if ((buffer+5 - output) >= size) return output;
+  strcpy(buffer,"line: ");
+  buffer += 6;
+  for (; *p1 != ';'; )
+  {
+    if ((buffer - output) >= size) return output;
+    *buffer++ = *p1++;
+  }
+  if ((buffer - output) >= size) return output;
+  *buffer = 0;
+
+  return output;
+}
 
 
-#if USE_KAAPI
 /*
 */
 static inline void realloc_ifrequired( size_t size )
@@ -110,10 +195,11 @@ static inline void realloc_ifrequired( size_t size )
   exit(0);
 #endif
 }
-#endif
 
 using namespace std;
 
+atomic<uint64_t> parallel_id(0);
+
 void ompt_callback_thread_begin_action(
     ompt_thread_t thread_type,
     ompt_data_t *thread_data
@@ -158,3 +244,60 @@ void ompt_callback_thread_end_action(
   kaapi_tracelib_thread_fini( koti->kproc );
   printf("%" PRIu64 ": ompt_event_thread_end\n", thread_data->value);
 }
+
+void ompt_callback_parallel_begin_action (
+  ompt_data_t *encountering_task_data,
+  const ompt_frame_t *encountering_task_frame,
+  ompt_data_t *parallel_data,
+  unsigned int requested_parallelism,
+  int flags,
+  const void *codeptr_ra
+)
+{
+  ompt_data_t *thread_data = ompt_get_thread_data();
+  parallel_data->value = parallel_id++;
+  uint64_t thread_id = thread_data->value;
+
+  printf("%" PRIu64 ": omp threadid:%" PRIu64 ": ompt_event_parallel_begin: parent_task_frame=%p, requested_team_size=%" PRIu32 ", parallel_function=%p, parallel_data: %" PRIu64 "\n", thread_id, (uint64_t)omp_get_thread_num(),
+      encountering_task_frame,
+      requested_parallelism, codeptr_ra,
+      parallel_data->value
+      );
+
+  /* TODO here: the key is the way several instances of the same parallel region are collapsed
+     to compute statistics. 
+     Default is to take key equal to the parallel_function pointer.
+
+     Each tracelib_team_t is a per thread data structure used to capture performance counter
+     for a parallel region.
+     The set of kaapi_tracelib_team_t that forms the OMP parallel region is computed at the
+     terminaison of the library in order to report overhead at the end, after the computation.
+     The relation ship between a team and its parent team is known only on the master team thread.
+     Statistics are computed among the different instances at the terminaison of the library.
+     */
+  kaapi_ompt_thread_info_t* koti = &__kaapi_oth_info[thread_id];
+  void* key = const_cast<void *>(codeptr_ra);
+#if LIBOMP_USE_NUMA
+  /* correct numaid here because when thread_begin, affinity seems not good with libomp */
+  koti->kproc->numaid = __kmp_cpu2node(sched_getcpu());
+#endif
+  kaapi_tracelib_team_t* parent_team = 0;
+  if (koti->tstack.top !=0)
+    parent_team = koti->tstack.stack[koti->tstack.top-1];
+  kaapi_tracelib_team_t* team = kaapi_tracelib_team_init(
+      koti->kproc,
+      key,
+      parent_team,
+      codeptr_ra,
+      0,
+      0, /* an other possible name in place for the defaut name encoded in psource */
+      libomp_filter_func
+      );
+  koti->tstack.stack[koti->tstack.top] = team;
+  kaapi_tracelib_team_start(koti->kproc,
+      team,
+      koti->tstack.top >0 ?  koti->tstack.stack[koti->tstack.top-1]: 0,
+      parallel_data->value
+      );
+  ++koti->tstack.top;
+}
diff --git a/tool/ompt_callbacks.def b/tool/ompt_callbacks.def
index 8b08953c57b51ef56c0e4c3b3ffd5e10c85687ed..b14066fd6b5b4d2afce257348e37a5d52d7dcc95 100644
--- a/tool/ompt_callbacks.def
+++ b/tool/ompt_callbacks.def
@@ -7,7 +7,7 @@
 
 CALLBACK(ompt_callback_thread_begin, ompt_thread_t, ompt_data_t*)
 CALLBACK(ompt_callback_thread_end, ompt_data_t*)
-UNUSED(ompt_callback_parallel_begin)
+CALLBACK(ompt_callback_parallel_begin, ompt_data_t *, const ompt_frame_t *, ompt_data_t *, unsigned int, int, const void *)
 UNUSED(ompt_callback_parallel_end)
 UNUSED(ompt_callback_task_create)
 UNUSED(ompt_callback_task_schedule)
diff --git a/tool/tool.cpp b/tool/tool.cpp
index be11c9683f0cad8f9d842d63d258c50dbe4e1f27..cc8c77a3627db07ae6eda98fe365abcc405a0033 100644
--- a/tool/tool.cpp
+++ b/tool/tool.cpp
@@ -13,6 +13,7 @@
 
 kaapi_ompt_thread_info_t* __kaapi_oth_info = 0;
 size_t                    __kaapi_oth_info_capacity = 256;
+ompt_get_thread_data_t ompt_get_thread_data;
 
 void initTool(ompt_function_lookup_t lookup) {
   printf("init tool\n");
@@ -28,6 +29,7 @@ void initTool(ompt_function_lookup_t lookup) {
     printf("[OMP-TRACE] kaapi tracing version: %s\n",get_kaapi_version());
   printf("oth_info %p\n", __kaapi_oth_info);
   ompt_set_callback_t ompt_set_callback = (ompt_set_callback_t) lookup("ompt_set_callback");
+  ompt_get_thread_data = (ompt_get_thread_data_t) lookup("ompt_get_thread_data");
 
 #define CALLBACK(name, ...)                       \
   do{                                                           \
diff --git a/tracelib/kaapi_trace_lib.c b/tracelib/kaapi_trace_lib.c
index 516b598816f913a3992716e082376d6b58e4a9b1..2d7f1bbbb66b5f3b68d45a5d62bf372394eba395 100644
--- a/tracelib/kaapi_trace_lib.c
+++ b/tracelib/kaapi_trace_lib.c
@@ -1402,7 +1402,7 @@ kaapi_tracelib_team_t* kaapi_tracelib_team_init(
     kaapi_tracelib_thread_t* kproc,
     void* key,
     kaapi_tracelib_team_t* parent_team,
-    void* (*routine)(),
+    const void* routine,
     const char* psource,
     const char* name,
     char* (*filter_name)(char*, int, const char*, const char*)