diff --git a/runtime/src/kaapi_trace.h b/runtime/src/kaapi_trace.h
index 263afe37a7f49e2b09bcaa63970e62a47ef1b56a..f2db61a3ac8d08ea479ba41d2e62553917d316c7 100644
--- a/runtime/src/kaapi_trace.h
+++ b/runtime/src/kaapi_trace.h
@@ -242,8 +242,8 @@ typedef struct kaapi_named_perfctr {
 #define KAAPI_EVT_TASK_END           3     /* end execution of tasks, d0: task, d1: numaid */
 #define KAAPI_EVT_TASK_SUCC          4     /* T0 has successor T1 */
 #define KAAPI_EVT_TASK_ACCESS        5     /* d0: task, d1: mode, d2: pointer */
-#define KAAPI_EVT_COMP_DAG_BEG       6     /* begin of computing the dag */
-#define KAAPI_EVT_COMP_DAG_END       7     /* end of computing the dag */
+#define KAAPI_EVT_COMP_DAG           6     /* computing the dag, i0[0]=1 iff beg, =0 iff else. d1: key */
+/*#define KAAPI_EVT_FREE0            7*/
 #define KAAPI_EVT_UNDEFINED_0        8
 #define KAAPI_EVT_UNDEFINED_1        9
 #define KAAPI_EVT_SCHED_IDLE_BEG     10    /* begin when k-processor begins to be idle and try to steal */
@@ -268,21 +268,21 @@ typedef struct kaapi_named_perfctr {
 #define KAAPI_EVT_OFFLOAD_DTOD_END   29
 #define KAAPI_EVT_OFFLOAD_KERNEL_BEG 30
 #define KAAPI_EVT_OFFLOAD_KERNEL_END 31
-#define KAAPI_EVT_PARALLEL_BEG       32
-#define KAAPI_EVT_PARALLEL_END       33
-#define KAAPI_EVT_TASKWAIT_BEG       34
-#define KAAPI_EVT_TASKWAIT_END       35
-#define KAAPI_EVT_TASKGROUP_BEG      36
-#define KAAPI_EVT_TASKGROUP_END      37
+#define KAAPI_EVT_PARALLEL           32 /* i0[] = 1 iff beg, = 0 iff end, d1: parallel_id */
+/*#define KAAPI_EVT_FREE0            33*/
+#define KAAPI_EVT_TASKWAIT           34 /* i0[] = 1 iff beg, = 0 iff end, d1: task_id */
+/*#define KAAPI_EVT_FREE0            35*/
+#define KAAPI_EVT_TASKGROUP          36
+/*#define KAAPI_EVT_FREE0            37*/
 #define KAAPI_EVT_PERFCOUNTER        38 /* format <perf id (0, 1, 2..)>, <value> */
 #define KAAPI_EVT_TASK_PERFCOUNTER   39 /* d0=task; d1.i8[0..2]: perf counter id; d2, d3: values */
                                         /* several KAAPI_EVT_TASK_PERFCOUNTER may follow KAAPI_EVT_TASK_END */
 #define KAAPI_EVT_LOCK_BEG           40
 #define KAAPI_EVT_LOCK_END           41
-#define KAAPI_EVT_YIELD_BEG          42
-#define KAAPI_EVT_YIELD_END          43
-#define KAAPI_EVT_BARRIER_BEG        44    /* d0: running task */
-#define KAAPI_EVT_BARRIER_END        45    /* d0: running  task */
+#define KAAPI_EVT_YIELD              42    /* i0[] = 1 iff beg, = 0 iff end */
+/*#define KAAPI_EVT_FREE0            43*/
+#define KAAPI_EVT_BARRIER            44    /* d0: running task */
+/*#define KAAPI_EVT_FREE0            45*/
 #define KAAPI_EVT_TASK_STEAL         46    /* d0: executed task, d1: original task */
 #define KAAPI_EVT_LOOP_BEGIN         47    /* d0: workshare id, d1: sched type, d2: iteration count */
 #define KAAPI_EVT_LOOP_END           48    /* d0: workshare id */
@@ -321,28 +321,23 @@ typedef uint64_t kaapi_event_mask_type_t;
      | KAAPI_EVT_MASK(KAAPI_EVT_TASK_ACCESS) \
      | KAAPI_EVT_MASK(KAAPI_EVT_TASK_END) \
      | KAAPI_EVT_MASK(KAAPI_EVT_TASK_STEAL) \
-     | KAAPI_EVT_MASK(KAAPI_EVT_COMP_DAG_BEG) \
-     | KAAPI_EVT_MASK(KAAPI_EVT_COMP_DAG_END) \
+     | KAAPI_EVT_MASK(KAAPI_EVT_COMP_DAG) \
      | KAAPI_EVT_MASK(KAAPI_EVT_OFFLOAD_KERNEL_BEG) \
      | KAAPI_EVT_MASK(KAAPI_EVT_OFFLOAD_KERNEL_END) \
     )
     
 #define KAAPI_EVT_MASK_OMP \
-    (  KAAPI_EVT_MASK(KAAPI_EVT_PARALLEL_BEG) \
-     | KAAPI_EVT_MASK(KAAPI_EVT_PARALLEL_END) \
-     | KAAPI_EVT_MASK(KAAPI_EVT_BARRIER_BEG) \
-     | KAAPI_EVT_MASK(KAAPI_EVT_BARRIER_END) \
-     | KAAPI_EVT_MASK(KAAPI_EVT_YIELD_BEG) \
-     | KAAPI_EVT_MASK(KAAPI_EVT_YIELD_END) \
-     | KAAPI_EVT_MASK(KAAPI_EVT_TASKWAIT_BEG) \
-     | KAAPI_EVT_MASK(KAAPI_EVT_TASKWAIT_END) \
-     | KAAPI_EVT_MASK(KAAPI_EVT_TASKGROUP_BEG) \
-     | KAAPI_EVT_MASK(KAAPI_EVT_TASKGROUP_END) \
+    (  KAAPI_EVT_MASK(KAAPI_EVT_PARALLEL) \
+     | KAAPI_EVT_MASK(KAAPI_EVT_BARRIER) \
+     | KAAPI_EVT_MASK(KAAPI_EVT_YIELD) \
+     | KAAPI_EVT_MASK(KAAPI_EVT_TASKWAIT) \
+     | KAAPI_EVT_MASK(KAAPI_EVT_TASKGROUP) \
      | KAAPI_EVT_MASK(KAAPI_EVT_LOOP_BEGIN) \
      | KAAPI_EVT_MASK(KAAPI_EVT_LOOP_END) \
      | KAAPI_EVT_MASK(KAAPI_EVT_LOOP_NEXT) \
      | KAAPI_EVT_MASK(KAAPI_EVT_LOOP_MDATA) \
      | KAAPI_EVT_MASK(KAAPI_EVT_TASK_ATTR) \
+     | KAAPI_EVT_MASK(KAAPI_EVT_COMP_DAG) \
     )
 
 #define KAAPI_EVT_MASK_SCHED \
diff --git a/runtime/src/kaapi_trace_lib.c b/runtime/src/kaapi_trace_lib.c
index 5a1af882c970bf7ccb094dc00422af1a89c2036a..5c5945c4b19fb6cda907e8913375da6747e240c7 100644
--- a/runtime/src/kaapi_trace_lib.c
+++ b/runtime/src/kaapi_trace_lib.c
@@ -1443,7 +1443,7 @@ void kaapi_tracelib_team_start(
     int                      parallel_id
 )
 {
-  KAAPI_EVENT_PUSH1( kproc, 0, KAAPI_EVT_PARALLEL_BEG, parallel_id );
+  KAAPI_EVENT_PUSH2( kproc, 0, KAAPI_EVT_PARALLEL, 1, parallel_id );
 //Here information is correct
 //printf("%i/%i:: parallel on numa:%i <-> %i\n", kproc->kid, sched_getcpu(), kproc->numaid, __kmp_cpu2node(sched_getcpu()));
   kaapi_tracelib_thread_read(kproc, kproc->perfset, &team->perf0[0]);
@@ -1461,7 +1461,7 @@ void kaapi_tracelib_team_stop(
     int                      parallel_id
 )
 {
-  KAAPI_EVENT_PUSH1( kproc, 0, KAAPI_EVT_PARALLEL_END, parallel_id );
+  KAAPI_EVENT_PUSH2( kproc, 0, KAAPI_EVT_PARALLEL, 0, parallel_id );
   kaapi_perf_counter_t tmp[ kaapi_tracelib_thread_idsetsize(kproc) ];
   kaapi_perf_counter_t* readbuff;
   if (parent)
@@ -1832,7 +1832,7 @@ void kaapi_tracelib_barrier_begin(
     uint64_t                     task
 )
 {
-  KAAPI_EVENT_PUSH1(kproc, 0, KAAPI_EVT_BARRIER_BEG, task );
+  KAAPI_EVENT_PUSH2(kproc, 0, KAAPI_EVT_BARRIER, 1, task );
 }
 
 
@@ -1843,7 +1843,7 @@ void kaapi_tracelib_barrier_end(
     uint64_t                     task
 )
 {
-  KAAPI_EVENT_PUSH1(kproc, 0, KAAPI_EVT_BARRIER_END, task );
+  KAAPI_EVENT_PUSH2(kproc, 0, KAAPI_EVT_BARRIER, 0, task );
 }
 
 
diff --git a/runtime/src/kaapi_trace_simulator.cpp b/runtime/src/kaapi_trace_simulator.cpp
index f5b94d16f427cd58a67d20e75e7f092a220347fb..b7c9563eb681e556ed0118a2af35eff321e74f26 100644
--- a/runtime/src/kaapi_trace_simulator.cpp
+++ b/runtime/src/kaapi_trace_simulator.cpp
@@ -724,8 +724,7 @@ static void processor_simulate_event(
       ++proc->active;
     break;
 
-    case KAAPI_EVT_COMP_DAG_BEG:
-    case KAAPI_EVT_COMP_DAG_END:
+    case KAAPI_EVT_COMP_DAG:
     break;
 
     /* processing request */
@@ -743,16 +742,12 @@ static void processor_simulate_event(
       proc->perfctr[KAAPI_EVENT_DATA(event,0,i)] = event->u.data[1].u;
     break;
 
-    case KAAPI_EVT_PARALLEL_BEG :
-    case KAAPI_EVT_PARALLEL_END :
-    case KAAPI_EVT_TASKWAIT_BEG :
-    case KAAPI_EVT_TASKWAIT_END :
-    case KAAPI_EVT_TASKGROUP_BEG:
-    case KAAPI_EVT_TASKGROUP_END:
+    case KAAPI_EVT_PARALLEL :
+    case KAAPI_EVT_TASKWAIT :
+    case KAAPI_EVT_TASKGROUP:
     break;
 
-    case KAAPI_EVT_BARRIER_BEG:
-    case KAAPI_EVT_BARRIER_END:
+    case KAAPI_EVT_BARRIER:
     case KAAPI_EVT_LOOP_BEGIN:
     case KAAPI_EVT_LOOP_END  :
     case KAAPI_EVT_LOOP_NEXT :
diff --git a/runtime/src/katracereader.cpp b/runtime/src/katracereader.cpp
index 57bf96dd2a8a99288287361cb1639a3a06b9cc4b..967dc9569f8c1fdc3db548ae47cbeadd69b09d33 100644
--- a/runtime/src/katracereader.cpp
+++ b/runtime/src/katracereader.cpp
@@ -323,11 +323,9 @@ static void callback_print_event(
     break;
 
     /* unroll graph for static schedule */
-    case KAAPI_EVT_COMP_DAG_BEG:
-    break;
-
-    case KAAPI_EVT_COMP_DAG_END:
-      std::cout << ", delay:" << KAAPI_EVENT_DATA(event,0,i);
+    case KAAPI_EVT_COMP_DAG:
+      std::cout << (KAAPI_EVENT_DATA(event,0,i) == 1 ? "BEGIN":"END")
+                << ", key:" << KAAPI_EVENT_DATA(event,1,u);
     break;
 
     /* idle = steal state */
@@ -389,25 +387,21 @@ static void callback_print_event(
                 << ">, value: " << KAAPI_EVENT_DATA(event,1,u);
       break;
 
-    case KAAPI_EVT_PARALLEL_BEG :
-      std::cout << " parallel_id:" << KAAPI_EVENT_DATA(event,0,i);
-    break;
-    case KAAPI_EVT_PARALLEL_END :
-      std::cout << " parallel_id:" << KAAPI_EVENT_DATA(event,0,i);
+    case KAAPI_EVT_PARALLEL :
+      std::cout << (KAAPI_EVENT_DATA(event,0,i) == 1 ? "BEGIN":"END")
+                << ", parallel_id:" << KAAPI_EVENT_DATA(event,1,u);
     break;
 
 
-    case KAAPI_EVT_TASKWAIT_BEG :
-    case KAAPI_EVT_TASKWAIT_END :
-    case KAAPI_EVT_TASKGROUP_BEG:
-    case KAAPI_EVT_TASKGROUP_END:
+    case KAAPI_EVT_TASKWAIT :
+    case KAAPI_EVT_TASKGROUP:
     case KAAPI_EVT_LOCK_BEG:
     case KAAPI_EVT_LOCK_END:
     break;
 
-    case KAAPI_EVT_BARRIER_BEG:
-    case KAAPI_EVT_BARRIER_END:
-      std::cout << " task: " << KAAPI_EVENT_DATA(event,0,u);
+    case KAAPI_EVT_BARRIER:
+      std::cout << (KAAPI_EVENT_DATA(event,0,i) == 1 ? " BEGIN" : "END")
+                << ", task: " << KAAPI_EVENT_DATA(event,1,u);
     break;
 
     case KAAPI_EVT_LOOP_BEGIN:
@@ -1036,32 +1030,32 @@ static void callback_display_paje_event(
     break;
 
     /* unroll graph for static schedule */
-    case KAAPI_EVT_COMP_DAG_BEG:
-      kaapi_trace_poti_PushState(d0, name, "STATE", "db");
-    break;
-    case KAAPI_EVT_COMP_DAG_END:
-      kaapi_trace_poti_PopState (d0, name, "STATE");
+    case KAAPI_EVT_COMP_DAG:
+      if (KAAPI_EVENT_DATA(event,0,i) == 1)
+        kaapi_trace_poti_PushState(d0, name, "STATE", "db");
+      else
+        kaapi_trace_poti_PopState (d0, name, "STATE");
     break;
 
-    case KAAPI_EVT_PARALLEL_BEG :
-      kaapi_trace_poti_PushState(d0, name, "STATE", "pi");
-    break;
-    case KAAPI_EVT_PARALLEL_END :
-      kaapi_trace_poti_PopState (d0, name, "STATE");
+    case KAAPI_EVT_PARALLEL:
+      if (KAAPI_EVENT_DATA(event,0,i) == 1)
+        kaapi_trace_poti_PushState(d0, name, "STATE", "pi");
+      else
+        kaapi_trace_poti_PopState (d0, name, "STATE");
     break;
 
-    case KAAPI_EVT_TASKWAIT_BEG:
-      kaapi_trace_poti_PushState(d0, name, "STATE", "ps");
-    break;
-    case KAAPI_EVT_TASKWAIT_END:
-      kaapi_trace_poti_PopState (d0, name, "STATE");
+    case KAAPI_EVT_TASKWAIT:
+      if (KAAPI_EVENT_DATA(event,0,i) == 1)
+        kaapi_trace_poti_PushState(d0, name, "STATE", "ps");
+      else
+        kaapi_trace_poti_PopState (d0, name, "STATE");
     break;
 
-    case KAAPI_EVT_TASKGROUP_BEG  :
-      kaapi_trace_poti_PushState(d0, name, "STATE", "pf");
-    break;
-    case KAAPI_EVT_TASKGROUP_END  :
-      kaapi_trace_poti_PopState (d0, name, "STATE");
+    case KAAPI_EVT_TASKGROUP  :
+      if (KAAPI_EVENT_DATA(event,0,i) == 1)
+        kaapi_trace_poti_PushState(d0, name, "STATE", "pf");
+      else
+        kaapi_trace_poti_PopState (d0, name, "STATE");
     break;
     
     case KAAPI_EVT_LOCK_BEG  :
@@ -1982,60 +1976,57 @@ printf("pred[id:%i/%p] = {id:%i}/%p\n", succ->id, (void*)succ->addr, task->id, (
       break;
 
     /* unroll graph for static schedule */
-    case KAAPI_EVT_COMP_DAG_BEG:
-    case KAAPI_EVT_COMP_DAG_END:
+    case KAAPI_EVT_COMP_DAG:
     break;
 
-    case KAAPI_EVT_TASKWAIT_BEG :
-    case KAAPI_EVT_TASKWAIT_END :
-    case KAAPI_EVT_TASKGROUP_BEG:
-    case KAAPI_EVT_TASKGROUP_END:
+    case KAAPI_EVT_TASKWAIT :
+    case KAAPI_EVT_TASKGROUP:
     break;
 
-    case KAAPI_EVT_PARALLEL_BEG :
+    case KAAPI_EVT_PARALLEL :
     {
-      uintptr_t parallel_id = KAAPI_EVENT_DATA(event,0,i);
-      if (rastello_parallel_region.size() <= (size_t)parallel_id)
-      {
-        rastello_parallel_region.reserve( parallel_id * 2);
-        rastello_parallel_region.resize( parallel_id +1 );
+      int begend = KAAPI_EVENT_DATA(event,0,i);
+      uintptr_t parallel_id = KAAPI_EVENT_DATA(event,1,u);
+      if (begend ==1)
+      { /* begin */
+        if (rastello_parallel_region.size() <= (size_t)parallel_id)
+        {
+          rastello_parallel_region.reserve( parallel_id * 2);
+          rastello_parallel_region.resize( parallel_id +1 );
+        }
+        rastello_parallel_region_t* rpr = (rastello_parallel_region_t*)rastello_parallel_region[parallel_id];
+        if (rpr == 0)
+        {
+          /* allocation */
+          rpr = rprf->create(parallel_id);
+          rastello_parallel_region[parallel_id] = rpr;
+        }
+        /* start is the min over all kprocs */
+        if (rpr->start ==0)
+          rpr->start = event->date;
+        else if (rpr->start > event->date)
+          rpr->start = event->date;
+        ++rpr->nproc;
+
+        if (rastello_front_parallel_region.size() <= kid)
+          rastello_front_parallel_region.resize(kid+1);
+        rastello_front_parallel_region[kid] = rpr;
       }
-      rastello_parallel_region_t* rpr = (rastello_parallel_region_t*)rastello_parallel_region[parallel_id];
-      if (rpr == 0)
-      {
-        /* allocation */
-        rpr = rprf->create(parallel_id);
-        rastello_parallel_region[parallel_id] = rpr;
+      else
+      { /* end */
+        rastello_parallel_region_t* rpr = (rastello_parallel_region_t*)rastello_parallel_region[parallel_id];
+        /* stop is the max over all kprocs */
+        if (rpr->stop < event->date)
+          rpr->stop = event->date;
+        kaapi_assert( rpr->nproc >= 0);
       }
-      /* start is the min over all kprocs */
-      if (rpr->start ==0)
-        rpr->start = event->date;
-      else if (rpr->start > event->date)
-        rpr->start = event->date;
-      ++rpr->nproc;
-
-      if (rastello_front_parallel_region.size() <= kid)
-        rastello_front_parallel_region.resize(kid+1);
-      rastello_front_parallel_region[kid] = rpr;
-    } break;
-
-    case KAAPI_EVT_PARALLEL_END   :
-    {
-      uintptr_t parallel_id = KAAPI_EVENT_DATA(event,0,i);
-      rastello_parallel_region_t* rpr = (rastello_parallel_region_t*)rastello_parallel_region[parallel_id];
-      /* stop is the max over all kprocs */
-      if (rpr->stop < event->date)
-        rpr->stop = event->date;
-      kaapi_assert( rpr->nproc >= 0);
     } break;
 
     case KAAPI_EVT_LOCK_BEG     :
     case KAAPI_EVT_LOCK_END     :
       break;
 
-    case KAAPI_EVT_BARRIER_BEG:
-    break;
-    case KAAPI_EVT_BARRIER_END:
+    case KAAPI_EVT_BARRIER:
     break;
 
     case KAAPI_EVT_LOOP_BEGIN: /* both: a state creation and an event */
diff --git a/runtime/src/kmp.h b/runtime/src/kmp.h
index 259bfe68d616f0bd27be99e54c242441db8b9f8f..628cad49230a60f860ff370a1089e27d002eaf72 100644
--- a/runtime/src/kmp.h
+++ b/runtime/src/kmp.h
@@ -2353,7 +2353,7 @@ struct kmp_taskdata { /* aligned during dynamic allocation       */
   kmp_int32 td_untied_count; /* untied task active parts counter        */
   ident_t *td_ident; /* task identifier                         */
 #if OMPT_SUPPORT
-  intptr_t td_keys[2];
+  intptr_t td_keys[2];    /* user level values */
 #endif
 #if LIBOMP_USE_AFFINITY
   kmp_uint8 td_aff_kind; // Affinity kind & tag & strict for the task