diff --git a/includes/kaapi_trace.h b/includes/kaapi_trace.h
index 53051f2ee6b547fcd8cef16f8a68ee9fc55c3942..3d0c5bf924106d4b8c67ade6221645481764e116 100644
--- a/includes/kaapi_trace.h
+++ b/includes/kaapi_trace.h
@@ -443,8 +443,9 @@ typedef struct kaapi_event_t {
       kaapi_event_data_t d1;
       kaapi_event_data_t d2;
       kaapi_event_data_t d3;
+      kaapi_event_data_t d4;
     } s;
-    kaapi_event_data_t data[4];
+    kaapi_event_data_t data[5];
   } u;
 } kaapi_event_t;
 
@@ -778,6 +779,7 @@ extern kaapi_task_id_t kaapi_tracelib_newtask_id(void);
 extern void kaapi_tracelib_task_begin(
     kaapi_tracelib_thread_t*     kproc,
     kaapi_task_id_t              task,
+    kaapi_task_id_t              parent_task,
     uint64_t                     fmtid,
     int8_t                       isexplicit,
     int8_t                       kind,
diff --git a/tool/ompt_callbacks.cpp b/tool/ompt_callbacks.cpp
index 1caec3adf34172c75da6d5a03a0b32050ca5fa5e..f13e3817ea7c07964290d345bfc3739b7b0c15a1 100644
--- a/tool/ompt_callbacks.cpp
+++ b/tool/ompt_callbacks.cpp
@@ -87,6 +87,7 @@ typedef struct {
 
 typedef struct tikki_task_id_s {
   uint64_t id;
+  uint64_t pid;
   const void *task_ptr;
   ompt_dependence_t *deps;
   int ndeps;
@@ -455,18 +456,19 @@ ompt_callback_task_create_action(
   tikki_task_id_t *task = (tikki_task_id_t *)malloc(sizeof(tikki_task_id_t));
   new_task_data->ptr = task;
   task->id = ompt_get_unique_id();
+  task->pid = -1;
   task->task_ptr = codeptr_ra;
   task->ndeps = 0;
   task->name = next_name;
   task->datainfo = next_data_info;
-  next_name = 0;
 #if LOG
   if (parent_task_data) {
     tikki_task_id_t *parent_task = (tikki_task_id_t*)parent_task_data->ptr;
+    task->pid = parent_task->id;
     printf("%" PRIu64 ": ompt_task_create: parent_id=%" PRIu64 ", task_id=%" PRIu64\
-           ", type=%i, has_dep=%i, ptr=%p\n", thread_id,
+           ", type=%i, has_dep=%i, ptr=%p, name=%s\n", thread_id,
            parent_task->id, task->id, type,
-           has_dependences, task->task_ptr);
+           has_dependences, task->task_ptr, next_name ==0 ? "<no name>" : next_name);
   } else {
     // NOTE: while not occurring with the current libOMP's master, I've seen
     // this happening in previous commits.
@@ -476,6 +478,7 @@ ompt_callback_task_create_action(
            has_dependences, (uint64_t)task->task_ptr);
   }
 #endif
+  next_name = 0;
 }
 
 
@@ -553,6 +556,7 @@ ompt_callback_task_schedule_action(
     kaapi_tracelib_task_begin(
         koti->kproc,
         (kaapi_task_id_t)next_task->id,
+        (kaapi_task_id_t)next_task->pid,
         fdescr->fmtid,
         1,
         0, 0, 0,
@@ -603,6 +607,7 @@ ompt_callback_implicit_task_action (
   {
     tikki_task_id_t *task = (tikki_task_id_t *)malloc(sizeof(tikki_task_id_t));
     task->id = ompt_get_unique_id();
+    task->pid = -1;
     task->task_ptr = 0;
     task_data->ptr = task;
     task->ndeps = 0;
@@ -628,6 +633,7 @@ ompt_callback_implicit_task_action (
     kaapi_tracelib_task_begin(
         koti->kproc,
         (kaapi_task_id_t)task->id,
+        (kaapi_task_id_t)task->pid,
         fdescr->fmtid,
         0,
         0, 0, 0,
@@ -679,6 +685,7 @@ ompt_callback_implicit_task_action (
   kaapi_tracelib_task_begin(
     koti->kproc,
     (kaapi_task_id_t)task_data->value,
+    (kaapi_task_id_t)-1,
     fdescr->fmtid,
     0,
     0, 0, 0,
diff --git a/tracelib/kaapi_trace_lib.c b/tracelib/kaapi_trace_lib.c
index 5082d5058b3ee0e3c36fb0e130daa7767478547d..37766f0c766f42a33c077825c761201d205e1326 100644
--- a/tracelib/kaapi_trace_lib.c
+++ b/tracelib/kaapi_trace_lib.c
@@ -1514,6 +1514,7 @@ kaapi_task_id_t kaapi_tracelib_newtask_id(void)
 void kaapi_tracelib_task_begin(
     kaapi_tracelib_thread_t*     kproc,
     kaapi_task_id_t              task,
+    kaapi_task_id_t              parent_task,
     uint64_t                     fmtid,
     int8_t                       isexplicit,
     int8_t                       kind,
@@ -1551,11 +1552,12 @@ void kaapi_tracelib_task_begin(
   if (evt)
   {
     evt->u.s.d0.i = (uintptr_t)task;
-    evt->u.s.d1.u = fmtid;
-    evt->u.s.d2.i8[0] = isexplicit;
-    evt->u.s.d2.i8[1] = kind;
-    evt->u.s.d2.i8[2] = strict;
-    evt->u.s.d3.u     = tag;
+    evt->u.s.d1.i = (uintptr_t)parent_task;
+    evt->u.s.d2.u = fmtid;
+    evt->u.s.d3.i8[0] = isexplicit;
+    evt->u.s.d3.i8[1] = kind;
+    evt->u.s.d3.i8[2] = strict;
+    evt->u.s.d4.u     = tag;
     KAAPI_EVENT_PUSH(kproc,0, KAAPI_EVT_TASK_BEG);
   }
 //  if (rsrc & 0x1) /* dram only tracing */
diff --git a/ukilli/ukilli.cpp b/ukilli/ukilli.cpp
index 690964623da6daaa31bd28e0fee88ae6cdff3776..804d3093bff83973f80934890de5798ef63014f4 100644
--- a/ukilli/ukilli.cpp
+++ b/ukilli/ukilli.cpp
@@ -101,6 +101,9 @@ typedef void (*kaapi_fnc_event)( int, const char** );
 #define DOT_OPTIONS_NODATA     0x1
 #define DOT_OPTIONS_NOLABEL    0x2
 #define DOT_OPTIONS_CREGION    0x4
+#define DOT_OPTIONS_PARENTCHILD_NOCLUSTER 0x8
+#define DOT_OPTIONS_PARENTCHILD_NO 0x10
+
 struct katracereader_options {
   std::string output; /* output file */
   unsigned int stealevent;
@@ -271,13 +274,14 @@ static void callback_print_event(
     /* standard task exec */
     case KAAPI_EVT_TASK_BEG:
       std::cout << "@:" << KAAPI_EVENT_DATA(event,0,p)
-                << ", fmtid:" << KAAPI_EVENT_DATA(event,1,u)
-                << ", explicit:" << (int)KAAPI_EVENT_DATA(event,2,i8)[0]
-                << ", affkind:" << int2affinitykind(KAAPI_EVENT_DATA(event,2,i8)[1]);
-      if (KAAPI_EVENT_DATA(event,2,i8)[1] >0)
+                << "parent@:" << KAAPI_EVENT_DATA(event,1,p)
+                << ", fmtid:" << KAAPI_EVENT_DATA(event,2,u)
+                << ", explicit:" << (int)KAAPI_EVENT_DATA(event,3,i8)[0]
+                << ", affkind:" << int2affinitykind(KAAPI_EVENT_DATA(event,3,i8)[1]);
+      if (KAAPI_EVENT_DATA(event,3,i8)[1] >0)
         std::cout
-                << ", strict:" << KAAPI_EVENT_DATA(event,2,i8)[2]
-                << ", tag:" << KAAPI_EVENT_DATA(event,3,u);
+                << ", strict:" << KAAPI_EVENT_DATA(event,3,i8)[2]
+                << ", tag:" << KAAPI_EVENT_DATA(event,4,u);
     break;
 
     case KAAPI_EVT_TASK_STEAL:
@@ -936,10 +940,10 @@ static void callback_display_paje_event(
       break;
 
     case KAAPI_EVT_TASK_BEG:
-      if (KAAPI_EVENT_DATA(event,1,u) !=0)
+      if (KAAPI_EVENT_DATA(event,2,u) !=0)
       {
         char stateid[64];
-        snprintf(stateid, 64, "t%lu", (unsigned long)KAAPI_EVENT_DATA(event,1,u));
+        snprintf(stateid, 64, "t%lu", (unsigned long)KAAPI_EVENT_DATA(event,0,u));
         kaapi_trace_poti_PushState (d0, name, "STATE", stateid);
       }
       else
@@ -1339,6 +1343,7 @@ struct task_info : public state_t {
   }
   uint64_t               addr;
   uint64_t               id;
+  uint64_t               pid;
   int                    kid;
   uint64_t               ct;
   uint64_t               fmtid;
@@ -1764,9 +1769,9 @@ static void callback_display_rastello(
 
     case KAAPI_EVT_TASK_BEG:
     {
-      if (KAAPI_EVENT_DATA(event,1,u) == 0) /* no format == internal kaapi tasks */
+      if (KAAPI_EVENT_DATA(event,2,u) == 0) /* no format == internal kaapi tasks */
         break;
-      if (KAAPI_EVENT_DATA(event,2,i8)[0] == 0) /* no explicit == forget them */
+      if (KAAPI_EVENT_DATA(event,3,i8)[0] == 0) /* no explicit == forget them */
       {
         if (rpr == 0) {
           printf("*** Begining implicit task without parallel region\n");
@@ -1775,11 +1780,11 @@ static void callback_display_rastello(
 
         uint64_t addr = KAAPI_EVENT_DATA(event,0,u);
         rpr->container_implicit_task.insert(  addr );
-//printf("%" PRIu64 " Implicit Task: %p fmtid: %i\n", event->date, KAAPI_EVENT_DATA(event,0,p), (int)KAAPI_EVENT_DATA(event,1,u));
+//printf("%" PRIu64 " Implicit Task: %p fmtid: %i\n", event->date, KAAPI_EVENT_DATA(event,0,p), (int)KAAPI_EVENT_DATA(event,2,u));
         break;
       }
 
-//printf("%" PRIu64 " Implicit Task: %p fmtid: %i\n", event->date, KAAPI_EVENT_DATA(event,0,p), (int)KAAPI_EVENT_DATA(event,1,u));
+//printf("%" PRIu64 " Implicit Task: %p fmtid: %i\n", event->date, KAAPI_EVENT_DATA(event,0,p), (int)KAAPI_EVENT_DATA(event,2,u));
       std::map<uint64_t,kproc_t>::iterator kp = parallel_region_t::container_kproc.find( event->kid );
       if (kp == parallel_region_t::container_kproc.end())
       {
@@ -1792,13 +1797,14 @@ static void callback_display_rastello(
 #if DEBUG_RAST
 printf("%i:: task begin:id:%i/%p [old:%p]\n", kid, ti->id, (void*)ti->addr, (void*)addr );
 #endif
-      ti->fmtid= KAAPI_EVENT_DATA(event,1,u);
+      ti->pid= KAAPI_EVENT_DATA(event,1,u);
+      ti->fmtid= KAAPI_EVENT_DATA(event,2,u);
       ti->kid  = event->kid;
       ti->start = event->date;
       ti->isexplicit = 1;
-      ti->aff_kind = KAAPI_EVENT_DATA(event,2,i8)[1];
-      ti->aff_strict = KAAPI_EVENT_DATA(event,2,i8)[2];
-      ti->aff_tag = KAAPI_EVENT_DATA(event,3,u);
+      ti->aff_kind = KAAPI_EVENT_DATA(event,3,i8)[1];
+      ti->aff_strict = KAAPI_EVENT_DATA(event,3,i8)[2];
+      ti->aff_tag = KAAPI_EVENT_DATA(event,4,u);
       kp->second.stack.push( ti );
     } break;
 
@@ -2273,6 +2279,8 @@ struct dot_parallel_region_t : public parallel_region_t {
   int openfile(kaapi_eventfile_header_t* header);
   int closefile(int cpucount);
   void dump_task_info( task_info* ti );
+  
+  std::set<task_info*> cluster_per_task_is_set; /* exist if cluster bounding box per task is already displayed */
 };
 
 class dot_parallel_region_factory_t {
@@ -2305,7 +2313,7 @@ int dot_parallel_region_t::openfile(kaapi_eventfile_header_t* header)
 //  fprintf(stdout, "insert fmtid: %" PRIu64 " -> name: %s\n", header->fmtdefs[cnt].fmtid, header->fmtdefs[cnt].name);
     }
 
-  fprintf(file, "digraph G {\n");
+  fprintf(file, "digraph G {\n\tcompound=true;\n");
 
   return 0;
 }
@@ -2339,6 +2347,43 @@ int dot_parallel_region_t::closefile(int cpucount)
 /* */
 void dot_parallel_region_t::dump_task_info( task_info* ti )
 {
+  /* plot relationship between parent and child */
+  task_info* pti = 0;
+  if (ti->pid != -1)
+    pti = get_taskinfo( ti->pid, false );
+
+
+  if ((katracereader_options.dotoption & DOT_OPTIONS_PARENTCHILD_NOCLUSTER) == 0)
+  {
+    /* generate the task inside the subgraph cliuster with named the parent */
+    if (pti !=0)
+    {
+      fprintf(fout, "subgraph cluster%" PRIu64 "{\n",
+        pti->addr
+      );
+
+      if (cluster_per_task_is_set.find(pti) == cluster_per_task_is_set.end())
+      {
+        fprintf(fout, " Ref_node%" PRIu64 "[shape=point style=invis]\n",
+          pti->addr
+        );
+        /* Generate dotted edge between the parent task and the cluster */
+        fprintf(fout, "%" PRIu64 " -> Ref_node%" PRIu64 "[style=dotted, lhead=cluster%" PRIu64 "]\n",
+          pti->addr, pti->addr,
+          pti->addr
+        );
+        cluster_per_task_is_set.insert(pti);
+      }
+    }
+    else /* no parent: put itself inside its own cluster */
+    {
+      fprintf(fout, "subgraph cluster_kid%i_%" PRIu64 "{\n",
+        ti->kid, ti->addr
+      );
+    }
+  }
+
+
   /* task */
   if (katracereader_options.dotoption & DOT_OPTIONS_NOLABEL)
   {
@@ -2360,6 +2405,16 @@ void dot_parallel_region_t::dump_task_info( task_info* ti )
     );
   }
 
+  /* plot relationship between parent and its child */
+  if ((katracereader_options.dotoption & DOT_OPTIONS_PARENTCHILD_NO) == 0)
+  {
+    if (pti !=0) {
+     fprintf(fout, "%" PRIu64 " -> %" PRIu64 "[style=dotted]\n",
+        pti->addr, ti->addr
+     );
+    }
+  }
+
   for (int i=0; i<ti->param.size(); ++i)
   {
     kaapi_access_mode_t mode   = ti->param[i].mode;
@@ -2442,6 +2497,11 @@ void dot_parallel_region_t::dump_task_info( task_info* ti )
       container_data[ptr] = data_t(version+1,data->second.numaid);
   }
 
+  if ((katracereader_options.dotoption & DOT_OPTIONS_PARENTCHILD_NOCLUSTER) ==0)
+  {
+    fprintf(fout, "}\n");
+  }
+
 }
 
 
@@ -2585,7 +2645,7 @@ int csv_parallel_region_t::closefile(int cpucount)
 
     /* header */
     fprintf(csv_parallel_region_t::fout,
-      "Resource,Numa,Start,End,Duration,Explicit,Aff,Strict,Tag,Key0,Key1,Name,TaskId"
+      "Resource,Numa,Start,End,Duration,Explicit,Aff,Strict,Tag,Key0,Key1,Name,TaskId,ParentTaskId"
     );
     if (!kaapi_perf_idset_empty(&rastello_header.task_perf_mask))
     {
@@ -2848,8 +2908,9 @@ int csv_parallel_region_t::closefile(int cpucount)
 void csv_parallel_region_t::dump_task_info(  task_info* ti )
 {
   std::map<uint64_t, char*>::iterator iter = rastello_fmtname.find( ti->fmtid );
+  task_info* pti = get_taskinfo( ti->pid, false );
   fprintf(csv_parallel_region_t::fout,
-    "%i,%i,%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%i,%i,%i,%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%s,%" PRIu64,
+    "%i,%i,%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%i,%i,%i,%" PRIu64 ",%" PRIu64 ",%" PRIu64 ",%s,%" PRIu64 ",%" PRIu64,
     ti->kid, ti->numaid, ti->start, ti->stop, ti->delay(),
     (int)ti->isexplicit,
     (int)ti->aff_kind,
@@ -2857,7 +2918,8 @@ void csv_parallel_region_t::dump_task_info(  task_info* ti )
     (ti->aff_kind == 0? (uint64_t)0 : ti->aff_tag),
     ti->keys[0],ti->keys[1],
     (iter == rastello_fmtname.end() ? "" : convert2paje(iter->second)),
-    ti->addr
+    ti->addr,
+     (pti !=0 ? pti->addr : 0)
   );
   if (!kaapi_perf_idset_empty(&rastello_header.task_perf_mask))
   {
@@ -3281,6 +3343,14 @@ static kaapi_fnc_event parse_option( const int argc, const char** argv, int* cou
     {
       katracereader_options.dotoption |= DOT_OPTIONS_NOLABEL;
     }
+    else if (strcmp(argv[i], "--dot-noparentcluster") ==0)
+    {
+      katracereader_options.dotoption |= DOT_OPTIONS_PARENTCHILD_NOCLUSTER;
+    }
+    else if (strcmp(argv[i], "--dot-noparent") ==0)
+    {
+      katracereader_options.dotoption |= DOT_OPTIONS_PARENTCHILD_NO;
+    }
 #if 0 // NOT YET
     else if (strcmp(argv[i], "--dot-nodata") ==0)
     {
@@ -3349,7 +3419,11 @@ static kaapi_fnc_event parse_option( const int argc, const char** argv, int* cou
     else
       break; /* end of options */
   }
-  
+  if ( ((katracereader_options.dotoption & DOT_OPTIONS_PARENTCHILD_NOCLUSTER) ==0)
+    && ((katracereader_options.dotoption & DOT_OPTIONS_PARENTCHILD_NO) ==0))
+  {
+    katracereader_options.dotoption |= DOT_OPTIONS_PARENTCHILD_NO;
+  }
   *count = i;
   
   switch (option) {