diff --git a/includes/kaapi_trace.h b/includes/kaapi_trace.h
index 256013240807f7a48d44f6146e6ba9b7b627ecad..bfe968a528e2ee1eae9fa3921016ba7a15ff4a3d 100644
--- a/includes/kaapi_trace.h
+++ b/includes/kaapi_trace.h
@@ -538,13 +538,12 @@ typedef enum kaapi_access_mode_t {
   KAAPI_ACCESS_MODE_S   = 16,       /* 0001 0000 : stack data */
   KAAPI_ACCESS_MODE_T   = 32,       /* 0010 0000 : for Quark support: scratch mode or temporary */
   KAAPI_ACCESS_MODE_P   = 64,       /* 0100 0000 : */
-  KAAPI_ACCESS_MODE_IP  = 128,      /* 1000 0000 : in place, for CW only */
+  KAAPI_ACCESS_MODE_C   = 128,      /* 1000 0000 : commute only - for CW only - */
 
   KAAPI_ACCESS_MODE_RW  = KAAPI_ACCESS_MODE_R|KAAPI_ACCESS_MODE_W,
   KAAPI_ACCESS_MODE_STACK = KAAPI_ACCESS_MODE_S|KAAPI_ACCESS_MODE_RW,
   KAAPI_ACCESS_MODE_SCRATCH = KAAPI_ACCESS_MODE_T|KAAPI_ACCESS_MODE_V,
   KAAPI_ACCESS_MODE_CWP = KAAPI_ACCESS_MODE_P|KAAPI_ACCESS_MODE_CW,
-  KAAPI_ACCESS_MODE_ICW = KAAPI_ACCESS_MODE_IP|KAAPI_ACCESS_MODE_CW
 } kaapi_access_mode_t;
 
 #define KAAPI_ACCESS_MASK_RIGHT_MODE   0x7F   /* 5 bits, ie bit 0, 1, 2, 3, 4, including P mode */
diff --git a/tool/ompt_callbacks.cpp b/tool/ompt_callbacks.cpp
index de3c5079da48fa1e8b591369e2c2a43145406e99..6fbcdc28965a81dbb14b68c0db9cf45c01cc2043 100644
--- a/tool/ompt_callbacks.cpp
+++ b/tool/ompt_callbacks.cpp
@@ -182,14 +182,38 @@ static void ompt_decoder( ompt_dependence_t* dep, int i, void** addr, size_t* le
   //*len  = dep[i].variable_len;
   *len = 1;
   *mode = KAAPI_ACCESS_MODE_VOID;
-  if (dep[i].dependence_type & ompt_dependence_type_in)
-    *mode |=KAAPI_ACCESS_MODE_R;
-  if (dep[i].dependence_type & ompt_dependence_type_out)
-    *mode |=KAAPI_ACCESS_MODE_W;
-  if (dep[i].dependence_type & ompt_dependence_type_inout)
-    *mode =KAAPI_ACCESS_MODE_R|KAAPI_ACCESS_MODE_W;
-  //if (dep[i].dependence_flags.commute)
-    //*mode |=KAAPI_ACCESS_MODE_CW;
+  switch (dep[i].dependence_type) 
+  {
+    case ompt_dependence_type_in:
+    {
+      *mode =KAAPI_ACCESS_MODE_R;
+      break;
+    } 
+    case ompt_dependence_type_out:
+    {
+      *mode =KAAPI_ACCESS_MODE_W;
+      break;
+    }
+    case ompt_dependence_type_inout:
+    {
+      *mode =KAAPI_ACCESS_MODE_R|KAAPI_ACCESS_MODE_W;
+      break;
+    }
+    case ompt_dependence_type_mutexinoutset:
+    {
+      *mode =KAAPI_ACCESS_MODE_C|KAAPI_ACCESS_MODE_CW;
+      break;
+    }
+    default:
+    {
+      static int alreadydisplay = 0;
+      if (alreadydisplay ==0)
+      {
+        printf("*** OpenMP pependence type: %i not implemented\n", dep[i].dependence_type);
+        alreadydisplay = 1;
+      }
+    }
+  }
 }
 
 
@@ -383,7 +407,7 @@ ompt_callback_task_create_action(
   if (parent_task_data) {
     tikki_task_id_t *parent_task = (tikki_task_id_t*)parent_task_data->ptr;
     printf("%" PRIu64 ": ompt_task_create: parent_id=%" PRIu64 ", task_id=%" PRIu64\
-           ", type=%i, has_dep=%i, ptr=%" PRIu64 "\n", thread_id,
+           ", type=%i, has_dep=%i, ptr=%p\n", thread_id,
            parent_task->id, task->id, type,
            has_dependences, task->task_ptr);
   } else {
@@ -432,7 +456,8 @@ ompt_callback_task_schedule_action(
     kaapi_tracelib_thread_switchstate(koti->kproc);
     //free(prior_task);
   }
-  if (next_task_data->ptr) {
+  if (next_task_data->ptr) 
+{
     tikki_task_id_t *new_task = (tikki_task_id_t *)next_task_data->ptr;
     // We are starting a task
 #if LOG
@@ -585,14 +610,38 @@ ompt_callback_dependences_action (
     ompt_data_t *task_data,
     const ompt_dependence_t *deps,
     int ndeps
-    )
+)
 {
   tikki_task_id_t *task = (tikki_task_id_t *)task_data->ptr;
   uint64_t thread_id = ompt_get_thread_data()->value;
   task->deps = (ompt_dependence_t *)malloc(ndeps * sizeof(ompt_dependence_t));
 #if LOG
-  printf("%" PRIu64 ": ompt_event_task_dependences: tsak_id=%" PRIu64 ", #deps=%i, deps: %p\n", thread_id, task->id, ndeps, task->deps);
+  printf("%" PRIu64 ": ompt_event_dependences: tsak_id=%" PRIu64 ", #deps=%i, deps: %p\n", thread_id, task->id, ndeps, task->deps);
 #endif
   task->ndeps = ndeps;
   memcpy(task->deps, deps, ndeps*sizeof(ompt_dependence_t));
 }
+
+
+void
+ompt_callback_task_dependence_action (
+  ompt_data_t *src_task_data,
+  ompt_data_t *sink_task_data
+)
+{
+  tikki_task_id_t *src_task = (tikki_task_id_t *)src_task_data->ptr;
+  tikki_task_id_t *dest_task = (tikki_task_id_t *)sink_task_data->ptr;
+  uint64_t thread_id = ompt_get_thread_data()->value;
+  kaapi_ompt_thread_info_t* koti = &__kaapi_oth_info[thread_id];
+
+  kaapi_tracelib_task_depend(
+    koti->kproc,
+    (kaapi_task_id_t)src_task->id,
+    (kaapi_task_id_t)dest_task->id
+  );
+
+#if LOG
+  printf("%" PRIu64 ": ompt_event_task_dependence: tsak_id=%" PRIu64 ", sink: %" PRIu64 "\n", thread_id, src_task->id, dest_task->id);
+#endif
+}
+
diff --git a/tool/ompt_callbacks.def b/tool/ompt_callbacks.def
index 99a78d7a494d157f3b74a858a8d438dda3255457..e4b2b7113131204f354e3567a50ee060491680eb 100644
--- a/tool/ompt_callbacks.def
+++ b/tool/ompt_callbacks.def
@@ -13,6 +13,7 @@ CALLBACK(ompt_callback_task_create, ompt_data_t*, const ompt_frame_t*, ompt_data
 CALLBACK(ompt_callback_task_schedule, ompt_data_t *, ompt_task_status_t, ompt_data_t *)
 CALLBACK(ompt_callback_implicit_task, ompt_scope_endpoint_t, ompt_data_t *, ompt_data_t*, unsigned int, unsigned int, int)
 CALLBACK(ompt_callback_dependences, ompt_data_t *, const ompt_dependence_t *, int)
+CALLBACK(ompt_callback_task_dependence, ompt_data_t *, ompt_data_t * )
 UNUSED(ompt_callback_target)
 UNUSED(ompt_callback_target_data_op)
 UNUSED(ompt_callback_target_submit)
@@ -23,8 +24,6 @@ UNUSED(ompt_callback_device_load)
 UNUSED(ompt_callback_device_unload)
 UNUSED(ompt_callback_sync_region_wait)
 UNUSED(ompt_callback_mutex_released)
-UNUSED(ompt_callback_dependences)
-UNUSED(ompt_callback_task_dependence)
 UNUSED(ompt_callback_work)
 UNUSED(ompt_callback_master)
 UNUSED(ompt_callback_target_map)
diff --git a/tracelib/kaapi_recorder.c b/tracelib/kaapi_recorder.c
index dd91fd3dda955394be7bb98ddd2039d272431c75..15beccbbf584698522ae698345a907fbde4b9241 100644
--- a/tracelib/kaapi_recorder.c
+++ b/tracelib/kaapi_recorder.c
@@ -203,6 +203,8 @@ static int _kaapi_write_header( int kid )
   for (i=0; i<kaapi_tracelib_param.fmt_listsize; ++i)
   {
     const kaapi_descrformat_t* fmt = kaapi_tracelib_param.fmt_list[i];
+    // TODO ?
+    if (fmt ==0) continue;
     kaapi_fmttrace_def* fmtdef = &header.fmtdefs[header.taskfmt_count];
     fmtdef->fmtid = fmt->fmtid;
     if (fmt->name !=0)
diff --git a/tracelib/kaapi_trace_lib.c b/tracelib/kaapi_trace_lib.c
index e552489bd528424f6ca642938b58b644dc6454b2..fb71672079749c5781f5f7d758821fcee50b0583 100644
--- a/tracelib/kaapi_trace_lib.c
+++ b/tracelib/kaapi_trace_lib.c
@@ -783,6 +783,7 @@ void kaapi_tracelib_fini(void)
 #endif
 
   /* Display stat per task */
+#if 0 // TG: 20-05-05 disable this feature
   if (kaapi_tracelib_param.display_perfcounter != KAAPI_NO_DISPLAY_PERF)
   {
     sprintf(filename, "stat.%i", getpid());
@@ -794,6 +795,7 @@ void kaapi_tracelib_fini(void)
         1e-9*((double)kaapi_get_elapsedns_since_start())
     );
   }
+#endif
 
   if (file !=0)
   {
@@ -1832,6 +1834,7 @@ kaapi_descrformat_t* kaapi_tracelib_reserve_perfcounter(void)
   retval->perfctr   = perf;
   kaapi_tracelib_param.fmt_list[kaapi_tracelib_param.fmt_listsize] = retval;
   ++kaapi_tracelib_param.fmt_listsize;
+//printf("%s\n", __func__ );
 
   kaapi_assert(retval != 0);
   return retval;
diff --git a/ukilli/ukilli.cpp b/ukilli/ukilli.cpp
index dfbb08a89beb1a3597e0965a33e422116d38ce29..88e5a1f5c459164700500d4cda8be58ba1d359c1 100644
--- a/ukilli/ukilli.cpp
+++ b/ukilli/ukilli.cpp
@@ -1776,7 +1776,7 @@ static void callback_display_rastello(
         break;
       }
 
-//printf("%" PRIu64 " Task: %p fmtid: %i\n", event->date, KAAPI_EVENT_DATA(event,0,p), (int)KAAPI_EVENT_DATA(event,1,u));
+//printf("%" PRIu64 " Implicit Task: %p fmtid: %i\n", event->date, KAAPI_EVENT_DATA(event,0,p), (int)KAAPI_EVENT_DATA(event,1,u));
       std::map<uint64_t,kproc_t>::iterator kp = parallel_region_t::container_kproc.find( event->kid );
       if (kp == parallel_region_t::container_kproc.end())
       {
@@ -1877,16 +1877,13 @@ printf("%i:: task[id:%i/%p] = {mode:%c}/%p\n", kid, task->id, (void*)task->addr,
 
     case KAAPI_EVT_TASK_SUCC:
     {
-      task_info* task = rpr->get_taskinfo( KAAPI_EVENT_DATA(event,0,u), false );
-      if (task != 0)/* should be top stack task */
-      {
-        task_info* succ = rpr->get_taskinfo( KAAPI_EVENT_DATA(event,1,u), true );
+      task_info* task = rpr->get_taskinfo( KAAPI_EVENT_DATA(event,0,u), true );
+      task_info* succ = rpr->get_taskinfo( KAAPI_EVENT_DATA(event,1,u), true );
 #if DEBUG_RAST
 printf("pred[id:%i/%p] = {id:%i}/%p\n", succ->id, (void*)succ->addr, task->id, (void*)task->addr);
 #endif
-        succ->pred.push_back( task );
-        task->succ.push_back( succ );
-      }
+      succ->pred.push_back( task );
+      task->succ.push_back( succ );
     } break;
 
 
@@ -1917,7 +1914,11 @@ printf("pred[id:%i/%p] = {id:%i}/%p\n", succ->id, (void*)succ->addr, task->id, (
       {
         task->stop = event->date;
         task->numaid = (uint32_t)KAAPI_EVENT_DATA(event,1,i);
+#if DEBUG_RAST
+        printf("%i:: task end:id:%i/%p\n", kid, task->id, (void*)task->addr);
+#endif
       }
+      /* should be top ? */
       if (kp == parallel_region_t::container_kproc.end())
         printf("***[%d] Unkown thread kid: %i\n", __LINE__, event->kid);
 /*
@@ -2277,6 +2278,8 @@ int dot_parallel_region_t::openfile(kaapi_eventfile_header_t* header)
   return 0;
 }
 
+
+/* */
 int dot_parallel_region_t::closefile(int cpucount)
 {
   if (fout == 0) return 1;
@@ -2840,22 +2843,6 @@ void csv_parallel_region_t::dump_task_info(  task_info* ti )
             ti->perfctr[i].value );
     }
   }
-#if 0 /* not yet in for csv */
-  for (size_t i=0; i< ti->param.size(); ++i)
-    switch (ti->param[i].mode)
-    {
-      case KAAPI_ACCESS_MODE_R:
-        fprintf(fout,"\taction_add_read(T, &blocks[id(\"A\",{%" PRIu64 "})]);\n", ti->param[i].idx);
-        break;
-      case KAAPI_ACCESS_MODE_W:
-        fprintf(fout,"\taction_add_write(T, &blocks[id(\"A\",{%" PRIu64 "})]);\n", ti->param[i].idx);
-        break;
-      case KAAPI_ACCESS_MODE_RW:
-        fprintf(fout,"\taction_add_readwrite(T, &blocks[id(\"A\",{%" PRIu64 "})]);\n", ti->param[i].idx);
-        break;
-      default: break;
-    }
-#endif
   fprintf(fout, "\n" );
 }
 
@@ -2943,6 +2930,194 @@ static void fnc_csv( int count, const char** filenames )
 }
 
 
+/* ============================= SOMP OUTPUT */
+/* SOMP: trace format
+*/
+struct somp_parallel_region_t : public parallel_region_t {
+  somp_parallel_region_t(uintptr_t pid)
+    : parallel_region_t(pid)
+  {}
+
+  int openfile(kaapi_eventfile_header_t* header);
+  int closefile(int cpucount);
+  void dump_task_info( task_info* ti );
+private:
+};
+
+class somp_parallel_region_factory_t {
+public:
+  virtual parallel_region_t* create(uintptr_t);
+};
+somp_parallel_region_factory_t somp_pr_factory;
+
+parallel_region_t* somp_parallel_region_factory_t::create(uintptr_t prid)
+{
+  return new somp_parallel_region_t(prid);
+}
+
+
+int somp_parallel_region_t::openfile(kaapi_eventfile_header_t* header)
+{
+  FILE* file = 0;
+  sprintf(filename, "trace_%i.rec", (int)parallel_id);
+  fout = file = fopen(filename,"w");
+  if (file ==0)
+  {
+    fprintf(stderr,"*** Cannot open file '%s'\n",filename);
+    exit(-1);
+  }
+
+  for (int cnt=0; cnt<header->taskfmt_count; ++cnt)
+    if (header->fmtdefs[cnt].fmtid !=0)
+    {
+      rastello_fmtname.insert( std::make_pair(header->fmtdefs[cnt].fmtid, header->fmtdefs[cnt].name) );
+//  fprintf(stdout, "insert fmtid: %" PRIu64 " -> name: %s\n", header->fmtdefs[cnt].fmtid, header->fmtdefs[cnt].name);
+    }
+
+  return 0;
+}
+
+/* */
+int somp_parallel_region_t::closefile(int cpucount)
+{
+  if (fout == 0) return 1;
+  std::list<task_info*>::iterator ibeg = container_orderedlist.begin();
+  std::list<task_info*>::iterator iend = container_orderedlist.end();
+  /* reset container_data used in dump_task_info to attach version number to data */
+  container_data.clear();
+  while (ibeg != iend)
+  {
+    dump_task_info(*ibeg);
+    ++ibeg;
+  }
+
+  fprintf(stdout,"*** File '%s' generated\n", filename);
+  fclose(fout);
+  fout = 0;
+
+  //rastello_fmtname.clear();
+  container_data.clear();
+  return 0;
+}
+
+
+/* Kind of format
+  Name*: dgemm
+  JobId: 11
+  SubmitOrder: 3
+  SubmitTime: 0.1
+  StartTime*: 0.2
+  EndTime*: 0.3
+  MemoryNode*: 0
+  Handles*: 2df4ce5 3dve4a4 1dvgf5d
+  Modes*: R R RW
+  Sizes: 2000x2000x8 2000x2000x8 2000x2000x8
+  DependsOn*: 5
+*/
+void somp_parallel_region_t::dump_task_info( task_info* ti )
+{
+  /* task */
+  std::map<uint64_t, char*>::iterator fmt =rastello_fmtname.find(ti->fmtid);
+
+  if (fmt == rastello_fmtname.end())
+    fprintf(stdout, "cannot find fmtid: %" PRIu64 "\n", ti->fmtid);
+
+  fprintf(fout,"Name: %s\n"
+               "JobId: %li\n"
+               "StartTime: %f\n"
+               "EndTime: %f\n"
+               "MemoryNode: %li\n",
+    (fmt == rastello_fmtname.end() ? "<unknown>" : fmt->second ),
+    ti->id,
+    (double)ti->start*1e-6 /* ns -> millis */,
+    (double)ti->stop*1e-6 /* ns -> millis */,
+    ti->numaid
+  );
+
+  /* handles */
+  fprintf(fout,"Handles:");
+  for (int i=0; i<ti->param.size(); ++i)
+  {
+    uint64_t            ptr    = ti->param[i].ptr;
+    fprintf(fout," %p", ptr);
+  }
+  fprintf(fout,"\n");
+
+  /* Modes */
+  fprintf(fout,"Modes:");
+  for (int i=0; i<ti->param.size(); ++i)
+  {
+    kaapi_access_mode_t mode   = ti->param[i].mode;
+    if (KAAPI_ACCESS_IS_READWRITE(mode))
+      fprintf(fout," RW");
+    else if (KAAPI_ACCESS_IS_READ(mode))
+      fprintf(fout," R");
+    else if (KAAPI_ACCESS_IS_WRITE(mode))
+      fprintf(fout," W");
+    else if (mode == (KAAPI_ACCESS_MODE_C|KAAPI_ACCESS_MODE_CW))
+      fprintf(fout," MX");
+    else if (mode & KAAPI_ACCESS_MODE_SCRATCH)
+      fprintf(fout," S");
+  }
+  fprintf(fout,"\n");
+
+  /* Modes */
+  fprintf(fout,"Sizes:");
+  fprintf(fout,"\n");
+
+
+  /* DependsOn */
+  fprintf(fout,"DependsOn:");
+  for (int i=0; i<ti->pred.size(); ++i)
+  {
+    fprintf(fout," %li", ti->pred[i]->id);
+  }
+  fprintf(fout,"\n\n");
+}
+
+
+
+/* reuse rastello data structure
+*/
+static void fnc_somp( int count, const char** filenames )
+{
+  rastello_parallel_region.reserve(128);
+  rastello_front_parallel_region.reserve(256);
+  rastello_fs = OpenFiles( count, filenames );
+  if (rastello_fs ==0)
+    return;
+  if (GetHeader(rastello_fs, &rastello_header) !=0)
+    return;
+
+  /* generate dot graph: one per parallel
+     region if katracereader_options.dotoption & DOT_OPTIONS_CREGION
+  */
+  ReadFiles(rastello_fs, &somp_pr_factory, callback_display_rastello );
+
+  for (size_t i = 0; i<rastello_parallel_region.size(); ++i)
+  {
+    somp_parallel_region_t* dotpr = (somp_parallel_region_t*)rastello_parallel_region[i];
+    if (dotpr ==0) continue;
+    if (dotpr->openfile( &rastello_header) !=0)
+    {
+      exit(1);
+    }
+    if (dotpr->closefile(rastello_parallel_region[i]->nproc) !=0)
+    {
+      exit(1);
+    }
+  }
+
+//  if (katracereader_options.dotoption & DOT_OPTIONS_CREGION )
+//  {
+//    rastello_parallel_region[0]->closefile( GetProcessorCount(rastello_fs) );
+//  }
+
+  /* close & umap */
+  CloseFiles(rastello_fs);
+}
+
+
 
 /*
 */
@@ -2967,8 +3142,9 @@ static void print_usage(const char* msg = 0)
   fprintf(stderr, "    --dot-nolabel      : do not output label.\n");
   fprintf(stderr, "    --dot-cregion      : output graph accross parallel regions.\n");
 //  fprintf(stderr, "     --dot-nodata : do not output data node.\n");
-  fprintf(stderr, "  -r | --rastello      : output Rastello format compatible with CORSE team simulator.\n");
-  fprintf(stderr, "                         Output filename is rastello_<n>.c, one per parallel region.\n");
+  fprintf(stderr, "  -s | --somp          : output file with SOMP trace format .\n");
+//  fprintf(stderr, "  -r | --rastello      : output Rastello format compatible with CORSE team simulator.\n");
+//  fprintf(stderr, "                         Output filename is rastello_<n>.c, one per parallel region.\n");
 //  fprintf(stderr, "  --steal-event   : include steal events in trace.\n");
 //  fprintf(stderr, "  --gpu-trace     : include GPU trace information.\n");
 //  fprintf(stderr, "  --gpu-transfer  : include GPU transfers.\n");
@@ -3005,6 +3181,8 @@ static kaapi_fnc_event parse_option( const int argc, const char** argv, int* cou
       option = 'a';
     else if ((strcmp(argv[i], "--display-header") ==0)||(strcmp(argv[i], "-e") ==0))
       option = 'h';
+    else if ((strcmp(argv[i], "--somp") ==0) || (strcmp(argv[i], "-s") ==0))
+      option = 'o';
     else if ((strcmp(argv[i], "--rastello") ==0) || (strcmp(argv[i], "-r") ==0))
       option = 'r';
     else if ((strcmp(argv[i], "--csv") ==0) || (strcmp(argv[i], "-c") ==0))
@@ -3123,6 +3301,9 @@ static kaapi_fnc_event parse_option( const int argc, const char** argv, int* cou
   case 'd':
     return fnc_dot;
 
+  case 'o':
+    return fnc_somp;
+
   case 'H':
   default:
     print_usage();