diff --git a/runtime/src/include/40/ompt.h.var b/runtime/src/include/40/ompt.h.var
index ec55236aec700067462103e203ea46c0fa427ab0..72403aeb29cecea3f1e30a46fc4ac7e38ce0b394 100644
--- a/runtime/src/include/40/ompt.h.var
+++ b/runtime/src/include/40/ompt.h.var
@@ -238,6 +238,8 @@ typedef enum ompt_task_dependence_flag_e {
     ompt_task_dependence_type_out   = 1,
     ompt_task_dependence_type_in    = 2,
     ompt_task_dependence_type_inout = 3,
+    ompt_task_dependence_type_mask  = 3,
+    ompt_task_dependence_type_cw    = 4,
 } ompt_task_dependence_flag_t;
 
 typedef struct ompt_task_dependence_s {
diff --git a/runtime/src/include/45/ompt.h.var b/runtime/src/include/45/ompt.h.var
index 85e0949998e3fd2eec122f8c7d299278bb1c502f..708a753ef4b6954cd16eeabe39b9ca352de8e640 100644
--- a/runtime/src/include/45/ompt.h.var
+++ b/runtime/src/include/45/ompt.h.var
@@ -239,6 +239,8 @@ typedef enum ompt_task_dependence_flag_e {
     ompt_task_dependence_type_out   = 1,
     ompt_task_dependence_type_in    = 2,
     ompt_task_dependence_type_inout = 3,
+    ompt_task_dependence_type_mask  = 3,
+    ompt_task_dependence_type_cw    = 4,
 } ompt_task_dependence_flag_t;
 
 typedef struct ompt_task_dependence_s {
diff --git a/runtime/src/kaapi_ompt.c b/runtime/src/kaapi_ompt.c
index 7a8daf2ef6d3bc035fab2ba79b4b463380a091df..b20beb696225f5b33d00a2385987cfbbbe00a428 100644
--- a/runtime/src/kaapi_ompt.c
+++ b/runtime/src/kaapi_ompt.c
@@ -729,16 +729,22 @@ on_ompt_event_task_switch(
 static void ompt_decoder( ompt_task_dependence_t* dep, int i, void** addr, int* mode /*, size_t* len */ )
 {
   *addr = dep[i].variable_addr;
-  switch (dep[i].dependence_flags)
+  switch (dep[i].dependence_flags & ompt_task_dependence_type_mask)
   {
     case ompt_task_dependence_type_out:
-      *mode = KAAPI_ACCESS_MODE_W;
+      if (dep[i].dependence_flags & ompt_task_dependence_type_cw)
+        *mode = KAAPI_ACCESS_MODE_CW;
+      else
+        *mode = KAAPI_ACCESS_MODE_W;
       break;
     case ompt_task_dependence_type_in:
       *mode = KAAPI_ACCESS_MODE_R;
       break;
     case ompt_task_dependence_type_inout:
-      *mode = KAAPI_ACCESS_MODE_R|KAAPI_ACCESS_MODE_W;
+      if (dep[i].dependence_flags & ompt_task_dependence_type_cw)
+        *mode = KAAPI_ACCESS_MODE_ICW;
+      else
+        *mode = KAAPI_ACCESS_MODE_R|KAAPI_ACCESS_MODE_W;
       break;
     default:
       *mode = KAAPI_ACCESS_MODE_VOID;
diff --git a/runtime/src/kmp.h b/runtime/src/kmp.h
index c4e5d511830f245e31adec44b0e4f1596b60d798..7a5f29fba1f7b48f4c6fb95eb0b152cac88c2dc9 100644
--- a/runtime/src/kmp.h
+++ b/runtime/src/kmp.h
@@ -19,6 +19,7 @@
 
 #include "kmp_config.h"
 
+#define LIBOMP_HAS_TASK_CONCURRENT_ACCESS 1
 
 /* #define BUILD_PARALLEL_ORDERED 1 */
 
@@ -2107,6 +2108,13 @@ typedef union kmp_cmplrdata {
 } kmp_cmplrdata_t;
 #endif
 
+// forward declarations
+typedef union kmp_depnode        kmp_depnode_t;
+typedef struct kmp_depnode_list  kmp_depnode_list_t;
+typedef struct kmp_dephash_entry kmp_dephash_entry_t;
+typedef struct kmp_cw_depnode    kmp_cw_depnode_t;
+
+
 /*  sizeof_kmp_task_t passed as arg to kmpc_omp_task call  */
 /*!
  */
@@ -2134,26 +2142,22 @@ typedef struct kmp_taskgroup {
 } kmp_taskgroup_t;
 
 
-// forward declarations
-typedef union kmp_depnode       kmp_depnode_t;
-typedef struct kmp_depnode_list  kmp_depnode_list_t;
-typedef struct kmp_dephash_entry kmp_dephash_entry_t;
+typedef struct {
+         bool                   in:1;
+         bool                   out:1;
+         bool                   cw:1;       /* write concurrency - assume commute + associativity */
+         bool                   commute:1;  /* commute but no concurrency */
+} kmp_depend_info_flags_t;
 
 typedef struct kmp_depend_info {
      kmp_intptr_t               base_addr;
      size_t                     len;
-     struct {
-         bool                   in:1;
-         bool                   out:1;
-     } flags;
+     kmp_depend_info_flags_t    flags;
 } kmp_depend_info_t;
 
 struct kmp_depnode_list {
    kmp_depnode_t *              node;
    kmp_depnode_list_t *         next;
-#if LIBOMP_USE_AFFINITY
-   kmp_intptr_t                 base_addr;
-#endif
 };
 
 typedef struct kmp_base_depnode {
@@ -2165,27 +2169,50 @@ typedef struct kmp_base_depnode {
 #if KMP_SUPPORT_GRAPH_OUTPUT
     kmp_uint32                  id;
 #endif
+#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS     
+    kmp_cw_depnode_t*           cw;      /* Should be a list_commute. Juste one cw in this version */
+#endif
 
     volatile kmp_int32          npredecessors;
     volatile kmp_int32          nrefs;
 } kmp_base_depnode_t;
 
+
+
 union KMP_ALIGN_CACHE kmp_depnode {
     double          dn_align;        /* use worst case alignment */
     char            dn_pad[ KMP_PAD(kmp_base_depnode_t, CACHE_LINE) ];
     kmp_base_depnode_t dn;
 };
 
+#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS
+typedef struct kmp_cw_depnode {
+    kmp_depnode_t              in_node;     /* attached in node - used as mutex */
+    kmp_depnode_t              out_node;    /* attached out node */
+    kmp_depnode_list_t        *head_waiter; /* FIO queue of waiting tasks for the concurrent access*/
+    kmp_depnode_list_t        *tail_waiter;
+    kmp_depend_info_flags_t    flags;       /* if concurrent write or commute */
+    volatile kmp_int32         nwriters;
+    volatile int               state;       /* 0: dirty, 1: clean */
+} kmp_cw_depnode_t;
+#endif
+
+
+
 struct kmp_dephash_entry {
     kmp_intptr_t               addr;
     kmp_depnode_t            * last_out;
     kmp_depnode_list_t       * last_ins;
     kmp_dephash_entry_t      * next_in_bucket;
+#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS
+    kmp_depend_info_flags_t    flags;
+    kmp_cw_depnode_t         * cw_sync;
+#endif
 };
 
 typedef struct kmp_dephash {
    kmp_dephash_entry_t     ** buckets;
-   size_t		      size;
+   size_t		              size;
 #ifdef KMP_DEBUG
    kmp_uint32                 nelements;
    kmp_uint32                 nconflicts;
diff --git a/runtime/src/kmp_taskdeps.cpp b/runtime/src/kmp_taskdeps.cpp
index 024f4223f1c0dcbcb77b89b309ef0bdbd91f31e2..1e132521eef1d52b6fcff10841e12411d5f00836 100644
--- a/runtime/src/kmp_taskdeps.cpp
+++ b/runtime/src/kmp_taskdeps.cpp
@@ -44,6 +44,10 @@ __kmp_init_node ( kmp_depnode_t *node )
 #ifdef KMP_SUPPORT_GRAPH_OUTPUT
     node->dn.id = KMP_TEST_THEN_INC32(&kmp_node_id_seed);
 #endif
+#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS     
+    //node->dn.list_commute        = 0;
+    node->dn.cw = 0;
+#endif
 }
 
 static inline kmp_depnode_t *
@@ -237,6 +241,88 @@ __kmp_track_dependence ( kmp_int32 gtid, kmp_depnode_t *source, kmp_depnode_t *s
 
 #include <stdio.h>
 
+
+#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS
+
+static kmp_task_t dummy_cw_node;
+
+/* Lock is acquired
+*/
+static inline void
+__kmp_enqueue ( kmp_int32 gtid, kmp_depnode_t *node, kmp_cw_depnode_t *cw )
+{
+  kmp_info_t *thread = __kmp_threads[ gtid ];
+  kmp_depnode_list_t *dnode;
+#if USE_FAST_MEMORY
+  dnode = (kmp_depnode_list_t *) __kmp_fast_allocate(thread,sizeof(kmp_depnode_list_t));
+#else
+  dnode = (kmp_depnode_list_t *) __kmp_thread_malloc(thread,sizeof(kmp_depnode_list_t));
+#endif
+  /* fifo */
+  dnode->node = __kmp_node_ref(node);
+  dnode->next = 0;
+  if (cw->tail_waiter ==0)
+    cw->head_waiter = dnode;
+  else
+    cw->tail_waiter->next = dnode;
+  cw->tail_waiter = dnode;
+}
+
+
+/* acquired all commute accesses or fail
+   return 0 if success to acquired commute accesses
+   return 1 if failure
+*/
+static int __kmp_acquired_or_failed( kmp_int32 gtid, kmp_depnode_t *node, kmp_cw_depnode_t* c_sync )//(kmp_depnode_list_t *list_commute)
+{
+//  if( list_commute ==0) return 0;
+  if (c_sync ==0) return 0;
+  if (!c_sync->flags.commute) return 0;
+  /* lock n-commute arguments, it not possible release all of them */
+  int fail = 0;
+//  kmp_depnode_list_t* curr;
+//  for ( curr = list_commute; curr; curr = curr->next )
+  {
+    /* commute: make task ready only if the shared ressource is not alread locked
+       tasks are (== graphe nodes) enqueued in info->last_waiter and npred is incremented.
+       on release all cw will decrease it.
+    */
+    //kmp_cw_depnode_t* c_sync = (kmp_cw_depnode_t*)curr->node;
+    KMP_ASSERT (c_sync->flags.commute);
+    KMP_ACQUIRE_DEPNODE(gtid, &c_sync->in_node);
+    if (c_sync->state ==1)
+    {
+      fail = 1;
+      __kmp_enqueue( gtid, node, c_sync );
+      KMP_RELEASE_DEPNODE(gtid, &c_sync->in_node);
+      //break;
+    }
+    c_sync->state = 1;
+    KMP_RELEASE_DEPNODE(gtid, &c_sync->in_node);
+  }
+  return fail;
+}
+#endif
+
+#define IS_WRITE(x) (x.out || x.cw || x.commute)
+#define IS_CONCURRENTWRITE(x) (x.cw || x.commute)
+/* in is always set ! */
+//#define IS_CONCURRENT(last,new) ((!last.out && !new.out) || (last.cw && new.cw) || (last.commute && new.commute))
+static int inline IS_CONCURRENT(kmp_depend_info_flags_t a, kmp_depend_info_flags_t b)
+{
+  int retval =  ((a.in && !a.out && b.in && !b.out) || (a.cw && b.cw) || (a.commute && b.commute));
+  return retval;
+}
+
+
+/* To handle CW:
+   - first CW -> create dummy node used to detect end of concurrent accesses
+   - new r,rw are branched afterward this dummy node
+   Store dummy node in info->
+   Dummy node may have task set to 0, then __kmp_release_deps may also release_deps
+   the successors this dummy node.
+   Todo: best algorithm to avoid O(k) iteration of previous accesses.
+*/
 template< bool filter >
 static inline kmp_int32
 __kmp_process_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash,
@@ -247,68 +333,128 @@ __kmp_process_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash,
 
     kmp_info_t *thread = __kmp_threads[ gtid ];
     kmp_int32 npredecessors=0;
+
     for ( kmp_int32 i = 0; i < ndeps ; i++ ) {
         const kmp_depend_info_t * dep = &dep_list[i];
 
-        KMP_DEBUG_ASSERT(dep->flags.in);
+        KMP_DEBUG_ASSERT(dep->flags.in || dep->flags.commute || dep->flags.cw);
 
         if ( filter && dep->base_addr == 0 ) continue; // skip filtered entries
 
         kmp_dephash_entry_t *info = __kmp_dephash_find(thread,hash,dep->base_addr);
         kmp_depnode_t *last_out = info->last_out;
 
-        if ( dep->flags.out && info->last_ins ) {
-            for ( kmp_depnode_list_t * p = info->last_ins; p; p = p->next ) {
+#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS
+        if (IS_CONCURRENTWRITE(dep->flags))
+        {
+            if (info->cw_sync ==0)
+            {
+#if USE_FAST_MEMORY
+                info->cw_sync = (kmp_cw_depnode_t *) __kmp_fast_allocate(thread,sizeof(kmp_cw_depnode_t));
+#else
+                info->cw_sync = (kmp_cw_depnode_t *) __kmp_thread_malloc(thread,sizeof(kmp_cw_depnode_t));
+#endif
+                __kmp_init_node( &info->cw_sync->in_node );
+                info->cw_sync->in_node.dn.npredecessors = 0;
+                info->cw_sync->in_node.dn.task = &dummy_cw_node;
+                __kmp_init_node( &info->cw_sync->out_node );
+                info->cw_sync->out_node.dn.npredecessors = 0;
+                info->cw_sync->out_node.dn.task = &dummy_cw_node;
+                info->cw_sync->head_waiter = info->cw_sync->tail_waiter = 0;
+                info->cw_sync->state = 0;
+                info->cw_sync->flags = dep->flags;
+                info->flags = {0, 0, 0, 0};
+            }
+            ++info->cw_sync->nwriters;  /* number of concurrent accesses to concurrent ressource */
+
+            /* TODO: detect if the same cw is set multiple times in the task */
+            //task->list_commute = __kmp_add_node( thread, task->list_commute, &info->cw_sync->in_node );
+            KMP_ASSERT(node->dn.cw ==0);
+            node->dn.cw = info->cw_sync;
+
+            /* */
+            info->cw_sync->in_node.dn.successors = __kmp_add_node(thread, info->cw_sync->in_node.dn.successors, node);
+            /* */
+            node->dn.successors = __kmp_add_node(thread, node->dn.successors, &info->cw_sync->out_node);
+        }
+#endif /* #if LIBOMP_HAS_TASK_CONCURRENT_ACCESS */
+
+        if ( IS_WRITE(dep->flags)  && info->last_ins ) { /* wa{r}* dependence  */
+            kmp_depnode_t* outdep;
+#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS
+            if (IS_CONCURRENTWRITE(dep->flags))
+              outdep = &info->cw_sync->in_node;
+            else
+#endif
+              outdep = node;
+            for ( kmp_depnode_list_t * p = info->last_ins; p; p = p->next )
+            {
                 kmp_depnode_t * indep = p->node;
                 if ( indep->dn.task ) {
                     KMP_ACQUIRE_DEPNODE(gtid,indep);
                     if ( indep->dn.task ) {
-                        __kmp_track_dependence(gtid, indep, node, task);
-                        indep->dn.successors = __kmp_add_node(thread, indep->dn.successors, node
-                        );
+                        __kmp_track_dependence(gtid, indep, outdep, task);
+                        indep->dn.successors = __kmp_add_node(thread, indep->dn.successors, outdep);
                         KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n",
-                                 filter,gtid, KMP_TASK_TO_TASKDATA(indep->dn.task), KMP_TASK_TO_TASKDATA(node->dn.task)));
+                                 filter,gtid, KMP_TASK_TO_TASKDATA(indep->dn.task), KMP_TASK_TO_TASKDATA(task)));
                         npredecessors++;
                     }
                     KMP_RELEASE_DEPNODE(gtid,indep);
                 }
             }
-
+            /*  */
             __kmp_depnode_list_free(thread,info->last_ins);
             info->last_ins = NULL;
-
-        } else if ( last_out && last_out->dn.task ) {
-            KMP_ACQUIRE_DEPNODE(gtid,last_out);
-            if ( last_out->dn.task ) {
-                __kmp_track_dependence(gtid, last_out,node,task);
-                last_out->dn.successors = __kmp_add_node(thread, last_out->dn.successors, node
-                );
+#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS
+        } else if (last_out && IS_CONCURRENT(info->flags, dep->flags)) { /* concurrent cw : commute or concurrent */
+            /* do not increment npred */
+//            printf("Here !!!!\n");
+#endif
+        } else if ( last_out && last_out->dn.task ) { /* {w|cw}a{wr} dependence */
+            kmp_depnode_t* last_writer_node;
+#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS
+            if (info->cw_sync)
+              last_writer_node = &info->cw_sync->out_node;
+            else
+#endif
+              last_writer_node = last_out;
+            KMP_ACQUIRE_DEPNODE(gtid, last_writer_node);
+            if (last_writer_node->dn.task
+#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS
+              || (last_writer_node->dn.task == &dummy_cw_node) /* dependency managed differently */
+#endif
+            )
+            {
+                __kmp_track_dependence(gtid, last_writer_node, node, task);
+                last_writer_node->dn.successors = __kmp_add_node(thread, last_writer_node->dn.successors, node );
                 KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n",
-                             filter,gtid, KMP_TASK_TO_TASKDATA(last_out->dn.task), KMP_TASK_TO_TASKDATA(node->dn.task)));
+                             filter,gtid, KMP_TASK_TO_TASKDATA(last_writer_node->dn.task), KMP_TASK_TO_TASKDATA(task)));
 
                 npredecessors++;
             }
-            KMP_RELEASE_DEPNODE(gtid,last_out);
+            KMP_RELEASE_DEPNODE(gtid, last_writer_node);
         }
 
+        /* free: node if required
+        */
         if ( dep_barrier ) {
             // if this is a sync point in the serial sequence, then the previous outputs are guaranteed to be completed after
             // the execution of this task so the previous output nodes can be cleared.
             __kmp_node_deref(thread,last_out);
             info->last_out = NULL;
         } else {
-            if ( dep->flags.out ) {
+            if ( IS_WRITE(dep->flags) ) {
                 __kmp_node_deref(thread,last_out);
                 info->last_out = __kmp_node_ref(node);
+                info->flags    = dep->flags;
             } else
-                info->last_ins = __kmp_add_node(thread, info->last_ins, node
-                );
+                info->last_ins = __kmp_add_node(thread, info->last_ins, node );
         }
-
     }
 
     KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter, gtid, npredecessors ) );
-
+//if (ndeps)
+//  printf("Task: %p, commute?:%p, node: %p npred=%i\n", task, (void*)node->dn.cw, (void*)node, npredecessors);
     return npredecessors;
 }
 
@@ -336,12 +482,15 @@ __kmp_check_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_task_t *task, kmp_de
                 if ( dep_list[i].base_addr == dep_list[j].base_addr ) {
                     dep_list[i].flags.in |= dep_list[j].flags.in;
                     dep_list[i].flags.out |= dep_list[j].flags.out;
+                    dep_list[i].flags.cw |= dep_list[j].flags.cw;
+                    dep_list[i].flags.commute |= dep_list[j].flags.commute;
                     dep_list[j].base_addr = 0; // Mark j element as void
                 }
     }
 
     // doesn't need to be atomic as no other thread is going to be accessing this node just yet
-    // npredecessors is set -1 to ensure that none of the releasing tasks queues this task before we have finished processing all the dependencies
+    // npredecessors is set -1 to ensure that none of the releasing tasks queues this task before
+    // we have finished processing all the dependencies
     node->dn.npredecessors = -1;
 
     // used to pack all npredecessors additions into a single atomic operation at the end
@@ -367,24 +516,17 @@ __kmp_check_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_task_t *task, kmp_de
     return npredecessors > 0 ? true : false;
 }
 
-void
-__kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task )
-{
-    kmp_info_t *thread = __kmp_threads[ gtid ];
-    kmp_depnode_t *node = task->td_depnode;
 
-    if ( task->td_dephash ) {
-        KA_TRACE(40, ("__kmp_realease_deps: T#%d freeing dependencies hash of task %p.\n", gtid, task ) );
-        __kmp_dephash_free(thread,task->td_dephash);
-    }
 
+/*
+*/
+static void
+__kmp_release_deps_node ( kmp_int32 gtid, kmp_depnode_t *node )
+{
+    kmp_info_t *thread = __kmp_threads[ gtid ];
     if ( !node ) return;
 
-    KA_TRACE(20, ("__kmp_realease_deps: T#%d notifying succesors of task %p.\n", gtid, task ) );
-
-    KMP_ACQUIRE_DEPNODE(gtid,node);
-    node->dn.task = NULL; // mark this task as finished, so no new dependencies are generated
-    KMP_RELEASE_DEPNODE(gtid,node);
+    KA_TRACE(20, ("__kmp_release_deps_node: T#%d notifying succesors of node %p.\n", gtid, node ) );
 
     kmp_depnode_list_t *next;
     for ( kmp_depnode_list_t *p = node->dn.successors; p; p = next ) {
@@ -394,9 +536,36 @@ __kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task )
         // successor task can be NULL for wait_depends or because deps are still being processed
         if ( npredecessors == 0 ) {
             KMP_MB();
-            if ( successor->dn.task ) {
-                KA_TRACE(20, ("__kmp_realease_deps: T#%d successor %p of %p scheduled for execution.\n", gtid, successor->dn.task, task ) );
-                __kmp_omp_task(gtid,successor->dn.task,false);
+            if ( successor->dn.task
+#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS
+              || (successor->dn.task == &dummy_cw_node)
+#endif
+            ) {
+                KA_TRACE(20, ("__kmp_release_deps_node: T#%d successor %p of %p scheduled for execution.\n", gtid, successor->dn.task, node ) );
+#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS
+                if (successor->dn.task == &dummy_cw_node)
+                {
+                  /* activate all tasks in successors of the dummy cw node */
+                  kmp_depnode_list_t *nnext;
+                  for ( kmp_depnode_list_t *pp = successor->dn.successors; pp; pp = nnext ) {
+                      kmp_depnode_t *ssuccessor = pp->node;
+                      kmp_int32 nppredecessors = KMP_TEST_THEN_DEC32(&ssuccessor->dn.npredecessors) - 1;
+                      if ( nppredecessors == 0 ) {
+                          KMP_MB();
+                          if ( ssuccessor->dn.task && (__kmp_acquired_or_failed( gtid, ssuccessor, ssuccessor->dn.cw ) ==0))
+                            __kmp_omp_task(gtid,successor->dn.task,false);
+                      }
+                  }
+                }
+                else
+                {
+                  /* try to acquired all n-commute arguments, it not possible release all of them */
+                  if (__kmp_acquired_or_failed( gtid, successor, successor->dn.cw ) ==0)
+                    __kmp_omp_task(gtid,successor->dn.task,false);
+                }
+#else
+                  __kmp_omp_task(gtid,successor->dn.task,false);
+#endif
             }
         }
 
@@ -409,6 +578,65 @@ __kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task )
 #endif
     }
 
+    KA_TRACE(20, ("__kmp_release_deps_node: T#%d all successors of %p notified of completation\n", gtid, node ) );
+}
+
+/*
+*/
+void
+__kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task )
+{
+    kmp_info_t *thread = __kmp_threads[ gtid ];
+    kmp_depnode_t *node = task->td_depnode;
+
+    if ( task->td_dephash ) {
+        KA_TRACE(40, ("__kmp_realease_deps: T#%d freeing dependencies hash of task %p.\n", gtid, task ) );
+        __kmp_dephash_free(thread,task->td_dephash);
+    }
+
+    if ( !node ) return;
+
+    KA_TRACE(20, ("__kmp_realease_deps: T#%d notifying succesors of task %p.\n", gtid, task ) );
+
+    KMP_ACQUIRE_DEPNODE(gtid,node);
+    node->dn.task = NULL; // mark this task as finished, so no new dependencies are generated
+    KMP_RELEASE_DEPNODE(gtid,node);
+
+#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS
+    /* release commute access lock if any */
+    //for ( kmp_depnode_list_t *list_commute = ttask->list_commute; list_commute; list_commute = list_commute->next )
+    if (node->dn.cw)
+    {
+      /* commute: make task ready only if the shared ressource is not alread locked
+         tasks are (== graph nodes) enqueued in info->last_waiter and npred is incremented.
+         on release all cw will decrease it.
+      */
+      kmp_cw_depnode_t* c_sync = (kmp_cw_depnode_t*)node->dn.cw; //list_commute->node;
+      KMP_ACQUIRE_DEPNODE(gtid, &c_sync->in_node);
+      KMP_ASSERT( c_sync->flags.cw || (c_sync->state ==1));
+      if (c_sync->flags.commute)
+      {
+        if (!c_sync->head_waiter)
+          c_sync->state = 0;
+        else {
+          /* wake-up c_sync->accessed remains set to 'clean' */
+        }
+      }
+      --c_sync->nwriters;
+      KMP_RELEASE_DEPNODE(gtid, &c_sync->in_node);
+      if (c_sync->nwriters ==0)
+      { /* release successor in out_node, if any */
+        __kmp_release_deps_node( gtid, &c_sync->out_node );
+        __kmp_node_deref(thread, &c_sync->in_node);
+      }
+      /* free if no more refcount. It is ok to free c_sync as if it is a kmp_depnode_t */
+      //__kmp_node_deref(thread,&c_sync->in_node);
+      //list_commute = list_commute->next;
+    }
+    //__kmp_depnode_list_free(thread, ttask->list_commute);
+#endif
+
+    __kmp_release_deps_node( gtid, node );
     __kmp_node_deref(thread,node);
 
     KA_TRACE(20, ("__kmp_realease_deps: T#%d all successors of %p notified of completation\n", gtid, task ) );
@@ -468,6 +696,10 @@ __kmpc_omp_task_with_deps( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_ta
             else if (dep_list[i].flags.in)
                 new_taskdata->ompt_task_info.deps[i].dependence_flags =
                   ompt_task_dependence_type_in;
+            /* concurrent access: add bit whatever is the access */
+            if (dep_list[i].flags.cw)
+                new_taskdata->ompt_task_info.deps[i].dependence_flags |=
+                  ompt_task_dependence_type_cw;
         }
         for (i = 0; i < ndeps_noalias; i++)
         {
@@ -482,6 +714,10 @@ __kmpc_omp_task_with_deps( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_ta
             else if (noalias_dep_list[i].flags.in)
                 new_taskdata->ompt_task_info.deps[ndeps+i].dependence_flags =
                   ompt_task_dependence_type_in;
+            /* concurrent access: add bit whatever is the access */ 
+            if (dep_list[i].flags.cw)
+                new_taskdata->ompt_task_info.deps[i].dependence_flags |=
+                  ompt_task_dependence_type_cw;
         }
     }
 #endif /* OMPT_SUPPORT && OMPT_TRACE */
@@ -512,6 +748,12 @@ __kmpc_omp_task_with_deps( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_ta
                   new_taskdata ) );
             return TASK_CURRENT_NOT_QUEUED;
         }
+#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS
+        /* task ready: check to acquired commute ressources */
+        if (__kmp_acquired_or_failed( gtid, node, node->dn./*list_*/cw) )
+            return TASK_CURRENT_NOT_QUEUED;
+        /* task ready + commute ressources acquired: push it */
+#endif
     } else {
 #if OMP_45_ENABLED
         kmp_task_team_t * task_team = thread->th.th_task_team;