diff --git a/runtime/src/include/40/ompt.h.var b/runtime/src/include/40/ompt.h.var index ec55236aec700067462103e203ea46c0fa427ab0..72403aeb29cecea3f1e30a46fc4ac7e38ce0b394 100644 --- a/runtime/src/include/40/ompt.h.var +++ b/runtime/src/include/40/ompt.h.var @@ -238,6 +238,8 @@ typedef enum ompt_task_dependence_flag_e { ompt_task_dependence_type_out = 1, ompt_task_dependence_type_in = 2, ompt_task_dependence_type_inout = 3, + ompt_task_dependence_type_mask = 3, + ompt_task_dependence_type_cw = 4, } ompt_task_dependence_flag_t; typedef struct ompt_task_dependence_s { diff --git a/runtime/src/include/45/ompt.h.var b/runtime/src/include/45/ompt.h.var index 85e0949998e3fd2eec122f8c7d299278bb1c502f..708a753ef4b6954cd16eeabe39b9ca352de8e640 100644 --- a/runtime/src/include/45/ompt.h.var +++ b/runtime/src/include/45/ompt.h.var @@ -239,6 +239,8 @@ typedef enum ompt_task_dependence_flag_e { ompt_task_dependence_type_out = 1, ompt_task_dependence_type_in = 2, ompt_task_dependence_type_inout = 3, + ompt_task_dependence_type_mask = 3, + ompt_task_dependence_type_cw = 4, } ompt_task_dependence_flag_t; typedef struct ompt_task_dependence_s { diff --git a/runtime/src/kaapi_ompt.c b/runtime/src/kaapi_ompt.c index 7a8daf2ef6d3bc035fab2ba79b4b463380a091df..b20beb696225f5b33d00a2385987cfbbbe00a428 100644 --- a/runtime/src/kaapi_ompt.c +++ b/runtime/src/kaapi_ompt.c @@ -729,16 +729,22 @@ on_ompt_event_task_switch( static void ompt_decoder( ompt_task_dependence_t* dep, int i, void** addr, int* mode /*, size_t* len */ ) { *addr = dep[i].variable_addr; - switch (dep[i].dependence_flags) + switch (dep[i].dependence_flags & ompt_task_dependence_type_mask) { case ompt_task_dependence_type_out: - *mode = KAAPI_ACCESS_MODE_W; + if (dep[i].dependence_flags & ompt_task_dependence_type_cw) + *mode = KAAPI_ACCESS_MODE_CW; + else + *mode = KAAPI_ACCESS_MODE_W; break; case ompt_task_dependence_type_in: *mode = KAAPI_ACCESS_MODE_R; break; case ompt_task_dependence_type_inout: - *mode = KAAPI_ACCESS_MODE_R|KAAPI_ACCESS_MODE_W; + if (dep[i].dependence_flags & ompt_task_dependence_type_cw) + *mode = KAAPI_ACCESS_MODE_ICW; + else + *mode = KAAPI_ACCESS_MODE_R|KAAPI_ACCESS_MODE_W; break; default: *mode = KAAPI_ACCESS_MODE_VOID; diff --git a/runtime/src/kmp.h b/runtime/src/kmp.h index c4e5d511830f245e31adec44b0e4f1596b60d798..7a5f29fba1f7b48f4c6fb95eb0b152cac88c2dc9 100644 --- a/runtime/src/kmp.h +++ b/runtime/src/kmp.h @@ -19,6 +19,7 @@ #include "kmp_config.h" +#define LIBOMP_HAS_TASK_CONCURRENT_ACCESS 1 /* #define BUILD_PARALLEL_ORDERED 1 */ @@ -2107,6 +2108,13 @@ typedef union kmp_cmplrdata { } kmp_cmplrdata_t; #endif +// forward declarations +typedef union kmp_depnode kmp_depnode_t; +typedef struct kmp_depnode_list kmp_depnode_list_t; +typedef struct kmp_dephash_entry kmp_dephash_entry_t; +typedef struct kmp_cw_depnode kmp_cw_depnode_t; + + /* sizeof_kmp_task_t passed as arg to kmpc_omp_task call */ /*! */ @@ -2134,26 +2142,22 @@ typedef struct kmp_taskgroup { } kmp_taskgroup_t; -// forward declarations -typedef union kmp_depnode kmp_depnode_t; -typedef struct kmp_depnode_list kmp_depnode_list_t; -typedef struct kmp_dephash_entry kmp_dephash_entry_t; +typedef struct { + bool in:1; + bool out:1; + bool cw:1; /* write concurrency - assume commute + associativity */ + bool commute:1; /* commute but no concurrency */ +} kmp_depend_info_flags_t; typedef struct kmp_depend_info { kmp_intptr_t base_addr; size_t len; - struct { - bool in:1; - bool out:1; - } flags; + kmp_depend_info_flags_t flags; } kmp_depend_info_t; struct kmp_depnode_list { kmp_depnode_t * node; kmp_depnode_list_t * next; -#if LIBOMP_USE_AFFINITY - kmp_intptr_t base_addr; -#endif }; typedef struct kmp_base_depnode { @@ -2165,27 +2169,50 @@ typedef struct kmp_base_depnode { #if KMP_SUPPORT_GRAPH_OUTPUT kmp_uint32 id; #endif +#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS + kmp_cw_depnode_t* cw; /* Should be a list_commute. Juste one cw in this version */ +#endif volatile kmp_int32 npredecessors; volatile kmp_int32 nrefs; } kmp_base_depnode_t; + + union KMP_ALIGN_CACHE kmp_depnode { double dn_align; /* use worst case alignment */ char dn_pad[ KMP_PAD(kmp_base_depnode_t, CACHE_LINE) ]; kmp_base_depnode_t dn; }; +#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS +typedef struct kmp_cw_depnode { + kmp_depnode_t in_node; /* attached in node - used as mutex */ + kmp_depnode_t out_node; /* attached out node */ + kmp_depnode_list_t *head_waiter; /* FIO queue of waiting tasks for the concurrent access*/ + kmp_depnode_list_t *tail_waiter; + kmp_depend_info_flags_t flags; /* if concurrent write or commute */ + volatile kmp_int32 nwriters; + volatile int state; /* 0: dirty, 1: clean */ +} kmp_cw_depnode_t; +#endif + + + struct kmp_dephash_entry { kmp_intptr_t addr; kmp_depnode_t * last_out; kmp_depnode_list_t * last_ins; kmp_dephash_entry_t * next_in_bucket; +#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS + kmp_depend_info_flags_t flags; + kmp_cw_depnode_t * cw_sync; +#endif }; typedef struct kmp_dephash { kmp_dephash_entry_t ** buckets; - size_t size; + size_t size; #ifdef KMP_DEBUG kmp_uint32 nelements; kmp_uint32 nconflicts; diff --git a/runtime/src/kmp_taskdeps.cpp b/runtime/src/kmp_taskdeps.cpp index 024f4223f1c0dcbcb77b89b309ef0bdbd91f31e2..1e132521eef1d52b6fcff10841e12411d5f00836 100644 --- a/runtime/src/kmp_taskdeps.cpp +++ b/runtime/src/kmp_taskdeps.cpp @@ -44,6 +44,10 @@ __kmp_init_node ( kmp_depnode_t *node ) #ifdef KMP_SUPPORT_GRAPH_OUTPUT node->dn.id = KMP_TEST_THEN_INC32(&kmp_node_id_seed); #endif +#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS + //node->dn.list_commute = 0; + node->dn.cw = 0; +#endif } static inline kmp_depnode_t * @@ -237,6 +241,88 @@ __kmp_track_dependence ( kmp_int32 gtid, kmp_depnode_t *source, kmp_depnode_t *s #include <stdio.h> + +#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS + +static kmp_task_t dummy_cw_node; + +/* Lock is acquired +*/ +static inline void +__kmp_enqueue ( kmp_int32 gtid, kmp_depnode_t *node, kmp_cw_depnode_t *cw ) +{ + kmp_info_t *thread = __kmp_threads[ gtid ]; + kmp_depnode_list_t *dnode; +#if USE_FAST_MEMORY + dnode = (kmp_depnode_list_t *) __kmp_fast_allocate(thread,sizeof(kmp_depnode_list_t)); +#else + dnode = (kmp_depnode_list_t *) __kmp_thread_malloc(thread,sizeof(kmp_depnode_list_t)); +#endif + /* fifo */ + dnode->node = __kmp_node_ref(node); + dnode->next = 0; + if (cw->tail_waiter ==0) + cw->head_waiter = dnode; + else + cw->tail_waiter->next = dnode; + cw->tail_waiter = dnode; +} + + +/* acquired all commute accesses or fail + return 0 if success to acquired commute accesses + return 1 if failure +*/ +static int __kmp_acquired_or_failed( kmp_int32 gtid, kmp_depnode_t *node, kmp_cw_depnode_t* c_sync )//(kmp_depnode_list_t *list_commute) +{ +// if( list_commute ==0) return 0; + if (c_sync ==0) return 0; + if (!c_sync->flags.commute) return 0; + /* lock n-commute arguments, it not possible release all of them */ + int fail = 0; +// kmp_depnode_list_t* curr; +// for ( curr = list_commute; curr; curr = curr->next ) + { + /* commute: make task ready only if the shared ressource is not alread locked + tasks are (== graphe nodes) enqueued in info->last_waiter and npred is incremented. + on release all cw will decrease it. + */ + //kmp_cw_depnode_t* c_sync = (kmp_cw_depnode_t*)curr->node; + KMP_ASSERT (c_sync->flags.commute); + KMP_ACQUIRE_DEPNODE(gtid, &c_sync->in_node); + if (c_sync->state ==1) + { + fail = 1; + __kmp_enqueue( gtid, node, c_sync ); + KMP_RELEASE_DEPNODE(gtid, &c_sync->in_node); + //break; + } + c_sync->state = 1; + KMP_RELEASE_DEPNODE(gtid, &c_sync->in_node); + } + return fail; +} +#endif + +#define IS_WRITE(x) (x.out || x.cw || x.commute) +#define IS_CONCURRENTWRITE(x) (x.cw || x.commute) +/* in is always set ! */ +//#define IS_CONCURRENT(last,new) ((!last.out && !new.out) || (last.cw && new.cw) || (last.commute && new.commute)) +static int inline IS_CONCURRENT(kmp_depend_info_flags_t a, kmp_depend_info_flags_t b) +{ + int retval = ((a.in && !a.out && b.in && !b.out) || (a.cw && b.cw) || (a.commute && b.commute)); + return retval; +} + + +/* To handle CW: + - first CW -> create dummy node used to detect end of concurrent accesses + - new r,rw are branched afterward this dummy node + Store dummy node in info-> + Dummy node may have task set to 0, then __kmp_release_deps may also release_deps + the successors this dummy node. + Todo: best algorithm to avoid O(k) iteration of previous accesses. +*/ template< bool filter > static inline kmp_int32 __kmp_process_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash, @@ -247,68 +333,128 @@ __kmp_process_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash, kmp_info_t *thread = __kmp_threads[ gtid ]; kmp_int32 npredecessors=0; + for ( kmp_int32 i = 0; i < ndeps ; i++ ) { const kmp_depend_info_t * dep = &dep_list[i]; - KMP_DEBUG_ASSERT(dep->flags.in); + KMP_DEBUG_ASSERT(dep->flags.in || dep->flags.commute || dep->flags.cw); if ( filter && dep->base_addr == 0 ) continue; // skip filtered entries kmp_dephash_entry_t *info = __kmp_dephash_find(thread,hash,dep->base_addr); kmp_depnode_t *last_out = info->last_out; - if ( dep->flags.out && info->last_ins ) { - for ( kmp_depnode_list_t * p = info->last_ins; p; p = p->next ) { +#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS + if (IS_CONCURRENTWRITE(dep->flags)) + { + if (info->cw_sync ==0) + { +#if USE_FAST_MEMORY + info->cw_sync = (kmp_cw_depnode_t *) __kmp_fast_allocate(thread,sizeof(kmp_cw_depnode_t)); +#else + info->cw_sync = (kmp_cw_depnode_t *) __kmp_thread_malloc(thread,sizeof(kmp_cw_depnode_t)); +#endif + __kmp_init_node( &info->cw_sync->in_node ); + info->cw_sync->in_node.dn.npredecessors = 0; + info->cw_sync->in_node.dn.task = &dummy_cw_node; + __kmp_init_node( &info->cw_sync->out_node ); + info->cw_sync->out_node.dn.npredecessors = 0; + info->cw_sync->out_node.dn.task = &dummy_cw_node; + info->cw_sync->head_waiter = info->cw_sync->tail_waiter = 0; + info->cw_sync->state = 0; + info->cw_sync->flags = dep->flags; + info->flags = {0, 0, 0, 0}; + } + ++info->cw_sync->nwriters; /* number of concurrent accesses to concurrent ressource */ + + /* TODO: detect if the same cw is set multiple times in the task */ + //task->list_commute = __kmp_add_node( thread, task->list_commute, &info->cw_sync->in_node ); + KMP_ASSERT(node->dn.cw ==0); + node->dn.cw = info->cw_sync; + + /* */ + info->cw_sync->in_node.dn.successors = __kmp_add_node(thread, info->cw_sync->in_node.dn.successors, node); + /* */ + node->dn.successors = __kmp_add_node(thread, node->dn.successors, &info->cw_sync->out_node); + } +#endif /* #if LIBOMP_HAS_TASK_CONCURRENT_ACCESS */ + + if ( IS_WRITE(dep->flags) && info->last_ins ) { /* wa{r}* dependence */ + kmp_depnode_t* outdep; +#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS + if (IS_CONCURRENTWRITE(dep->flags)) + outdep = &info->cw_sync->in_node; + else +#endif + outdep = node; + for ( kmp_depnode_list_t * p = info->last_ins; p; p = p->next ) + { kmp_depnode_t * indep = p->node; if ( indep->dn.task ) { KMP_ACQUIRE_DEPNODE(gtid,indep); if ( indep->dn.task ) { - __kmp_track_dependence(gtid, indep, node, task); - indep->dn.successors = __kmp_add_node(thread, indep->dn.successors, node - ); + __kmp_track_dependence(gtid, indep, outdep, task); + indep->dn.successors = __kmp_add_node(thread, indep->dn.successors, outdep); KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n", - filter,gtid, KMP_TASK_TO_TASKDATA(indep->dn.task), KMP_TASK_TO_TASKDATA(node->dn.task))); + filter,gtid, KMP_TASK_TO_TASKDATA(indep->dn.task), KMP_TASK_TO_TASKDATA(task))); npredecessors++; } KMP_RELEASE_DEPNODE(gtid,indep); } } - + /* */ __kmp_depnode_list_free(thread,info->last_ins); info->last_ins = NULL; - - } else if ( last_out && last_out->dn.task ) { - KMP_ACQUIRE_DEPNODE(gtid,last_out); - if ( last_out->dn.task ) { - __kmp_track_dependence(gtid, last_out,node,task); - last_out->dn.successors = __kmp_add_node(thread, last_out->dn.successors, node - ); +#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS + } else if (last_out && IS_CONCURRENT(info->flags, dep->flags)) { /* concurrent cw : commute or concurrent */ + /* do not increment npred */ +// printf("Here !!!!\n"); +#endif + } else if ( last_out && last_out->dn.task ) { /* {w|cw}a{wr} dependence */ + kmp_depnode_t* last_writer_node; +#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS + if (info->cw_sync) + last_writer_node = &info->cw_sync->out_node; + else +#endif + last_writer_node = last_out; + KMP_ACQUIRE_DEPNODE(gtid, last_writer_node); + if (last_writer_node->dn.task +#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS + || (last_writer_node->dn.task == &dummy_cw_node) /* dependency managed differently */ +#endif + ) + { + __kmp_track_dependence(gtid, last_writer_node, node, task); + last_writer_node->dn.successors = __kmp_add_node(thread, last_writer_node->dn.successors, node ); KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n", - filter,gtid, KMP_TASK_TO_TASKDATA(last_out->dn.task), KMP_TASK_TO_TASKDATA(node->dn.task))); + filter,gtid, KMP_TASK_TO_TASKDATA(last_writer_node->dn.task), KMP_TASK_TO_TASKDATA(task))); npredecessors++; } - KMP_RELEASE_DEPNODE(gtid,last_out); + KMP_RELEASE_DEPNODE(gtid, last_writer_node); } + /* free: node if required + */ if ( dep_barrier ) { // if this is a sync point in the serial sequence, then the previous outputs are guaranteed to be completed after // the execution of this task so the previous output nodes can be cleared. __kmp_node_deref(thread,last_out); info->last_out = NULL; } else { - if ( dep->flags.out ) { + if ( IS_WRITE(dep->flags) ) { __kmp_node_deref(thread,last_out); info->last_out = __kmp_node_ref(node); + info->flags = dep->flags; } else - info->last_ins = __kmp_add_node(thread, info->last_ins, node - ); + info->last_ins = __kmp_add_node(thread, info->last_ins, node ); } - } KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter, gtid, npredecessors ) ); - +//if (ndeps) +// printf("Task: %p, commute?:%p, node: %p npred=%i\n", task, (void*)node->dn.cw, (void*)node, npredecessors); return npredecessors; } @@ -336,12 +482,15 @@ __kmp_check_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_task_t *task, kmp_de if ( dep_list[i].base_addr == dep_list[j].base_addr ) { dep_list[i].flags.in |= dep_list[j].flags.in; dep_list[i].flags.out |= dep_list[j].flags.out; + dep_list[i].flags.cw |= dep_list[j].flags.cw; + dep_list[i].flags.commute |= dep_list[j].flags.commute; dep_list[j].base_addr = 0; // Mark j element as void } } // doesn't need to be atomic as no other thread is going to be accessing this node just yet - // npredecessors is set -1 to ensure that none of the releasing tasks queues this task before we have finished processing all the dependencies + // npredecessors is set -1 to ensure that none of the releasing tasks queues this task before + // we have finished processing all the dependencies node->dn.npredecessors = -1; // used to pack all npredecessors additions into a single atomic operation at the end @@ -367,24 +516,17 @@ __kmp_check_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_task_t *task, kmp_de return npredecessors > 0 ? true : false; } -void -__kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task ) -{ - kmp_info_t *thread = __kmp_threads[ gtid ]; - kmp_depnode_t *node = task->td_depnode; - if ( task->td_dephash ) { - KA_TRACE(40, ("__kmp_realease_deps: T#%d freeing dependencies hash of task %p.\n", gtid, task ) ); - __kmp_dephash_free(thread,task->td_dephash); - } +/* +*/ +static void +__kmp_release_deps_node ( kmp_int32 gtid, kmp_depnode_t *node ) +{ + kmp_info_t *thread = __kmp_threads[ gtid ]; if ( !node ) return; - KA_TRACE(20, ("__kmp_realease_deps: T#%d notifying succesors of task %p.\n", gtid, task ) ); - - KMP_ACQUIRE_DEPNODE(gtid,node); - node->dn.task = NULL; // mark this task as finished, so no new dependencies are generated - KMP_RELEASE_DEPNODE(gtid,node); + KA_TRACE(20, ("__kmp_release_deps_node: T#%d notifying succesors of node %p.\n", gtid, node ) ); kmp_depnode_list_t *next; for ( kmp_depnode_list_t *p = node->dn.successors; p; p = next ) { @@ -394,9 +536,36 @@ __kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task ) // successor task can be NULL for wait_depends or because deps are still being processed if ( npredecessors == 0 ) { KMP_MB(); - if ( successor->dn.task ) { - KA_TRACE(20, ("__kmp_realease_deps: T#%d successor %p of %p scheduled for execution.\n", gtid, successor->dn.task, task ) ); - __kmp_omp_task(gtid,successor->dn.task,false); + if ( successor->dn.task +#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS + || (successor->dn.task == &dummy_cw_node) +#endif + ) { + KA_TRACE(20, ("__kmp_release_deps_node: T#%d successor %p of %p scheduled for execution.\n", gtid, successor->dn.task, node ) ); +#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS + if (successor->dn.task == &dummy_cw_node) + { + /* activate all tasks in successors of the dummy cw node */ + kmp_depnode_list_t *nnext; + for ( kmp_depnode_list_t *pp = successor->dn.successors; pp; pp = nnext ) { + kmp_depnode_t *ssuccessor = pp->node; + kmp_int32 nppredecessors = KMP_TEST_THEN_DEC32(&ssuccessor->dn.npredecessors) - 1; + if ( nppredecessors == 0 ) { + KMP_MB(); + if ( ssuccessor->dn.task && (__kmp_acquired_or_failed( gtid, ssuccessor, ssuccessor->dn.cw ) ==0)) + __kmp_omp_task(gtid,successor->dn.task,false); + } + } + } + else + { + /* try to acquired all n-commute arguments, it not possible release all of them */ + if (__kmp_acquired_or_failed( gtid, successor, successor->dn.cw ) ==0) + __kmp_omp_task(gtid,successor->dn.task,false); + } +#else + __kmp_omp_task(gtid,successor->dn.task,false); +#endif } } @@ -409,6 +578,65 @@ __kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task ) #endif } + KA_TRACE(20, ("__kmp_release_deps_node: T#%d all successors of %p notified of completation\n", gtid, node ) ); +} + +/* +*/ +void +__kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task ) +{ + kmp_info_t *thread = __kmp_threads[ gtid ]; + kmp_depnode_t *node = task->td_depnode; + + if ( task->td_dephash ) { + KA_TRACE(40, ("__kmp_realease_deps: T#%d freeing dependencies hash of task %p.\n", gtid, task ) ); + __kmp_dephash_free(thread,task->td_dephash); + } + + if ( !node ) return; + + KA_TRACE(20, ("__kmp_realease_deps: T#%d notifying succesors of task %p.\n", gtid, task ) ); + + KMP_ACQUIRE_DEPNODE(gtid,node); + node->dn.task = NULL; // mark this task as finished, so no new dependencies are generated + KMP_RELEASE_DEPNODE(gtid,node); + +#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS + /* release commute access lock if any */ + //for ( kmp_depnode_list_t *list_commute = ttask->list_commute; list_commute; list_commute = list_commute->next ) + if (node->dn.cw) + { + /* commute: make task ready only if the shared ressource is not alread locked + tasks are (== graph nodes) enqueued in info->last_waiter and npred is incremented. + on release all cw will decrease it. + */ + kmp_cw_depnode_t* c_sync = (kmp_cw_depnode_t*)node->dn.cw; //list_commute->node; + KMP_ACQUIRE_DEPNODE(gtid, &c_sync->in_node); + KMP_ASSERT( c_sync->flags.cw || (c_sync->state ==1)); + if (c_sync->flags.commute) + { + if (!c_sync->head_waiter) + c_sync->state = 0; + else { + /* wake-up c_sync->accessed remains set to 'clean' */ + } + } + --c_sync->nwriters; + KMP_RELEASE_DEPNODE(gtid, &c_sync->in_node); + if (c_sync->nwriters ==0) + { /* release successor in out_node, if any */ + __kmp_release_deps_node( gtid, &c_sync->out_node ); + __kmp_node_deref(thread, &c_sync->in_node); + } + /* free if no more refcount. It is ok to free c_sync as if it is a kmp_depnode_t */ + //__kmp_node_deref(thread,&c_sync->in_node); + //list_commute = list_commute->next; + } + //__kmp_depnode_list_free(thread, ttask->list_commute); +#endif + + __kmp_release_deps_node( gtid, node ); __kmp_node_deref(thread,node); KA_TRACE(20, ("__kmp_realease_deps: T#%d all successors of %p notified of completation\n", gtid, task ) ); @@ -468,6 +696,10 @@ __kmpc_omp_task_with_deps( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_ta else if (dep_list[i].flags.in) new_taskdata->ompt_task_info.deps[i].dependence_flags = ompt_task_dependence_type_in; + /* concurrent access: add bit whatever is the access */ + if (dep_list[i].flags.cw) + new_taskdata->ompt_task_info.deps[i].dependence_flags |= + ompt_task_dependence_type_cw; } for (i = 0; i < ndeps_noalias; i++) { @@ -482,6 +714,10 @@ __kmpc_omp_task_with_deps( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_ta else if (noalias_dep_list[i].flags.in) new_taskdata->ompt_task_info.deps[ndeps+i].dependence_flags = ompt_task_dependence_type_in; + /* concurrent access: add bit whatever is the access */ + if (dep_list[i].flags.cw) + new_taskdata->ompt_task_info.deps[i].dependence_flags |= + ompt_task_dependence_type_cw; } } #endif /* OMPT_SUPPORT && OMPT_TRACE */ @@ -512,6 +748,12 @@ __kmpc_omp_task_with_deps( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_ta new_taskdata ) ); return TASK_CURRENT_NOT_QUEUED; } +#if LIBOMP_HAS_TASK_CONCURRENT_ACCESS + /* task ready: check to acquired commute ressources */ + if (__kmp_acquired_or_failed( gtid, node, node->dn./*list_*/cw) ) + return TASK_CURRENT_NOT_QUEUED; + /* task ready + commute ressources acquired: push it */ +#endif } else { #if OMP_45_ENABLED kmp_task_team_t * task_team = thread->th.th_task_team;