diff --git a/runtime/src/kmp_gsupport.cpp b/runtime/src/kmp_gsupport.cpp index dc52957018b282d8e9e3e0dcca3d2b5a9dd308eb..218957f8823b4521769135adf244503f4f298175 100644 --- a/runtime/src/kmp_gsupport.cpp +++ b/runtime/src/kmp_gsupport.cpp @@ -891,15 +891,12 @@ xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, void (*copy_fu ndeps = 0; /* force allocation of all dependencies in task */ + /* extra deps are allocated in taskdata struct if depsinalloc is set and VARDEP configured */ input_flags->depsinalloc = 1; kmp_task_t *task = __kmp_task_alloc(&loc, gtid, input_flags, sizeof(kmp_task_t), arg_size ? arg_size + arg_align - 1 : 0, (kmp_routine_entry_t)func, -#if LIBOMP_USE_VARDEP - ndeps+thread->th.th_edps_size[0], thread->th.th_edps_size[1] -#else ndeps, 0 -#endif ); kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); ndeps = taskdata->td_ndeps + taskdata->td_ndeps_noalias; @@ -966,21 +963,17 @@ xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, void (*copy_fu dep_list[i].flags.alias = 0; } } - /* consider that gcc does not fill ALL dependencies fields */ -/* BUG HERE: gcc move extra dep to taskdata->deps and omp_task_with_deps recompute the total - number of deps using also extra deps stored in threaddata. -*/ -#error "STOP" - taskdata->td_flags.depsfill= 0; + /* consider that gcc has filled ALL dependencies fields (excluding extra deps) */ + taskdata->td_flags.depsfill= 1; if (if_cond) __kmpc_omp_task_with_deps( &loc, gtid, task, - taskdata->td_ndeps, taskdata->td_deps, - taskdata->td_ndeps_noalias, taskdata->td_deps_noalias + depend == 0 ? 0 : (kmp_int32)(uintptr_t)depend[0], taskdata->td_deps, + 0, taskdata->td_deps_noalias ); else { __kmpc_omp_wait_deps( &loc, gtid, - taskdata->td_ndeps, taskdata->td_deps, - taskdata->td_ndeps_noalias, taskdata->td_deps_noalias + depend == 0 ? 0 : (kmp_int32)(uintptr_t)depend[0], taskdata->td_deps, + 0, taskdata->td_deps_noalias ); #if OMPT_SUPPORT ompt_thread_info_t oldInfo; diff --git a/runtime/src/kmp_taskdeps.cpp b/runtime/src/kmp_taskdeps.cpp index a6ad799cfa4c7ee66ae636ac940e802906f8e60b..95c0c38c309dd10ccb4766053653ee2d9476fa49 100644 --- a/runtime/src/kmp_taskdeps.cpp +++ b/runtime/src/kmp_taskdeps.cpp @@ -19,6 +19,7 @@ #include "kmp_io.h" #include "kmp_wait_release.h" + #if LIBOMP_USE_PARALLEL_SPAWN #include "kmp_atomic.h" #endif @@ -489,7 +490,7 @@ __kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash, KMP_DEBUG_ASSERT(dep->flags.in); if (filter && -#if LIBOMP_USE_VARDEP +#if LIBOMP_USE_VARDEP && OMP_40_ENABLED dep->flags.alias == 1 #else dep->base_addr == 0 @@ -668,7 +669,7 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node, // Filter deps in dep_list // TODO: Different algorithm for large dep_list ( > 10 ? ) for ( i = 0; i < ndeps; i ++ ) { -#if LIBOMP_USE_VARDEP +#if LIBOMP_USE_VARDEP && OMP_40_ENABLED if (dep_list[i].flags.alias == 0) { #if LIBOMP_USE_CONCURRENT_WRITE @@ -705,7 +706,7 @@ static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node, dep_list[j].base_addr = 0; // Mark j element as void } } -#endif // #if LIBOMP_USE_VARDEP +#endif // #if LIBOMP_USE_VARDEP && OMP_40_ENABLED } #if LIBOMP_USE_CONCURRENT_WRITE thread->th.th_commute_addr = 0; @@ -1016,6 +1017,9 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, // ); #if LIBOMP_USE_VARDEP + /* - all deps including extra deps are allocated into task data struct if depsinalloc is set */ + /* - ndeps and ndeps_noalias does not take into account extra deps */ + /* - if depsfill == 1 (i.e. gcc) then deps are already copied into taskdata->td_deps array */ kmp_int32 ndeps_extra = thread->th.th_edps_size[0]; kmp_int32 ndeps_extra_noalias = thread->th.th_edps_size[1]; kmp_int32 total_ndeps = ndeps + ndeps_extra; @@ -1024,24 +1028,27 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 total_ndeps = ndeps; kmp_int32 total_ndeps_noalias = ndeps_noalias; #endif - if (new_taskdata->td_flags.depsfill ==0) /* gcc already have stored dependencies */ + /* allocate here if not done or not enough (+extra deps) in task_alloc */ + if (new_taskdata->td_ndeps+new_taskdata->td_ndeps_noalias < total_ndeps+total_ndeps_noalias) { - /* allocate here if not done or not enough (+extra deps) in task_alloc */ - if (new_taskdata->td_ndeps+new_taskdata->td_ndeps_noalias < total_ndeps+total_ndeps_noalias) - { - kmp_depend_info_t* td_deps = (kmp_depend_info_t*)__kmp_fast_allocate( thread, - (total_ndeps+total_ndeps_noalias)*sizeof(kmp_depend_info_t) - ); - new_taskdata->td_flags.depsinalloc = 0; - if (new_taskdata->td_deps) - __kmp_fast_free(thread, new_taskdata->td_deps); - // do not free td_deps_noalias - new_taskdata->td_deps = td_deps; - } - new_taskdata->td_deps_noalias = new_taskdata->td_deps+total_ndeps; + kmp_depend_info_t* td_deps = (kmp_depend_info_t*)__kmp_fast_allocate( thread, + (total_ndeps + total_ndeps_noalias)*sizeof(kmp_depend_info_t) + ); + new_taskdata->td_flags.depsinalloc = 0; + if (new_taskdata->td_flags.depsfill) + KMP_MEMCPY( td_deps, new_taskdata->td_deps, ndeps * sizeof(kmp_depend_info_t) ); + if (new_taskdata->td_deps) + __kmp_fast_free(thread, new_taskdata->td_deps); + // do not free td_deps_noalias, always in same allocated bloc + new_taskdata->td_deps = td_deps; + new_taskdata->td_deps_noalias = td_deps+total_ndeps; + } - kmp_int32 i; - kmp_depend_info_t* deps = new_taskdata->td_deps; + kmp_int32 i; + kmp_depend_info_t* deps = new_taskdata->td_deps; + if (new_taskdata->td_flags.depsfill ==0) /* gcc already have stored dependencies */ + { + /* if not allocated */ if (deps != dep_list) for (i=0; i<ndeps; ++i) { dep_list[i].flags.alias = 0; @@ -1049,62 +1056,66 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, } else deps += ndeps; + } #if LIBOMP_USE_VARDEP - kmp_extra_depinfo_th_t* bloc = &thread->th.th_edeps[0]; - do { - if (bloc->ed_size) { - kmp_depend_info_flags_t flags; - flags.in = 1; //(bloc->ed_mode & OMPEXT_MODE_READ ? 1: 0); - flags.out= (bloc->ed_mode & OMPEXT_MODE_WRITE ? 1: 0); - flags.cw = (bloc->ed_mode & OMPEXT_MODE_CONCURRENT ? 1: 0); - flags.commute = 0; - flags.alias = 0; - for (i=0; i<bloc->ed_size; ++i, ++deps) - { - deps->base_addr = bloc->ed_deps[i]; - deps->len = 1; - deps->flags = flags; - } + /* deps with possibly alias */ + kmp_extra_depinfo_th_t* bloc = &thread->th.th_edeps[0]; + do { + if (bloc->ed_size) { + kmp_depend_info_flags_t flags; + flags.in = 1; //(bloc->ed_mode & OMPEXT_MODE_READ ? 1: 0); + flags.out= (bloc->ed_mode & OMPEXT_MODE_WRITE ? 1: 0); + flags.cw = (bloc->ed_mode & OMPEXT_MODE_CONCURRENT ? 1: 0); + flags.commute = 0; + flags.alias = 0; + for (i=0; i<bloc->ed_size; ++i, ++deps) + { + deps->base_addr = bloc->ed_deps[i]; + deps->len = 1; + deps->flags = flags; } - kmp_extra_depinfo_th_t* blocn = bloc->ed_next; - if (bloc != &thread->th.th_edeps[0]) - __kmp_fast_free(thread, bloc ); - bloc = blocn; - } while (bloc != 0); -#endif - KMP_ASSERT( deps == new_taskdata->td_deps_noalias ); - for (i=0; i<ndeps_noalias; ++i) { - noalias_dep_list[i].flags.alias = 0; - *deps++ = noalias_dep_list[i]; } + kmp_extra_depinfo_th_t* blocn = bloc->ed_next; + if (bloc != &thread->th.th_edeps[0]) + __kmp_fast_free(thread, bloc ); + bloc = blocn; + } while (bloc != 0); +#endif + KMP_ASSERT( deps == new_taskdata->td_deps_noalias || (total_ndeps_noalias==0) ); + for (i=0; i<ndeps_noalias; ++i) { + noalias_dep_list[i].flags.alias = 0; + *deps++ = noalias_dep_list[i]; + } #if LIBOMP_USE_VARDEP - bloc = &thread->th.th_edeps[1]; - do { - if (bloc->ed_size) { - kmp_depend_info_flags_t flags; - flags.in = 1; //(bloc->ed_mode & OMPEXT_MODE_READ ? 1: 0); - flags.out= (bloc->ed_mode & OMPEXT_MODE_WRITE ? 1: 0); - flags.cw = (bloc->ed_mode & OMPEXT_MODE_CONCURRENT ? 1: 0); - flags.commute = 0; - flags.alias = 0; - for (i=0; i<bloc->ed_size; ++i, ++deps) - { - deps->base_addr = bloc->ed_deps[i]; - deps->len = 1; - deps->flags = flags; - } + bloc = &thread->th.th_edeps[1]; + do { + if (bloc->ed_size) { + kmp_depend_info_flags_t flags; + flags.in = 1; //(bloc->ed_mode & OMPEXT_MODE_READ ? 1: 0); + flags.out= (bloc->ed_mode & OMPEXT_MODE_WRITE ? 1: 0); + flags.cw = (bloc->ed_mode & OMPEXT_MODE_CONCURRENT ? 1: 0); + flags.commute = 0; + flags.alias = 0; + for (i=0; i<bloc->ed_size; ++i, ++deps) + { + deps->base_addr = bloc->ed_deps[i]; + deps->len = 1; + deps->flags = flags; } - kmp_extra_depinfo_th_t* blocn = bloc->ed_next; - if (bloc != &thread->th.th_edeps[1]) - __kmp_fast_free(thread, bloc ); - bloc = blocn; - } while (bloc != 0); - thread->th.th_edps_size[0] =0; - thread->th.th_edps_size[1] =0; - thread->th.th_edeps_tail[0] =0; - thread->th.th_edeps_tail[1] =0; -#endif - } + } + kmp_extra_depinfo_th_t* blocn = bloc->ed_next; + if (bloc != &thread->th.th_edeps[1]) + __kmp_fast_free(thread, bloc ); + bloc = blocn; + } while (bloc != 0); + /* reset thread state about extra deps */ + thread->th.th_edps_size[0] =0; + thread->th.th_edps_size[1] =0; + thread->th.th_edeps_tail[0] =0; + thread->th.th_edeps_tail[1] =0; +#endif + +//kmp_depend_info_t *save_dep_list = dep_list; dep_list = new_taskdata->td_deps; ndeps = new_taskdata->td_ndeps = total_ndeps; noalias_dep_list = new_taskdata->td_deps_noalias; @@ -1170,8 +1181,22 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, "dependencies: " "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref, new_taskdata)); +#if 0 + printf("%i:: Task %p #deps=%i @dep=%p/%p @=%p/%p has dependencies.\n", gtid, new_task, ndeps, dep_list, save_dep_list, + (ndeps >0 ? dep_list[0].base_addr : 0), + (ndeps >0 ? save_dep_list[0].base_addr : 0) + ); +#endif return TASK_CURRENT_NOT_QUEUED; } + else { +#if 0 + printf("%i:: Task %p #deps=%i @dep=%p/%p @=%p/%p is independent.\n", gtid, new_task, ndeps, dep_list, save_dep_list, + (ndeps >0 ? dep_list[0].base_addr : 0), + (ndeps >0 ? save_dep_list[0].base_addr : 0) + ); +#endif + } #if LIBOMP_USE_CONCURRENT_WRITE /* task ready: check to acquired commute ressources */ if ((node->dn./*list_*/cw) && __kmp_acquired_or_failed( gtid, node, node->dn./*list_*/cw) ) diff --git a/runtime/src/kmp_tasking.cpp b/runtime/src/kmp_tasking.cpp index 804c19d6dc8ecaeb9a3bdf483d2fcdce758f0116..8d9a588d0cf9b8fc347549288170e8ccbdf27f8d 100644 --- a/runtime/src/kmp_tasking.cpp +++ b/runtime/src/kmp_tasking.cpp @@ -1115,10 +1115,13 @@ static size_t __kmp_round_up_to_val(size_t size, size_t val) { // sizeof_shareds: Size in bytes of array of pointers to shared vars accessed // in task. // task_entry: Pointer to task code entry point generated by compiler. +// ndeps,ndeps_noalias: size of deps + deps without alias to be stored in taskdata struct. +// only to be considered if flag depsinalloc is set. // returns: a pointer to the allocated kmp_task_t structure (task). +// Not that variable length deps are allocated by task_alloc if flag depsinalloc is set kmp_task_t* __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, - kmp_routine_entry_t task_entry, kmp_int32 deps, kmp_int32 deps_noalias ) + kmp_routine_entry_t task_entry, kmp_int32 ndeps, kmp_int32 ndeps_noalias ) { kmp_task_t *task; kmp_taskdata_t *taskdata; @@ -1128,9 +1131,9 @@ kmp_task_t* __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flag size_t shareds_offset; KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) " - "sizeof_task=%ld sizeof_shared=%ld entry=%p, deps=%d deps_noalias=%d\n", + "sizeof_task=%ld sizeof_shared=%ld entry=%p, ndeps=%d ndeps_noalias=%d\n", gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t, - sizeof_shareds, task_entry, deps, deps_noalias) ); + sizeof_shareds, task_entry, ndeps, ndeps_noalias) ); if ( parent_task->td_flags.final ) { if (flags->merged_if0) { @@ -1179,9 +1182,15 @@ kmp_task_t* __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flag #endif #if OMP_40_ENABLED - int ndeps; if (flags->depsinalloc) - ndeps = deps + deps_noalias; + { + ndeps = ndeps + ndeps_noalias; +#if LIBOMP_USE_VARDEP + kmp_int32 ndeps_extra = thread->th.th_edps_size[0]; + kmp_int32 ndeps_extra_noalias = thread->th.th_edps_size[1]; + ndeps += ndeps_extra + ndeps_extra_noalias; +#endif + } else #endif ndeps = 0; @@ -1230,11 +1239,11 @@ kmp_task_t* __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flag { //taskdata->td_deps = (kmp_depend_info_t*)& ((char *) taskdata)[ shareds_offset - ndeps*sizeof(kmp_depend_info_t) ]; taskdata->td_deps = (kmp_depend_info_t*)(task+1); - taskdata->td_ndeps = deps; - taskdata->td_deps_noalias = taskdata->td_deps+deps; - taskdata->td_ndeps_noalias = deps_noalias; + taskdata->td_ndeps = ndeps; + taskdata->td_deps_noalias = taskdata->td_deps+ndeps; + taskdata->td_ndeps_noalias = ndeps_noalias; taskdata->td_flags.depsinalloc = 1; - KMP_DEBUG_ASSERT( (void*)(taskdata->td_deps_noalias+deps_noalias) <= (void*)task->shareds || task->shareds == NULL); + KMP_DEBUG_ASSERT( (void*)(taskdata->td_deps_noalias+ndeps_noalias) <= (void*)task->shareds || task->shareds == NULL); } else { taskdata->td_deps = 0;