diff --git a/runtime/src/exports_so.txt b/runtime/src/exports_so.txt index b7d00cd46baf5dbf8bb471df08061c0ebcc56e5a..47ab96619b59b0cdc87c362f83666b9e206888de 100644 --- a/runtime/src/exports_so.txt +++ b/runtime/src/exports_so.txt @@ -129,6 +129,8 @@ OMP_3.1 { } OMP_3.0; OMP_4.0 { } OMP_3.1; +OMP_4.5 { +} OMP_4.0; # sets up GCC GOMP_ version dependency chain GOMP_1.0 { @@ -139,5 +141,7 @@ GOMP_3.0 { } GOMP_2.0; GOMP_4.0 { } GOMP_3.0; +GOMP_4.5 { +} GOMP_4.0; # end of file # diff --git a/runtime/src/kmp.h b/runtime/src/kmp.h index 628cad49230a60f860ff370a1089e27d002eaf72..82ccda31ae0b0b63704414327808837862991514 100644 --- a/runtime/src/kmp.h +++ b/runtime/src/kmp.h @@ -2394,6 +2394,7 @@ struct kmp_taskdata { /* aligned during dynamic allocation */ td_ndeps_noalias; #endif #if OMP_45_ENABLED + kmp_int32 priority; kmp_task_team_t *td_task_team; kmp_int32 td_size_alloc; // The size of task structure, including shareds etc. #endif @@ -2501,19 +2502,20 @@ typedef struct kmp_base_task_team { #if OMP_45_ENABLED kmp_int32 tt_found_proxy_tasks; /* Have we found proxy tasks since last barrier */ + kmp_queue_data_t **tt_task_queues_priority; /* array of size 0..__kmp_max_task_priority-1*/ #endif - kmp_queue_data_t *tt_task_queues[KMP_LEVEL_QUEUE_MAX]; + kmp_queue_data_t *tt_task_queues[KMP_LEVEL_QUEUE_MAX]; #if LIBOMP_USE_AFFINITY - // Mask of NUMA nodes active for this team - kmp_queue_data_t *tt_task_private_queues[KMP_LEVEL_QUEUE_MAX]; - kmp_int32 tt_num_nodes; - kmp_affin_mask_t *tt_nodes_mask; + // Mask of NUMA nodes active for this team + kmp_queue_data_t *tt_task_private_queues[KMP_LEVEL_QUEUE_MAX]; + kmp_int32 tt_num_nodes; + kmp_affin_mask_t *tt_nodes_mask; #endif #if LIBOMP_USE_EXTSCHED_MEM /* only MACHINE and NUMA level is defined */ - kmp_queue_data_t *tt_task_queues_free[KMP_LEVEL_MAX]; - kmp_queue_data_t *tt_task_private_queues_free[KMP_LEVEL_MAX]; - kmp_queue_data_t *tt_task_queues_mem[KMP_LEVEL_MAX]; - kmp_queue_data_t *tt_task_private_queues_mem[KMP_LEVEL_MAX]; + kmp_queue_data_t *tt_task_queues_free[KMP_LEVEL_MAX]; + kmp_queue_data_t *tt_task_private_queues_free[KMP_LEVEL_MAX]; + kmp_queue_data_t *tt_task_queues_mem[KMP_LEVEL_MAX]; + kmp_queue_data_t *tt_task_private_queues_mem[KMP_LEVEL_MAX]; #endif KMP_ALIGN_CACHE diff --git a/runtime/src/kmp_ftn_entry.h b/runtime/src/kmp_ftn_entry.h index 47f65c69902b3569219b8c4457d5a2e922a404e2..e22dfa1620cb2477c3b4ddf84c58affb7b6081d4 100644 --- a/runtime/src/kmp_ftn_entry.h +++ b/runtime/src/kmp_ftn_entry.h @@ -1270,7 +1270,7 @@ int FTN_STDCALL FTN_GET_CANCELLATION_STATUS(int cancel_kind) { #if OMP_45_ENABLED /* returns the maximum allowed task priority */ -int FTN_STDCALL FTN_GET_MAX_TASK_PRIORITY(void) { +int FTN_STDCALL xexpand(FTN_GET_MAX_TASK_PRIORITY)(void) { #ifdef KMP_STUB return 0; #else @@ -1366,6 +1366,7 @@ xaliasify(FTN_IS_INITIAL_DEVICE, 40); #if OMP_45_ENABLED // OMP_4.5 aliases +xaliasify(FTN_GET_MAX_TASK_PRIORITY, 45); #endif #if OMP_50_ENABLED @@ -1437,6 +1438,7 @@ xversionify(FTN_IS_INITIAL_DEVICE, 40, "OMP_4.0"); #if OMP_45_ENABLED // OMP_4.5 versioned symbols +xversionify(FTN_GET_MAX_TASK_PRIORITY, 45, "OMP_4.5"); #endif #if OMP_50_ENABLED diff --git a/runtime/src/kmp_ftn_os.h b/runtime/src/kmp_ftn_os.h index 0457f3209656e368e64fb242a81ef6c25c2f3503..68de62145a9bb9520b429e728894dfe309145ce0 100644 --- a/runtime/src/kmp_ftn_os.h +++ b/runtime/src/kmp_ftn_os.h @@ -548,6 +548,7 @@ #define FTN_GET_PLACE_NUM OMP_GET_PLACE_NUM_ #define FTN_GET_PARTITION_NUM_PLACES OMP_GET_PARTITION_NUM_PLACES_ #define FTN_GET_PARTITION_PLACE_NUMS OMP_GET_PARTITION_PLACE_NUMS_ + #ifdef KMP_STUB #define FTN_GET_INITIAL_DEVICE OMP_GET_INITIAL_DEVICE_ #define FTN_TARGET_ALLOC OMP_TARGET_ALLOC_ diff --git a/runtime/src/kmp_gsupport.cpp b/runtime/src/kmp_gsupport.cpp index 218957f8823b4521769135adf244503f4f298175..81711d1e73169c1f47169f66561320d953a436ba 100644 --- a/runtime/src/kmp_gsupport.cpp +++ b/runtime/src/kmp_gsupport.cpp @@ -877,6 +877,10 @@ xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, void (*copy_fu input_flags->native = 1; // __kmp_task_alloc() sets up all other flags + if (priority >0) + { + input_flags->priority_specified = 1; + } if (! if_cond) { arg_size = 0; } @@ -901,6 +905,9 @@ xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, void (*copy_fu kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); ndeps = taskdata->td_ndeps + taskdata->td_ndeps_noalias; + if (priority >0) + task->data2.priority = priority; + if (arg_size > 0) { if (arg_align > 0) { task->shareds = (void *)((((size_t)task->shareds) diff --git a/runtime/src/kmp_tasking.cpp b/runtime/src/kmp_tasking.cpp index ae6acc770ea81c320fb2b84e33d4de2b884303e8..60e19efd2d405297bfc9d73a051ab95193f1431a 100644 --- a/runtime/src/kmp_tasking.cpp +++ b/runtime/src/kmp_tasking.cpp @@ -353,6 +353,17 @@ __kmp_push_task(kmp_int32 gtid, kmp_task_t * task ) int isremote = 1; #endif +#if OMP_45_ENABLED + if (taskdata->td_flags.priority_specified) + { + kmp_int32 priority = task->data2.priority; + if (priority < 0) priority = 0; + else if (priority > __kmp_max_task_priority) + priority = __kmp_max_task_priority; + taskdata->priority = priority; + } +#endif + #if LIBOMP_USE_AFFINITY int selected_coreid = thread->th.th_team_nproc; int aff_kind = taskdata->td_aff_kind; @@ -365,7 +376,17 @@ __kmp_push_task(kmp_int32 gtid, kmp_task_t * task ) /*Default case, just push to our current thread queue!*/ //int numa_victim = __kmp_get_random(thread) % task_team->tt.tt_num_nodes; //selected_queue = &task_team->tt.tt_task_queues[KMP_LEVEL_NUMA][numa_victim].qd; - //isremote = 1; + isremote = 0; +#if OMP_45_ENABLED + if (taskdata->priority >0) + { + KMP_ASSERT( taskdata->priority <= __kmp_max_task_priority); + selected_queue = &task_team->tt.tt_task_queues_priority[taskdata->priority-1]->qd; + //printf("Push on priority=%i queue\n", taskdata->priority); + isremote = 1; /* always lock push on this priority queue */ + } + else +#endif #if LIBOMP_USE_EXTSCHED_MEM if (taskdata->td_free_rsrc) selected_queue = &task_team->tt.tt_task_queues_free[KMP_LEVEL_MACHINE][0].qd; @@ -373,11 +394,10 @@ __kmp_push_task(kmp_int32 gtid, kmp_task_t * task ) selected_queue = &task_team->tt.tt_task_queues_mem[KMP_LEVEL_MACHINE][0].qd; else #endif - if ( __kmp_queue_empty(&task_team->tt.tt_task_private_queues[KMP_LEVEL_THREAD][tid])) - selected_queue = &task_team->tt.tt_task_private_queues[KMP_LEVEL_THREAD][tid].qd; - else - selected_queue = &task_team->tt.tt_task_queues[KMP_LEVEL_THREAD][tid].qd; - isremote = 0; + if ( __kmp_queue_empty(&task_team->tt.tt_task_private_queues[KMP_LEVEL_THREAD][tid])) + selected_queue = &task_team->tt.tt_task_private_queues[KMP_LEVEL_THREAD][tid].qd; + else + selected_queue = &task_team->tt.tt_task_queues[KMP_LEVEL_THREAD][tid].qd; KA_TRACE(10, ( "__kmp_push_task: pushing to own private queue, no affinity\n" ) ); } break; @@ -504,7 +524,17 @@ __kmp_push_task(kmp_int32 gtid, kmp_task_t * task ) KMP_DEBUG_ASSERT(0); break; } -#else // affinity +#else // USE_AFFINITY +#if OMP_45_ENABLED + if (taskdata->priority >0) + { + KMP_ASSERT( taskdata->priority <= __kmp_max_task_priority); + selected_queue = &task_team->tt.tt_task_queues_priority[taskdata->priority-1]->qd; + //printf("Push on priority=%i queue\n", taskdata->priority); + isremote = 1; /* always lock push on this priority queue */ + } + else +#endif #if LIBOMP_USE_EXTSCHED_MEM if (taskdata->td_free_rsrc) { @@ -529,7 +559,7 @@ __kmp_push_task(kmp_int32 gtid, kmp_task_t * task ) else #endif { - /*If no affinity, we just use cores, that are always available!*/ + /*If no affinity, no priority, no memory schedule we just use cores, that are always available!*/ selected_queue = &task_team->tt.tt_task_queues[KMP_LEVEL_THREAD][tid].qd; } #endif/* USE_AFFINITY */ @@ -1465,6 +1495,8 @@ kmp_task_t* __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flag KMP_DEBUG_ASSERT(taskdata->td_parent != NULL); #if OMP_45_ENABLED + taskdata->td_flags.priority_specified = flags->priority_specified; + // avoid copying icvs for proxy tasks if (flags->proxy == TASK_FULL) #endif @@ -2663,6 +2695,23 @@ __kmp_remove_my_task( kmp_info_t * thread, kmp_int32 cpu, kmp_int32 node, kmp_in thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)]; kmp_queue_data_t *selected_struct_queue = 0; +#if OMP_45_ENABLED + /* look at task priority set before my local queue */ + for (kmp_int32 i = __kmp_max_task_priority-1; i>=0; --i) + { + if (task_team->tt.tt_task_queues_priority && + !kaapi_wsqueue_empty( &(task_team->tt.tt_task_queues_priority[i]->qd.td_wsdeque))) + { + taskdata = kaapi_wsqueue_locked_pop_task( &(task_team->tt.tt_task_queues_priority[i]->qd.td_wsdeque) ); + if (taskdata !=0) + { + task = KMP_TASKDATA_TO_TASK( taskdata ); + return task; + } + } + } +#endif + int islocal; selected_struct_queue = __kmp_select_queue(thread_data, cpu, node, thread->th.th_info.ds.ds_tid , 1, &islocal); if (!selected_struct_queue) { @@ -2671,6 +2720,10 @@ __kmp_remove_my_task( kmp_info_t * thread, kmp_int32 cpu, kmp_int32 node, kmp_in #if LIBOMP_USE_THEQUEUE||LIBOMP_USE_LINKED_DEQUEUE KMP_DEBUG_ASSERT( gtid == __kmp_get_gtid() ); +#endif + + +#if LIBOMP_USE_THEQUEUE||LIBOMP_USE_LINKED_DEQUEUE #if LIBOMP_USE_EXTSCHED_MEM //printf("%i::Islocal:%i\n", gtid, islocal); if (islocal ==2) /* == own queue or locked pop on constraints */ @@ -3722,6 +3775,13 @@ static int __kmp_realloc_task_threads_data(kmp_info_t *thread, KMP_CPU_FREE(new_tt_nodes_mask); KMP_CPU_FREE(all_cpus); #endif + /* allocate __kmp_max_task_priority priority for priority from MIN+1 (1) to __kmp_max_task_priority */ + task_team->tt.tt_task_queues_priority = (kmp_queue_data_t**)__kmp_allocate( sizeof(kmp_queue_data_t*)*(__kmp_max_task_priority)); + for (i=0; i<__kmp_max_task_priority; ++i) /* prio= 0 is no priority, default */ + { + task_team->tt.tt_task_queues_priority[i] = (kmp_queue_data_t *) __kmp_allocate( sizeof(kmp_queue_data_t)); + __kmp_init_task_deque(task_team->tt.tt_task_queues_priority[i], KMP_LEVEL_MACHINE, i, i); + } #if LIBOMP_USE_EXTSCHED_MEM if (task_team->tt.tt_task_queues_free[KMP_LEVEL_MACHINE] ==0) {