Make task throttling a runtime option

parent 402c7439
......@@ -2119,6 +2119,7 @@ typedef enum kmp_tasking_mode {
extern kmp_tasking_mode_t
__kmp_tasking_mode; /* determines how/when to execute tasks */
extern int __kmp_task_stealing_constraint;
extern int __kmp_enable_task_throttling;
#if OMP_40_ENABLED
extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if
// specified, defaults to 0 otherwise
......
......@@ -341,6 +341,7 @@ omp_memspace_handle_t const omp_low_lat_mem_space =
KMP_BUILD_ASSERT(sizeof(kmp_tasking_flags_t) == 4);
int __kmp_task_stealing_constraint = 1; /* Constrain task stealing by default */
int __kmp_enable_task_throttling = 1;
#ifdef DEBUG_SUSPEND
int __kmp_suspend_count = 0;
......
......@@ -4682,6 +4682,20 @@ static void __kmp_stg_print_forkjoin_frames_mode(kmp_str_buf_t *buffer,
} // __kmp_stg_print_forkjoin_frames
#endif /* USE_ITT_BUILD */
// -----------------------------------------------------------------------------
// KMP_ENABLE_TASK_THROTTLING
static void __kmp_stg_parse_task_throttling(char const *name,
char const *value, void *data) {
__kmp_stg_parse_bool(name, value, &__kmp_enable_task_throttling);
} // __kmp_stg_parse_task_throttling
static void __kmp_stg_print_task_throttling(kmp_str_buf_t *buffer,
char const *name, void *data) {
__kmp_stg_print_bool(buffer, name, __kmp_enable_task_throttling);
} // __kmp_stg_print_task_throttling
// -----------------------------------------------------------------------------
// OMP_DISPLAY_ENV
......@@ -5003,6 +5017,8 @@ static kmp_setting_t __kmp_stg_table[] = {
{"KMP_FORKJOIN_FRAMES_MODE", __kmp_stg_parse_forkjoin_frames_mode,
__kmp_stg_print_forkjoin_frames_mode, NULL, 0, 0},
#endif
{"KMP_ENABLE_TASK_THROTTLING", __kmp_stg_parse_task_throttling,
__kmp_stg_print_task_throttling, NULL, 0, 0},
#if OMP_40_ENABLED
{"OMP_DISPLAY_ENV", __kmp_stg_parse_omp_display_env,
......
......@@ -374,7 +374,8 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
// Check if deque is full
if (TCR_4(thread_data->td.td_deque_ntasks) >=
TASK_DEQUE_SIZE(thread_data->td)) {
if (__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
if (__kmp_enable_task_throttling &&
__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
thread->th.th_current_task)) {
KA_TRACE(20, ("__kmp_push_task: T#%d deque is full; returning "
"TASK_NOT_PUSHED for task %p\n",
......@@ -394,7 +395,8 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
// Need to recheck as we can get a proxy task from thread outside of OpenMP
if (TCR_4(thread_data->td.td_deque_ntasks) >=
TASK_DEQUE_SIZE(thread_data->td)) {
if (__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
if (__kmp_enable_task_throttling &&
__kmp_task_is_allowed(gtid, __kmp_task_stealing_constraint, taskdata,
thread->th.th_current_task)) {
__kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock);
KA_TRACE(20, ("__kmp_push_task: T#%d deque is full on 2nd check; "
......
// RUN: %libomp-compile && env KMP_ENABLE_TASK_THROTTLING=0 %libomp-run
// RUN: %libomp-compile && env KMP_ENABLE_TASK_THROTTLING=1 %libomp-run
#include<omp.h>
#include<stdlib.h>
#include<string.h>
/**
* Test the task throttling behavior of the runtime.
* Unless OMP_NUM_THREADS is 1, the master thread pushes tasks to its own tasks
* queue until either of the following happens:
* - the task queue is full, and it starts serializing tasks
* - all tasks have been pushed, and it can begin execution
* The idea is to create a huge number of tasks which execution are blocked
* until the master thread comes to execute tasks (they need to be blocking,
* otherwise the second thread will start emptying the queue).
* At this point we can check the number of enqueued tasks: iff all tasks have
* been enqueued, then there was no task throttling.
* Otherwise there has been some sort of task throttling.
* If what we detect doesn't match the value of the environment variable, the
* test is failed.
*/
#define NUM_TASKS 2000
int main()
{
int i;
int block = 1;
int tid;
int throttling = strcmp(getenv("KMP_ENABLE_TASK_THROTTLING"), "1") == 0;
int enqueued = 0;
int failed = -1;
#pragma omp parallel num_threads(2)
#pragma omp master
{
for (i = 0; i < NUM_TASKS; i++) {
enqueued++;
#pragma omp task
{
tid = omp_get_thread_num();
if (tid == 0) {
// As soon as the master thread starts executing task we should unlock
// all tasks, and detect the test failure if it has not been done yet.
if (failed < 0)
failed = throttling ? enqueued == NUM_TASKS : enqueued < NUM_TASKS;
block = 0;
}
while (block)
;
}
}
block = 0;
}
return failed;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment