diff --git a/compute/pzgetrf_nopiv.c b/compute/pzgetrf_nopiv.c index 9d78bd9bd8b1c14ab3fb9af5f9a35e9184fd8216..ac33e9985ef32ede6aac4013098e312779206fe3 100644 --- a/compute/pzgetrf_nopiv.c +++ b/compute/pzgetrf_nopiv.c @@ -19,6 +19,7 @@ * @author Florent Pruvost * @author Samuel Thibault * @author Terry Cojean + * @author Matthieu Kuhn * @date 2022-02-22 * @precisions normal z -> s d c * @@ -30,9 +31,9 @@ /** * Parallel tile LU factorization with no pivoting - dynamic scheduling */ -void chameleon_pzgetrf_nopiv(CHAM_desc_t *A, - RUNTIME_sequence_t *sequence, - RUNTIME_request_t *request) +void chameleon_pzgetrf_nopiv( CHAM_desc_t *A, + RUNTIME_sequence_t *sequence, + RUNTIME_request_t *request ) { CHAM_context_t *chamctxt; RUNTIME_option_t options; @@ -51,6 +52,19 @@ void chameleon_pzgetrf_nopiv(CHAM_desc_t *A, ib = CHAMELEON_IB; + if ( chamctxt->autominmax_enabled && (chamctxt->scheduler == RUNTIME_SCHED_STARPU) ) { + int lookahead = chamctxt->lookahead; + int nbtasks_per_step = (A->mt * A->nt) / (A->p * A->q); + int mintasks = nbtasks_per_step * lookahead; + int maxtasks = nbtasks_per_step * (lookahead+1); + + if ( CHAMELEON_Comm_rank() == 0 ) { + chameleon_warning( "chameleon_pzgetrf_nopiv", + "Setting limit for the number of submitted tasks\n" ); + } + RUNTIME_set_minmax_submitted_tasks( mintasks, maxtasks ); + } + for (k = 0; k < chameleon_min(A->mt, A->nt); k++) { RUNTIME_iteration_push(chamctxt, k); diff --git a/control/auxiliary.h b/control/auxiliary.h index 45df77d6ad4c25da5c724f465d7c44643b21a760..f335bb3620d77f685c6e80e8da35d502fa64c655 100644 --- a/control/auxiliary.h +++ b/control/auxiliary.h @@ -31,6 +31,88 @@ extern "C" { #endif +/* + * Get environment variable + */ +#if defined(CHAMELEON_OS_WINDOWS) + +static inline int +chameleon_setenv( const char *var, const char *value, int overwrite ) { + return !(SetEnvironmentVariable( var, value )); +} + +static inline char * +chameleon_getenv( const char *var ) { + char *str; + int len = 512; + int rc; + str = (char*)malloc(len * sizeof(char)); + rc = GetEnvironmentVariable(var, str, len); + if (rc == 0) { + free(str); + str = NULL; + } + return str; +} + +static inline void +chameleon_cleanenv( char *str ) { + if (str != NULL) free(str); +} + +#else /* Other OS systems */ + +static inline int +chameleon_setenv( const char *var, const char *value, int overwrite ) { + return setenv( var, value, overwrite ); +} + +static inline char * +chameleon_getenv( const char *var ) { + return getenv( var ); +} + +static inline void +chameleon_cleanenv( char *str ) { + (void)str; +} + +#endif + + +static inline int +chameleon_env_is_set_to(char * str, char * value) { + char * val; + if ( (val = chameleon_getenv(str)) && + !strcmp(val, value)) + return 1; + return 0; +} + +static inline int +chameleon_env_is_on(char * str) { + return chameleon_env_is_set_to(str, "1"); +} + +static inline int +chameleon_env_is_off(char * str) { + return chameleon_env_is_set_to(str, "0"); +} + +static inline int +chameleon_getenv_get_value_int(char * string, int default_value) { + long int ret; + char *str = chameleon_getenv(string); + if (str == NULL) return default_value; + + if ( sscanf( str, "%ld", &ret ) != 1 ) { + perror("sscanf"); + return default_value; + } + + return (int)ret; +} + /** * Internal routines */ diff --git a/control/context.c b/control/context.c index a48c80ea72d59c26b283bb3f2d0e48a2e00c16ff..827ebc811db3a4b0989d093b752c1a89b5b03493 100644 --- a/control/context.c +++ b/control/context.c @@ -17,6 +17,8 @@ * @author Cedric Castagnede * @author Florent Pruvost * @author Guillaume Sylvand + * @author Alycia Lisito + * @author Matthieu Kuhn * @date 2022-02-22 * *** @@ -41,6 +43,65 @@ /* master threads context lookup table */ static CHAM_context_t *chameleon_ctxt = NULL; +static inline cham_householder_t +chameleon_getenv_householder(char * string, cham_householder_t default_value) { + long int ret; + char *str = chameleon_getenv(string); + if (str == NULL) return default_value; + + if ( sscanf( str, "%ld", &ret ) == 1 ) { + switch (ret) { + case ChamFlatHouseholder: + return ChamFlatHouseholder; + case ChamTreeHouseholder: + return ChamTreeHouseholder; + default: + chameleon_error( "chameleon_getenv_householder", "Incorrect householder value" ); + return default_value; + } + } + + if(0 == strcasecmp("chamflathouseholder", str)) { return ChamFlatHouseholder; } + if(0 == strcasecmp("flat", str)) { return ChamFlatHouseholder; } + if(0 == strcasecmp("chamtreehouseholder", str)) { return ChamTreeHouseholder; } + if(0 == strcasecmp("tree", str)) { return ChamTreeHouseholder; } + + chameleon_error( "chameleon_getenv_householder", "Incorrect householder type" ); + + return default_value; +} + +static inline cham_translation_t +chameleon_getenv_translation(char * string, cham_translation_t default_value) { + long int ret; + char *str = chameleon_getenv(string); + if (str == NULL) return default_value; + + if ( sscanf( str, "%ld", &ret ) == 1 ) { + switch (ret) { + case ChamInPlace: + return ChamInPlace; + case ChamOutOfPlace: + return ChamOutOfPlace; + default: + chameleon_error( "chameleon_getenv_translation", "Incorrect translation value" ); + return default_value; + } + } + + if(0 == strcasecmp("chaminplace", str)) { return ChamInPlace; } + if(0 == strcasecmp("inplace", str)) { return ChamInPlace; } + if(0 == strcasecmp("in", str)) { return ChamInPlace; } + + if(0 == strcasecmp("chamoutofplace", str)) { return ChamOutOfPlace; } + if(0 == strcasecmp("outofplace", str)) { return ChamOutOfPlace; } + if(0 == strcasecmp("out", str)) { return ChamOutOfPlace; } + + chameleon_error( "chameleon_getenv_translation", "Incorrect translation type" ); + + return default_value; +} + /** * Create new context */ @@ -61,26 +122,27 @@ CHAM_context_t *chameleon_context_create() /* These initializations are just in case the user disables autotuning and does not set nb and ib */ - chamctxt->nb = 320; - chamctxt->ib = 48; - chamctxt->rhblock = 4; - chamctxt->lookahead = 3; + chamctxt->nb = chameleon_getenv_get_value_int( "CHAMELEON_TILE_SIZE", 384 ); + chamctxt->ib = chameleon_getenv_get_value_int( "CHAMELEON_INNER_BLOCK_SIZE", 48 ); + chamctxt->rhblock = chameleon_getenv_get_value_int( "CHAMELEON_HOUSEHOLDER_SIZE", 4 ); + chamctxt->lookahead = chameleon_getenv_get_value_int( "CHAMELEON_LOOKAHEAD", 1 ); chamctxt->nworkers = 1; chamctxt->ncudas = 0; chamctxt->nthreads_per_worker= 1; - chamctxt->warnings_enabled = CHAMELEON_TRUE; - chamctxt->autotuning_enabled = CHAMELEON_FALSE; - chamctxt->parallel_enabled = CHAMELEON_FALSE; - chamctxt->profiling_enabled = CHAMELEON_FALSE; - chamctxt->progress_enabled = CHAMELEON_FALSE; - chamctxt->generic_enabled = CHAMELEON_FALSE; + chamctxt->warnings_enabled = chameleon_env_is_off( "CHAMELEON_WARNINGS" ); + chamctxt->autotuning_enabled = chameleon_env_is_on( "CHAMELEON_AUTOTUNING" ); + chamctxt->parallel_enabled = chameleon_env_is_on( "CHAMELEON_PARALLEL_KERNEL" ); + chamctxt->profiling_enabled = chameleon_env_is_on( "CHAMELEON_PROFILING_MODE" ); + chamctxt->progress_enabled = chameleon_env_is_on( "CHAMELEON_PROGRESS" ); + chamctxt->generic_enabled = chameleon_env_is_on( "CHAMELEON_GENERIC" ); + chamctxt->autominmax_enabled = chameleon_env_is_on( "CHAMELEON_AUTOMINMAX" ); chamctxt->runtime_paused = CHAMELEON_FALSE; - chamctxt->householder = ChamFlatHouseholder; - chamctxt->translation = ChamInPlace; + chamctxt->householder = chameleon_getenv_householder( "CHAMELEON_HOUSEHOLDER_MODE", ChamFlatHouseholder ); + chamctxt->translation = chameleon_getenv_translation( "CHAMELEON_TRANSLATION_MODE", ChamInPlace ); /* Initialize scheduler */ RUNTIME_context_create(chamctxt); diff --git a/doc/user/chapters/using.org b/doc/user/chapters/using.org index a8a5ade105e5b583448c3175256a39770bb87c66..91533bfedb639ab00e229b01ebbf20c6f846f80d 100644 --- a/doc/user/chapters/using.org +++ b/doc/user/chapters/using.org @@ -63,6 +63,51 @@ * lange: General matrix norm * lacpy: General matrix copy +**** Configuration through environment variables + <<sec:env_variables>> + + Some parameters of the Chameleon library can be set to some + default values through environment variables which ar elisted + below. Note that the code itself can modify these values through + calls to `CHAMELEON_Enable()`, `CHAMELEON_Disable()`, or + `CHAMELEON_Set()` (see [[sec:options_routines][Options]]) + + * *CHAMELEON_TILE_SIZE* defines the default tile size value for + all algorithms. The default value is *384*. + * *CHAMELEON_INNER_BLOCK_SIZE* defines the default inner blocking + size value for algorithms that requires it (mainly QR/LQ + algorithms). The default value is *48*. + * *CHAMELEON_HOUSEHOLDER_MODE* changes the basic QR algorithm + from a flat tree (*1*, *ChamFlatHouseholder* or *Flat*) to an + Householder tree (*2*, *ChamTreeHouseholder*, or *Tree* ). The + default value is *ChamFlatTree*. + * *CHAMELEON_HOUSEHOLDER_SIZE* defines the size of the local + housholder trees if the Houselmoder tree mode is set. The + default value is *4*. + * *CHAMELEON_TRANSLATION_MODE* defines the translation used in + the LAPACK API routines. *1*, *In*, or *ChamInPlace* sets the + in-place translation to avoid copies. *2*, *Out*, + *ChamOutOfPlace* sets the out-of-place translation that uses a + copy of the matrix. The default is *ChamInPlace*. + * *CHAMELEON_GENERIC*, if ON all algorithms using specialized + algorithms specific to data distributions are disabled. + * *CHAMELEON_AUTOMINMAX*, if ON the minimal/maximal limits of + tasks that can be submitted to the runtime system are + set. These limits are computed per algorithm using the + _lookahead_ parameter. (StarPU specific, and currently + only available for getrf) + * *CHAMELEON_LOOKAHEAD* defines the number of steps that will be + submitted in advance in algorithms using lookahead + techniques. The default is *1*. + + * *CHAMELEON_WARNINGS* enables/disables the warning output + * *CHAMELEON_PARALLEL_KERNEL* enables/disables the use of + multi-threaded kernels. Available only for StarPU runtime system. + * *CHAMELEON_PROFILING_MODE* enables the profiling information of + the kernels (StarPU specific) + * *CHAMELEON_PROGRESS* enables the progress function to show the + percentage of tasks completed. + **** Execution trace using EZTrace <<sec:trace_ezt>> @@ -795,13 +840,14 @@ * map: apply a user operator on each tile of the matrix **** Options routines + <<sec:options_foutines>> Enable CHAMELEON feature. #+begin_src int CHAMELEON_Enable (CHAMELEON_enum option); #+end_src - Feature to be enabled: + Features that can be enabled/disabled: * *CHAMELEON_WARNINGS*: printing of warning messages, - * *CHAMELEON_AUTOTUNING*: autotuning for tile size and inner block size, + * *CHAMELEON_AUTOTUNING*: autotuning for tile size and inner block size (inactive), * *CHAMELEON_PROFILING_MODE*: activate kernels profiling, * *CHAMELEON_PROGRESS*: to print a progress status, * *CHAMELEON_GEMM3M*: to enable the use of the /gemm3m/ blas bunction. diff --git a/include/chameleon/runtime.h b/include/chameleon/runtime.h index e1aa0f96c0de8aa8e6b18d849aee1a22f8ee27d6..8794b5618391d33fd7da9987cd7d399fe92f6073 100644 --- a/include/chameleon/runtime.h +++ b/include/chameleon/runtime.h @@ -17,6 +17,7 @@ * @author Florent Pruvost * @author Samuel Thibault * @author Philippe Swartvagher + * @author Matthieu Kuhn * @date 2022-02-22 * */ @@ -647,6 +648,24 @@ RUNTIME_options_ws_alloc( RUNTIME_option_t *options, int RUNTIME_options_ws_free( RUNTIME_option_t *options ); +/** + * @brief Set the minimum and maximum limit of tasks submitted to the runtime + * + * @warning Only STARPU for now + * + * @param[in] min + * Minimum number of tasks under which the task submission + * restarts. + * + * @param[in] max + * Maximum number of tasks submitted to the runtime. + * When reached, we stop submitting and switch to execution + * until the minimum number of tasks is reached. + * + */ +void +RUNTIME_set_minmax_submitted_tasks( int min, int max ); + /** * @} * diff --git a/include/chameleon/struct.h b/include/chameleon/struct.h index 7ab9711e1df9dfc9a9df131c501f37da4d66f0f1..bd49119df09a400e09e21a6d9096b0ec5a19c7ea 100644 --- a/include/chameleon/struct.h +++ b/include/chameleon/struct.h @@ -17,6 +17,7 @@ * @author Cedric Castagnede * @author Florent Pruvost * @author Samuel Thibault + * @author Matthieu Kuhn * @date 2022-02-22 * */ @@ -137,6 +138,7 @@ typedef struct chameleon_context_s { cham_bool_t profiling_enabled; cham_bool_t progress_enabled; cham_bool_t generic_enabled; + cham_bool_t autominmax_enabled; cham_bool_t runtime_paused; cham_householder_t householder; // "domino" (flat) or tree-based (reduction) Householder diff --git a/runtime/openmp/control/runtime_control.c b/runtime/openmp/control/runtime_control.c index e50193c0fd4ab8eccd8860d5f243a16edd897496..02a44c477cdb80cc118174b9af82b328d7668bc2 100644 --- a/runtime/openmp/control/runtime_control.c +++ b/runtime/openmp/control/runtime_control.c @@ -18,6 +18,7 @@ * @author Florent Pruvost * @author Philippe Virouleau * @author Philippe Swartvagher + * @author Matthieu Kuhn * @date 2022-02-22 * */ @@ -127,3 +128,8 @@ int RUNTIME_comm_size( CHAM_context_t *chamctxt ) (void)chamctxt; return 1; } + +void RUNTIME_set_minmax_submitted_tasks( int min, int max ) { + (void)min; + (void)max; +} diff --git a/runtime/parsec/control/runtime_control.c b/runtime/parsec/control/runtime_control.c index 5a5347866f596eba12444fe8ed66085a4adafe8c..5aa0484a49458c1313d28acad2c6ef3f9731100d 100644 --- a/runtime/parsec/control/runtime_control.c +++ b/runtime/parsec/control/runtime_control.c @@ -16,6 +16,7 @@ * @author Mathieu Faverge * @author Samuel Thibault * @author Philippe Swartvagher + * @author Matthieu Kuhn * @date 2022-02-22 * */ @@ -153,3 +154,8 @@ int RUNTIME_comm_size( CHAM_context_t *chamctxt ) (void)chamctxt; return size; } + +void RUNTIME_set_minmax_submitted_tasks( int min, int max ) { + (void)min; + (void)max; +} diff --git a/runtime/quark/control/runtime_control.c b/runtime/quark/control/runtime_control.c index bdb4a78d33b6eeee66d297b95e28e4a2cd20d36f..a457839f7c48fcc1642868e9a4b779ab4bdaa37b 100644 --- a/runtime/quark/control/runtime_control.c +++ b/runtime/quark/control/runtime_control.c @@ -18,6 +18,7 @@ * @author Mathieu Faverge * @author Samuel Thibault * @author Philippe Swartvagher + * @author Matthieu Kuhn * @date 2022-02-22 * */ @@ -134,3 +135,8 @@ int RUNTIME_comm_size( CHAM_context_t *chamctxt ) (void)chamctxt; return 1; } + +void RUNTIME_set_minmax_submitted_tasks( int min, int max ) { + (void)min; + (void)max; +} diff --git a/runtime/starpu/CMakeLists.txt b/runtime/starpu/CMakeLists.txt index 09a02526c81e484f8b914b7100ed6768ec940457..3fb2514a9a98c7b08f37d700e87dde37170d73e2 100644 --- a/runtime/starpu/CMakeLists.txt +++ b/runtime/starpu/CMakeLists.txt @@ -23,6 +23,7 @@ # @author Mathieu Faverge # @author Florent Pruvost # @author Samuel Thibault +# @author Matthieu Kuhn # @date 2022-02-22 # ### @@ -73,6 +74,10 @@ if ( STARPU_FOUND ) if ( HAVE_STARPU_DATA_PEEK ) message("-- ${Blue}Add definition HAVE_STARPU_DATA_PEEK${ColourReset}") endif() + check_function_exists(starpu_set_limit_min_submitted_tasks HAVE_STARPU_SET_LIMIT_SUBMITTED_TASKS ) + if ( HAVE_STARPU_SET_LIMIT_SUBMITTED_TASKS ) + message("-- ${Blue}Add definition HAVE_STARPU_SET_LIMIT_SUBMITTED_TASKS${ColourReset}") + endif() check_struct_has_member( "struct starpu_data_interface_ops" reuse_data_on_node "starpu_data_interfaces.h" HAVE_STARPU_REUSE_DATA_ON_NODE LANGUAGE "C" ) if ( HAVE_STARPU_REUSE_DATA_ON_NODE ) message("-- ${Blue}Add definition HAVE_STARPU_REUSE_DATA_ON_NODE${ColourReset}") diff --git a/runtime/starpu/control/runtime_control.c b/runtime/starpu/control/runtime_control.c index 37979d86ef77b0a5f691034b82de609424b11ed2..bcc79254f0db5ee2e95aab1f703200e9f644d2ae 100644 --- a/runtime/starpu/control/runtime_control.c +++ b/runtime/starpu/control/runtime_control.c @@ -18,6 +18,7 @@ * @author Florent Pruvost * @author Philippe Swartvagher * @author Samuel Thibault + * @author Matthieu Kuhn * @date 2022-02-22 * */ @@ -320,3 +321,16 @@ int RUNTIME_comm_size( CHAM_context_t *chamctxt ) (void)chamctxt; return size; } + +void RUNTIME_set_minmax_submitted_tasks( int min, int max ){ +#if defined(HAVE_STARPU_SET_LIMIT_SUBMITTED_TASKS) + starpu_set_limit_min_submitted_tasks( min ); + starpu_set_limit_max_submitted_tasks( max ); +#else + fprintf( stderr, + "RUNTIME_set_minmax_submitted_tasks: StarPU version does not support dynamic limit setting.\n" + "Please use setting through environment variables:\n" + " export STARPU_LIMIT_MIN_SUBMITTED_TASKS=%d\n" + " export STARPU_LIMIT_MAX_SUBMITTED_TASKS=%d\n", min, max ); +#endif +}