From 7ff0faacbeb264b59e189270e3cb8f8e26a6c4dd Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Thu, 4 May 2023 15:25:52 +0200 Subject: [PATCH] getrf: add an alg parameter to dynamically switch from one algorithm to another through environment variable --- compute/pzgetrf.c | 18 +++++++++-------- compute/zgetrf.c | 38 ++++++++++++++++++++++++++--------- control/compute_z.h | 5 +++-- include/chameleon/constants.h | 8 ++++++++ 4 files changed, 49 insertions(+), 20 deletions(-) diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c index 967e83f2f..165801efb 100644 --- a/compute/pzgetrf.c +++ b/compute/pzgetrf.c @@ -113,11 +113,13 @@ chameleon_pzgetrf_panel_facto( struct chameleon_pzgetrf_s *ws, int k, RUNTIME_option_t *options ) { -#if defined(GETRF_NOPIV_PER_COLUMN) - chameleon_pzgetrf_panel_facto_nopiv_percol( ws, A, k, options ); -#else - chameleon_pzgetrf_panel_facto_nopiv( ws, A, k, options ); -#endif + /* TODO: Should be replaced by a function pointer */ + if ( ws->alg == ChamGetrfNoPivPerColumn ) { + chameleon_pzgetrf_panel_facto_nopiv_percol( ws, A, k, options ); + } + else { + chameleon_pzgetrf_panel_facto_nopiv( ws, A, k, options ); + } } /** @@ -180,9 +182,9 @@ chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws, * Parallel tile LU factorization with no pivoting - dynamic scheduling */ void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, - CHAM_desc_t *A, - RUNTIME_sequence_t *sequence, - RUNTIME_request_t *request ) + CHAM_desc_t *A, + RUNTIME_sequence_t *sequence, + RUNTIME_request_t *request ) { CHAM_context_t *chamctxt; RUNTIME_option_t options; diff --git a/compute/zgetrf.c b/compute/zgetrf.c index 97f010bb9..bcb8ee0c8 100644 --- a/compute/zgetrf.c +++ b/compute/zgetrf.c @@ -61,15 +61,33 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ) } ws = calloc( 1, sizeof( struct chameleon_pzgetrf_s ) ); + ws->alg = ChamGetrfNoPiv; ws->ib = CHAMELEON_IB; -#if defined(GETRF_NOPIV_PER_COLUMN) - chameleon_desc_init( &(ws->U), CHAMELEON_MAT_ALLOC_TILE, - ChamComplexDouble, 1, A->nb, A->nb, - A->mt, A->nt * A->nb, 0, 0, - A->mt, A->nt * A->nb, A->p, A->q, - NULL, NULL, A->get_rankof_init ); -#endif + { + char *algostr = chameleon_getenv( "CHAMELEON_GETRF_ALGO" ); + + if ( algostr ) { + if ( strcasecmp( algostr, "nopiv" ) ) { + ws->alg = ChamGetrfNoPiv; + } + else if ( strcasecmp( algostr, "nopivpercolumn" ) == 0 ) { + ws->alg = ChamGetrfNoPivPerColumn; + } + else { + fprintf( stderr, "ERROR: CHAMELEON_GETRF_ALGO is not one of NoPiv, NoPivPerColumn => Switch back to NoPiv\n" ); + } + } + chameleon_cleanenv( algostr ); + } + + if ( ws->alg == ChamGetrfNoPivPerColumn ) { + chameleon_desc_init( &(ws->U), CHAMELEON_MAT_ALLOC_TILE, + ChamComplexDouble, 1, A->nb, A->nb, + A->mt, A->nt * A->nb, 0, 0, + A->mt, A->nt * A->nb, A->p, A->q, + NULL, NULL, A->get_rankof_init ); + } return ws; } @@ -98,9 +116,9 @@ CHAMELEON_zgetrf_WS_Free( void *user_ws ) { struct chameleon_pzgetrf_s *ws = (struct chameleon_pzgetrf_s *)user_ws; -#if defined(GETRF_NOPIV_PER_COLUMN) - chameleon_desc_destroy( &(ws->U) ); -#endif + if ( ws->alg == ChamGetrfNoPivPerColumn ) { + chameleon_desc_destroy( &(ws->U) ); + } free( ws ); } diff --git a/control/compute_z.h b/control/compute_z.h index bf9fdc8dc..1dd9b1305 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -41,8 +41,9 @@ struct chameleon_pzgemm_s { * @brief Data structure to handle the GETRF workspaces with partial pivoting */ struct chameleon_pzgetrf_s { - int ib; /* Internal blocking parameter */ - CHAM_desc_t U; + cham_getrf_t alg; + int ib; /* Internal blocking parameter */ + CHAM_desc_t U; }; /** diff --git a/include/chameleon/constants.h b/include/chameleon/constants.h index ba252005b..d3d2b4317 100644 --- a/include/chameleon/constants.h +++ b/include/chameleon/constants.h @@ -194,6 +194,14 @@ typedef enum chameleon_gemm_e { ChamGemmAlgSummaC } cham_gemm_t; +/** + * @brief Chameleon GETRF algorithm variants + */ +typedef enum chameleon_getrf_e { + ChamGetrfNoPiv, + ChamGetrfNoPivPerColumn, +} cham_getrf_t; + #define ChameleonTrd 1001 #define ChameleonBrd 1002 -- GitLab