From 61dbad0ed513d2bd36bdf86e8817e4784b45bb23 Mon Sep 17 00:00:00 2001 From: Matthieu KUHN <bkuhnm@l0.spartan.bench.local> Date: Thu, 31 Mar 2022 17:16:11 +0200 Subject: [PATCH] getrf_nopiv: Add Alloc/Free functions to manage temporary buffers of the pzgetrf_nopiv algorithm to better control the MPI transfers overhead --- compute/zgetrf_nopiv.c | 94 ++++++++++++++++++++++++++++++++- include/chameleon/chameleon_z.h | 2 + 2 files changed, 94 insertions(+), 2 deletions(-) diff --git a/compute/zgetrf_nopiv.c b/compute/zgetrf_nopiv.c index d6e1c27ec..07ebfa2e9 100644 --- a/compute/zgetrf_nopiv.c +++ b/compute/zgetrf_nopiv.c @@ -11,19 +11,109 @@ * * @brief Chameleon zgetrf_nopiv wrappers * - * @version 1.2.0 + * @version 1.3.0 * @author Omar Zenati * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede * @author Florent Pruvost * @author Alycia Lisito - * @date 2022-02-22 + * @author Matthieu Kuhn + * @date 2024-10-17 * * @precisions normal z -> s d c * */ #include "control/common.h" +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t + * + * @brief Allocate the required workspaces for asynchronous getrf + * + ******************************************************************************* + * + * @param[in] A + * The descriptor of the matrix A. + * + ******************************************************************************* + * + * @retval An allocated opaque pointer to use in CHAMELEON_zgetrf_nopiv_Tile_Async() + * and to free with CHAMELEON_zgetrf_nopiv_WS_Free(). + * + ******************************************************************************* + * + * @sa CHAMELEON_zgetrf_nopiv_Tile_Async + * @sa CHAMELEON_zgetrf_nopiv_WS_Free + * + */ +void *CHAMELEON_zgetrf_nopiv_WS_Alloc( const CHAM_desc_t *A ) +{ + CHAM_context_t *chamctxt; + struct chameleon_pzgetrf_nopiv_s *options; + + chamctxt = chameleon_context_self(); + if ( chamctxt == NULL ) { + return NULL; + } + + options = calloc( 1, sizeof(struct chameleon_pzgetrf_nopiv_s) ); + options->use_workspace = 0; + + if ( ( ( A->p > 1 ) || ( A->q > 1 ) ) && + ( A->get_rankof_init == chameleon_getrankof_2d ) && + ( chamctxt->generic_enabled != CHAMELEON_TRUE ) ) + { + int lookahead = chamctxt->lookahead; + options->use_workspace = 1; + + chameleon_desc_init( &(options->WL), CHAMELEON_MAT_ALLOC_TILE, + ChamComplexDouble, A->mb, A->nb, (A->mb * A->nb), + A->mt * A->mb, A->nb * A->q * lookahead, 0, 0, + A->mt * A->mb, A->nb * A->q * lookahead, A->p, A->q, + NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg ); + + chameleon_desc_init( &(options->WU), CHAMELEON_MAT_ALLOC_TILE, + ChamComplexDouble, + A->mb, A->nb, (A->mb * A->nb), + A->mb * A->p * lookahead, A->nt * A->nb, 0, 0, + A->mb * A->p * lookahead, A->nt * A->nb, A->p, A->q, + NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg ); + } + + return (void*)options; +} + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t + * + * @brief Free the allocated workspaces for asynchronous getrf + * + ******************************************************************************* + * + * @param[in,out] user_ws + * On entry, the opaque pointer allocated by CHAMELEON_zgetrf_nopiv_WS_Alloc() + * On exit, all data are freed. + * + ******************************************************************************* + * + * @sa CHAMELEON_zgetrf_nopiv_Tile_Async + * @sa CHAMELEON_zgetrf_nopiv_WS_Alloc + * + */ +void CHAMELEON_zgetrf_nopiv_WS_Free( void *user_ws ) +{ + struct chameleon_pzgetrf_nopiv_s *ws = (struct chameleon_pzgetrf_nopiv_s*)user_ws; + + if ( ws->use_workspace ) { + chameleon_desc_destroy( &(ws->WL) ); + chameleon_desc_destroy( &(ws->WU) ); + } + free( ws ); +} /** ******************************************************************************** diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h index 3f33260f4..9bd22083b 100644 --- a/include/chameleon/chameleon_z.h +++ b/include/chameleon/chameleon_z.h @@ -333,6 +333,8 @@ void *CHAMELEON_zgram_WS_Alloc( const CHAM_desc_t *A ); void CHAMELEON_zgram_WS_Free( void *ws ); void *CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ); void CHAMELEON_zgetrf_WS_Free( void *ws ); +void *CHAMELEON_zgetrf_nopiv_WS_Alloc( const CHAM_desc_t *A ); +void CHAMELEON_zgetrf_nopiv_WS_Free( void *ws ); int CHAMELEON_Alloc_Workspace_zgesv_incpiv( int N, CHAM_desc_t **descL, int **IPIV, int p, int q); int CHAMELEON_Alloc_Workspace_zgetrf_incpiv(int M, int N, CHAM_desc_t **descL, int **IPIV, int p, int q); -- GitLab