From 0d58113d1dc30324b806f489c98be586aa5882c5 Mon Sep 17 00:00:00 2001 From: Matthieu KUHN <bkuhnm@l0.spartan.bench.local> Date: Thu, 31 Mar 2022 14:46:46 +0200 Subject: [PATCH] getrf: Add a version with no pivoting per column --- compute/pzgetrf.c | 88 ++++++++++++++++++++++++++++++++------------- compute/zgetrf.c | 31 ++++++++++------ control/compute_z.h | 12 +++++-- 3 files changed, 94 insertions(+), 37 deletions(-) diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c index 7e6237c7e..967e83f2f 100644 --- a/compute/pzgetrf.c +++ b/compute/pzgetrf.c @@ -4,7 +4,7 @@ * * @copyright 2009-2014 The University of Tennessee and The University of * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * Univ. Bordeaux. All rights reserved. * *** @@ -16,13 +16,14 @@ * @author Mathieu Faverge * @author Emmanuel Agullo * @author Matthieu Kuhn - * @date 2022-02-22 + * @date 2023-02-21 * @precisions normal z -> s d c * */ #include "control/common.h" #define A(m,n) A, m, n +#define U(m,n) &(ws->U), m, n /* * All the functions below are panel factorization variant. @@ -44,10 +45,10 @@ * The runtime options data structure to pass through all insert_task calls. */ static inline void -chameleon_pzgetrf_panel_facto_nopiv( void *ws, - CHAM_desc_t *A, - int k, - RUNTIME_option_t *options ) +chameleon_pzgetrf_panel_facto_nopiv( struct chameleon_pzgetrf_s *ws, + CHAM_desc_t *A, + int k, + RUNTIME_option_t *options ) { const CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t) 1.0; int m, tempkm, tempkn, tempmm; @@ -60,7 +61,7 @@ chameleon_pzgetrf_panel_facto_nopiv( void *ws, */ INSERT_TASK_zgetrf_nopiv( options, - tempkm, tempkn, 32, A->mb, + tempkm, tempkn, ws->ib, A->mb, A(k, k), 0); for (m = k+1; m < A->mt; m++) { @@ -73,24 +74,61 @@ chameleon_pzgetrf_panel_facto_nopiv( void *ws, A(m, k) ); } } + +static inline void +chameleon_pzgetrf_panel_facto_nopiv_percol( struct chameleon_pzgetrf_s *ws, + CHAM_desc_t *A, + int k, + RUNTIME_option_t *options ) +{ + int m, h; + int tempkm, tempkn, tempmm, minmn; + + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + minmn = chameleon_min( tempkm, tempkn ); + + /* + * Algorithm per column without pivoting + */ + for(h=0; h<minmn; h++){ + INSERT_TASK_zgetrf_panel_nopiv_percol_diag( + options, tempkm, tempkn, h, + A( k, k ), U( k, k ), A->mb * k ); + + for (m = k+1; m < A->mt; m++) { + tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb; + INSERT_TASK_zgetrf_panel_nopiv_percol_trsm( + options, tempmm, tempkn, h, + A( m, k ), U( k, k ) ); + } + } + + RUNTIME_data_flush( options->sequence, U(k, k) ); +} + static inline void -chameleon_pzgetrf_panel_facto( void *ws, - CHAM_desc_t *A, - int k, - RUNTIME_option_t *options ) +chameleon_pzgetrf_panel_facto( struct chameleon_pzgetrf_s *ws, + CHAM_desc_t *A, + int k, + RUNTIME_option_t *options ) { +#if defined(GETRF_NOPIV_PER_COLUMN) + chameleon_pzgetrf_panel_facto_nopiv_percol( ws, A, k, options ); +#else chameleon_pzgetrf_panel_facto_nopiv( ws, A, k, options ); +#endif } /** * Permutation of the panel n at step k */ static inline void -chameleon_pzgetrf_panel_permute( void *ws, - CHAM_desc_t *A, - int k, - int n, - RUNTIME_option_t *options ) +chameleon_pzgetrf_panel_permute( struct chameleon_pzgetrf_s *ws, + CHAM_desc_t *A, + int k, + int n, + RUNTIME_option_t *options ) { (void)ws; (void)A; @@ -100,11 +138,11 @@ chameleon_pzgetrf_panel_permute( void *ws, } static inline void -chameleon_pzgetrf_panel_update( void *ws, - CHAM_desc_t *A, - int k, - int n, - RUNTIME_option_t *options ) +chameleon_pzgetrf_panel_update( struct chameleon_pzgetrf_s *ws, + CHAM_desc_t *A, + int k, + int n, + RUNTIME_option_t *options ) { const CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t) 1.0; const CHAMELEON_Complex64_t mzone = (CHAMELEON_Complex64_t)-1.0; @@ -141,10 +179,10 @@ chameleon_pzgetrf_panel_update( void *ws, /** * Parallel tile LU factorization with no pivoting - dynamic scheduling */ -void chameleon_pzgetrf( void *ws, - CHAM_desc_t *A, - RUNTIME_sequence_t *sequence, - RUNTIME_request_t *request ) +void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, + CHAM_desc_t *A, + RUNTIME_sequence_t *sequence, + RUNTIME_request_t *request ) { CHAM_context_t *chamctxt; RUNTIME_option_t options; diff --git a/compute/zgetrf.c b/compute/zgetrf.c index 52c33a740..c99ae3f80 100644 --- a/compute/zgetrf.c +++ b/compute/zgetrf.c @@ -4,7 +4,7 @@ * * @copyright 2009-2014 The University of Tennessee and The University of * Tennessee Research Foundation. All rights reserved. - * @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, * Univ. Bordeaux. All rights reserved. * *** @@ -18,7 +18,7 @@ * @author Cedric Castagnede * @author Florent Pruvost * @author Matthieu Kuhn - * @date 2022-09-19 + * @date 2023-02-21 * * @precisions normal z -> s d c * @@ -52,14 +52,26 @@ void * CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ) { - CHAM_context_t *chamctxt; + CHAM_context_t *chamctxt; + struct chameleon_pzgetrf_s *options; chamctxt = chameleon_context_self(); if ( chamctxt == NULL ) { return NULL; } - return NULL; + options = calloc( 1, sizeof( struct chameleon_pzgetrf_s ) ); + options->ib = CHAMELEON_IB; + +#if defined(GETRF_NOPIV_PER_COLUMN) + chameleon_desc_init( &(options->U), CHAMELEON_MAT_ALLOC_TILE, + ChamComplexDouble, 1, A->nb, A->nb, + A->mt, A->nt * A->nb, 0, 0, + A->mt, A->nt * A->nb, A->p, A->q, + NULL, NULL, A->get_rankof_init ); +#endif + + return options; } /** @@ -84,14 +96,13 @@ CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ) void CHAMELEON_zgetrf_WS_Free( const CHAM_desc_t *A, void *user_ws ) { - CHAM_context_t *chamctxt; + struct chameleon_pzgetrf_s *ws = (struct chameleon_pzgetrf_s *)user_ws; - chamctxt = chameleon_context_self(); - if ( chamctxt == NULL ) { - return; - } +#if defined(GETRF_NOPIV_PER_COLUMN) + chameleon_desc_destroy( &(ws->U) ); +#endif - (void)user_ws; + free( ws ); } #if defined(NOT_AVAILABLE_YET) diff --git a/control/compute_z.h b/control/compute_z.h index b938b178f..bf9fdc8dc 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -21,7 +21,7 @@ * @author Florent Pruvost * @author Alycia Lisito * @author Matthieu Kuhn - * @date 2022-09-19 + * @date 2023-02-21 * @precisions normal z -> c d s * */ @@ -37,6 +37,14 @@ struct chameleon_pzgemm_s { CHAM_desc_t WB; }; +/** + * @brief Data structure to handle the GETRF workspaces with partial pivoting + */ +struct chameleon_pzgetrf_s { + int ib; /* Internal blocking parameter */ + CHAM_desc_t U; +}; + /** * @brief Data structure to handle the Centering-Scaled workspaces */ @@ -78,7 +86,7 @@ void chameleon_pzgepdf_qdwh( cham_mtxtype_t trans, CHAM_desc_t *descU, CHAM_desc void chameleon_pzgepdf_qr( int genD, int doqr, int optid, const libhqr_tree_t *qrtreeT, const libhqr_tree_t *qrtreeB, CHAM_desc_t *A1, CHAM_desc_t *TS1, CHAM_desc_t *TT1, CHAM_desc_t *D1, CHAM_desc_t *Q1, CHAM_desc_t *A2, CHAM_desc_t *TS2, CHAM_desc_t *TT2, CHAM_desc_t *D2, CHAM_desc_t *Q2, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -void chameleon_pzgetrf( void *ws, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); +void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzgetrf_incpiv(CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgetrf_nopiv(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgetrf_reclap(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -- GitLab