From fb6ccd8c0fde2b3abf284d518e93fb7511303edb Mon Sep 17 00:00:00 2001 From: Alycia Lisito <alycia.lisito@inria.fr> Date: Wed, 14 Feb 2024 16:17:40 +0100 Subject: [PATCH] zgetrf batched: add batched percol algorithm --- compute/pzgetrf.c | 58 +++++++++++++++++++++++++++++++++++++++- testing/CTestLists.cmake | 6 +++++ 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c index 99152bc04..000209624 100644 --- a/compute/pzgetrf.c +++ b/compute/pzgetrf.c @@ -157,6 +157,57 @@ chameleon_pzgetrf_panel_facto_percol( struct chameleon_pzgetrf_s *ws, RUNTIME_ipiv_flushk( options->sequence, ipiv, k ); } +/* + * Factorization of panel k - dynamic scheduling - batched version / stock + */ +static inline void +chameleon_pzgetrf_panel_facto_percol_batched( struct chameleon_pzgetrf_s *ws, + CHAM_desc_t *A, + CHAM_ipiv_t *ipiv, + int k, + RUNTIME_option_t *options ) +{ + int m, h; + int tempkm, tempkn, tempmm, minmn; + void **clargs = malloc( sizeof(char *) ); + memset( clargs, 0, sizeof(char *) ); + + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + minmn = chameleon_min( tempkm, tempkn ); + + /* Update the number of column */ + ipiv->n = minmn; + + /* + * Algorithm per column with pivoting (no recursion) + */ + /* Iterate on current panel column */ + /* Since index h scales column h-1, we need to iterate up to minmn (included) */ + for ( h = 0; h <= minmn; h++ ) { + + INSERT_TASK_zgetrf_percol_diag( options, tempkm, tempkn, h, k * A->mb, A(k, k), ipiv ); + + for ( m = k+1; m < A->mt; m++ ) { + tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb; + INSERT_TASK_zgetrf_panel_offdiag_batched( options, tempmm, tempkn, h, m * A->mb, + (void *)ws, A(m, k), clargs, ipiv ); + } + INSERT_TASK_zgetrf_panel_offdiag_batched_flush( options, A, k, clargs, ipiv ); + + if ( h < minmn ) { + /* Reduce globally (between MPI processes) */ + INSERT_TASK_ipiv_reducek( options, ipiv, k, h ); + } + } + + free( clargs ); + + /* Flush temporary data used for the pivoting */ + INSERT_TASK_ipiv_to_perm( options, k * A->mb, tempkm, minmn, ipiv, k ); + RUNTIME_ipiv_flushk( options->sequence, ipiv, k ); +} + static inline void chameleon_pzgetrf_panel_facto_blocked( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, @@ -235,7 +286,12 @@ chameleon_pzgetrf_panel_facto( struct chameleon_pzgetrf_s *ws, break; case ChamGetrfPPivPerColumn: - chameleon_pzgetrf_panel_facto_percol( ws, A, ipiv, k, options ); + if ( ws->batch_size > 1 ) { + chameleon_pzgetrf_panel_facto_percol_batched( ws, A, ipiv, k, options ); + } + else { + chameleon_pzgetrf_panel_facto_percol( ws, A, ipiv, k, options ); + } break; case ChamGetrfPPiv: diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake index b429b5c02..98bdb1939 100644 --- a/testing/CTestLists.cmake +++ b/testing/CTestLists.cmake @@ -94,6 +94,12 @@ if (NOT CHAMELEON_SIMULATION) set_tests_properties( test_${cat}_${prec}getrf_ppivpercol PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppivpercolumn;CHAMELEON_GETRF_BATCH_SIZE=1" ) + if ( ${cat} STREQUAL "shm" ) + add_test( test_${cat}_${prec}getrf_ppivpercol_batch ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/getrf_nopiv.in ) + set_tests_properties( test_${cat}_${prec}getrf_ppivpercol_batch + PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppivpercolumn;CHAMELEON_GETRF_BATCH_SIZE=6" ) + endif() + add_test( test_${cat}_${prec}getrf_ppiv ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/getrf.in ) set_tests_properties( test_${cat}_${prec}getrf_ppiv PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppiv;CHAMELEON_GETRF_BATCH_SIZE=1" ) -- GitLab