diff --git a/ChangeLog b/ChangeLog index 2517d5d027d2a8c248466b97428388ce6cdab625..1e299d40f81f0d0b6ca075464c76cffbc7a3224d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,6 +1,6 @@ chameleon-1.3.0 ------------------------------------------------------------------------ - - mixed-precision: introduce descripto with precision adapted to local norms + - mixed-precision: introduce descriptor with precision adapted to local norms - Add CHAMELEON_[dz]gered... functions to reduce the precision of the tiles based on a requested accuracy - Add CHAMELEON_[dz]gerst... functions to restore the original numerical precision of the tiles in a descriptor - types: add support for half precision arithmetic into the data descriptors @@ -11,6 +11,8 @@ chameleon-1.3.0 CHAMELEON_Desc_Create_User that requires aan additional `, NULL` parameters in the general case. - compute/poinv: Add the possibility to use an intermediate distribution for the TRTRI operation + - compute/getrf_nopiv: Add lookahead through temporary buffers to better regulate the communication allocations + - runtime/starpu: Whenever possible replace the lacpy codelet by a direct memory copy from the input handler to the output one chameleon-1.2.0 ------------------------------------------------------------------------ diff --git a/compute/pzgepdf_qdwh.c b/compute/pzgepdf_qdwh.c index 420ad6f502ceccea3a253363e5b00333c5f49aea..0d1b66cb267f374b5ce493c01a2ecb619603d645 100644 --- a/compute/pzgepdf_qdwh.c +++ b/compute/pzgepdf_qdwh.c @@ -15,7 +15,7 @@ * @author Mathieu Faverge * @author Hatem Ltaief * @author Lionel Eyraud-Dubois - * @date 2023-07-05 + * @date 2024-10-17 * @precisions normal z -> s d c * */ @@ -35,8 +35,8 @@ static int _zgepdf_qdwh_opt_genD = 1; static int _zgepdf_qdwh_opt_genD = 0; #endif -static int _zgepdf_qdwh_opt_qr = 1; -static int _zgepdf_qdwh_opt_id = 1; // There is a numerical issue when combining this optimization and the StarPU lacpy +static int _zgepdf_qdwh_opt_qr = 1; +static int _zgepdf_qdwh_opt_id = 1; static int _zgepdf_qdwh_verbose = 0; /** @@ -603,6 +603,7 @@ chameleon_pzgepdf_qdwh( cham_mtxtype_t mtxtype, CHAM_desc_t *descU, CHAM_desc_t double conv = 100.; double normest, Unorm; int it, itconv, facto = -1; + cham_bool_t optlacpy_backup; double eps = CHAMELEON_dlamch(); double tol1 = 5. * eps; @@ -615,6 +616,10 @@ chameleon_pzgepdf_qdwh( cham_mtxtype_t mtxtype, CHAM_desc_t *descU, CHAM_desc_t } assert( chamctxt->scheduler != RUNTIME_SCHED_PARSEC ); + /* Force unoptimized lacpy */ + optlacpy_backup = chamctxt->optlacpy_enabled; + chamctxt->optlacpy_enabled = CHAMELEON_FALSE; + if ( info ) { info->itQR = 0; info->itPO = 0; @@ -848,5 +853,8 @@ chameleon_pzgepdf_qdwh( cham_mtxtype_t mtxtype, CHAM_desc_t *descU, CHAM_desc_t &descB2, &descTS2, &descTT2, &descQ2, &descD2 ); CHAMELEON_zgemm_WS_Free( gemm_ws ); + /* Restore optimized lacpy value */ + chamctxt->optlacpy_enabled = optlacpy_backup; + return; } diff --git a/compute/pzgetrf_nopiv.c b/compute/pzgetrf_nopiv.c index daac4bad9652cd1512b023431c5ad1b325cd8c5c..18fb33ed81656b462cdcff0f52fe82e6080b90de 100644 --- a/compute/pzgetrf_nopiv.c +++ b/compute/pzgetrf_nopiv.c @@ -11,7 +11,7 @@ * * @brief Chameleon zgetrf_nopiv parallel algorithm * - * @version 1.2.0 + * @version 1.3.0 * @author Omar Zenati * @author Mathieu Faverge * @author Emmanuel Agullo @@ -20,20 +20,23 @@ * @author Samuel Thibault * @author Terry Cojean * @author Matthieu Kuhn - * @date 2022-02-22 + * @date 2024-10-17 * @precisions normal z -> s d c * */ #include "control/common.h" -#define A(m,n) A, m, n +#define A(m, n) A, m, n +#define WD(m) WL, m, m +#define WL(m, n) WL, m, n +#define WU(m, n) WU, m, n /** * Parallel tile LU factorization with no pivoting - dynamic scheduling */ -void chameleon_pzgetrf_nopiv( CHAM_desc_t *A, - RUNTIME_sequence_t *sequence, - RUNTIME_request_t *request ) +void chameleon_pzgetrf_nopiv_generic( CHAM_desc_t *A, + RUNTIME_sequence_t *sequence, + RUNTIME_request_t *request ) { CHAM_context_t *chamctxt; RUNTIME_option_t options; @@ -121,3 +124,196 @@ void chameleon_pzgetrf_nopiv( CHAM_desc_t *A, RUNTIME_options_finalize(&options, chamctxt); } + +void chameleon_pzgetrf_nopiv_ws( CHAM_desc_t *A, + CHAM_desc_t *WL, + CHAM_desc_t *WU, + RUNTIME_sequence_t *sequence, + RUNTIME_request_t *request ) +{ + CHAM_context_t *chamctxt; + RUNTIME_option_t options; + + int k, m, n, ib, p, q, lp, lq; + int tempkm, tempkn, tempmm, tempnn; + int lookahead, myp, myq; + + CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t) 1.0; + CHAMELEON_Complex64_t mzone = (CHAMELEON_Complex64_t)-1.0; + + chamctxt = chameleon_context_self(); + if (sequence->status != CHAMELEON_SUCCESS) { + return; + } + RUNTIME_options_init(&options, chamctxt, sequence, request); + + ib = CHAMELEON_IB; + lookahead = chamctxt->lookahead; + myp = A->myrank / A->q; + myq = A->myrank % A->q; + + for (k = 0; k < chameleon_min(A->mt, A->nt); k++) { + RUNTIME_iteration_push(chamctxt, k); + lp = (k % lookahead) * A->p; + lq = (k % lookahead) * A->q; + + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + + options.priority = 2*A->nt - 2*k; + INSERT_TASK_zgetrf_nopiv( + &options, + tempkm, tempkn, ib, A->mb, + A(k, k), A->mb*k); + + /** + * Broadcast of A(k,k) along rings in both directions + */ + { + INSERT_TASK_zlacpy( + &options, + ChamUpperLower, tempkm, tempkn, + A( k, k ), + WL( k, (k % A->q) + lq ) ); + INSERT_TASK_zlacpy( + &options, + ChamUpperLower, tempkm, tempkn, + A( k, k ), + WU( (k % A->p) + lp, k ) ); + + for ( q=1; q < A->q; q++ ) { + INSERT_TASK_zlacpy( + &options, + ChamUpperLower, tempkm, tempkn, + WL( k, ((k+q-1) % A->q) + lq ), + WL( k, ((k+q) % A->q) + lq ) ); + } + + for ( p=1; p < A->p; p++ ) { + INSERT_TASK_zlacpy( + &options, + ChamUpperLower, tempkm, tempkn, + WU( ((k+p-1) % A->p) + lp, k ), + WU( ((k+p) % A->p) + lp, k ) ); + } + } + RUNTIME_data_flush( sequence, A( k, k ) ); + + for (m = k+1; m < A->mt; m++) { + + /* Skip the row if you are not involved with */ + if ( m%A->p != myp ) { + continue; + } + + options.priority = 2*A->nt - 2*k - m; + tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + + assert( A->get_rankof( A, m, k ) == WU->get_rankof( WU, myp + lp, k) ); + INSERT_TASK_ztrsm( + &options, + ChamRight, ChamUpper, ChamNoTrans, ChamNonUnit, + tempmm, tempkn, A->mb, + zone, WU( myp + lp, k ), + A( m, k ) ); + + /* Broadcast A(m,k) into temp buffers through a ring */ + { + assert( A->get_rankof( A, m, k ) == WL->get_rankof( WL, m, (k % A->q) + lq) ); + INSERT_TASK_zlacpy( + &options, + ChamUpperLower, tempmm, tempkn, + A( m, k ), + WL( m, (k % A->q) + lq) ); + + for ( q=1; q < A->q; q++ ) { + INSERT_TASK_zlacpy( + &options, + ChamUpperLower, tempmm, tempkn, + WL( m, ((k+q-1) % A->q) + lq ), + WL( m, ((k+q) % A->q) + lq ) ); + } + } + RUNTIME_data_flush( sequence, A( m, k ) ); + } + + for (n = k+1; n < A->nt; n++) { + + /* Skip the column if you are not involved with */ + if ( n%A->q != myq ) { + continue; + } + + tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + options.priority = 2*A->nt - 2*k - n; + + assert( A->get_rankof( A, k, n ) == WL->get_rankof( WL, k, myq+lq) ); + INSERT_TASK_ztrsm( + &options, + ChamLeft, ChamLower, ChamNoTrans, ChamUnit, + tempkm, tempnn, A->mb, + zone, WL( k, myq + lq ), + A( k, n )); + + /* Broadcast A(k,n) into temp buffers through a ring */ + { + assert( A->get_rankof( A, k, n ) == WU->get_rankof( WU, (k%A->p) + lp, n) ); + INSERT_TASK_zlacpy( + &options, + ChamUpperLower, tempkm, tempnn, + A( k, n ), + WU( (k % A->p) + lp, n ) ); + + for ( p=1; p < A->p; p++ ) { + INSERT_TASK_zlacpy( + &options, + ChamUpperLower, tempkm, tempnn, + WU( ((k+p-1) % A->p) + lp, n ), + WU( ((k+p) % A->p) + lp, n ) ); + } + } + RUNTIME_data_flush( sequence, A( k, n ) ); + + for (m = k+1; m < A->mt; m++) { + + /* Skip the row if you are not involved with */ + if ( m%A->p != myp ) { + continue; + } + + tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + options.priority = 2*A->nt - 2*k - n - m; + + assert( A->get_rankof( A, m, n ) == WL->get_rankof( WL, m, myq + lq) ); + assert( A->get_rankof( A, m, n ) == WU->get_rankof( WU, myp + lp, n) ); + + INSERT_TASK_zgemm( + &options, + ChamNoTrans, ChamNoTrans, + tempmm, tempnn, A->mb, A->mb, + mzone, WL( m, myq + lq ), + WU( myp + lp, n ), + zone, A( m, n )); + } + } + RUNTIME_iteration_pop( chamctxt ); + } + + CHAMELEON_Desc_Flush( WL, sequence ); + CHAMELEON_Desc_Flush( WU, sequence ); + + RUNTIME_options_finalize( &options, chamctxt ); +} + +void chameleon_pzgetrf_nopiv( struct chameleon_pzgetrf_nopiv_s *ws, + CHAM_desc_t *A, + RUNTIME_sequence_t *sequence, + RUNTIME_request_t *request ) +{ + if ( ws && ws->use_workspace ) { + chameleon_pzgetrf_nopiv_ws( A, &(ws->WL), &(ws->WU), sequence, request ); + } + else { + chameleon_pzgetrf_nopiv_generic( A, sequence, request ); + } +} diff --git a/compute/zgesv_nopiv.c b/compute/zgesv_nopiv.c index c7c703a45dcf31b7129d31b5baefd549fb78d5f9..cf5c756eb00259ded3da6bc1c8dcfb5965768089 100644 --- a/compute/zgesv_nopiv.c +++ b/compute/zgesv_nopiv.c @@ -11,7 +11,7 @@ * * @brief Chameleon zgesv_nopiv wrappers * - * @version 1.2.0 + * @version 1.3.0 * @comment This file has been automatically generated * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @author Jakub Kurzak @@ -19,7 +19,8 @@ * @author Emmanuel Agullo * @author Cedric Castagnede * @author Florent Pruvost - * @date 2022-02-22 + * @author Matthieu Kuhn + * @date 2024-10-17 * @precisions normal z -> s d c * */ @@ -82,8 +83,8 @@ * */ int CHAMELEON_zgesv_nopiv( int N, int NRHS, - CHAMELEON_Complex64_t *A, int LDA, - CHAMELEON_Complex64_t *B, int LDB ) + CHAMELEON_Complex64_t *A, int LDA, + CHAMELEON_Complex64_t *B, int LDB ) { int NB; int status; @@ -92,6 +93,7 @@ int CHAMELEON_zgesv_nopiv( int N, int NRHS, RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; CHAM_desc_t descAl, descAt; CHAM_desc_t descBl, descBt; + void *ws = NULL; chamctxt = chameleon_context_self(); if (chamctxt == NULL) { @@ -138,7 +140,8 @@ int CHAMELEON_zgesv_nopiv( int N, int NRHS, B, NB, NB, LDB, NRHS, N, NRHS, sequence, &request ); /* Call the tile interface */ - CHAMELEON_zgesv_nopiv_Tile_Async( &descAt, &descBt, sequence, &request ); + ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( &descAt ); + CHAMELEON_zgesv_nopiv_Tile_Async( &descAt, &descBt, ws, sequence, &request ); /* Submit the matrix conversion back */ chameleon_ztile2lap( chamctxt, &descAl, &descAt, @@ -149,6 +152,7 @@ int CHAMELEON_zgesv_nopiv( int N, int NRHS, chameleon_sequence_wait( chamctxt, sequence ); /* Cleanup the temporary data */ + CHAMELEON_zgetrf_nopiv_WS_Free( ws ); chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt ); chameleon_ztile2lap_cleanup( chamctxt, &descBl, &descBt ); @@ -195,10 +199,11 @@ int CHAMELEON_zgesv_nopiv( int N, int NRHS, */ int CHAMELEON_zgesv_nopiv_Tile( CHAM_desc_t *A, CHAM_desc_t *B ) { - CHAM_context_t *chamctxt; + CHAM_context_t *chamctxt; RUNTIME_sequence_t *sequence = NULL; - RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; - int status; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + int status; + void *ws; chamctxt = chameleon_context_self(); if (chamctxt == NULL) { @@ -207,12 +212,15 @@ int CHAMELEON_zgesv_nopiv_Tile( CHAM_desc_t *A, CHAM_desc_t *B ) } chameleon_sequence_create( chamctxt, &sequence ); - CHAMELEON_zgesv_nopiv_Tile_Async( A, B, sequence, &request ); + ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( A ); + CHAMELEON_zgesv_nopiv_Tile_Async( A, B, ws, sequence, &request ); CHAMELEON_Desc_Flush( A, sequence ); CHAMELEON_Desc_Flush( B, sequence ); chameleon_sequence_wait( chamctxt, sequence ); + CHAMELEON_zgetrf_nopiv_WS_Free( ws ); + status = sequence->status; chameleon_sequence_destroy( chamctxt, sequence ); return status; @@ -248,10 +256,14 @@ int CHAMELEON_zgesv_nopiv_Tile( CHAM_desc_t *A, CHAM_desc_t *B ) * @sa CHAMELEON_zcgesv_Tile_Async * */ -int CHAMELEON_zgesv_nopiv_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *B, - RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) +int CHAMELEON_zgesv_nopiv_Tile_Async( CHAM_desc_t *A, + CHAM_desc_t *B, + void *user_ws, + RUNTIME_sequence_t *sequence, + RUNTIME_request_t *request ) { - CHAM_context_t *chamctxt; + CHAM_context_t *chamctxt; + struct chameleon_pzgetrf_nopiv_s *ws; chamctxt = chameleon_context_self(); if (chamctxt == NULL) { @@ -294,11 +306,23 @@ int CHAMELEON_zgesv_nopiv_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *B, return CHAMELEON_SUCCESS; */ - chameleon_pzgetrf_nopiv( A, sequence, request ); + if ( user_ws == NULL ) { + ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( A ); + } + else { + ws = user_ws; + } + chameleon_pzgetrf_nopiv( ws, A, sequence, request ); chameleon_pztrsm( ChamLeft, ChamLower, ChamNoTrans, ChamUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request ); chameleon_pztrsm( ChamLeft, ChamUpper, ChamNoTrans, ChamNonUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request ); + if ( user_ws == NULL ) { + CHAMELEON_Desc_Flush( A, sequence ); + CHAMELEON_Desc_Flush( B, sequence ); + chameleon_sequence_wait( chamctxt, sequence ); + CHAMELEON_zgetrf_nopiv_WS_Free( ws ); + } return CHAMELEON_SUCCESS; } diff --git a/compute/zgetrf_nopiv.c b/compute/zgetrf_nopiv.c index d6e1c27ec6423bb5c0db35a0f96615003aa4fbe8..3409e97f7a80cfb0844b4eefa66d6646be6239f1 100644 --- a/compute/zgetrf_nopiv.c +++ b/compute/zgetrf_nopiv.c @@ -11,19 +11,109 @@ * * @brief Chameleon zgetrf_nopiv wrappers * - * @version 1.2.0 + * @version 1.3.0 * @author Omar Zenati * @author Mathieu Faverge * @author Emmanuel Agullo * @author Cedric Castagnede * @author Florent Pruvost * @author Alycia Lisito - * @date 2022-02-22 + * @author Matthieu Kuhn + * @date 2024-10-17 * * @precisions normal z -> s d c * */ #include "control/common.h" +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t + * + * @brief Allocate the required workspaces for asynchronous getrf + * + ******************************************************************************* + * + * @param[in] A + * The descriptor of the matrix A. + * + ******************************************************************************* + * + * @retval An allocated opaque pointer to use in CHAMELEON_zgetrf_nopiv_Tile_Async() + * and to free with CHAMELEON_zgetrf_nopiv_WS_Free(). + * + ******************************************************************************* + * + * @sa CHAMELEON_zgetrf_nopiv_Tile_Async + * @sa CHAMELEON_zgetrf_nopiv_WS_Free + * + */ +void *CHAMELEON_zgetrf_nopiv_WS_Alloc( const CHAM_desc_t *A ) +{ + CHAM_context_t *chamctxt; + struct chameleon_pzgetrf_nopiv_s *options; + + chamctxt = chameleon_context_self(); + if ( chamctxt == NULL ) { + return NULL; + } + + options = calloc( 1, sizeof(struct chameleon_pzgetrf_nopiv_s) ); + options->use_workspace = 0; + + if ( ( ( A->p > 1 ) || ( A->q > 1 ) ) && + ( A->get_rankof_init == chameleon_getrankof_2d ) && + ( chamctxt->generic_enabled != CHAMELEON_TRUE ) ) + { + int lookahead = chamctxt->lookahead; + options->use_workspace = 1; + + chameleon_desc_init( &(options->WL), CHAMELEON_MAT_ALLOC_TILE, + ChamComplexDouble, A->mb, A->nb, (A->mb * A->nb), + A->mt * A->mb, A->nb * A->q * lookahead, 0, 0, + A->mt * A->mb, A->nb * A->q * lookahead, A->p, A->q, + NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg ); + + chameleon_desc_init( &(options->WU), CHAMELEON_MAT_ALLOC_TILE, + ChamComplexDouble, + A->mb, A->nb, (A->mb * A->nb), + A->mb * A->p * lookahead, A->nt * A->nb, 0, 0, + A->mb * A->p * lookahead, A->nt * A->nb, A->p, A->q, + NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg ); + } + + return (void*)options; +} + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t + * + * @brief Free the allocated workspaces for asynchronous getrf + * + ******************************************************************************* + * + * @param[in,out] user_ws + * On entry, the opaque pointer allocated by CHAMELEON_zgetrf_nopiv_WS_Alloc() + * On exit, all data are freed. + * + ******************************************************************************* + * + * @sa CHAMELEON_zgetrf_nopiv_Tile_Async + * @sa CHAMELEON_zgetrf_nopiv_WS_Alloc + * + */ +void CHAMELEON_zgetrf_nopiv_WS_Free( void *user_ws ) +{ + struct chameleon_pzgetrf_nopiv_s *ws = (struct chameleon_pzgetrf_nopiv_s*)user_ws; + + if ( ws->use_workspace ) { + chameleon_desc_destroy( &(ws->WL) ); + chameleon_desc_destroy( &(ws->WU) ); + } + free( ws ); +} /** ******************************************************************************** @@ -69,7 +159,7 @@ * */ int CHAMELEON_zgetrf_nopiv( int M, int N, - CHAMELEON_Complex64_t *A, int LDA ) + CHAMELEON_Complex64_t *A, int LDA ) { int NB; int status; @@ -77,6 +167,7 @@ int CHAMELEON_zgetrf_nopiv( int M, int N, CHAM_context_t *chamctxt; RUNTIME_sequence_t *sequence = NULL; RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + void *ws = NULL; chamctxt = chameleon_context_self(); if (chamctxt == NULL) { @@ -117,7 +208,8 @@ int CHAMELEON_zgetrf_nopiv( int M, int N, A, NB, NB, LDA, N, M, N, sequence, &request ); /* Call the tile interface */ - CHAMELEON_zgetrf_nopiv_Tile_Async( &descAt, sequence, &request ); + ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( &descAt ); + CHAMELEON_zgetrf_nopiv_Tile_Async( &descAt, ws, sequence, &request ); /* Submit the matrix conversion back */ chameleon_ztile2lap( chamctxt, &descAl, &descAt, @@ -126,6 +218,7 @@ int CHAMELEON_zgetrf_nopiv( int M, int N, chameleon_sequence_wait( chamctxt, sequence ); /* Cleanup the temporary data */ + CHAMELEON_zgetrf_nopiv_WS_Free( ws ); chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt ); status = sequence->status; @@ -169,10 +262,11 @@ int CHAMELEON_zgetrf_nopiv( int M, int N, */ int CHAMELEON_zgetrf_nopiv_Tile( CHAM_desc_t *A ) { - CHAM_context_t *chamctxt; + CHAM_context_t *chamctxt; RUNTIME_sequence_t *sequence = NULL; - RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; - int status; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + int status; + void *ws; chamctxt = chameleon_context_self(); if (chamctxt == NULL) { @@ -181,11 +275,14 @@ int CHAMELEON_zgetrf_nopiv_Tile( CHAM_desc_t *A ) } chameleon_sequence_create( chamctxt, &sequence ); - CHAMELEON_zgetrf_nopiv_Tile_Async( A, sequence, &request ); + ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( A ); + CHAMELEON_zgetrf_nopiv_Tile_Async( A, ws, sequence, &request ); CHAMELEON_Desc_Flush( A, sequence ); chameleon_sequence_wait( chamctxt, sequence ); + CHAMELEON_zgetrf_nopiv_WS_Free( ws ); + status = sequence->status; chameleon_sequence_destroy( chamctxt, sequence ); return status; @@ -224,11 +321,13 @@ int CHAMELEON_zgetrf_nopiv_Tile( CHAM_desc_t *A ) * @sa CHAMELEON_zgetrs_Tile_Async * */ -int CHAMELEON_zgetrf_nopiv_Tile_Async( CHAM_desc_t *A, - RUNTIME_sequence_t *sequence, - RUNTIME_request_t *request ) +int CHAMELEON_zgetrf_nopiv_Tile_Async( CHAM_desc_t *A, + void *user_ws, + RUNTIME_sequence_t *sequence, + RUNTIME_request_t *request ) { - CHAM_context_t *chamctxt; + CHAM_context_t *chamctxt; + struct chameleon_pzgetrf_nopiv_s *ws; chamctxt = chameleon_context_self(); if (chamctxt == NULL) { @@ -263,7 +362,19 @@ int CHAMELEON_zgetrf_nopiv_Tile_Async( CHAM_desc_t *A, return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); } - chameleon_pzgetrf_nopiv( A, sequence, request ); + if ( user_ws == NULL ) { + ws = CHAMELEON_zgetrf_nopiv_WS_Alloc( A ); + } + else { + ws = user_ws; + } + + chameleon_pzgetrf_nopiv( ws, A, sequence, request ); + if ( user_ws == NULL ) { + CHAMELEON_Desc_Flush( A, sequence ); + chameleon_sequence_wait( chamctxt, sequence ); + CHAMELEON_zgetrf_nopiv_WS_Free( ws ); + } return CHAMELEON_SUCCESS; } diff --git a/control/chameleon_zf77.c b/control/chameleon_zf77.c index 70541b792b2b3ee527c1737af4f314a86a7f57d3..64fad284a4f118b79bef98e0c622f415e3a2ac0a 100644 --- a/control/chameleon_zf77.c +++ b/control/chameleon_zf77.c @@ -11,7 +11,7 @@ * * @brief Chameleon Fortran77 computational routines * - * @version 1.2.0 + * @version 1.3.0 * @comment This file has been automatically generated * from Plasma 2.5.0 for CHAMELEON 0.9.2 * @comment This file is automatically generated by tools/genf77interface.pl @@ -21,7 +21,8 @@ * @author Cedric Castagnede * @author Florent Pruvost * @author Alycia Lisito - * @date 2022-02-22 + * @author Matthieu Kuhn + * @date 2024-10-17 * @precisions normal z -> c d s * */ @@ -727,7 +728,7 @@ void CHAMELEON_ZGETRF_INCPIV_TILE_ASYNC(CHAM_desc_t *A, CHAM_desc_t *L, int *IPI { *info = CHAMELEON_zgetrf_incpiv_Tile_Async(A, L, IPIV, sequence, request); } void CHAMELEON_ZGETRF_NOPIV_TILE_ASYNC(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request, int *info) -{ *info = CHAMELEON_zgetrf_nopiv_Tile_Async(A, sequence, request); } +{ *info = CHAMELEON_zgetrf_nopiv_Tile_Async(A, NULL, sequence, request); } //void CHAMELEON_ZGETRI_TILE_ASYNC(CHAM_desc_t *A, int *IPIV, CHAM_desc_t *W, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request, int *info) //{ *info = CHAMELEON_zgetri_Tile_Async(A, IPIV, W, sequence, request); } diff --git a/control/compute_z.h b/control/compute_z.h index 06c8854c1b05a7c1bffbca0fa8e615218363672d..c0bac97556b86fbe2aaa5b61abcb9a645abf246b 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -23,7 +23,7 @@ * @author Matthieu Kuhn * @author Lionel Eyraud-Dubois * @author Ana Hourcau - * @date 2024-07-17 + * @date 2024-10-17 * @precisions normal z -> c d s * */ @@ -52,6 +52,49 @@ struct chameleon_pzgetrf_s { int involved:1; }; +/** + * @brief Data structure to handle the GETRF temporary workspaces + * for MPI transfers. + * + * @comment The idea is to manage explicitely temporary + * blocks arising from MPI transfers automatically + * inferred by StarPU, hence limiting the total number + * of temporary data allocated for these blocks. + * + * The blocks to be sent/received on the network are + * copied into those buffers. These copies are + * then used by the algorithm in place of the regular + * blocks of the problem matrix. + * + * For WL (resp. WU), the number of allocated blocks + * corresponds to the number of blocks on the column + * (resp. on the line) multiplied by lookahead number + * from the current chameleon context. + * + * Then, depending on the block panel index, we access + * one of the temporary column blocks of WL and row blocks + * of WU int a circular way. + * + * For instance, for the block panel index k, the block + * A(m,k) produced by the TRSM(A(k,k),A(m,k)) is stored + * into temporary buffer WL(m,k%chamctxt->lookahead). + * Similarly, the block A(k,n) is stored into the temporary + * block WU(k%chamctxt->lookahead, n). + * + * Notice that, by doing so, the notion of look ahead is + * reintroduced : artificial dependencies are implied by + * the circular usage of WL and WU temporary workspaces. + * + */ +struct chameleon_pzgetrf_nopiv_s { + int use_workspace; + + CHAM_desc_t WL; /* Workspace to store temporary blocks of the */ + /* diagonal and the lower part of the problem matrix */ + CHAM_desc_t WU; /* Workspace to store temporary blocks of the */ + /* upper part of the problem matrix */ +}; + /** * @brief Data structure to handle the Centering-Scaled workspaces */ @@ -103,7 +146,7 @@ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgetrf( struct chameleon_pzgetrf_s *ws, CHAM_desc_t *A, CHAM_ipiv_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzgetrf_incpiv(CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -void chameleon_pzgetrf_nopiv(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); +void chameleon_pzgetrf_nopiv(struct chameleon_pzgetrf_nopiv_s *ws, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgetrf_reclap(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgetrf_rectil(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzhegst(int itype, cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); diff --git a/control/context.c b/control/context.c index cfbe13a714ec97d1f817aaff6e4762e7e85b7ef4..5bcd6d598f9c2a6d90590934b51787a7e0b50162 100644 --- a/control/context.c +++ b/control/context.c @@ -21,7 +21,7 @@ * @author Matthieu Kuhn * @author Loris Lucido * @author Terry Cojean - * @date 2023-09-11 + * @date 2024-10-17 * *** * @@ -140,6 +140,7 @@ CHAM_context_t *chameleon_context_create() chamctxt->progress_enabled = chameleon_env_on_off( "CHAMELEON_PROGRESS", CHAMELEON_FALSE ); chamctxt->generic_enabled = chameleon_env_on_off( "CHAMELEON_GENERIC", CHAMELEON_FALSE ); chamctxt->autominmax_enabled = chameleon_env_on_off( "CHAMELEON_AUTOMINMAX", CHAMELEON_TRUE ); + chamctxt->optlacpy_enabled = chameleon_env_on_off( "CHAMELEON_OPTIMIZED_LACPY", CHAMELEON_TRUE ); chamctxt->runtime_paused = CHAMELEON_FALSE; diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h index 3f33260f4436ec195323874d3c13b9b44d2c62e4..9b085201650e266247ade2308e6522b58d4070a9 100644 --- a/include/chameleon/chameleon_z.h +++ b/include/chameleon/chameleon_z.h @@ -24,7 +24,7 @@ * @author Alycia Lisito * @author Matthieu Kuhn * @author Ana Hourcau - * @date 2024-07-17 + * @date 2024-10-17 * @precisions normal z -> c d s * */ @@ -211,11 +211,11 @@ int CHAMELEON_zgeqrf_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *T, RUNTIME_sequence int CHAMELEON_zgeqrs_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); //int CHAMELEON_zgesv_Tile_Async(CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgesv_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -int CHAMELEON_zgesv_nopiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); +int CHAMELEON_zgesv_nopiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *B, void * ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgesvd_Tile_Async(cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *A, double *S, CHAM_desc_t *T, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *VT, int LDVT, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); //int CHAMELEON_zgetrf_Tile_Async(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgetrf_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -int CHAMELEON_zgetrf_nopiv_Tile_Async(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); +int CHAMELEON_zgetrf_nopiv_Tile_Async(CHAM_desc_t *A, void * ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, CHAM_ipiv_t *IPIV, void *ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); //int CHAMELEON_zgetri_Tile_Async(CHAM_desc_t *A, int *IPIV, CHAM_desc_t *W, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); //int CHAMELEON_zgetrs_Tile_Async(cham_trans_t trans, CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); @@ -333,6 +333,8 @@ void *CHAMELEON_zgram_WS_Alloc( const CHAM_desc_t *A ); void CHAMELEON_zgram_WS_Free( void *ws ); void *CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ); void CHAMELEON_zgetrf_WS_Free( void *ws ); +void *CHAMELEON_zgetrf_nopiv_WS_Alloc( const CHAM_desc_t *A ); +void CHAMELEON_zgetrf_nopiv_WS_Free( void *ws ); int CHAMELEON_Alloc_Workspace_zgesv_incpiv( int N, CHAM_desc_t **descL, int **IPIV, int p, int q); int CHAMELEON_Alloc_Workspace_zgetrf_incpiv(int M, int N, CHAM_desc_t **descL, int **IPIV, int p, int q); diff --git a/include/chameleon/runtime_struct.h b/include/chameleon/runtime_struct.h index 72e353accaba3542c4ac36dd129fdc4552f72989..4c1b9c5210f9153221a2b6668c1868e6fbd5af42 100644 --- a/include/chameleon/runtime_struct.h +++ b/include/chameleon/runtime_struct.h @@ -17,7 +17,7 @@ * @author Cedric Castagnede * @author Florent Pruvost * @author Philippe Virouleau - * @date 2024-03-16 + * @date 2024-10-17 * */ #ifndef _chameleon_runtime_struct_h_ @@ -103,6 +103,7 @@ typedef struct runtime_option_s { int priority; /**< Define the submitted task priority */ int workerid; /**< Define the prefered worker id to perform the tasks */ int forcesub; /**< Force task submission if true */ + int withlacpy; /**< Flag to force the use of lacpy copies */ int withcuda; /**< Flag to know if cuda is enabled/disabled */ size_t ws_wsize; /**< Define the worker workspace size */ size_t ws_hsize; /**< Define the host workspace size for hybrid CPU/GPU kernel */ diff --git a/include/chameleon/struct_context.h b/include/chameleon/struct_context.h index 471dcfc11acfbdf27fc58593950ae739ed4b32f9..b66621cca2b625d35a7b92f9aaf6dbd70f5f2113 100644 --- a/include/chameleon/struct_context.h +++ b/include/chameleon/struct_context.h @@ -14,7 +14,7 @@ * @version 1.3.0 * @author Mathieu Faverge * @author Florent Pruvost - * @date 2024-03-16 + * @date 2024-10-17 * */ #ifndef _struct_context_h_ @@ -43,18 +43,19 @@ typedef struct chameleon_context_s { cham_bool_t progress_enabled; cham_bool_t generic_enabled; cham_bool_t autominmax_enabled; + cham_bool_t optlacpy_enabled; /**< Enable runtime cpy instead of lacpy kernel */ cham_bool_t runtime_paused; - cham_householder_t householder; /**> "domino" (flat) or tree-based (reduction) Householder */ - cham_translation_t translation; /**> In place or Out of place layout conversion */ + cham_householder_t householder; /**< "domino" (flat) or tree-based (reduction) Householder */ + cham_translation_t translation; /**< In place or Out of place layout conversion */ int nb; int ib; - int rhblock; /**> block size for tree-based (reduction) Householder */ - int lookahead; /**> depth of the look ahead in algorithms */ - void *schedopt; /**> structure for runtimes */ - int mpi_outer_init; /**> MPI has been initialized outside our functions */ - MPI_Comm comm; /**> MPI communicator */ + int rhblock; /**< block size for tree-based (reduction) Householder */ + int lookahead; /**< depth of the look ahead in algorithms */ + void *schedopt; /**< structure for runtimes */ + int mpi_outer_init; /**< MPI has been initialized outside our functions */ + MPI_Comm comm; /**< MPI communicator */ } CHAM_context_t; END_C_DECLS diff --git a/runtime/openmp/control/runtime_options.c b/runtime/openmp/control/runtime_options.c index 77f4a51320216dc19eb043937dd2758bce71869a..11ecfb4dee30a518cb127114483cec03e0942d5d 100644 --- a/runtime/openmp/control/runtime_options.c +++ b/runtime/openmp/control/runtime_options.c @@ -11,12 +11,12 @@ * * @brief Chameleon OpenMP options routines * - * @version 1.2.0 + * @version 1.3.0 * @author Cedric Augonnet * @author Mathieu Faverge * @author Cedric Castagnede * @author Philippe Virouleau - * @date 2022-02-22 + * @date 2024-10-17 * */ #include "chameleon_openmp.h" @@ -31,6 +31,7 @@ void RUNTIME_options_init( RUNTIME_option_t *options, CHAM_context_t *chamctxt, options->priority = RUNTIME_PRIORITY_MIN; options->workerid = -1; options->forcesub = 0; + options->withlacpy = 0; options->ws_wsize = 0; options->ws_hsize = 0; options->ws_worker = NULL; diff --git a/runtime/parsec/control/runtime_options.c b/runtime/parsec/control/runtime_options.c index edb2017e599215a46c683013a311c9c07883e23d..a4f8c0ffffec83029f3d2d1bdbaac3da9bf53a72 100644 --- a/runtime/parsec/control/runtime_options.c +++ b/runtime/parsec/control/runtime_options.c @@ -11,10 +11,10 @@ * * @brief Chameleon PaRSEC options routines * - * @version 1.2.0 + * @version 1.3.0 * @author Reazul Hoque * @author Mathieu Faverge - * @date 2022-02-22 + * @date 2024-10-17 * */ #include "chameleon_parsec.h" @@ -22,17 +22,18 @@ void RUNTIME_options_init( RUNTIME_option_t *options, CHAM_context_t *chamctxt, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) { - options->sequence = sequence; - options->request = request; - options->profiling = CHAMELEON_STATISTICS == CHAMELEON_TRUE; - options->parallel = CHAMELEON_PARALLEL == CHAMELEON_TRUE; - options->priority = RUNTIME_PRIORITY_MIN; - options->workerid = -1; - options->forcesub = 0; - options->ws_wsize = 0; - options->ws_hsize = 0; - options->ws_worker = NULL; - options->ws_host = NULL; + options->sequence = sequence; + options->request = request; + options->profiling = CHAMELEON_STATISTICS == CHAMELEON_TRUE; + options->parallel = CHAMELEON_PARALLEL == CHAMELEON_TRUE; + options->priority = RUNTIME_PRIORITY_MIN; + options->workerid = -1; + options->forcesub = 0; + options->withlacpy = 0; + options->ws_wsize = 0; + options->ws_hsize = 0; + options->ws_worker = NULL; + options->ws_host = NULL; return; } diff --git a/runtime/quark/control/runtime_options.c b/runtime/quark/control/runtime_options.c index 8c090263308a8a861fdaab945c3d3759228da37e..5cb4ac31371ac8fcce34002ae14ef4f78f2d522a 100644 --- a/runtime/quark/control/runtime_options.c +++ b/runtime/quark/control/runtime_options.c @@ -11,12 +11,12 @@ * * @brief Chameleon Quark options routines * - * @version 1.2.0 + * @version 1.3.0 * @author Vijay Joshi * @author Cedric Castagnede * @author Florent Pruvost * @author Mathieu Faverge - * @date 2022-02-22 + * @date 2024-10-17 * */ #include "chameleon_quark.h" @@ -34,18 +34,19 @@ void RUNTIME_options_init( RUNTIME_option_t *options, CHAM_context_t *chamctxt, qopt->flags.thread_set_to_manual_scheduling = -1; /* Initialize options */ - options->sequence = sequence; - options->request = request; - options->profiling = CHAMELEON_STATISTICS == CHAMELEON_TRUE; - options->parallel = CHAMELEON_PARALLEL == CHAMELEON_TRUE; - options->priority = RUNTIME_PRIORITY_MIN; - options->workerid = -1; - options->forcesub = 0; + options->sequence = sequence; + options->request = request; + options->profiling = CHAMELEON_STATISTICS == CHAMELEON_TRUE; + options->parallel = CHAMELEON_PARALLEL == CHAMELEON_TRUE; + options->priority = RUNTIME_PRIORITY_MIN; + options->workerid = -1; + options->forcesub = 0; + options->withlacpy = 0; - options->ws_wsize = 0; - options->ws_hsize = 0; - options->ws_worker = NULL; - options->ws_host = NULL; + options->ws_wsize = 0; + options->ws_hsize = 0; + options->ws_worker = NULL; + options->ws_host = NULL; /* quark in options */ qopt->quark = (Quark*)(chamctxt->schedopt); diff --git a/runtime/starpu/codelets/codelet_zlacpy.c b/runtime/starpu/codelets/codelet_zlacpy.c index aa8d73ed3774d60b34f1b9d6641c984db59fe820..93c951a246545a970c272e50a544c62b9fd986d8 100644 --- a/runtime/starpu/codelets/codelet_zlacpy.c +++ b/runtime/starpu/codelets/codelet_zlacpy.c @@ -21,7 +21,7 @@ * @author Florent Pruvost * @author Samuel Thibault * @author Alycia Lisito - * @date 2024-03-16 + * @date 2024-10-17 * @precisions normal z -> c d s * */ @@ -148,7 +148,8 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options, #if !defined(CHAMELEON_USE_MPI) || defined(HAVE_STARPU_MPI_DATA_CPY_PRIORITY) /* Insert the task */ - if ( (uplo == ChamUpperLower) && + if ( (!options->withlacpy) && + (uplo == ChamUpperLower) && (tileA->m == m) && (tileA->n == n) && (tileB->m == m) && (tileB->n == n) && (displA == 0) && (displB == 0) ) @@ -225,7 +226,8 @@ void INSERT_TASK_zlacpy( const RUNTIME_option_t *options, #if !defined(CHAMELEON_USE_MPI) || defined(HAVE_STARPU_MPI_DATA_CPY_PRIORITY) /* Insert the task */ - if ( (uplo == ChamUpperLower) && + if ( (!options->withlacpy) && + (uplo == ChamUpperLower) && (tileA->m == m) && (tileA->n == n) && (tileB->m == m) && (tileB->n == n) ) { diff --git a/runtime/starpu/control/runtime_options.c b/runtime/starpu/control/runtime_options.c index 8ec2551f59d59e94c0eb763196576911db49d0cc..8423d9d7d5530bb1fe15bee327be2c0b2a3d7f11 100644 --- a/runtime/starpu/control/runtime_options.c +++ b/runtime/starpu/control/runtime_options.c @@ -16,7 +16,7 @@ * @author Mathieu Faverge * @author Cedric Castagnede * @author Florent Pruvost - * @date 2023-07-04 + * @date 2024-10-17 * */ #include "chameleon_starpu_internal.h" @@ -32,6 +32,7 @@ void RUNTIME_options_init( RUNTIME_option_t *options, CHAM_context_t *chamctxt, options->priority = RUNTIME_PRIORITY_MIN; options->workerid = (schedopt == NULL) ? -1 : schedopt->workerid; options->forcesub = 0; + options->withlacpy = !(chamctxt->optlacpy_enabled); options->withcuda = (chamctxt->ncudas > 0); options->ws_wsize = 0; options->ws_hsize = 0; diff --git a/testing/testing_zgesv_nopiv.c b/testing/testing_zgesv_nopiv.c index 8855334349498a9a28f1ca2cc3e70cfe1f95ea9e..d1fd9b1df9c7b745c178f42b69892b781c87efed 100644 --- a/testing/testing_zgesv_nopiv.c +++ b/testing/testing_zgesv_nopiv.c @@ -13,7 +13,7 @@ * @author Lucas Barros de Assis * @author Mathieu Faverge * @author Alycia Lisito - * @date 2023-07-05 + * @date 2024-10-17 * @precisions normal z -> c d s * */ @@ -64,7 +64,7 @@ testing_zgesv_nopiv_desc( run_arg_list_t *args, int check ) /* Calculates the solution */ testing_start( &test_data ); if ( async ) { - hres = CHAMELEON_zgesv_nopiv_Tile_Async( descA, descX, + hres = CHAMELEON_zgesv_nopiv_Tile_Async( descA, descX, NULL, test_data.sequence, &test_data.request ); CHAMELEON_Desc_Flush( descA, test_data.sequence ); CHAMELEON_Desc_Flush( descX, test_data.sequence ); diff --git a/testing/testing_zgetrf_nopiv.c b/testing/testing_zgetrf_nopiv.c index afa44ede3d749a4caa695a8f780ee9f30615e793..657513eaf26371399a7402e1e0b305afdc8fb854 100644 --- a/testing/testing_zgetrf_nopiv.c +++ b/testing/testing_zgetrf_nopiv.c @@ -13,7 +13,9 @@ * @author Lucas Barros de Assis * @author Mathieu Faverge * @author Alycia Lisito - * @date 2023-07-05 + * @author Lucas Barros De Assis + * @author Matthieu Kuhn + * @date 2024-10-17 * @precisions normal z -> c d s * */ @@ -53,7 +55,7 @@ testing_zgetrf_nopiv_desc( run_arg_list_t *args, int check ) /* Calculates the solution */ testing_start( &test_data ); if ( async ) { - hres = CHAMELEON_zgetrf_nopiv_Tile_Async( descA, test_data.sequence, &test_data.request ); + hres = CHAMELEON_zgetrf_nopiv_Tile_Async( descA, NULL, test_data.sequence, &test_data.request ); CHAMELEON_Desc_Flush( descA, test_data.sequence ); } else {