From 3af4d5452cdf035202d08915596380d8fd7c2b4e Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Mon, 17 Mar 2025 17:00:02 +0100 Subject: [PATCH] gesv: Add gesv operation and testing --- compute/CMakeLists.txt | 2 +- compute/zgesv.c | 387 ++++++++++++++++++++++++++++++++ include/chameleon/chameleon_z.h | 6 +- testing/CMakeLists.txt | 1 + testing/CTestLists.cmake | 3 +- testing/input/gesv.in | 20 ++ testing/testing_zgesv.c | 256 +++++++++++++++++++++ 7 files changed, 670 insertions(+), 5 deletions(-) create mode 100644 compute/zgesv.c create mode 100644 testing/input/gesv.in create mode 100644 testing/testing_zgesv.c diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index f754f4c56..573921e49 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -151,7 +151,7 @@ set(ZSRC zgepdf_qr.c zgeqrs.c zgeqrs_param.c - #zgesv.c + zgesv.c zgesv_incpiv.c zgesv_nopiv.c #zgetrf.c diff --git a/compute/zgesv.c b/compute/zgesv.c new file mode 100644 index 000000000..1b657bfac --- /dev/null +++ b/compute/zgesv.c @@ -0,0 +1,387 @@ +/** + * + * @file zgesv.c + * + * @copyright 2025-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgesv wrappers + * + * @version 1.3.0 + * @author Matteo Marcos + * @date 2025-03-24 + * @precisions normal z -> s d c + * + */ +#include "control/common.h" + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t + * + * @brief Computes the solution to a system of linear equations A * X = B, + * where A is an N-by-N matrix and X and B are N-by-NRHS matrices. + * + * The tile LU decomposition with partial tile pivoting and row interchanges is used to factor A. + * The factored form of A is then used to solve the system of equations A * X = B. + * + ******************************************************************************* + * + * @param[in] N + * The number of linear equations, i.e., the order of the matrix A. N >= 0. + * + * @param[in] NRHS + * The number of right hand sides, i.e., the number of columns of the matrix B. + * NRHS >= 0. + * + * @param[in,out] A + * On entry, the N-by-N coefficient matrix A. + * On exit, the tile L and U factors from the factorization (not equivalent to LAPACK). + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,N). + * + * @param[out] IPIV + * On exit, the pivot indices that define the permutations (not equivalent to LAPACK). + * + * @param[in,out] B + * On entry, the N-by-NRHS matrix of right hand side matrix B. + * On exit, if return value = 0, the N-by-NRHS solution matrix X. + * + * @param[in] LDB + * The leading dimension of the array B. LDB >= max(1,N). + * + ******************************************************************************* + * + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, + * but the factor U is exactly singular, so the solution could not be computed. + * + ******************************************************************************* + * + * @sa CHAMELEON_zgesv_Tile + * @sa CHAMELEON_zgesv_Tile_Async + * @sa CHAMELEON_cgesv + * @sa CHAMELEON_dgesv + * @sa CHAMELEON_sgesv + * + */ +int CHAMELEON_zgesv( int N, int NRHS, + CHAMELEON_Complex64_t *A, int LDA, + int *IPIV, + CHAMELEON_Complex64_t *B, int LDB ) +{ + int NB; + int status; + CHAM_context_t *chamctxt; + CHAM_ipiv_t descIPIV; + RUNTIME_sequence_t *sequence = NULL; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + CHAM_desc_t descAl, descAt; + CHAM_desc_t descBl, descBt; + struct chameleon_pzgetrf_s *wsA, *wsB; + + chamctxt = chameleon_context_self(); + if ( chamctxt == NULL ) { + chameleon_error( "CHAMELEON_zgesv", "CHAMELEON not initialized" ); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + /* Check input arguments */ + if ( N < 0 ) { + chameleon_error( "CHAMELEON_zgesv", "illegal value of N" ); + return -1; + } + if ( NRHS < 0 ) { + chameleon_error( "CHAMELEON_zgesv", "illegal value of NRHS" ); + return -2; + } + if ( LDA < chameleon_max( 1, N ) ) { + chameleon_error( "CHAMELEON_zgesv", "illegal value of LDA" ); + return -4; + } + if ( LDB < chameleon_max( 1, N ) ) { + chameleon_error( "CHAMELEON_zgesv", "illegal value of LDB" ); + return -8; + } + /* Quick return */ + if ( chameleon_min( N, NRHS ) == 0 ) { + return CHAMELEON_SUCCESS; + } + + /* Tune NB & IB depending on M, N & NRHS; Set NBNB */ + status = chameleon_tune( CHAMELEON_FUNC_ZGESV, N, N, NRHS ); + if ( status != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zgesv", "chameleon_tune() failed" ); + return status; + } + + /* Set NT & NTRHS */ + NB = CHAMELEON_NB; + + chameleon_sequence_create( chamctxt, &sequence ); + + /* Submit the matrix conversion */ + chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInout, ChamUpperLower, + A, NB, NB, LDA, N, N, N, sequence, &request ); + chameleon_zlap2tile( chamctxt, &descBl, &descBt, ChamDescInout, ChamUpperLower, + B, NB, NB, LDB, NRHS, N, NRHS, sequence, &request ); + + /* Allocate workspace for partial pivoting */ + wsA = CHAMELEON_zgetrf_WS_Alloc( &descAt ); + wsB = CHAMELEON_zgetrf_WS_Alloc( &descBt ); + + if ( ( wsA->alg == ChamGetrfPPivPerColumn ) || + ( wsA->alg == ChamGetrfPPiv ) ) + { + chameleon_ipiv_init( &descIPIV, &descAt, IPIV ); + } + + /* Call the tile interface */ + CHAMELEON_zgesv_Tile_Async( &descAt, &descIPIV, &descBt, wsA, wsB, sequence, &request ); + + /* Submit the matrix conversion back */ + chameleon_ztile2lap( chamctxt, &descAl, &descAt, + ChamDescInout, ChamUpperLower, sequence, &request ); + chameleon_ztile2lap( chamctxt, &descBl, &descBt, + ChamDescInout, ChamUpperLower, sequence, &request ); + + if ( ( wsA->alg == ChamGetrfPPivPerColumn ) || + ( wsA->alg == ChamGetrfPPiv ) ) + { + RUNTIME_ipiv_gather( sequence, &descIPIV, IPIV, 0 ); + } + + chameleon_sequence_wait( chamctxt, sequence ); + + /* Cleanup the temporary data */ + if ( ( wsA->alg == ChamGetrfPPivPerColumn ) || + ( wsA->alg == ChamGetrfPPiv ) ) + { + chameleon_ipiv_destroy( &descIPIV, &descAt ); + } + + /* Cleanup the temporary data */ + CHAMELEON_zgetrf_WS_Free( wsA ); + CHAMELEON_zgetrf_WS_Free( wsB ); + chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt ); + chameleon_ztile2lap_cleanup( chamctxt, &descBl, &descBt ); + + status = sequence->status; + chameleon_sequence_destroy( chamctxt, sequence ); + return status; +} + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t_Tile + * + * @brief Solves a system of linear equations using the tile LU factorization. + * Tile equivalent of CHAMELEON_zgetrf_nopiv(). + * + * Operates on matrices stored by tiles. + * All matrices are passed through descriptors. + * All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in,out] A + * On entry, the N-by-N coefficient matrix A. + * On exit, the tile L and U factors from the factorization (not equivalent to LAPACK). + * + * @param[in,out] IPIV + * On entry, ipiv descriptor associated to A and created with + * CHAMELEON_Ipiv_Create(). + * On exit, it contains the pivot indices associated to the PLU + * factorization of A. + * + * @param[in,out] B + * On entry, the N-by-NRHS matrix of right hand side matrix B. + * On exit, if return value = 0, the N-by-NRHS solution matrix X. + * + * + ******************************************************************************* + * + * @retval CHAMELEON_SUCCESS successful exit + * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, + * but the factor U is exactly singular, so the solution could not be computed. + * + ******************************************************************************* + * + * @sa CHAMELEON_zgesv + * @sa CHAMELEON_zgesv_Tile_Async + * @sa CHAMELEON_cgesv_Tile + * @sa CHAMELEON_dgesv_Tile + * @sa CHAMELEON_sgesv_Tile + * @sa CHAMELEON_zcgesv_Tile + * + */ +int CHAMELEON_zgesv_Tile( CHAM_desc_t *A, CHAM_ipiv_t *IPIV, CHAM_desc_t *B ) +{ + CHAM_context_t *chamctxt; + RUNTIME_sequence_t *sequence = NULL; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + int status; + void *wsA, *wsB; + + chamctxt = chameleon_context_self(); + if ( chamctxt == NULL ) { + chameleon_fatal_error( "CHAMELEON_zgesv_Tile", "CHAMELEON not initialized" ); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + chameleon_sequence_create( chamctxt, &sequence ); + + wsA = CHAMELEON_zgetrf_WS_Alloc( A ); + wsB = CHAMELEON_zgetrf_WS_Alloc( B ); + CHAMELEON_zgesv_Tile_Async( A, IPIV, B, wsA, wsB, sequence, &request ); + + CHAMELEON_Desc_Flush( A, sequence ); + CHAMELEON_Ipiv_Flush( IPIV, sequence ); + CHAMELEON_Desc_Flush( B, sequence ); + + chameleon_sequence_wait( chamctxt, sequence ); + CHAMELEON_zgetrf_WS_Free( wsA ); + CHAMELEON_zgetrf_WS_Free( wsB ); + + status = sequence->status; + chameleon_sequence_destroy( chamctxt, sequence ); + return status; +} + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t_Tile_Async + * + * @brief Solves a system of linear equations using the tile LU factorization. + * + * Non-blocking equivalent of CHAMELEON_zgesv_Tile(). + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in,out] A + * On entry, the M-by-N matrix to be factored. + * On exit, the tile factors L and U from the factorization. + * + * @param[in,out] IPIV + * On entry, ipiv descriptor associated to A and created with + * CHAMELEON_Ipiv_Create(). + * On exit, it contains the pivot indices associated to the PLU + * factorization of A. + * + * @param[in,out] B + * On entry, the N-by-NRHS matrix of right hand side matrix B. + * On exit, the N-by-NRHS solution matrix X. + * + * @param[in,out] user_wsA + * The opaque pointer to pre-allocated getrf workspace through + * CHAMELEON_zgetrf_WS_Alloc() for A. If user_ws is NULL, it is automatically + * allocated, but BE CAREFULL as it switches the call from asynchronous + * to synchronous call. + * + * @param[in,out] user_wsB + * The opaque pointer to pre-allocated getrf workspace through + * CHAMELEON_zgetrf_WS_Alloc() for B. If user_ws is NULL, it is automatically + * allocated, but BE CAREFULL as it switches the call from asynchronous + * to synchronous call.* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa CHAMELEON_zgesv + * @sa CHAMELEON_zgesv_Tile + * @sa CHAMELEON_cgesv_Tile_Async + * @sa CHAMELEON_dgesv_Tile_Async + * @sa CHAMELEON_sgesv_Tile_Async + * @sa CHAMELEON_zcgesv_Tile_Async + * + */ +int CHAMELEON_zgesv_Tile_Async( CHAM_desc_t *A, + CHAM_ipiv_t *IPIV, + CHAM_desc_t *B, + void *user_wsA, + void *user_wsB, + RUNTIME_sequence_t *sequence, + RUNTIME_request_t *request ) +{ + CHAM_context_t *chamctxt; + struct chameleon_pzgetrf_s *wsA, *wsB; + + chamctxt = chameleon_context_self(); + if ( chamctxt == NULL ) { + chameleon_fatal_error( "CHAMELEON_zgesv_Tile", "CHAMELEON not initialized" ); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + if ( sequence == NULL ) { + chameleon_fatal_error( "CHAMELEON_zgesv_Tile", "NULL sequence" ); + return CHAMELEON_ERR_UNALLOCATED; + } + if ( request == NULL ) { + chameleon_fatal_error( "CHAMELEON_zgesv_Tile", "NULL request" ); + return CHAMELEON_ERR_UNALLOCATED; + } + /* Check sequence status */ + if ( sequence->status == CHAMELEON_SUCCESS ) { + request->status = CHAMELEON_SUCCESS; + } + else { + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED ); + } + + /* Check descriptors for correctness */ + if ( chameleon_desc_check( A ) != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zgesv_Tile", "invalid first descriptor" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); + } + if ( chameleon_desc_check( B ) != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zgesv_Tile", "invalid third descriptor" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); + } + /* Check input arguments */ + if ( A->nb != A->mb || B->nb != B->mb ) { + chameleon_error( "CHAMELEON_zgesv_Tile", "only square tiles supported" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); + } + + if ( user_wsA == NULL ) { + wsA = CHAMELEON_zgetrf_WS_Alloc( A ); + } + else { + wsA = user_wsA; + } + + if ( user_wsB == NULL ) { + wsB = CHAMELEON_zgetrf_WS_Alloc( B ); + } + else { + wsB = user_wsB; + } + + chameleon_pzgetrf( wsA, A, IPIV, sequence, request ); + + CHAMELEON_zgetrs_Tile_Async( ChamNoTrans, A, IPIV, B, wsB, sequence, request ); + + if ( user_wsA == NULL ) { + CHAMELEON_Desc_Flush( A, sequence ); + CHAMELEON_Desc_Flush( B, sequence ); + chameleon_sequence_wait( chamctxt, sequence ); + CHAMELEON_zgetrf_WS_Free( wsA ); + } + if ( user_wsB == NULL ) { + CHAMELEON_zgetrf_WS_Free( wsB ); + } + return CHAMELEON_SUCCESS; +} + diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h index 5e9b4868e..d9540d5e8 100644 --- a/include/chameleon/chameleon_z.h +++ b/include/chameleon/chameleon_z.h @@ -49,7 +49,7 @@ int CHAMELEON_zgemm(cham_trans_t transA, cham_trans_t transB, int M, int N, int int CHAMELEON_zgepdf_qdwh( int M, int N, CHAMELEON_Complex64_t *A, int LDA, CHAMELEON_Complex64_t *H, int LDH, gepdf_info_t *info ); int CHAMELEON_zgeqrf(int M, int N, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descT); int CHAMELEON_zgeqrs(int M, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descT, CHAMELEON_Complex64_t *B, int LDB); -//int CHAMELEON_zgesv(int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, int *IPIV, CHAMELEON_Complex64_t *B, int LDB); +int CHAMELEON_zgesv(int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, int *IPIV, CHAMELEON_Complex64_t *B, int LDB); int CHAMELEON_zgesv_incpiv(int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descL, int *IPIV, CHAMELEON_Complex64_t *B, int LDB); int CHAMELEON_zgesv_nopiv(int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, CHAMELEON_Complex64_t *B, int LDB); int CHAMELEON_zgesvd(cham_job_t jobu, cham_job_t jobvt, int M, int N, CHAMELEON_Complex64_t *A, int LDA, double *S, CHAM_desc_t *descT, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *VT, int LDVT); @@ -129,7 +129,7 @@ int CHAMELEON_zgepdf_qdwh_Tile( CHAM_desc_t *A, CHAM_desc_t *H, gepdf_info_t *in int CHAMELEON_zgepdf_qr_Tile( int doqr, int optid, const libhqr_tree_t *qrtreeT, const libhqr_tree_t *qrtreeB, CHAM_desc_t *A1, CHAM_desc_t *TS1, CHAM_desc_t *TT1, CHAM_desc_t *Q1, CHAM_desc_t *A2, CHAM_desc_t *TS2, CHAM_desc_t *TT2, CHAM_desc_t *Q2 ); int CHAMELEON_zgeqrf_Tile(CHAM_desc_t *A, CHAM_desc_t *T); int CHAMELEON_zgeqrs_Tile(CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B); -//int CHAMELEON_zgesv_Tile(CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B); +int CHAMELEON_zgesv_Tile(CHAM_desc_t *A, CHAM_ipiv_t *IPIV, CHAM_desc_t *B); int CHAMELEON_zgesv_incpiv_Tile(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, CHAM_desc_t *B); int CHAMELEON_zgesv_nopiv_Tile(CHAM_desc_t *A, CHAM_desc_t *B); int CHAMELEON_zgesvd_Tile(cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *A, double *S, CHAM_desc_t *T, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *VT, int LDVT); @@ -208,7 +208,7 @@ int CHAMELEON_zgemm_Tile_Async(cham_trans_t transA, cham_trans_t transB, CHAMELE int CHAMELEON_zgepdf_qdwh_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *H, gepdf_info_t *info, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); int CHAMELEON_zgeqrf_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *T, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgeqrs_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -//int CHAMELEON_zgesv_Tile_Async(CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); +int CHAMELEON_zgesv_Tile_Async(CHAM_desc_t *A, CHAM_ipiv_t *IPIV, CHAM_desc_t *B, void *user_wsA, void *user_wsB, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgesv_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgesv_nopiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *B, void * ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgesvd_Tile_Async(cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *A, double *S, CHAM_desc_t *T, CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *VT, int LDVT, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt index 26c296439..2221ee128 100644 --- a/testing/CMakeLists.txt +++ b/testing/CMakeLists.txt @@ -53,6 +53,7 @@ set(ZSRC_W_STDAPI testing_zlantr.c testing_zgemm.c testing_zgetrf.c + testing_zgesv.c testing_zgetrs.c testing_zhemm.c testing_zherk.c diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake index b14bc446d..297d2628a 100644 --- a/testing/CTestLists.cmake +++ b/testing/CTestLists.cmake @@ -112,10 +112,11 @@ if (NOT CHAMELEON_SIMULATION) PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppiv;CHAMELEON_GETRF_BATCH_SIZE=3" ) add_test( test_${cat}_${prec}laswp ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/laswp.in ) add_test( test_${cat}_${prec}getrs ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/getrs.in ) - + add_test( test_${cat}_${prec}gesv ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/gesv.in ) if ( ${cat} STREQUAL "mpi" ) add_test( test_${cat}_${prec}laswp_ppiv_comm_with_task ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P ${NP} -f input/laswp.in ) add_test( test_${cat}_${prec}getrs_ppiv_comm_with_task ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P ${NP} -f input/getrs.in ) + add_test( test_${cat}_${prec}gesv_ppiv_comm_with_task ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P ${NP} -f input/gesv.in ) add_test( test_${cat}_${prec}getrf_ppiv_comm_with_task ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P ${NP} -f input/getrf.in ) set_tests_properties( test_${cat}_${prec}getrf_ppiv_comm_with_task PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppiv;CHAMELEON_GETRF_BATCH_SIZE=0;CHAMELEON_GETRF_ALL_REDUCE=cham_spu_tasks" ) diff --git a/testing/input/gesv.in b/testing/input/gesv.in new file mode 100644 index 000000000..95ecf5825 --- /dev/null +++ b/testing/input/gesv.in @@ -0,0 +1,20 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# GESV + +# nb: Tile size +# ib: Inner tile size +# n: Order of the matrix A and number of rows of matrix B +# nrhs: The number of columns of matrix B +# lda: Leading dimension of matrix A +# ldb: Leading dimension of matrix B + +op = gesv +nb = 4, 16, 17 +ib = 4, 12, 50 +n = 15, 21, 35 +nrhs = 1, 13, 22, 33 +lda = 40 +ldb = 41 + diff --git a/testing/testing_zgesv.c b/testing/testing_zgesv.c new file mode 100644 index 000000000..ac5ffe621 --- /dev/null +++ b/testing/testing_zgesv.c @@ -0,0 +1,256 @@ +/** + * + * @file testing_zgesv.c + * + * @copyright 2025-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgesv testing + * + * @version 1.3.0 + * @author Lucas Barros de Assis + * @author Mathieu Faverge + * @author Alycia Lisito + * @author Matteo Marcos + * @date 2025-03-24 + * @precisions normal z -> c d s + * + */ +#include "testings.h" +#include "chameleon/chameleon_z.h" +#include "testing_zcheck.h" +#include <chameleon/flops.h> +#include <chameleon/getenv.h> +#include <coreblas/lapacke.h> + +static cham_fixdbl_t +flops_zgesv( int N, int NRHS ) +{ + cham_fixdbl_t flops = flops_zgetrf( N, N ) + flops_zgetrs( N, NRHS ); + return flops; +} + +#if !defined(CHAMELEON_TESTINGS_VENDOR) +int +testing_zgesv_desc( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int async = parameters_getvalue_int( "async" ); + int nb = run_arg_get_nb( args ); + int ib = run_arg_get_ib( args ); + int N = run_arg_get_int( args, "N", 1000 ); + int NRHS = run_arg_get_int( args, "NRHS", 1 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int LDB = run_arg_get_int( args, "LDB", N ); + int seedA = run_arg_get_int( args, "seedA", testing_ialea() ); + int seedB = run_arg_get_int( args, "seedB", testing_ialea() ); + + /* Descriptors */ + CHAM_desc_t *descA, *descX; + CHAM_ipiv_t *descIPIV; + void *wsA = NULL; + void *wsB = NULL; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + /* Creates the matrices */ + parameters_desc_create( "A", &descA, ChamComplexDouble, nb, nb, LDA, N, N, N ); + parameters_desc_create( "X", &descX, ChamComplexDouble, nb, nb, LDB, NRHS, N, NRHS ); + CHAMELEON_Ipiv_Create( &descIPIV, descA, NULL ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt_Tile( descA, seedA ); + CHAMELEON_zplrnt_Tile( descX, seedB ); + + if ( async ) { + wsA = CHAMELEON_zgetrf_WS_Alloc( descA ); + wsB = CHAMELEON_zgetrf_WS_Alloc( descX ); + } + + /* Calculates the solution */ + testing_start( &test_data ); + if ( async ) { + hres = CHAMELEON_zgesv_Tile_Async( descA, descIPIV, descX, wsA, wsB, + test_data.sequence, &test_data.request ); + CHAMELEON_Desc_Flush( descA, test_data.sequence ); + CHAMELEON_Desc_Flush( descX, test_data.sequence ); + CHAMELEON_Ipiv_Flush( descIPIV, test_data.sequence ); + } + else { + hres = CHAMELEON_zgesv_Tile( descA, descIPIV, descX ); + } + test_data.hres = hres; + testing_stop( &test_data, flops_zgesv( N, NRHS ) ); + + if ( async ) { + CHAMELEON_zgetrf_WS_Free( wsA ); + CHAMELEON_zgetrf_WS_Free( wsB ); + } + + /* Checks the factorisation and the residual */ + if ( check ) { + CHAM_desc_t *descA0, *descB; + + /* Check the factorization */ + descA0 = CHAMELEON_Desc_Copy( descA, CHAMELEON_MAT_ALLOC_TILE ); + CHAMELEON_zplrnt_Tile( descA0, seedA ); + + CHAMELEON_zlaswp_Tile( ChamLeft, ChamDirForward, descA0, 1, N, descIPIV ); + + hres += check_zxxtrf( args, ChamGeneral, ChamUpperLower, descA0, descA ); + + if ( hres ) { + CHAMELEON_Desc_Destroy( &descA0 ); + CHAMELEON_Ipiv_Destroy( &descIPIV, descA ); + parameters_desc_destroy( &descA ); + parameters_desc_destroy( &descX ); + return hres; + } + + /* Check the solve */ + descB = CHAMELEON_Desc_Copy( descX, CHAMELEON_MAT_ALLOC_TILE ); + CHAMELEON_zplrnt_Tile( descB, seedB ); + + CHAMELEON_zplrnt_Tile( descA0, seedA ); + hres += check_zsolve( args, ChamGeneral, ChamNoTrans, ChamUpperLower, descA0, descX, descB ); + + CHAMELEON_Desc_Destroy( &descA0 ); + CHAMELEON_Desc_Destroy( &descB ); + } + + CHAMELEON_Ipiv_Destroy( &descIPIV, descA ); + parameters_desc_destroy( &descA ); + parameters_desc_destroy( &descX ); + + return hres; +} +#endif + +int +testing_zgesv_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ +#if !defined(CHAMELEON_TESTINGS_VENDOR) + int api = parameters_getvalue_int( "api" ); +#endif + int nb = run_arg_get_nb( args ); + int ib = run_arg_get_ib( args ); + int N = run_arg_get_int( args, "N", 1000 ); + int NRHS = run_arg_get_int( args, "NRHS", 1 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int LDB = run_arg_get_int( args, "LDB", N ); + int seedA = run_arg_get_int( args, "seedA", testing_ialea() ); + int seedB = run_arg_get_int( args, "seedB", testing_ialea() ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A, *X; + int *IPIV; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + /* Creates the matrices */ + A = malloc( sizeof(CHAMELEON_Complex64_t) * LDA*N ); + X = malloc( sizeof(CHAMELEON_Complex64_t) * LDB*NRHS ); + IPIV = malloc( sizeof(int) * N ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt( N, N, A, LDA, seedA ); + CHAMELEON_zplrnt( N, NRHS, X, LDB, seedB ); + + /* Calculates the solution */ +#if defined(CHAMELEON_TESTINGS_VENDOR) + testing_start( &test_data ); + hres = LAPACKE_zgesv( LAPACK_COL_MAJOR, N, NRHS, A, LDA, IPIV, X, LDB ); + test_data.hres = hres; + testing_stop( &test_data, flops_zgesv( N, NRHS ) ); +#else + testing_start( &test_data ); + switch ( api ) { + case 1: + hres = CHAMELEON_zgesv( N, NRHS, A, LDA, IPIV, X, LDB ); + break; +#if !defined(CHAMELEON_SIMULATION) && 0 + case 2: + CHAMELEON_lapacke_zgesv( CblasColMajor, chameleon_lapack_const(uplo), N, NRHS, A, LDA, X, LDB ); + break; +#endif + default: + if ( CHAMELEON_Comm_rank() == 0 ) { + fprintf( stderr, + "SKIPPED: This function can only be used with the option --api 1 or --api 2.\n" ); + } + return -1; + } + test_data.hres = hres; + testing_stop( &test_data, flops_zgesv( N, NRHS ) ); + + /* Checks the factorisation and residual */ + if ( check ) { + CHAMELEON_Complex64_t *A0 = malloc( sizeof(CHAMELEON_Complex64_t) * LDA*N ); + CHAMELEON_Complex64_t *B = malloc( sizeof(CHAMELEON_Complex64_t) * LDB*NRHS ); + + /* Check the factorization */ + CHAMELEON_zplrnt( N, N, A0, LDA, seedA ); + CHAMELEON_zlaswp( ChamLeft, ChamDirForward, N, N, A0, LDA, 1, N, IPIV ); + + hres += check_zxxtrf_std( args, ChamGeneral, ChamUpperLower, N, N, A0, A, LDA ); + + /* Check the solve */ + CHAMELEON_zplrnt( N, N, A0, LDA, seedA ); + CHAMELEON_zplrnt( N, NRHS, B, LDB, seedB ); + hres += check_zsolve_std( args, ChamGeneral, ChamNoTrans, ChamUpperLower, N, NRHS, A0, LDA, X, B, LDB ); + + free( A0 ); + free( B ); + } +#endif + + free( A ); + free( X ); + + (void)check; + return hres; +} + +testing_t test_zgesv; +#if defined(CHAMELEON_TESTINGS_VENDOR) +const char *zgesv_params[] = { "n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL }; +#else +const char *zgesv_params[] = { "mtxfmt", "nb", "ib", "n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL }; +#endif +const char *zgesv_output[] = { NULL }; +const char *zgesv_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zgesv_init( void ) __attribute__( ( constructor ) ); +void +testing_zgesv_init( void ) +{ + test_zgesv.name = "zgesv"; + test_zgesv.helper = "General linear system solve (LU with partial pivoting)"; + test_zgesv.params = zgesv_params; + test_zgesv.output = zgesv_output; + test_zgesv.outchk = zgesv_outchk; +#if defined(CHAMELEON_TESTINGS_VENDOR) + test_zgesv.fptr_desc = NULL; +#else + test_zgesv.fptr_desc = testing_zgesv_desc; +#endif + test_zgesv.fptr_std = testing_zgesv_std; + test_zgesv.next = NULL; + + testing_register( &test_zgesv ); +} + -- GitLab