diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index fafab2d2e8d661d312271f04af4ab5b1849760e7..f754f4c5645098a714bf999d20e7ca653919c1d0 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -28,6 +28,7 @@ # @author Loris Lucido # @author Matthieu Kuhn # @author Ana Hourcau +# @author Matteo Marcos # @date 2025-03-24 # ### @@ -159,6 +160,7 @@ set(ZSRC zgetrf.c zgetrs_incpiv.c zgetrs_nopiv.c + zgetrs.c zlacpy.c zlange.c zlanhe.c diff --git a/compute/zgetrs.c b/compute/zgetrs.c new file mode 100644 index 0000000000000000000000000000000000000000..9a2e5bac6de8623a1425cd81f18e4963b3ad92b2 --- /dev/null +++ b/compute/zgetrs.c @@ -0,0 +1,399 @@ +/** + * + * @file zgetrs.c + * + * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgetrs wrappers + * + * @version 1.3.0 + * @author Matteo Marcos + * @date 2025-03-24 + * @precisions normal z -> s d c + * + */ +#include "control/common.h" + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t + * + * @brief Solves a system of linear equations A * X = B, with a general N-by-N matrix A + * using the tile LU factorization with partial pivoting computed by CHAMELEON_zgetrf. + * + ******************************************************************************* + * + * @param[in] trans + * Intended to specify the the form of the system of equations: + * = ChamNoTrans: A * X = B (No transpose) + * = ChamTrans: A^T * X = B (Transpose) + * = ChamConjTrans: A^H * X = B (Conjugate transpose) + * Only ChamNoTrans and ChamTrans are supported. + * + * @param[in] N + * The order of the matrix A. N >= 0. + * + * @param[in] NRHS + * The number of right hand sides, i.e., the number of columns of the matrix B. + * NRHS >= 0. + * + * @param[in] A + * The tile factors L and U from the factorization, computed by CHAMELEON_zgetrf. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,N). + * + * @param[in] IPIV + * On entry, ipiv descriptor associated to A and created with + * CHAMELEON_Ipiv_Create(). + * On exit, it contains the pivot indices associated to the PLU + * factorization of A. + * + * @param[in,out] B + * On entry, the N-by-NRHS matrix of right hand side matrix B. + * On exit, the solution matrix X. + * + * @param[in] LDB + * The leading dimension of the array B. LDB >= max(1,N). + * + ******************************************************************************* + * + * @retval CHAMELEON_SUCCESS successful exit + * @return <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************* + * + * @sa CHAMELEON_zgetrs_Tile + * @sa CHAMELEON_zgetrs_Tile_Async + * @sa CHAMELEON_cgetrs + * @sa CHAMELEON_dgetrs + * @sa CHAMELEON_sgetrs + * @sa CHAMELEON_zgetrf + * + */ +int CHAMELEON_zgetrs( cham_trans_t trans, int N, int NRHS, + CHAMELEON_Complex64_t *A, int LDA, + int *IPIV, + CHAMELEON_Complex64_t *B, int LDB ) +{ + int NB; + int status; + CHAM_context_t *chamctxt; + CHAM_ipiv_t *descIPIV; + RUNTIME_sequence_t *sequence = NULL; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + CHAM_desc_t descAl, descAt; + CHAM_desc_t descBl, descBt; + struct chameleon_pzgetrf_s *ws; + + chamctxt = chameleon_context_self(); + if ( chamctxt == NULL ) { + chameleon_fatal_error("CHAMELEON_zgetrs", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + /* Check input arguments */ + if ( ( trans != ChamTrans ) && ( trans != ChamNoTrans ) ) { + chameleon_error("CHAMELEON_zgetrs", "Only ChamTrans and ChamNoTrans are supported"); + return CHAMELEON_ERR_ILLEGAL_VALUE; + } + if ( N < 0 ) { + chameleon_error("CHAMELEON_zgetrs", "illegal value of N"); + return -2; + } + if ( NRHS < 0 ) { + chameleon_error("CHAMELEON_zgetrs", "illegal value of NRHS"); + return -3; + } + if ( LDA < chameleon_max( 1, N ) ) { + chameleon_error("CHAMELEON_zgetrs", "illegal value of LDA"); + return -5; + } + if ( LDB < chameleon_max( 1, N ) ) { + chameleon_error("CHAMELEON_zgetrs", "illegal value of LDB"); + return -9; + } + /* Quick return */ + if ( chameleon_min( N, NRHS ) == 0 ) + return CHAMELEON_SUCCESS; + + /* Tune NB & IB depending on N & NRHS; Set NBNBSIZE */ + status = chameleon_tune( CHAMELEON_FUNC_ZGESV, N, N, NRHS ); + if ( status != CHAMELEON_SUCCESS ) { + chameleon_error("CHAMELEON_zgetrs", "chameleon_tune() failed"); + return status; + } + + /* Set NT & NTRHS */ + NB = CHAMELEON_NB; + + chameleon_sequence_create( chamctxt, &sequence ); + + /* Submit the matrix conversion */ + chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInput, ChamUpperLower, + A, NB, NB, LDA, N, N, N, sequence, &request ); + chameleon_zlap2tile( chamctxt, &descBl, &descBt, ChamDescInout, ChamUpperLower, + B, NB, NB, LDB, NRHS, N, NRHS, sequence, &request ); + + ws = CHAMELEON_zgetrf_WS_Alloc( &descBt ); + CHAMELEON_Ipiv_Create( &descIPIV, &descAt, IPIV ); + CHAMELEON_Ipiv_Init( &descAt, descIPIV ); + + /* Call the tile interface */ + CHAMELEON_zgetrs_Tile_Async( trans, &descAt, descIPIV, &descBt, ws, sequence, &request ); + + /* Submit the matrix conversion back */ + chameleon_ztile2lap( chamctxt, &descAl, &descAt, + ChamDescInput, ChamUpperLower, sequence, &request ); + chameleon_ztile2lap( chamctxt, &descBl, &descBt, + ChamDescInout, ChamUpperLower, sequence, &request ); + + chameleon_sequence_wait( chamctxt, sequence ); + + /* Cleanup the temporary data */ + CHAMELEON_Ipiv_Destroy( &descIPIV, &descAt ); + CHAMELEON_zgetrf_WS_Free( ws ); + chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt ); + chameleon_ztile2lap_cleanup( chamctxt, &descBl, &descBt ); + + status = sequence->status; + chameleon_sequence_destroy( chamctxt, sequence ); + return status; +} + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t_Tile + * + * @brief Solves a system of linear equations using previously + * computed LU factorization with partial pivoting. + * Tile equivalent of CHAMELEON_zgetrs(). + * Operates on matrices stored by tiles. + * All matrices are passed through descriptors. + * All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in] trans + * Intended to specify the the form of the system of equations: + * = ChamNoTrans: A * X = B (No transpose) + * = ChamTrans: A^T * X = B (Transpose) + * = ChamConjTrans: A^H * X = B (Conjugate transpose) + * Only ChamNoTrans and ChamTrans are supported. + * + * @param[in] A + * The tile factors L and U from the factorization, computed by CHAMELEON_zgetrf. + * + * @param[in] IPIV + * On entry, ipiv descriptor associated to A and created with + * CHAMELEON_Ipiv_Create(). + * On exit, it contains the pivot indices associated to the PLU + * factorization of A. + * + * @param[in,out] B + * On entry, the N-by-NRHS matrix of right hand side matrix B. + * On exit, the solution matrix X. + * + ******************************************************************************* + * + * @retval CHAMELEON_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa CHAMELEON_zgetrs + * @sa CHAMELEON_zgetrs_Tile_Async + * @sa CHAMELEON_cgetrs_Tile + * @sa CHAMELEON_dgetrs_Tile + * @sa CHAMELEON_sgetrs_Tile + * @sa CHAMELEON_zgetrf_Tile + * + */ +int CHAMELEON_zgetrs_Tile( cham_trans_t trans, + CHAM_desc_t *A, + CHAM_ipiv_t *IPIV, + CHAM_desc_t *B ) +{ + CHAM_context_t *chamctxt; + RUNTIME_sequence_t *sequence = NULL; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + int status; + void *ws; + + chamctxt = chameleon_context_self(); + if ( ( trans != ChamTrans ) && ( trans != ChamNoTrans ) ) { + chameleon_error("CHAMELEON_zgetrs", "Only ChamTrans and ChamNoTrans are supported"); + return CHAMELEON_ERR_ILLEGAL_VALUE; + } + if ( chamctxt == NULL ) { + chameleon_fatal_error("CHAMELEON_zgetrs_Tile", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + chameleon_sequence_create( chamctxt, &sequence ); + + ws = CHAMELEON_zgetrf_WS_Alloc( B ); + + CHAMELEON_zgetrs_Tile_Async( trans, A, IPIV, B, ws, sequence, &request ); + + CHAMELEON_Desc_Flush( A, sequence ); + CHAMELEON_Desc_Flush( B, sequence ); + + CHAMELEON_zgetrf_WS_Free( ws ); + + chameleon_sequence_wait( chamctxt, sequence ); + status = sequence->status; + chameleon_sequence_destroy( chamctxt, sequence ); + return status; +} + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t_Tile_Async + * + * @brief Solves a system of linear equations using previously + * computed LU factorization with partial pivoting. + * Non-blocking equivalent of CHAMELEON_zgetrs_Tile(). + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] trans + * Intended to specify the the form of the system of equations: + * = ChamNoTrans: A * X = B (No transpose) + * = ChamTrans: A^T * X = B (Transpose) + * = ChamConjTrans: A^H * X = B (Conjugate transpose) + * Only ChamNoTrans and ChamTrans are supported. + * + * @param[in,out] A + * On entry, the M-by-N matrix to be factored. + * On exit, the tile factors L and U from the factorization. + * + * @param[in] IPIV + * On entry, ipiv descriptor associated to A and created with + * CHAMELEON_Ipiv_Create(). + * On exit, it contains the pivot indices associated to the PLU + * factorization of A. + * + * @param[in,out] B + * On entry, the N-by-NRHS matrix of right hand side matrix B. + * On exit, the N-by-NRHS solution matrix X. + * + * @param[in,out] user_ws + * The opaque pointer to pre-allocated getrf workspace through + * CHAMELEON_zgetrf_WS_Alloc() for B. If user_ws is NULL, it is automatically + * allocated, but BE CAREFULL as it switches the call from asynchronous + * to synchronous call.* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa CHAMELEON_zgetrs + * @sa CHAMELEON_zgetrs_Tile + * @sa CHAMELEON_cgetrs_Tile_Async + * @sa CHAMELEON_dgetrs_Tile_Async + * @sa CHAMELEON_sgetrs_Tile_Async + * @sa CHAMELEON_zgetrf_Tile_Async + * + */ +int CHAMELEON_zgetrs_Tile_Async( cham_trans_t trans, + CHAM_desc_t *A, + CHAM_ipiv_t *IPIV, + CHAM_desc_t *B, + void *user_ws, + RUNTIME_sequence_t *sequence, + RUNTIME_request_t *request ) +{ + CHAM_context_t *chamctxt; + struct chameleon_pzgetrf_s *ws; + RUNTIME_option_t options; + int k, tempkm; + + chamctxt = chameleon_context_self(); + if ( ( trans != ChamTrans ) && ( trans != ChamNoTrans ) ) { + chameleon_error("CHAMELEON_zgetrs", "Only ChamTrans and ChamNoTrans are supported"); + return CHAMELEON_ERR_ILLEGAL_VALUE; + } + if ( chamctxt == NULL ) { + chameleon_fatal_error("CHAMELEON_zgetrs_Tile", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + if ( sequence == NULL ) { + chameleon_fatal_error("CHAMELEON_zgetrs_Tile", "NULL sequence"); + return CHAMELEON_ERR_UNALLOCATED; + } + if ( request == NULL ) { + chameleon_fatal_error("CHAMELEON_zgetrs_Tile", "NULL request"); + return CHAMELEON_ERR_UNALLOCATED; + } + /* Check sequence status */ + if ( sequence->status == CHAMELEON_SUCCESS ) { + request->status = CHAMELEON_SUCCESS; + } + else { + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED ); + } + + /* Check descriptors for correctness */ + if ( chameleon_desc_check( A ) != CHAMELEON_SUCCESS ) { + chameleon_error("CHAMELEON_zgetrs_Tile", "invalid first descriptor"); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); + } + if ( chameleon_desc_check( B ) != CHAMELEON_SUCCESS ) { + chameleon_error("CHAMELEON_zgetrs_Tile", "invalid third descriptor"); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); + } + /* Check input arguments */ + if ( ( A->nb != A->mb ) || ( B->nb != B->mb ) ) { + chameleon_error("CHAMELEON_zgetrs_Tile", "only square tiles supported"); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); + } + + if ( user_ws == NULL ) { + ws = CHAMELEON_zgetrf_WS_Alloc( B ); + } + else { + ws = user_ws; + } + + if ( IPIV->data != NULL ) { + RUNTIME_options_init( &options, chamctxt, sequence, request ); + for ( k = 0; k < A->mt; k++ ) { + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); + INSERT_TASK_ipiv_to_perm( &options, k * A->mb, tempkm, tempkm, 0, A->m, + IPIV, k ); + } + chameleon_sequence_wait( chamctxt, sequence ); + } + + if ( trans == ChamNoTrans ) { + chameleon_pzlaswp( ws, ChamDirForward, B, IPIV, sequence, request ); + + chameleon_pztrsm( ChamLeft, ChamLower, ChamNoTrans, ChamUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request ); + + chameleon_pztrsm( ChamLeft, ChamUpper, ChamNoTrans, ChamNonUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request ); + } + else { + chameleon_pztrsm( ChamLeft, ChamUpper, ChamNoTrans, ChamNonUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request ); + + chameleon_pztrsm( ChamLeft, ChamLower, ChamNoTrans, ChamUnit, (CHAMELEON_Complex64_t)1.0, A, B, sequence, request ); + + chameleon_pzlaswp( ws, ChamDirBackward, B, IPIV, sequence, request ); + } + + if ( user_ws == NULL ) { + CHAMELEON_zgetrf_WS_Free( ws ); + } + + return CHAMELEON_SUCCESS; +} diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h index 25cfa63ecc2402d5ddf81f981308b83889cd0500..5e9b4868e5f62a19029b89ff49c9184ceb1849f9 100644 --- a/include/chameleon/chameleon_z.h +++ b/include/chameleon/chameleon_z.h @@ -24,6 +24,7 @@ * @author Alycia Lisito * @author Matthieu Kuhn * @author Ana Hourcau + * @author Matteo Marcos * @date 2025-03-24 * @precisions normal z -> c d s * @@ -57,7 +58,7 @@ int CHAMELEON_zgetrf_incpiv(int M, int N, CHAMELEON_Complex64_t *A, int LDA, CHA int CHAMELEON_zgetrf_nopiv(int M, int N, CHAMELEON_Complex64_t *A, int LDA); int CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV ); //int CHAMELEON_zgetri(int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV); -//int CHAMELEON_zgetrs(cham_trans_t trans, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, int *IPIV, CHAMELEON_Complex64_t *B, int LDB); +int CHAMELEON_zgetrs(cham_trans_t trans, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, int *IPIV, CHAMELEON_Complex64_t *B, int LDB); int CHAMELEON_zgetrs_incpiv(cham_trans_t trans, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descL, int *IPIV, CHAMELEON_Complex64_t *B, int LDB); int CHAMELEON_zgetrs_nopiv(cham_trans_t trans, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, CHAMELEON_Complex64_t *B, int LDB); int CHAMELEON_zhemm(cham_side_t side, cham_uplo_t uplo, int M, int N, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t *A, int LDA, CHAMELEON_Complex64_t *B, int LDB, CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t *C, int LDC); @@ -137,7 +138,7 @@ int CHAMELEON_zgetrf_incpiv_Tile(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV); int CHAMELEON_zgetrf_nopiv_Tile(CHAM_desc_t *A); int CHAMELEON_zgetrf_Tile( CHAM_desc_t *A, CHAM_ipiv_t *IPIV ); //int CHAMELEON_zgetri_Tile(CHAM_desc_t *A, int *IPIV); -//int CHAMELEON_zgetrs_Tile(cham_trans_t trans, CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B); +int CHAMELEON_zgetrs_Tile(cham_trans_t trans, CHAM_desc_t *A, CHAM_ipiv_t *IPIV, CHAM_desc_t *B); int CHAMELEON_zgetrs_incpiv_Tile(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, CHAM_desc_t *B); int CHAMELEON_zgetrs_nopiv_Tile(CHAM_desc_t *A, CHAM_desc_t *B); int CHAMELEON_zhemm_Tile(cham_side_t side, cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B, CHAMELEON_Complex64_t beta, CHAM_desc_t *C); @@ -216,7 +217,7 @@ int CHAMELEON_zgetrf_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV int CHAMELEON_zgetrf_nopiv_Tile_Async(CHAM_desc_t *A, void * ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, CHAM_ipiv_t *IPIV, void *ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); //int CHAMELEON_zgetri_Tile_Async(CHAM_desc_t *A, int *IPIV, CHAM_desc_t *W, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -//int CHAMELEON_zgetrs_Tile_Async(cham_trans_t trans, CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); +int CHAMELEON_zgetrs_Tile_Async(cham_trans_t trans, CHAM_desc_t *A, CHAM_ipiv_t *IPIV, CHAM_desc_t *B, void *user_ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgetrs_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgetrs_nopiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zhemm_Tile_Async(cham_side_t side, cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B, CHAMELEON_Complex64_t beta, CHAM_desc_t *C, void *ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt index 0838040586e89ad34bfb25ff83cef93f7c6fc4bc..26c296439005d240bd6d28c17ccaf44f06356efd 100644 --- a/testing/CMakeLists.txt +++ b/testing/CMakeLists.txt @@ -26,6 +26,7 @@ # @author Alycia Lisito # @author Matthieu Kuhn # @author Abel Calluaud +# @author Matteo Marcos # @date 2025-03-24 # ### @@ -52,6 +53,7 @@ set(ZSRC_W_STDAPI testing_zlantr.c testing_zgemm.c testing_zgetrf.c + testing_zgetrs.c testing_zhemm.c testing_zherk.c testing_zher2k.c diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake index 000c8fb3e3484e6e09be3e83e1f5774862714cfa..b14bc446dadec1070817994da90f804501ab3eae 100644 --- a/testing/CTestLists.cmake +++ b/testing/CTestLists.cmake @@ -111,9 +111,11 @@ if (NOT CHAMELEON_SIMULATION) set_tests_properties( test_${cat}_${prec}getrf_ppivblocked_batch PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppiv;CHAMELEON_GETRF_BATCH_SIZE=3" ) add_test( test_${cat}_${prec}laswp ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/laswp.in ) + add_test( test_${cat}_${prec}getrs ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/getrs.in ) if ( ${cat} STREQUAL "mpi" ) add_test( test_${cat}_${prec}laswp_ppiv_comm_with_task ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P ${NP} -f input/laswp.in ) + add_test( test_${cat}_${prec}getrs_ppiv_comm_with_task ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P ${NP} -f input/getrs.in ) add_test( test_${cat}_${prec}getrf_ppiv_comm_with_task ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P ${NP} -f input/getrf.in ) set_tests_properties( test_${cat}_${prec}getrf_ppiv_comm_with_task PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppiv;CHAMELEON_GETRF_BATCH_SIZE=0;CHAMELEON_GETRF_ALL_REDUCE=cham_spu_tasks" ) diff --git a/testing/input/getrs.in b/testing/input/getrs.in new file mode 100644 index 0000000000000000000000000000000000000000..9714143c4ddb4a953f2a9e756715a4ffe44b8735 --- /dev/null +++ b/testing/input/getrs.in @@ -0,0 +1,18 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# GETRS + +# nb: Tile size +# n: Order of the matrix A and number of rows of matrix B +# nrhs: The number of columns of matrix B +# lda: Leading dimension of matrix A +# ldb: Leading dimension of matrix B + +op = getrs +nb = 16, 17 +ib = 16, 17 +n = 15, 21, 35 +nrhs = 1, 13, 22, 33 +lda = 40 +ldb = 41 diff --git a/testing/testing_zgetrs.c b/testing/testing_zgetrs.c new file mode 100644 index 0000000000000000000000000000000000000000..4a3713be3db61d974b151d1100edb63dac512495 --- /dev/null +++ b/testing/testing_zgetrs.c @@ -0,0 +1,230 @@ +/** + * + * @file testing_zgetrs.c + * + * @copyright 2019-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgetrf testing + * + * @version 1.3.0 + * @author Matteo Marcos + * @date 2025-03-24 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include <chameleon_lapack.h> +#include "chameleon/chameleon_z.h" +#include "testings.h" +#include "testing_zcheck.h" +#include <chameleon/flops.h> +#include <chameleon/getenv.h> +#if defined(CHAMELEON_TESTINGS_VENDOR) || !defined(CHAMELEON_SIMULATION) +#include <coreblas.h> +#include <coreblas/lapacke.h> +#endif + +#if !defined(CHAMELEON_TESTINGS_VENDOR) +int +testing_zgetrs_desc( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int async = parameters_getvalue_int( "async" ); + int nb = run_arg_get_nb( args ); + int ib = run_arg_get_ib( args ); + int N = run_arg_get_int( args, "N", 1000 ); + int NRHS = run_arg_get_int( args, "NRHS", 1 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int LDB = run_arg_get_int( args, "LDB", N ); + int seedA = run_arg_get_int( args, "seedA", testing_ialea() ); + int seedB = run_arg_get_int( args, "seedB", testing_ialea() ); + + /* Descriptors */ + CHAM_desc_t *descA, *descX; + CHAM_ipiv_t *descIPIV; + void *ws = NULL; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + CHAMELEON_Set( CHAMELEON_INNER_BLOCK_SIZE, ib ); + + /* Creates the matrices */ + parameters_desc_create( "A", &descA, ChamComplexDouble, nb, nb, LDA, N, N, N ); + parameters_desc_create( "X", &descX, ChamComplexDouble, nb, nb, LDB, NRHS, N, NRHS ); + CHAMELEON_Ipiv_Create( &descIPIV, descA, NULL ); + + CHAMELEON_zplrnt_Tile( descA, seedA ); + CHAMELEON_zplrnt_Tile( descX, seedB ); + + CHAMELEON_zgetrf_Tile( descA, descIPIV ); + + if ( async ) { + ws = CHAMELEON_zgetrf_WS_Alloc( descX ); + } + + /* Calculates the solution */ + testing_start( &test_data ); + if ( async ) { + hres = CHAMELEON_zgetrs_Tile_Async( ChamNoTrans, descA, descIPIV, descX, ws, test_data.sequence, &test_data.request ); + CHAMELEON_Desc_Flush( descA, test_data.sequence ); + CHAMELEON_Ipiv_Flush( descIPIV, test_data.sequence ); + } + else { + hres = CHAMELEON_zgetrs_Tile( ChamNoTrans, descA, descIPIV, descX ); + } + test_data.hres = hres; + testing_stop( &test_data, flops_zgetrs( N, NRHS ) ); + + /* Checks the factorization and residual */ +#if !defined(CHAMELEON_SIMULATION) + if ( check ) { + CHAM_desc_t *descA0, *descB; + + descA0 = CHAMELEON_Desc_Copy( descA, CHAMELEON_MAT_ALLOC_TILE ); + descB = CHAMELEON_Desc_Copy( descX, CHAMELEON_MAT_ALLOC_TILE ); + + CHAMELEON_zplrnt_Tile( descA0, seedA ); + CHAMELEON_zplrnt_Tile( descB, seedB ); + + hres += check_zsolve( args, ChamGeneral, ChamNoTrans, ChamUpperLower, descA0, descX, descB ); + + CHAMELEON_Desc_Destroy( &descA0 ); + CHAMELEON_Desc_Destroy( &descB ); + } +#endif /* !defined(CHAMELEON_SIMULATION) */ + + if ( ws != NULL ) { + CHAMELEON_zgetrf_WS_Free( ws ); + } + + CHAMELEON_Ipiv_Destroy( &descIPIV, descA ); + parameters_desc_destroy( &descA ); + parameters_desc_destroy( &descX ); + + return hres; +} +#endif + +int +testing_zgetrs_std( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ +#if !defined(CHAMELEON_TESTINGS_VENDOR) + int api = parameters_getvalue_int( "api" ); +#endif + int nb = run_arg_get_nb( args ); + int N = run_arg_get_int( args, "N", 1000 ); + int NRHS = run_arg_get_int( args, "NRHS", 1 ); + int LDA = run_arg_get_int( args, "LDA", N ); + int LDB = run_arg_get_int( args, "LDB", N ); + int seedA = run_arg_get_int( args, "seedA", testing_ialea() ); + int seedB = run_arg_get_int( args, "seedB", testing_ialea() ); + + /* Descriptors */ + CHAMELEON_Complex64_t *A, *X; + int *IPIV; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + A = malloc( sizeof(CHAMELEON_Complex64_t) * LDA*N ); + X = malloc( sizeof(CHAMELEON_Complex64_t) * LDB*NRHS ); + IPIV = malloc( sizeof(int) * N ); + + /* Fills the matrix with random values */ + CHAMELEON_zplrnt( N, N, A, LDA, seedA ); + CHAMELEON_zplrnt( N, NRHS, X, LDB, seedB ); + + CHAMELEON_zgetrf( N, N, A, LDA, IPIV ); + + /* Calculates the solution */ +#if defined(CHAMELEON_TESTINGS_VENDOR) + testing_start( &test_data ); + hres = LAPACKE_zgetrs( LAPACK_COL_MAJOR, 'N', N, NRHS, A, LDA, IPIV, X, LDB ); + test_data.hres = hres; + testing_stop( &test_data, flops_zgetrs( N, NRHS ) ); +#else + testing_start( &test_data ); + switch ( api ) { + case 1: + hres = CHAMELEON_zgetrs( ChamNoTrans, N, NRHS, A, LDA, IPIV, X, LDB); + break; +#if !defined(CHAMELEON_SIMULATION) & 0 + case 2: + CHAMELEON_lapacke_zgetrs( CblasColMajor, N, NRHS, A, LDA, IPIV, B, LDB ); + break; +#endif + default: + if ( CHAMELEON_Comm_rank() == 0 ) { + fprintf( stderr, + "SKIPPED: This function can only be used with the option --api 1 or --api 2.\n" ); + } + return -1; + } + test_data.hres = hres; + testing_stop( &test_data, flops_zgetrs( N, NRHS ) ); + +#if !defined(CHAMELEON_SIMULATION) + /* Checks the factorisation and residue */ + if ( check ) { + CHAMELEON_Complex64_t *A0 = malloc( sizeof(CHAMELEON_Complex64_t) * LDA*N ); + CHAMELEON_Complex64_t *B = malloc( sizeof(CHAMELEON_Complex64_t) * LDB*NRHS ); + + CHAMELEON_zplrnt( N, N, A0, LDA, seedA ); + CHAMELEON_zplrnt( N, NRHS, B, LDB, seedB ); + + hres += check_zsolve_std( args, ChamGeneral, ChamNoTrans, ChamUpperLower, N, NRHS, A0, LDA, X, B, LDB ); + + free( A0 ); + free( B ); + } +#endif +#endif + + free ( IPIV ); + free( A ); + free( X ); + + (void)check; + return hres; +} + +testing_t test_zgetrs; +#if defined(CHAMELEON_TESTINGS_VENDOR) +const char *zgetrs_params[] = { "m", "n", "lda", "seedA", NULL }; +#else +const char *zgetrs_params[] = { "mtxfmt", "nb", "ib", "n", "nrhs", "lda", "ldb", "seedA", "seedB", NULL }; +#endif +const char *zgetrs_output[] = { NULL }; +const char *zgetrs_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zgetrs_init( void ) __attribute__( ( constructor ) ); +void +testing_zgetrs_init( void ) +{ + test_zgetrs.name = "zgetrs"; + test_zgetrs.helper = "General triangular solve (LU with partial pivoting)"; + test_zgetrs.params = zgetrs_params; + test_zgetrs.output = zgetrs_output; + test_zgetrs.outchk = zgetrs_outchk; +#if defined(CHAMELEON_TESTINGS_VENDOR) + test_zgetrs.fptr_desc = NULL; +#else + test_zgetrs.fptr_desc = testing_zgetrs_desc; +#endif + test_zgetrs.fptr_std = testing_zgetrs_std; + test_zgetrs.next = NULL; + + testing_register( &test_zgetrs ); +}