From 1176bdaa366374b36f68412d11364578784fe2e1 Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Mon, 17 Mar 2025 17:00:02 +0100 Subject: [PATCH] laswp: Add zlaswp operation and testing --- compute/CMakeLists.txt | 2 +- compute/zlaswp.c | 388 ++++++++++++++++++++++++++++++++ control/descriptor_ipiv.c | 41 +++- include/chameleon/chameleon_z.h | 9 +- testing/CMakeLists.txt | 3 +- testing/CTestLists.cmake | 2 + testing/chameleon_ztesting.c | 4 + testing/input/laswp.in | 20 ++ testing/testing_zlaswp.c | 141 ++++++++++++ 9 files changed, 601 insertions(+), 9 deletions(-) create mode 100644 compute/zlaswp.c create mode 100644 testing/input/laswp.in create mode 100644 testing/testing_zlaswp.c diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index a5d95ed43..fafab2d2e 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -237,7 +237,7 @@ set(ZSRC #zhegv.c #zhegvd.c zhetrd.c - #zlaswp.c + zlaswp.c #zlaswpc.c #ztrsmrv.c ################## diff --git a/compute/zlaswp.c b/compute/zlaswp.c new file mode 100644 index 000000000..6d7955e78 --- /dev/null +++ b/compute/zlaswp.c @@ -0,0 +1,388 @@ +/** + * + * @file zlaswp.c + * + * @copyright 2012-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlaswp wrappers + * + * @version 1.3.0 + * @author Alycia Lisito + * @author Matteo Marcos + * @date 2025-03-24 + * @precisions normal z -> s d c + * + */ +#include "control/common.h" + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t + * + * @brief Computes the permutation P*op(A) or op(A)*P where P is the permutation + * matrix generated from IPIV. + * + ******************************************************************************* + * + * @param[in] side + * Specifies whether the permutation is done on the rows or the columns. + * = ChamLeft: op(A) = A + * = ChamRight: op(A) = A^T + * + * @param[in] dir + * Specifies the order of the permutation. + * = ChamDirForward: Natural order. P*op(A) + * = ChamDirBackward: Reverse order. op(A)*P + * + * @param[in] M + * The number of rows of the matrix A. M >= 0. + * + * @param[in] N + * The number of columns of the matrix A. N >= 0. + * + * @param[in,out] A + * The M-by-N matrix A. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + * @param[in] K1 + * The first element of IPIV for which an interchange will + * be done. Must follow the Fortran numbering standard. + * + * @param[in] K2 + * The last element of ipiv for which an interchange will + * be done. Must follow the Fortran numbering standard. + * + * @param[in] IPIV + * Vector of pivot indices. + * + ******************************************************************************* + * + * @sa CHAMELEON_zlaswp_Tile + * @sa CHAMELEON_zlaswp_Tile_Async + * @sa CHAMELEON_claswp + * @sa CHAMELEON_dlaswp + * @sa CHAMELEON_slaswp + * + */ +int CHAMELEON_zlaswp( cham_side_t side, + cham_dir_t dir, + int M, + int N, + CHAMELEON_Complex64_t *A, + int LDA, + int K1, + int K2, + int *IPIV ) +{ + int status; + int NB; + CHAM_context_t *chamctxt; + RUNTIME_sequence_t *sequence = NULL; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + CHAM_desc_t descAl, descAt; + CHAM_ipiv_t *descIPIV; + + chamctxt = chameleon_context_self(); + if ( chamctxt == NULL ) { + chameleon_fatal_error("CHAMELEON_zlaswp", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + if ( side == ChamRight ) { + chameleon_fatal_error("CHAMELEON_zlaswp", "Only ChamLeft is implemented"); + return CHAMELEON_ERR_NOT_SUPPORTED; + } + /* Check input arguments */ + if ( M < 0 ) { + chameleon_error("CHAMELEON_zlaswp", "illegal value of M"); + return -2; + } + if ( N < 0 ) { + chameleon_error("CHAMELEON_zlaswp", "illegal value of N"); + return -3; + } + if ( LDA < chameleon_max( 1, M ) ) { + chameleon_error("CHAMELEON_zlaswp", "illegal value of LDA"); + return -5; + } + if ( ( K1 < 1 ) || ( K1 > M ) ) { + chameleon_error("CHAMELEON_zlaswp", "illegal value of K1"); + return -6; + } + if ( ( K2 < 1 ) || ( K2 > M ) ) { + chameleon_error("CHAMELEON_zlaswp", "illegal value of K2"); + return -7; + } + + /* Quick return */ + if ( chameleon_min( N, M ) == 0 ) { + return (double)0.0; + } + + /* Tune NB depending on M, N & NRHS; Set NBNB */ + status = chameleon_tune(CHAMELEON_FUNC_ZGEMM, M, N, 0); + if ( status != CHAMELEON_SUCCESS ) { + chameleon_error("CHAMELEON_zlaswp", "chameleon_tune() failed"); + return status; + } + + /* Set NB */ + NB = CHAMELEON_NB; + + chameleon_sequence_create( chamctxt, &sequence ); + + /* Submit the matrix conversion */ + chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInput, ChamUpperLower, + A, NB, NB, LDA, N, M, N, sequence, &request ); + CHAMELEON_Ipiv_Create( &descIPIV, &descAt, IPIV ); + + CHAMELEON_Ipiv_Init( &descAt, descIPIV ); + + /* Call the tile interface */ + CHAMELEON_zlaswp_Tile_Async( side, dir, &descAt, K1, K2, descIPIV, sequence, &request ); + + /* Submit the matrix conversion back */ + chameleon_ztile2lap( chamctxt, &descAl, &descAt, + ChamDescInput, ChamUpperLower, sequence, &request ); + + chameleon_sequence_wait( chamctxt, sequence ); + + /* Cleanup the temporary data */ + CHAMELEON_Ipiv_Destroy( &descIPIV, &descAt ); + chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt ); + + chameleon_sequence_destroy( chamctxt, sequence ); + return CHAMELEON_SUCCESS; +} + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t_Tile + * + * @brief Tile equivalent of CHAMELEON_zlaswp(). + * + * Operates on matrices stored by tiles. + * All matrices are passed through descriptors. + * All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in] side + * Specifies whether the permutation is done on the rows or the columns. + * = ChamLeft: op(A) = A + * = ChamRight: op(A) = A^T + * + * @param[in] dir + * Specifies the order of the permutation. + * = ChamDirForward: Natural order. P*op(A) + * = ChamDirBackward: Reverse order. op(A)*P + * + * @param[in,out] A + * The M-by-N matrix A. + * + * @param[in] K1 + * The first element of IPIV for which an interchange will + * be done. Must follow the Fortran numbering standard. + * + * @param[in] K2 + * The last element of ipiv for which an interchange will + * be done. Must follow the Fortran numbering standard. + * + * @param[in] IPIV + * Vector of pivot indices. + * + ******************************************************************************* + * + * @retval CHAMELEON_SUCCESS successful exit + * + ******************************************************************************* + * + * @sa CHAMELEON_zlaswp + * @sa CHAMELEON_zlaswp_Tile_Async + * @sa CHAMELEON_claswp_Tile + * @sa CHAMELEON_dlaswp_Tile + * @sa CHAMELEON_slaswp_Tile + * + */ +int CHAMELEON_zlaswp_Tile( cham_side_t side, + cham_dir_t dir, + CHAM_desc_t *A, + int K1, + int K2, + CHAM_ipiv_t *IPIV ) +{ + CHAM_context_t *chamctxt; + RUNTIME_sequence_t *sequence = NULL; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + int status; + + chamctxt = chameleon_context_self(); + if ( chamctxt == NULL ) { + chameleon_fatal_error("CHAMELEON_zlaswp_Tile", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + if ( side == ChamRight ) { + chameleon_fatal_error("CHAMELEON_zlaswp", "Only ChamLeft is implemented"); + return CHAMELEON_ERR_NOT_SUPPORTED; + } + if ( ( K1 < 1 ) || ( K1 > A->m ) ) { + chameleon_error("CHAMELEON_zlaswp", "illegal value of K1"); + return CHAMELEON_ERR_ILLEGAL_VALUE; + } + if ( ( K2 < 1 ) || ( K2 > A->m ) ) { + chameleon_error("CHAMELEON_zlaswp", "illegal value of K2"); + return CHAMELEON_ERR_ILLEGAL_VALUE; + } + chameleon_sequence_create( chamctxt, &sequence ); + + CHAMELEON_zlaswp_Tile_Async( side, dir, A, K1, K2, IPIV, sequence, &request ); + + CHAMELEON_Desc_Flush( A, sequence ); + CHAMELEON_Ipiv_Flush( IPIV, sequence ); + + chameleon_sequence_wait( chamctxt, sequence ); + status = sequence->status; + chameleon_sequence_destroy( chamctxt, sequence ); + return status; +} + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t_Tile_Async + * + * @brief Non-blocking equivalent of CHAMELEON_zlaswp_Tile(). + * + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] side + * Specifies whether the permutation is done on the rows or the columns. + * = ChamLeft: op(A) = A + * = ChamRight: op(A) = A^T + * + * @param[in] dir + * Specifies the order of the permutation. + * = ChamDirForward: Natural order. P*op(A) + * = ChamDirBackward: Reverse order. op(A)*P + * + * @param[in,out] A + * The M-by-N matrix A. + * + * @param[in] K1 + * The first element of IPIV for which an interchange will + * be done. Must follow the Fortran numbering standard + * + * @param[in] K2 + * The last element of ipiv for which an interchange will + * be done. Must follow the Fortran numbering standard. + * + * @param[in] IPIV + * Vector of pivot indices. + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa CHAMELEON_zlaswp + * @sa CHAMELEON_zlaswp_Tile + * @sa CHAMELEON_claswp_Tile_Async + * @sa CHAMELEON_dlaswp_Tile_Async + * @sa CHAMELEON_slaswp_Tile_Async + * + */ +int CHAMELEON_zlaswp_Tile_Async( cham_side_t side, + cham_dir_t dir, + CHAM_desc_t *A, + int K1, + int K2, + CHAM_ipiv_t *IPIV, + RUNTIME_sequence_t *sequence, + RUNTIME_request_t *request ) +{ + CHAM_context_t *chamctxt; + struct chameleon_pzgetrf_s *ws; + RUNTIME_option_t options; + int k, tempkm; + + chamctxt = chameleon_context_self(); + if ( chamctxt == NULL ) { + chameleon_fatal_error("CHAMELEON_zlaswp_Tile_Async", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + if ( side == ChamRight ) { + chameleon_fatal_error("CHAMELEON_zlaswp", "Only ChamLeft is implemented"); + return CHAMELEON_ERR_NOT_SUPPORTED; + } + if ( ( K1 < 1 ) || ( K1 > A->m ) ) { + chameleon_error("CHAMELEON_zlaswp", "illegal value of K1"); + return CHAMELEON_ERR_ILLEGAL_VALUE; + } + if ( ( K2 < 1 ) || ( K2 > A->m ) ) { + chameleon_error("CHAMELEON_zlaswp", "illegal value of K2"); + return CHAMELEON_ERR_ILLEGAL_VALUE; + } + if ( sequence == NULL ) { + chameleon_fatal_error("CHAMELEON_zlaswp_Tile_Async", "NULL sequence"); + return CHAMELEON_ERR_UNALLOCATED; + } + if ( request == NULL ) { + chameleon_fatal_error("CHAMELEON_zlaswp_Tile_Async", "NULL request"); + return CHAMELEON_ERR_UNALLOCATED; + } + /* Check sequence status */ + if ( sequence->status == CHAMELEON_SUCCESS ) { + request->status = CHAMELEON_SUCCESS; + } + else { + return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED); + } + + /* Check descriptors for correctness */ + if ( chameleon_desc_check(A) != CHAMELEON_SUCCESS ) { + chameleon_error("CHAMELEON_zlaswp_Tile_Async", "invalid first descriptor"); + return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + } + /* Check input arguments */ + if ( A->mb != A->nb ) { + chameleon_error("CHAMELEON_zlaswp_Tile_Async", "only matching tile sizes supported"); + return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + } + + /* Quick return */ + if ( chameleon_min( A->m, A->n ) == 0 ) { + return CHAMELEON_SUCCESS; + } + + if ( IPIV->data != NULL ) { + RUNTIME_options_init( &options, chamctxt, sequence, request ); + for ( k = 0; k < A->mt; k++ ) { + tempkm = A->get_blkdim( A, k, DIM_m, A->m ); + INSERT_TASK_ipiv_to_perm( &options, k * A->mb, tempkm, tempkm, K1 - 1, K2 - 1, + IPIV, k ); + } + chameleon_sequence_wait( chamctxt, sequence ); + } + + ws = CHAMELEON_zgetrf_WS_Alloc( A ); + + chameleon_pzlaswp( ws, dir, A, IPIV, sequence, request ); + + CHAMELEON_zgetrf_WS_Free( ws ); + + return CHAMELEON_SUCCESS; +} + diff --git a/control/descriptor_ipiv.c b/control/descriptor_ipiv.c index 84067cf5f..d46269d32 100644 --- a/control/descriptor_ipiv.c +++ b/control/descriptor_ipiv.c @@ -14,7 +14,7 @@ * @author Matthieu Kuhn * @author Alycia Lisito * @author Florent Pruvost - * @date 2024-08-29 + * @date 2025-03-24 * *** * @@ -148,6 +148,45 @@ int CHAMELEON_Ipiv_Create( CHAM_ipiv_t **ipivptr, const CHAM_desc_t *desc, void return CHAMELEON_SUCCESS; } +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t + * + * @brief initialize the IPIV descriptor. + * + ******************************************************************************* + * + * @param[in] descA + * Descriptor of the matrix A. + * + * @param[in,out] descIPIV + * Descriptor of the pivot array. Should be initialized using + * CHAMELEON_Ipiv_Create() with data filled with the vector of pivot. + * + ******************************************************************************* + * + * + */ +void CHAMELEON_Ipiv_Init( const CHAM_desc_t *descA, + CHAM_ipiv_t *descIPIV ) +{ + + RUNTIME_option_t options; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + RUNTIME_sequence_t *sequence = NULL; + CHAM_context_t *chamctxt; + + chamctxt = chameleon_context_self(); + chameleon_sequence_create( chamctxt, &sequence ); + RUNTIME_options_init( &options, chamctxt, sequence, &request ); + + INSERT_TASK_ipiv_init_data( &options, descIPIV ); + + chameleon_sequence_wait( chamctxt, sequence ); + chameleon_sequence_destroy( chamctxt, sequence ); +} + /** ***************************************************************************** * diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h index 37d352bfd..25cfa63ec 100644 --- a/include/chameleon/chameleon_z.h +++ b/include/chameleon/chameleon_z.h @@ -76,8 +76,7 @@ double CHAMELEON_zlansy(cham_normtype_t norm, cham_uplo_t uplo, int N, CHAMELEON double CHAMELEON_zlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, int M, int N, CHAMELEON_Complex64_t *A, int LDA); int CHAMELEON_zlascal(cham_uplo_t uplo, int M, int N, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t *A, int LDA); int CHAMELEON_zlaset(cham_uplo_t uplo, int M, int N, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t *A, int LDA); -//int CHAMELEON_zlaswp(int N, CHAMELEON_Complex64_t *A, int LDA, int K1, int K2, int *IPIV, int INCX); -//int CHAMELEON_zlaswpc(int N, CHAMELEON_Complex64_t *A, int LDA, int K1, int K2, int *IPIV, int INCX); +int CHAMELEON_zlaswp( cham_side_t side, cham_dir_t dir, int M, int N, CHAMELEON_Complex64_t *A, int LDA, int K1, int K2, int *IPIV ); int CHAMELEON_zlatms( int M, int N, cham_dist_t idist, unsigned long long int seed, cham_sym_t sym, double *D, int mode, double cond, double dmax, CHAMELEON_Complex64_t *A, int LDA ); int CHAMELEON_zlauum(cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA); int CHAMELEON_zplghe( double bump, cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA, unsigned long long int seed ); @@ -157,8 +156,7 @@ double CHAMELEON_zlansy_Tile(cham_normtype_t norm, cham_uplo_t uplo, CHAM_desc_t double CHAMELEON_zlantr_Tile(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A); int CHAMELEON_zlascal_Tile(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A); int CHAMELEON_zlaset_Tile(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, CHAM_desc_t *A); -//int CHAMELEON_zlaswp_Tile(CHAM_desc_t *A, int K1, int K2, int *IPIV, int INCX); -//int CHAMELEON_zlaswpc_Tile(CHAM_desc_t *A, int K1, int K2, int *IPIV, int INCX); +int CHAMELEON_zlaswp_Tile( cham_side_t side, cham_dir_t dir, CHAM_desc_t *A, int K1, int K2, CHAM_ipiv_t *IPIV ); int CHAMELEON_zlatms_Tile( cham_dist_t idist, unsigned long long int seed, cham_sym_t sym, double *D, int mode, double cond, double dmax, CHAM_desc_t *A ); int CHAMELEON_zlauum_Tile(cham_uplo_t uplo, CHAM_desc_t *A); int CHAMELEON_zplghe_Tile(double bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed ); @@ -237,8 +235,7 @@ int CHAMELEON_zlansy_Tile_Async(cham_normtype_t norm, cham_uplo_t uplo, CHAM_des int CHAMELEON_zlantr_Tile_Async(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, double *value, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zlascal_Tile_Async(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zlaset_Tile_Async(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -//int CHAMELEON_zlaswp_Tile_Async(CHAM_desc_t *A, int K1, int K2, int *IPIV, int INCX, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -//int CHAMELEON_zlaswpc_Tile_Async(CHAM_desc_t *A, int K1, int K2, int *IPIV, int INCX, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); +int CHAMELEON_zlaswp_Tile_Async( cham_side_t side, cham_dir_t dir, CHAM_desc_t *A, int K1, int K2, CHAM_ipiv_t *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); int CHAMELEON_zlatms_Tile_Async( cham_dist_t idist, unsigned long long int seed, cham_sym_t sym, double *D, int mode, double cond, double dmax, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); int CHAMELEON_zlauum_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zplghe_Tile_Async(double bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt index 81268734d..083804058 100644 --- a/testing/CMakeLists.txt +++ b/testing/CMakeLists.txt @@ -26,7 +26,7 @@ # @author Alycia Lisito # @author Matthieu Kuhn # @author Abel Calluaud -# @date 2025-01-24 +# @date 2025-03-24 # ### @@ -81,6 +81,7 @@ set(ZSRC_WO_STDAPI testing_zgenm2.c testing_zgesv_nopiv.c testing_zgesvd.c + testing_zlaswp.c testing_zgetrf_nopiv.c testing_zgetrs_nopiv.c testing_zgeqrf.c diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake index 39b7e89e0..000c8fb3e 100644 --- a/testing/CTestLists.cmake +++ b/testing/CTestLists.cmake @@ -110,8 +110,10 @@ if (NOT CHAMELEON_SIMULATION) add_test( test_${cat}_${prec}getrf_ppivblocked_batch ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/getrf.in ) set_tests_properties( test_${cat}_${prec}getrf_ppivblocked_batch PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppiv;CHAMELEON_GETRF_BATCH_SIZE=3" ) + add_test( test_${cat}_${prec}laswp ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P 1 -f input/laswp.in ) if ( ${cat} STREQUAL "mpi" ) + add_test( test_${cat}_${prec}laswp_ppiv_comm_with_task ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P ${NP} -f input/laswp.in ) add_test( test_${cat}_${prec}getrf_ppiv_comm_with_task ${PREFIX} ${CMD} -c -t ${THREADS} -g ${gpus} -P ${NP} -f input/getrf.in ) set_tests_properties( test_${cat}_${prec}getrf_ppiv_comm_with_task PROPERTIES ENVIRONMENT "CHAMELEON_GETRF_ALGO=ppiv;CHAMELEON_GETRF_BATCH_SIZE=0;CHAMELEON_GETRF_ALL_REDUCE=cham_spu_tasks" ) diff --git a/testing/chameleon_ztesting.c b/testing/chameleon_ztesting.c index 27e2ad9e4..979abaf12 100644 --- a/testing/chameleon_ztesting.c +++ b/testing/chameleon_ztesting.c @@ -133,6 +133,10 @@ parameter_t parameters[] = { { NULL, "SVD parameters", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL }, { "jobu", "Value of the jobu parameter ('NoVec', 'Vec', 'Ivec', 'AllVec', 'SVec', 'OVec')", -50, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 4, TestJob, {0}, NULL, pread_job, sprint_job }, { "jobvt", "Value of the jobvt parameter ('NoVec', 'Vec', 'Ivec', 'AllVec', 'SVec', 'OVec')", -51, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 5, TestJob, {0}, NULL, pread_job, sprint_job }, + + { NULL, "LASWP parameters", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL }, + { "k1", "Index of the first element to permute", -70, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 3, TestValInt, {0}, NULL, pread_int, sprint_int }, + { "k2", "Index of the last element to permute", -71, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 3, TestValInt, {0}, NULL, pread_int, sprint_int }, #endif { "tsub", "Graph submission time in s", 999, PARAM_OUTPUT, 2, 13, TestValFixdbl, {0}, NULL, pread_fixdbl, sprint_fixdbl }, diff --git a/testing/input/laswp.in b/testing/input/laswp.in new file mode 100644 index 000000000..41037f5e5 --- /dev/null +++ b/testing/input/laswp.in @@ -0,0 +1,20 @@ +# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step] +# Not given parameters will receive default values + +# LASWP + +# nb: Tile size +# n: Order of the matrix A +# lda: Leading dimension of matrix A +# k1: First element of ipiv to apply the permutation. +# k2: Last element of ipiv to apply the permutation. +# dir: Specifies the order of the permutation. + +op = laswp +nb = 4, 16, 17 +n = 15, 21, 35 +lda = 40 +k1 = 1, 2, 10 +k2 = 1, 2, 10 +dir = Forward, Backward + diff --git a/testing/testing_zlaswp.c b/testing/testing_zlaswp.c new file mode 100644 index 000000000..56c7a8a84 --- /dev/null +++ b/testing/testing_zlaswp.c @@ -0,0 +1,141 @@ +/** + * + * @file testing_zlaswp.c + * + * @copyright 2025-2025 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zlaswp testing + * + * @version 1.3.0 + * @author Matteo Marcos + * @date 2025-03-24 + * @precisions normal z -> c d s + * + */ +#include "chameleon/constants.h" +#include "chameleon/struct.h" +#include "testings.h" +#include "chameleon/chameleon_z.h" +#include "testing_zcheck.h" +#include <chameleon/flops.h> +#include <chameleon/getenv.h> +#include <coreblas/lapacke.h> +#include <chameleon/tasks.h> + +static void testing_zlaswp_ipiv_gen( int *IPIV, + int M ) +{ + int i; + + for ( i = 0; i < M; i++ ) { + IPIV[i] = testing_ialea() % ( M - i ) + i + 1; + } +} + +int +testing_zlaswp_desc( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int async = parameters_getvalue_int( "async" ); + cham_side_t side = run_arg_get_side( args, "side", ChamLeft ); + cham_dir_t dir = run_arg_get_dir( args, "dir", ChamDirForward ); + int nb = run_arg_get_nb( args ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", N ); + int seedA = run_arg_get_int( args, "seedA", testing_ialea() ); + int K1 = run_arg_get_int( args, "K1", 1 ); + int K2 = run_arg_get_int( args, "K2", M ); + + int *IPIV = malloc( sizeof(int) * M ); + + /* Descriptors */ + CHAM_desc_t *descA; + CHAM_ipiv_t *descIPIV; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + parameters_desc_create( "A", &descA, ChamComplexDouble, nb, nb, LDA, N, M, N ); + CHAMELEON_zplrnt_Tile( descA, seedA ); + + testing_zlaswp_ipiv_gen( IPIV, M ); + CHAMELEON_Ipiv_Create( &descIPIV, descA, IPIV ); + CHAMELEON_Ipiv_Init( descA, descIPIV ); + + /* Calculates the solution */ + testing_start( &test_data ); + if ( async ) { + hres = CHAMELEON_zlaswp_Tile_Async( side, dir, descA, K1, K2, descIPIV, test_data.sequence, &test_data.request ); + CHAMELEON_Desc_Flush( descA, test_data.sequence ); + CHAMELEON_Ipiv_Flush( descIPIV, test_data.sequence ); + } + else { + hres = CHAMELEON_zlaswp_Tile( side, dir, descA, K1, K2, descIPIV ); + } + test_data.hres = hres; + testing_stop( &test_data, 0 ); + +#if !defined(CHAMELEON_SIMULATION) + if ( check ) { + CHAM_desc_t *descA0, *descA0c; + int INCX = ( dir == ChamDirForward ) ? 1 : -1; + + descA0 = CHAMELEON_Desc_Copy( descA, CHAMELEON_MAT_ALLOC_TILE ); + + CHAMELEON_Desc_Create_User( + &descA0c, (void*)CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble, + nb, nb, nb*nb, M, N, 0, 0, M, N, 1, 1, + chameleon_getaddr_cm, chameleon_getblkldd_cm, NULL, NULL ); + + CHAMELEON_zplrnt_Tile( descA0c, seedA ); + + if ( CHAMELEON_Comm_rank() == 0 ) { + LAPACKE_zlaswp( LAPACK_COL_MAJOR, N, descA0c->mat, M, K1, K2, IPIV, INCX ); + } + + CHAMELEON_zlacpy_Tile( ChamUpperLower, descA0c, descA0 ); + CHAMELEON_Desc_Destroy( &descA0c ); + + hres += check_zmatrices( args, ChamUpperLower, descA, descA0 ); + + CHAMELEON_Desc_Destroy( &descA0 ); + } +#endif /* !defined(CHAMELEON_SIMULATION) */ + + CHAMELEON_Ipiv_Destroy( &descIPIV, descA ); + parameters_desc_destroy( &descA ); + free( IPIV ); + + return hres; +} + +testing_t test_zlaswp; +const char *zlaswp_params[] = { "mtxfmt", "nb", "n", "m", "lda", "seedA", "k1", "k2", "side", "dir", NULL }; +const char *zlaswp_output[] = { NULL }; +const char *zlaswp_outchk[] = { "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zlaswp_init( void ) __attribute__( ( constructor ) ); +void +testing_zlaswp_init( void ) +{ + test_zlaswp.name = "zlaswp"; + test_zlaswp.helper = "Row interchange on general matrices"; + test_zlaswp.params = zlaswp_params; + test_zlaswp.output = zlaswp_output; + test_zlaswp.outchk = zlaswp_outchk; + test_zlaswp.fptr_desc = testing_zlaswp_desc; + test_zlaswp.next = NULL; + + testing_register( &test_zlaswp ); +} + -- GitLab