diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index 59a24ff8eb4eb9e02cf765c1429b3cc4db7c009f..0106e546cf95b7b0875761e6de3b4209bcf91f4a 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -100,6 +100,7 @@ set(ZSRC pzgeqrf_param.c pzgetrf_incpiv.c pzgetrf_nopiv.c + pzgetrf.c pzlacpy.c pzlange.c pzlansy.c @@ -150,6 +151,7 @@ set(ZSRC #zgetrf.c zgetrf_incpiv.c zgetrf_nopiv.c + zgetrf.c zgetrs_incpiv.c zgetrs_nopiv.c zlacpy.c diff --git a/compute/pzgetrf.c b/compute/pzgetrf.c new file mode 100644 index 0000000000000000000000000000000000000000..7e6237c7ef7fbba9fc6f2574da2e834920e410e7 --- /dev/null +++ b/compute/pzgetrf.c @@ -0,0 +1,188 @@ +/** + * + * @file pzgetrf.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgetrf parallel algorithm + * + * @version 1.2.0 + * @author Omar Zenati + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Matthieu Kuhn + * @date 2022-02-22 + * @precisions normal z -> s d c + * + */ +#include "control/common.h" + +#define A(m,n) A, m, n + +/* + * All the functions below are panel factorization variant. + * The parameters are: + * @param[inout] ws + * The data structure associated to the algorithm that holds all extra + * information that may be needed for LU factorization + * + * @param[inout] A + * The descriptor of the full matrix A (not just the panel) + * + * @param[in] k + * The index of the column to factorize + * + * @param[in] ib + * The index of the column to factorize + * + * @param[inout] options + * The runtime options data structure to pass through all insert_task calls. + */ +static inline void +chameleon_pzgetrf_panel_facto_nopiv( void *ws, + CHAM_desc_t *A, + int k, + RUNTIME_option_t *options ) +{ + const CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t) 1.0; + int m, tempkm, tempkn, tempmm; + + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb; + + /* + * Algorithm per block without pivoting + */ + INSERT_TASK_zgetrf_nopiv( + options, + tempkm, tempkn, 32, A->mb, + A(k, k), 0); + + for (m = k+1; m < A->mt; m++) { + tempmm = (m == (A->mt - 1)) ? A->m - m * A->mb : A->mb; + INSERT_TASK_ztrsm( + options, + ChamRight, ChamUpper, ChamNoTrans, ChamNonUnit, + tempmm, tempkn, A->mb, + zone, A(k, k), + A(m, k) ); + } +} +static inline void +chameleon_pzgetrf_panel_facto( void *ws, + CHAM_desc_t *A, + int k, + RUNTIME_option_t *options ) +{ + chameleon_pzgetrf_panel_facto_nopiv( ws, A, k, options ); +} + +/** + * Permutation of the panel n at step k + */ +static inline void +chameleon_pzgetrf_panel_permute( void *ws, + CHAM_desc_t *A, + int k, + int n, + RUNTIME_option_t *options ) +{ + (void)ws; + (void)A; + (void)k; + (void)n; + (void)options; +} + +static inline void +chameleon_pzgetrf_panel_update( void *ws, + CHAM_desc_t *A, + int k, + int n, + RUNTIME_option_t *options ) +{ + const CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t) 1.0; + const CHAMELEON_Complex64_t mzone = (CHAMELEON_Complex64_t)-1.0; + + int m, tempkm, tempmm, tempnn; + + tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb; + tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb; + + chameleon_pzgetrf_panel_permute( ws, A, k, n, options ); + + INSERT_TASK_ztrsm( + options, + ChamLeft, ChamLower, ChamNoTrans, ChamUnit, + tempkm, tempnn, A->mb, + zone, A(k, k), + A(k, n) ); + + for (m = k+1; m < A->mt; m++) { + tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb; + + INSERT_TASK_zgemm( + options, + ChamNoTrans, ChamNoTrans, + tempmm, tempnn, A->mb, A->mb, + mzone, A(m, k), + A(k, n), + zone, A(m, n) ); + } + + RUNTIME_data_flush( options->sequence, A(k, n) ); +} + +/** + * Parallel tile LU factorization with no pivoting - dynamic scheduling + */ +void chameleon_pzgetrf( void *ws, + CHAM_desc_t *A, + RUNTIME_sequence_t *sequence, + RUNTIME_request_t *request ) +{ + CHAM_context_t *chamctxt; + RUNTIME_option_t options; + + int k, m, n; + int min_mnt = chameleon_min( A->mt, A->nt ); + + chamctxt = chameleon_context_self(); + if (sequence->status != CHAMELEON_SUCCESS) { + return; + } + RUNTIME_options_init( &options, chamctxt, sequence, request ); + + for (k = 0; k < min_mnt; k++) { + RUNTIME_iteration_push( chamctxt, k ); + + options.priority = A->nt; + chameleon_pzgetrf_panel_facto( ws, A, k, &options ); + + for (n = k+1; n < A->nt; n++) { + options.priority = A->nt-n; + chameleon_pzgetrf_panel_update( ws, A, k, n, &options ); + } + + /* Flush panel k */ + for (m = k; m < A->mt; m++) { + RUNTIME_data_flush( sequence, A(m, k) ); + } + + RUNTIME_iteration_pop( chamctxt ); + } + + /* Backward pivoting */ + for (k = 1; k < min_mnt; k++) { + for (n = 0; n < k; n++) { + chameleon_pzgetrf_panel_permute( ws, A, k, n, &options ); + } + } + + RUNTIME_options_finalize( &options, chamctxt ); +} diff --git a/compute/pzplrnt.c b/compute/pzplrnt.c index 8b4cfe115330f9c44ddf3a5036ac034709154186..b6fba32067df9ea3a9487302f90569238003efd2 100644 --- a/compute/pzplrnt.c +++ b/compute/pzplrnt.c @@ -26,7 +26,7 @@ #define A(m, n) A, m, n /** - * chameleon_pzplghe - Generate a random matrix by tiles. + * chameleon_pzplrnt - Generate a random matrix by tiles. */ void chameleon_pzplrnt( CHAM_desc_t *A, int bigM, int m0, int n0, unsigned long long int seed, diff --git a/compute/zgetrf.c b/compute/zgetrf.c new file mode 100644 index 0000000000000000000000000000000000000000..52c33a740b24973a32d7a0973d68d6e74c400f3a --- /dev/null +++ b/compute/zgetrf.c @@ -0,0 +1,359 @@ +/** + * + * @file zgetrf.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgetrf wrappers + * + * @version 1.2.0 + * @author Omar Zenati + * @author Mathieu Faverge + * @author Emmanuel Agullo + * @author Cedric Castagnede + * @author Florent Pruvost + * @author Matthieu Kuhn + * @date 2022-09-19 + * + * @precisions normal z -> s d c + * + */ +#include "control/common.h" + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t + * + * CHAMELEON_zgetrf_WS_Alloc - Allocate the required workspaces for + * asynchronous getrf + * + ******************************************************************************* + * + * @param[in] A + * The descriptor of the matrix A. + * + ******************************************************************************* + * + * @retval An allocated opaque pointer to use in CHAMELEON_zgetrf_Tile_Async() + * and to free with CHAMELEON_zgetrf_WS_Free(). + * + ******************************************************************************* + * + * @sa CHAMELEON_zgetrf_Tile_Async + * @sa CHAMELEON_zgetrf_WS_Free + * + */ +void * +CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ) +{ + CHAM_context_t *chamctxt; + + chamctxt = chameleon_context_self(); + if ( chamctxt == NULL ) { + return NULL; + } + + return NULL; +} + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t + * + * @brief Free the allocated workspaces for asynchronous getrf + * + ******************************************************************************* + * + * @param[in,out] user_ws + * On entry, the opaque pointer allocated by + * CHAMELEON_zgetrf_WS_Alloc() On exit, all data are freed. + * + ******************************************************************************* + * + * @sa CHAMELEON_zgetrf_Tile_Async + * @sa CHAMELEON_zgetrf_WS_Alloc + * + */ +void +CHAMELEON_zgetrf_WS_Free( const CHAM_desc_t *A, void *user_ws ) +{ + CHAM_context_t *chamctxt; + + chamctxt = chameleon_context_self(); + if ( chamctxt == NULL ) { + return; + } + + (void)user_ws; +} + +#if defined(NOT_AVAILABLE_YET) +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t + * + * CHAMELEON_zgetrf - Computes an LU factorization of a general M-by-N matrix A + * using the tile LU algorithm without row pivoting. + * WARNING: Don't use this function if you are not sure your matrix is diagonal + * dominant. + * + ******************************************************************************* + * + * @param[in] M + * The number of rows of the matrix A. M >= 0. + * + * @param[in] N + * The number of columns of the matrix A. N >= 0. + * + * @param[in,out] A + * On entry, the M-by-N matrix to be factored. + * On exit, the tile factors L and U from the factorization. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,M). + * + ******************************************************************************* + * + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if i, U(i,i) is exactly zero. The factorization has been + * completed, but the factor U is exactly singular, and division + * by zero will occur if it is used to solve a system of + * equations. + * + ******************************************************************************* + * + * @sa CHAMELEON_zgetrf_Tile + * @sa CHAMELEON_zgetrf_Tile_Async + * @sa CHAMELEON_cgetrf + * @sa CHAMELEON_dgetrf + * @sa CHAMELEON_sgetrf + * + */ +int +CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int *IPIV, int LDA ) +{ + int NB; + int status; + CHAM_desc_t descAl, descAt; + CHAM_context_t *chamctxt; + RUNTIME_sequence_t *sequence = NULL; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + void *ws; + + chamctxt = chameleon_context_self(); + if ( chamctxt == NULL ) { + chameleon_fatal_error( "CHAMELEON_zgetrf", "CHAMELEON not initialized" ); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + /* Check input arguments */ + if ( M < 0 ) { + chameleon_error( "CHAMELEON_zgetrf", "illegal value of M" ); + return -1; + } + if ( N < 0 ) { + chameleon_error( "CHAMELEON_zgetrf", "illegal value of N" ); + return -2; + } + if ( LDA < chameleon_max( 1, M ) ) { + chameleon_error( "CHAMELEON_zgetrf", "illegal value of LDA" ); + return -4; + } + /* Quick return */ + if ( chameleon_min( M, N ) == 0 ) + return CHAMELEON_SUCCESS; + + /* Tune NB & IB depending on M, N & NRHS; Set NBNBSIZE */ + status = chameleon_tune( CHAMELEON_FUNC_ZGESV, M, N, 0 ); + if ( status != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zgetrf", "chameleon_tune() failed" ); + return status; + } + + /* Set NT & NTRHS */ + NB = CHAMELEON_NB; + + chameleon_sequence_create( chamctxt, &sequence ); + + /* Submit the matrix conversion */ + chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInout, ChamUpperLower, + A, NB, NB, LDA, N, M, N, sequence, &request ); + + /* Allocate workspace for partial pivoting */ + ws = CHAMELEON_zgetrf_WS_Alloc( &descAt ); + /* Call the tile interface */ + CHAMELEON_zgetrf_Tile_Async( &descAt, ws, sequence, &request ); + + /* Submit the matrix conversion back */ + chameleon_ztile2lap( chamctxt, &descAl, &descAt, + ChamDescInout, ChamUpperLower, sequence, &request ); + + chameleon_sequence_wait( chamctxt, sequence ); + + /* Cleanup the temporary data */ + CHAMELEON_zgetrf_WS_Free( &descAt, ws ); + chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt ); + + status = sequence->status; + chameleon_sequence_destroy( chamctxt, sequence ); + + return status; +} +#endif + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t_Tile + * + * CHAMELEON_zgetrf_Tile - Computes the tile LU factorization of a matrix. + * Tile equivalent of CHAMELEON_zgetrf(). Operates on matrices stored by + * tiles. All matrices are passed through descriptors. All dimensions are + * taken from the descriptors. + * + ******************************************************************************* + * + * @param[in,out] A + * On entry, the M-by-N matrix to be factored. + * On exit, the tile factors L and U from the factorization. + * + ******************************************************************************* + * + * @retval CHAMELEON_SUCCESS successful exit + * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed, + * but the factor U is exactly singular, and division by zero will occur + * if it is used to solve a system of equations. + * + ******************************************************************************* + * + * @sa CHAMELEON_zgetrf + * @sa CHAMELEON_zgetrf_Tile_Async + * @sa CHAMELEON_cgetrf_Tile + * @sa CHAMELEON_dgetrf_Tile + * @sa CHAMELEON_sgetrf_Tile + * @sa CHAMELEON_zgetrs_Tile + * + */ +int +CHAMELEON_zgetrf_Tile( CHAM_desc_t *A ) +{ + CHAM_context_t *chamctxt; + RUNTIME_sequence_t *sequence = NULL; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + int status; + void *ws; + + chamctxt = chameleon_context_self(); + if ( chamctxt == NULL ) { + chameleon_fatal_error( "CHAMELEON_zgetrf_Tile", "CHAMELEON not initialized" ); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + chameleon_sequence_create( chamctxt, &sequence ); + + ws = CHAMELEON_zgetrf_WS_Alloc( A ); + CHAMELEON_zgetrf_Tile_Async( A, ws, sequence, &request ); + + CHAMELEON_Desc_Flush( A, sequence ); + + chameleon_sequence_wait( chamctxt, sequence ); + CHAMELEON_zgetrf_WS_Free( A, ws ); + + status = sequence->status; + chameleon_sequence_destroy( chamctxt, sequence ); + return status; +} + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t_Tile_Async + * + * CHAMELEON_zgetrf_Tile_Async - Computes the tile LU factorization of a + * matrix. Non-blocking equivalent of CHAMELEON_zgetrf_Tile(). May return + * before the computation is finished. Allows for pipelining of operations ar + * runtime. + * + ******************************************************************************* + * + * @param[in,out] A + * On entry, the M-by-N matrix to be factored. + * On exit, the tile factors L and U from the factorization. + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa CHAMELEON_zgetrf + * @sa CHAMELEON_zgetrf_Tile + * @sa CHAMELEON_cgetrf_Tile_Async + * @sa CHAMELEON_dgetrf_Tile_Async + * @sa CHAMELEON_sgetrf_Tile_Async + * @sa CHAMELEON_zgetrs_Tile_Async + * + */ +int +CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, + void *user_ws, + RUNTIME_sequence_t *sequence, + RUNTIME_request_t *request ) +{ + CHAM_context_t *chamctxt; + chamctxt = chameleon_context_self(); + + if ( chamctxt == NULL ) { + chameleon_fatal_error( "CHAMELEON_zgetrf_Tile_Async", "CHAMELEON not initialized" ); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + if ( chamctxt->scheduler != RUNTIME_SCHED_STARPU ) { + chameleon_fatal_error( "CHAMELEON_zgetrf_Tile_Async", "CHAMELEON_zgetrf_Tile_Async is only available with StarPU" ); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + if ( user_ws == NULL ) { + chameleon_fatal_error( "CHAMELEON_zgetrf_Tile_Async", "NULL user_ws" ); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + if ( sequence == NULL ) { + chameleon_fatal_error( "CHAMELEON_zgetrf_Tile_Async", "NULL sequence" ); + return CHAMELEON_ERR_UNALLOCATED; + } + if ( request == NULL ) { + chameleon_fatal_error( "CHAMELEON_zgetrf_Tile_Async", "NULL request" ); + return CHAMELEON_ERR_UNALLOCATED; + } + /* Check sequence status */ + if ( sequence->status == CHAMELEON_SUCCESS ) { + request->status = CHAMELEON_SUCCESS; + } + else { + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED ); + } + + /* Check descriptors for correctness */ + if ( chameleon_desc_check( A ) != CHAMELEON_SUCCESS ) { + chameleon_error( "CHAMELEON_zgetrf_Tile", "invalid first descriptor" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); + } + + /* Check input arguments */ + if ( A->nb != A->mb ) { + chameleon_error( "CHAMELEON_zgetrf_Tile", "only square tiles supported" ); + return chameleon_request_fail( sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE ); + } + + chameleon_pzgetrf( user_ws, A, sequence, request ); + + return CHAMELEON_SUCCESS; +} diff --git a/control/compute_z.h b/control/compute_z.h index 6d2cdcb7f56f127c7f02b71ba361665ecee5470a..b938b178fb8346890c991d7d4a30303b545cd3ae 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -20,7 +20,8 @@ * @author Cedric Castagnede * @author Florent Pruvost * @author Alycia Lisito - * @date 2022-02-22 + * @author Matthieu Kuhn + * @date 2022-09-19 * @precisions normal z -> c d s * */ @@ -77,6 +78,7 @@ void chameleon_pzgepdf_qdwh( cham_mtxtype_t trans, CHAM_desc_t *descU, CHAM_desc void chameleon_pzgepdf_qr( int genD, int doqr, int optid, const libhqr_tree_t *qrtreeT, const libhqr_tree_t *qrtreeB, CHAM_desc_t *A1, CHAM_desc_t *TS1, CHAM_desc_t *TT1, CHAM_desc_t *D1, CHAM_desc_t *Q1, CHAM_desc_t *A2, CHAM_desc_t *TS2, CHAM_desc_t *TT2, CHAM_desc_t *D2, CHAM_desc_t *Q2, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); +void chameleon_pzgetrf( void *ws, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzgetrf_incpiv(CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgetrf_nopiv(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgetrf_reclap(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h index 32bedaa6c19abe8fb2629198c13d9d9a3314c49f..c36e6289dcfc7dd19122cd671f25a66717c586f2 100644 --- a/include/chameleon/chameleon_z.h +++ b/include/chameleon/chameleon_z.h @@ -53,6 +53,7 @@ int CHAMELEON_zgesvd(cham_job_t jobu, cham_job_t jobvt, int M, int N, CHAMELEON_ //int CHAMELEON_zgetrf(int M, int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV); int CHAMELEON_zgetrf_incpiv(int M, int N, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descL, int *IPIV); int CHAMELEON_zgetrf_nopiv(int M, int N, CHAMELEON_Complex64_t *A, int LDA); +int CHAMELEON_zgetrf( int M, int N, CHAMELEON_Complex64_t *A, int LDA ); //int CHAMELEON_zgetri(int N, CHAMELEON_Complex64_t *A, int LDA, int *IPIV); //int CHAMELEON_zgetrs(cham_trans_t trans, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, int *IPIV, CHAMELEON_Complex64_t *B, int LDB); int CHAMELEON_zgetrs_incpiv(cham_trans_t trans, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descL, int *IPIV, CHAMELEON_Complex64_t *B, int LDB); @@ -133,6 +134,7 @@ int CHAMELEON_zgesvd_Tile(cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t *A, dou //int CHAMELEON_zgetrf_Tile(CHAM_desc_t *A, int *IPIV); int CHAMELEON_zgetrf_incpiv_Tile(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV); int CHAMELEON_zgetrf_nopiv_Tile(CHAM_desc_t *A); +int CHAMELEON_zgetrf_Tile( CHAM_desc_t *A ); //int CHAMELEON_zgetri_Tile(CHAM_desc_t *A, int *IPIV); //int CHAMELEON_zgetrs_Tile(cham_trans_t trans, CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B); int CHAMELEON_zgetrs_incpiv_Tile(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, CHAM_desc_t *B); @@ -209,6 +211,7 @@ int CHAMELEON_zgesvd_Tile_Async(cham_job_t jobu, cham_job_t jobvt, CHAM_desc_t * //int CHAMELEON_zgetrf_Tile_Async(CHAM_desc_t *A, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgetrf_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgetrf_nopiv_Tile_Async(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); +int CHAMELEON_zgetrf_Tile_Async( CHAM_desc_t *A, void *ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); //int CHAMELEON_zgetri_Tile_Async(CHAM_desc_t *A, int *IPIV, CHAM_desc_t *W, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); //int CHAMELEON_zgetrs_Tile_Async(cham_trans_t trans, CHAM_desc_t *A, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zgetrs_incpiv_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *L, int *IPIV, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); @@ -320,6 +323,8 @@ void *CHAMELEON_zcesca_WS_Alloc( const CHAM_desc_t *A ); void CHAMELEON_zcesca_WS_Free( void *ws ); void *CHAMELEON_zgram_WS_Alloc( const CHAM_desc_t *A ); void CHAMELEON_zgram_WS_Free( void *ws ); +void *CHAMELEON_zgetrf_WS_Alloc( const CHAM_desc_t *A ); +void CHAMELEON_zgetrf_WS_Free( const CHAM_desc_t *A, void *ws ); int CHAMELEON_Alloc_Workspace_zgesv_incpiv( int N, CHAM_desc_t **descL, int **IPIV, int p, int q); int CHAMELEON_Alloc_Workspace_zgetrf_incpiv(int M, int N, CHAM_desc_t **descL, int **IPIV, int p, int q); diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt index d3efdb830b454e771a27bfdf82f375efb551ba83..8804eb0e0e490bd3a876d64a0afbcce783d09b84 100644 --- a/testing/CMakeLists.txt +++ b/testing/CMakeLists.txt @@ -78,6 +78,7 @@ set(ZSRC_WO_STDAPI testing_zgenm2.c testing_zgesv_nopiv.c testing_zgesvd.c + testing_zgetrf.c testing_zgetrf_nopiv.c testing_zgetrs_nopiv.c testing_zgeqrf.c diff --git a/testing/testing_zgetrf.c b/testing/testing_zgetrf.c new file mode 100644 index 0000000000000000000000000000000000000000..08c13b926556ae49ab292ac0e34f06d9175a8efe --- /dev/null +++ b/testing/testing_zgetrf.c @@ -0,0 +1,159 @@ +/** + * + * @file testing_zgetrf.c + * + * @copyright 2019-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zgetrf testing + * + * @version 1.2.0 + * @author Lucas Barros de Assis + * @author Mathieu Faverge + * @author Alycia Lisito + * @date 2022-02-22 + * @precisions normal z -> c d s + * + */ +#include <chameleon.h> +#include "testings.h" +#include "testing_zcheck.h" +#include <chameleon/flops.h> +#include <coreblas/lapacke.h> + +#if !defined(CHAMELEON_TESTINGS_VENDOR) +int +testing_zgetrf_desc( run_arg_list_t *args, int check ) +{ + testdata_t test_data = { .args = args }; + int hres = 0; + + /* Read arguments */ + int async = parameters_getvalue_int( "async" ); + intptr_t mtxfmt = parameters_getvalue_int( "mtxfmt" ); + int nb = run_arg_get_int( args, "nb", 320 ); + int P = parameters_getvalue_int( "P" ); + int N = run_arg_get_int( args, "N", 1000 ); + int M = run_arg_get_int( args, "M", N ); + int LDA = run_arg_get_int( args, "LDA", M ); + int seedA = run_arg_get_int( args, "seedA", random() ); + cham_diag_t diag = run_arg_get_diag( args, "diag", ChamNonUnit ); + int Q = parameters_compute_q( P ); + + /* Descriptors */ + CHAM_desc_t *descA; + void *ws = NULL; + + CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb ); + + /* Creates the matrices */ + CHAMELEON_Desc_Create( + &descA, (void*)(-mtxfmt), ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q ); + + /* Fills the matrix with random values */ + if ( diag == ChamUnit ) { + CHAMELEON_zplgtr_Tile( 0, ChamUpper, descA, seedA ); + CHAMELEON_zplgtr_Tile( 0, ChamLower, descA, seedA+1 ); + } + else { + CHAMELEON_zplrnt_Tile( descA, seedA ); + } + + if ( async ) { + ws = CHAMELEON_zgetrf_WS_Alloc( descA ); + } + + /* Calculates the solution */ + testing_start( &test_data ); + if ( async ) { + hres = CHAMELEON_zgetrf_Tile_Async( descA, ws, test_data.sequence, &test_data.request ); + CHAMELEON_Desc_Flush( descA, test_data.sequence ); + } + else { + hres = CHAMELEON_zgetrf_Tile( descA ); + } + test_data.hres = hres; + testing_stop( &test_data, flops_zgetrf( M, N ) ); + + /* Checks the factorization and residual */ +#if !defined(CHAMELEON_SIMULATION) + if ( check ) { + CHAM_desc_t *descA0c; + CHAM_desc_t *descA0 = CHAMELEON_Desc_Copy( descA, NULL ); + + /* Create A0c as local to rank 0 on all nodes */ + CHAMELEON_Desc_Create_User( + &descA0c, (void*)CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble, + nb, nb, nb*nb, M, N, 0, 0, M, N, 1, 1, + chameleon_getaddr_cm, chameleon_getblkldd_cm, NULL ); + + if ( diag == ChamUnit ) { + CHAMELEON_zplgtr_Tile( 0, ChamUpper, descA0, seedA ); + CHAMELEON_zplgtr_Tile( 0, ChamLower, descA0, seedA+1 ); + } + else { + CHAMELEON_zplrnt_Tile( descA0, seedA ); + } + + /* Compute the permutation of A0: P * A0 */ + if ( CHAMELEON_Comm_rank() == 0 ) { + int i, j; + int *ipiv = (int *)calloc( M, sizeof(int) ); + + for (i = 0; i < M; i++) { + ipiv[i] = i+1; + } + + LAPACKE_zlaswp( LAPACK_COL_MAJOR, N, descA0c->mat, M, 1, M, ipiv, 1 ); + free( ipiv ); + } + + CHAMELEON_zlacpy_Tile( ChamUpperLower, descA0c, descA0 ); + CHAMELEON_Desc_Destroy( &descA0c ); + + hres += check_zxxtrf( args, ChamGeneral, ChamUpperLower, + descA0, descA ); + + CHAMELEON_Desc_Destroy( &descA0 ); + } +#endif /* !defined(CHAMELEON_SIMULATION) */ + + if ( ws != NULL ) { + CHAMELEON_zgetrf_WS_Free( descA, ws ); + } + + CHAMELEON_Desc_Destroy( &descA ); + + return hres; +} +#endif + +testing_t test_zgetrf; +const char *zgetrf_params[] = { "mtxfmt", "nb", "m", "n", "lda", "seedA", "diag", NULL }; +const char *zgetrf_output[] = { NULL }; +const char *zgetrf_outchk[] = { "||A||", "||A-fact(A)||", "RETURN", NULL }; + +/** + * @brief Testing registration function + */ +void testing_zgetrf_init( void ) __attribute__( ( constructor ) ); +void +testing_zgetrf_init( void ) +{ + test_zgetrf.name = "zgetrf"; + test_zgetrf.helper = "General LU factorization (with partial pivoting)"; + test_zgetrf.params = zgetrf_params; + test_zgetrf.output = zgetrf_output; + test_zgetrf.outchk = zgetrf_outchk; +#if defined(CHAMELEON_TESTINGS_VENDOR) + test_zgetrf.fptr_desc = NULL; +#else + test_zgetrf.fptr_desc = testing_zgetrf_desc; +#endif + test_zgetrf.fptr_std = NULL; /* testing_zgetrf_std; */ + test_zgetrf.next = NULL; + + testing_register( &test_zgetrf ); +}