diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index f7f3c27b8149bc441b861d94dba3dd1b6d6452b5..739ba511e81454a6b0dd8d563b8496a4b4d5c928 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -166,6 +166,7 @@ set(ZSRC zpotrf.c zsytrf.c zpotri.c + zpoinv.c zpotrimm.c zpotrs.c zsytrs.c diff --git a/compute/zpoinv.c b/compute/zpoinv.c new file mode 100644 index 0000000000000000000000000000000000000000..acc12fcea5d4681bb29aaa43c6ede64bcf0d456a --- /dev/null +++ b/compute/zpoinv.c @@ -0,0 +1,294 @@ +/** + * + * @file zpoinv.c + * + * @copyright 2009-2014 The University of Tennessee and The University of + * Tennessee Research Foundation. All rights reserved. + * @copyright 2012-2022 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * + *** + * + * @brief Chameleon zpoinv wrappers + * + * @version 1.0.0 + * @author Alycia Lisito + * @date 2022-02-02 + * @precisions normal z -> s d c + * + */ +#include "control/common.h" + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t + * + * @brief Computes the inverse of a complex Hermitian positive definite + * matrix A using the Cholesky factorization A = U^H*U or A = L*L^H + * computed by CHAMELEON_zpotrf(). + * + ******************************************************************************* + * + * @param[in] uplo + * = ChamUpper: Upper triangle of A is stored; + * = ChamLower: Lower triangle of A is stored. + * + * @param[in] N + * The order of the matrix A. N >= 0. + * + * @param[in,out] A + * On entry, the symmetric positive definite (or Hermitian) matrix A. + * If uplo = ChamUpper, the leading N-by-N upper triangular part of A + * contains the upper triangular part of the matrix A, and the strictly lower triangular + * part of A is not referenced. + * If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower + * triangular part of the matrix A, and the strictly upper triangular part of A is not + * referenced. + * On exit, the upper or lower triangle of the (Hermitian) + * inverse of A, overwriting the input factor U or L. + * + * @param[in] LDA + * The leading dimension of the array A. LDA >= max(1,N). + * + ******************************************************************************* + * + * @retval CHAMELEON_SUCCESS successful exit + * @retval <0 if -i, the i-th argument had an illegal value + * @retval >0 if i, the (i,i) element of the factor U or L is + * zero, and the inverse could not be computed. + * + ******************************************************************************* + * + * @sa CHAMELEON_zpoinv_Tile + * @sa CHAMELEON_zpoinv_Tile_Async + * @sa CHAMELEON_cpoinv + * @sa CHAMELEON_dpoinv + * @sa CHAMELEON_spoinv + * @sa CHAMELEON_zpotrf + * + */ +int CHAMELEON_zpoinv( cham_uplo_t uplo, int N, + CHAMELEON_Complex64_t *A, int LDA ) +{ + int NB; + int status; + CHAM_context_t *chamctxt; + RUNTIME_sequence_t *sequence = NULL; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + CHAM_desc_t descAl, descAt; + + chamctxt = chameleon_context_self(); + if (chamctxt == NULL) { + chameleon_fatal_error("CHAMELEON_zpoinv", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + /* Check input arguments */ + if ((uplo != ChamUpper) && (uplo != ChamLower)) { + chameleon_error("CHAMELEON_zpoinv", "illegal value of uplo"); + return -1; + } + if (N < 0) { + chameleon_error("CHAMELEON_zpoinv", "illegal value of N"); + return -2; + } + if (LDA < chameleon_max(1, N)) { + chameleon_error("CHAMELEON_zpoinv", "illegal value of LDA"); + return -4; + } + /* Quick return */ + if (chameleon_max(N, 0) == 0) + return CHAMELEON_SUCCESS; + + /* Tune NB depending on M, N & NRHS; Set NBNB */ + status = chameleon_tune(CHAMELEON_FUNC_ZPOSV, N, N, 0); + if (status != CHAMELEON_SUCCESS) { + chameleon_error("CHAMELEON_zpoinv", "chameleon_tune() failed"); + return status; + } + + /* Set NT */ + NB = CHAMELEON_NB; + + chameleon_sequence_create( chamctxt, &sequence ); + + /* Submit the matrix conversion */ + chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInout, uplo, + A, NB, NB, LDA, N, N, N, sequence, &request ); + + /* Call the tile interface */ + CHAMELEON_zpoinv_Tile_Async( uplo, &descAt, sequence, &request ); + + /* Submit the matrix conversion back */ + chameleon_ztile2lap( chamctxt, &descAl, &descAt, + ChamDescInout, uplo, sequence, &request ); + + chameleon_sequence_wait( chamctxt, sequence ); + + /* Cleanup the temporary data */ + chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt ); + + status = sequence->status; + chameleon_sequence_destroy( chamctxt, sequence ); + return status; +} + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t_Tile + * + * @brief Tile equivalent of CHAMELEON_zpoinv() + * + * Computes the inverse of a complex Hermitian + * positive definite matrix A using the Cholesky factorization + * A = U^H*U or A = L*L^H computed by CHAMELEON_zpotrf(). + * + * Operates on matrices stored by tiles. + * All matrices are passed through descriptors. + * All dimensions are taken from the descriptors. + * + ******************************************************************************* + * + * @param[in] uplo + * = ChamUpper: Upper triangle of A is stored; + * = ChamLower: Lower triangle of A is stored. + * + * @param[in] A + * On entry, the symmetric positive definite (or Hermitian) matrix A. + * If uplo = ChamUpper, the leading N-by-N upper triangular part of A + * contains the upper triangular part of the matrix A, and the strictly lower triangular + * part of A is not referenced. + * If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower + * triangular part of the matrix A, and the strictly upper triangular part of A is not + * referenced. + * On exit, the upper or lower triangle of the (Hermitian) + * inverse of A, overwriting the input factor U or L. + * + ******************************************************************************* + * + * @retval CHAMELEON_SUCCESS successful exit + * @retval >0 if i, the leading minor of order i of A is not + * positive definite, so the factorization could not be + * completed, and the solution has not been computed. + * + ******************************************************************************* + * + * @sa CHAMELEON_zpoinv + * @sa CHAMELEON_zpoinv_Tile_Async + * @sa CHAMELEON_cpoinv_Tile + * @sa CHAMELEON_dpoinv_Tile + * @sa CHAMELEON_spoinv_Tile + * @sa CHAMELEON_zpotrf_Tile + * + */ +int CHAMELEON_zpoinv_Tile( cham_uplo_t uplo, CHAM_desc_t *A ) +{ + CHAM_context_t *chamctxt; + RUNTIME_sequence_t *sequence = NULL; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + int status; + + chamctxt = chameleon_context_self(); + if (chamctxt == NULL) { + chameleon_fatal_error("CHAMELEON_zpoinv_Tile", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + chameleon_sequence_create( chamctxt, &sequence ); + + CHAMELEON_zpoinv_Tile_Async( uplo, A, sequence, &request ); + + CHAMELEON_Desc_Flush( A, sequence ); + + chameleon_sequence_wait( chamctxt, sequence ); + status = sequence->status; + chameleon_sequence_destroy( chamctxt, sequence ); + return status; +} + +/** + ******************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t_Tile_Async + * + * @brief Non-blocking equivalent of CHAMELEON_zpoinv_Tile(). + * + * Submit the tasks to compute the inverse of a complex Hermitian + * positive definite matrix A using the Cholesky factorization A = U^H*U + * or A = L*L^H computed by CHAMELEON_zpotrf. + * + * May return before the computation is finished. + * Allows for pipelining of operations at runtime. + * + ******************************************************************************* + * + * @param[in] sequence + * Identifies the sequence of function calls that this call belongs to + * (for completion checks and exception handling purposes). + * + * @param[out] request + * Identifies this function call (for exception handling purposes). + * + ******************************************************************************* + * + * @sa CHAMELEON_zpoinv + * @sa CHAMELEON_zpoinv_Tile + * @sa CHAMELEON_cpoinv_Tile_Async + * @sa CHAMELEON_dpoinv_Tile_Async + * @sa CHAMELEON_spoinv_Tile_Async + * @sa CHAMELEON_zpotrf_Tile_Async + * + */ +int CHAMELEON_zpoinv_Tile_Async( cham_uplo_t uplo, CHAM_desc_t *A, + RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) +{ + CHAM_context_t *chamctxt; + + chamctxt = chameleon_context_self(); + if (chamctxt == NULL) { + chameleon_fatal_error("CHAMELEON_zpoinv_Tile_Async", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + if (sequence == NULL) { + chameleon_fatal_error("CHAMELEON_zpoinv_Tile_Async", "NULL sequence"); + return CHAMELEON_ERR_UNALLOCATED; + } + if (request == NULL) { + chameleon_fatal_error("CHAMELEON_zpoinv_Tile_Async", "NULL request"); + return CHAMELEON_ERR_UNALLOCATED; + } + /* Check sequence status */ + if (sequence->status == CHAMELEON_SUCCESS) { + request->status = CHAMELEON_SUCCESS; + } + else { + return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED); + } + + /* Check descriptors for correctness */ + if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) { + chameleon_error("CHAMELEON_zpoinv_Tile_Async", "invalid descriptor"); + return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + } + /* Check input arguments */ + if (A->nb != A->mb) { + chameleon_error("CHAMELEON_zpoinv_Tile_Async", "only square tiles supported"); + return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE); + } + if ((uplo != ChamUpper) && (uplo != ChamLower)) { + chameleon_error("CHAMELEON_zpoinv_Tile_Async", "illegal value of uplo"); + return chameleon_request_fail(sequence, request, -1); + } + /* Quick return */ + if ( chameleon_max( A->n, 0 ) == 0 ) { + return CHAMELEON_SUCCESS; + } + + chameleon_pzpotrf( uplo, A, sequence, request ); + + chameleon_pztrtri( uplo, ChamNonUnit, A, sequence, request ); + + chameleon_pzlauum( uplo, A, sequence, request ); + + return CHAMELEON_SUCCESS; +} diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h index 128478f3bfae0bbf58a7e70ed6164adf92a4ec76..6fd52f848de55b5e1680ab3a322769244e08c2f9 100644 --- a/include/chameleon/chameleon_z.h +++ b/include/chameleon/chameleon_z.h @@ -80,6 +80,7 @@ int CHAMELEON_zplghe( double bump, cham_uplo_t uplo, int N, CHAMELEON_Complex64_ int CHAMELEON_zplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA, unsigned long long int seed ); int CHAMELEON_zplrnt( int M, int N, CHAMELEON_Complex64_t *A, int LDA, unsigned long long int seed ); int CHAMELEON_zplrnk( int M, int N, int K, CHAMELEON_Complex64_t *C, int LDC, unsigned long long int seedA, unsigned long long int seedB ); +int CHAMELEON_zpoinv(cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA); int CHAMELEON_zposv(cham_uplo_t uplo, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, CHAMELEON_Complex64_t *B, int LDB); int CHAMELEON_zpotrf(cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA); int CHAMELEON_zsytrf(cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA); @@ -158,6 +159,7 @@ int CHAMELEON_zplghe_Tile(double bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigne int CHAMELEON_zplgsy_Tile(CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed ); int CHAMELEON_zplrnt_Tile(CHAM_desc_t *A, unsigned long long int seed ); int CHAMELEON_zplrnk_Tile(int K, CHAM_desc_t *C, unsigned long long int seedA, unsigned long long int seedB ); +int CHAMELEON_zpoinv_Tile(cham_uplo_t uplo, CHAM_desc_t *A); int CHAMELEON_zposv_Tile(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B); int CHAMELEON_zpotrf_Tile(cham_uplo_t uplo, CHAM_desc_t *A); int CHAMELEON_zsytrf_Tile(cham_uplo_t uplo, CHAM_desc_t *A); @@ -232,6 +234,7 @@ int CHAMELEON_zplghe_Tile_Async(double bump, cham_uplo_t uplo, CHAM_desc_t *A, u int CHAMELEON_zplgsy_Tile_Async(CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); int CHAMELEON_zplrnt_Tile_Async(CHAM_desc_t *A, unsigned long long int seed, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); int CHAMELEON_zplrnk_Tile_Async(int K, CHAM_desc_t *C, unsigned long long int seedA, unsigned long long int seedB, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); +int CHAMELEON_zpoinv_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zposv_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zpotrf_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); int CHAMELEON_zsytrf_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);