diff --git a/cmake_modules/local_subs.py b/cmake_modules/local_subs.py index 18a79e3000ca3c1018d49ce7d34254c7baae3eae..c26498a1fd7d9761dd14d36f3f0e5ca6b61d2ae5 100644 --- a/cmake_modules/local_subs.py +++ b/cmake_modules/local_subs.py @@ -22,6 +22,7 @@ _extra_blas = [ ('', 'slatm1', 'dlatm1', 'slatm1', 'dlatm1' ), ('', 'sgenm2', 'dgenm2', 'cgenm2', 'zgenm2' ), ('', 'slag2c_fake', 'dlag2z_fake', 'slag2c', 'dlag2z' ), + ('', 'sgepdf', 'dgepdf', 'cgepdf', 'zgepdf' ), ] _extra_BLAS = [ [ x.upper() for x in row ] for row in _extra_blas ] diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt index 9b0e390599f9bb019f7227652d853e735f1a8e2e..7ac29028f72737aea9f2d7db393645cf76801995 100644 --- a/compute/CMakeLists.txt +++ b/compute/CMakeLists.txt @@ -108,6 +108,7 @@ set(ZSRC pzgelqf.c pzgelqf_param.c pzgelqfrh.c + pzgepdf_qr.c pzgeqrf.c pzgeqrfrh.c pzgeqrf_param.c @@ -153,6 +154,7 @@ set(ZSRC zgelqs_param.c zgeqrf.c zgeqrf_param.c + zgepdf_qr.c zgeqrs.c zgeqrs_param.c #zgesv.c diff --git a/compute/pzgepdf_qr.c b/compute/pzgepdf_qr.c new file mode 100644 index 0000000000000000000000000000000000000000..c0a455c3a40f8177d49495ea74ddbc679afe3bb8 --- /dev/null +++ b/compute/pzgepdf_qr.c @@ -0,0 +1,95 @@ +/** + * + * @file zgeqrf_qdwh_param.c + * + * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * @copyright 2016-2020 KAUST. All rights reserved. + * + *** + * + * @brief Chameleon zgepdf_qr parallel algorithm + * + * @version 1.0.0 + * @author Mathieu Faverge + * @date 2020-11-09 + * @precisions normal z -> s d c + * + */ +#include "control/common.h" + +void +chameleon_pzgepdf_qr( int genD, int doqr, int optid, + const libhqr_tree_t *qrtreeT, const libhqr_tree_t *qrtreeB, + CHAM_desc_t *A1, CHAM_desc_t *TS1, CHAM_desc_t *TT1, CHAM_desc_t *D1, CHAM_desc_t *Q1, + CHAM_desc_t *A2, CHAM_desc_t *TS2, CHAM_desc_t *TT2, CHAM_desc_t *D2, CHAM_desc_t *Q2, + RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ) +{ + int L, KT, Dtop; + + if ( A1->m > A1->n ) { + KT = A1->nt; + } else { + KT = A1->mt; + } + L = optid ? A2->m : 0; + + Dtop = genD; + + /* Factorize A1 if needed */ + if ( doqr ) { + if ( qrtreeT ) { + chameleon_pzgeqrf_param( genD, KT, qrtreeT, A1, + TS1, TT1, D1, sequence, request ); + } + else { + chameleon_pzgeqrf( genD, A1, TS1, D1, sequence, request ); + } + Dtop = 0; + } + + /* Factorize A2 */ + if ( qrtreeT ) { + chameleon_pztpqrt_param( genD, (optid ? ChamUpper : ChamUpperLower), KT, qrtreeB, + A1, A2, TS2, TT2, D2, sequence, request ); + } + else { + chameleon_pztpqrt( L, A1, A2, TS2, sequence, request ); + } + + /* Initialize Q1 and Q2 */ + chameleon_pzlaset( ChamUpperLower, 0.0, 1.0, Q1, sequence, request ); + chameleon_pzlaset( ChamUpperLower, 0.0, 0.0, Q2, sequence, request ); + + /* Generate Q1 and Q2 */ + if ( qrtreeT ) { + chameleon_pztpgqrt_param( 0, (optid ? ChamUpper : ChamUpperLower), KT, qrtreeB, + A2, Q1, Q2, TS2, TT2, D2, sequence, request ); + chameleon_pzungqr_param( Dtop, KT, qrtreeT, A1, Q1, TS1, TT1, D1, sequence, request ); + } + else { + chameleon_pztpgqrt( KT, L, A2, TS2, Q1, Q2, sequence, request ); + chameleon_pzungqr( Dtop, A1, Q1, TS1, D1, sequence, request ); + } + + /* Flush all read data */ + CHAMELEON_Desc_Flush( A1, sequence ); + CHAMELEON_Desc_Flush( TS1, sequence ); + + CHAMELEON_Desc_Flush( A2, sequence ); + CHAMELEON_Desc_Flush( TS2, sequence ); + + if ( qrtreeT ) { + CHAMELEON_Desc_Flush( TT1, sequence ); + CHAMELEON_Desc_Flush( TT2, sequence ); + } + + if ( D1 != NULL ) { + CHAMELEON_Desc_Flush( D1, sequence ); + } + if ( D2 != NULL ) { + CHAMELEON_Desc_Flush( D2, sequence ); + } + + return; +} diff --git a/compute/zgepdf_qr.c b/compute/zgepdf_qr.c new file mode 100644 index 0000000000000000000000000000000000000000..0399ba49ee210d2fed7ecaa36251bb449e36dad1 --- /dev/null +++ b/compute/zgepdf_qr.c @@ -0,0 +1,109 @@ +/** + * + * @file zgepdf_qr.c + * + * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, + * Univ. Bordeaux. All rights reserved. + * @copyright 2016-2020 KAUST. All rights reserved. + * + *** + * + * @brief Chameleon zgepdf_qr wrapper. + * + * This wrapper exists only for testing purpose to ease the creation of + * timer/testing fot this subroutine of the QDWH/Zolo algorithms. That is why + * only the Tile version is available. + * + * @version 1.0.0 + * @author Mathieu Faverge + * @date 2016-12-15 + * @precisions normal z -> s d c + * + */ +#include "control/common.h" + +/** + ****************************************************************************** + * + * @ingroup CHAMELEON_Complex64_t + * + * @brief Perfom a QR factorization of a matrix A and generate the associated Q. + * + * A is of the form [ A1 ] and Q is generated in a similar form [ Q1 ]. + * [ A2 ] [ Q2 ] + * + * @warning This function is NOT a USER routine and should not be called directly. + * + ******************************************************************************* + * + * @param[in] doqr + * Specify if A1 is already factorized or if it needs to be done within this call. + * + * @param[in] optid + * Specify if A2 is an identity structure matrix and optimization is performed or not. + * + * @param[in] qrtreeT + * Describe the reduction tree for the factorization of A1. + * + * @param[in] qrtreeB + * Describe the reduction tree for the factorization of A2. + * + * @param[in] A1 + * + * @param[in] TS1 + * + * @param[in] TT1 + * + * @param[in] Q1 + * + * @param[in] A2 + * + * @param[in] TS2 + * + * @param[in] TT2 + * + * @param[in] Q2 + * + ******************************************************************************* + * + * @return + * \retval CHAMELEON_SUCCESS successful exit + * \retval <0 if -i, the i-th argument had an illegal value + * + ******************************************************************************* + * + * @sa CHAMELEON_cgepdf_qr_Tile + * @sa CHAMELEON_dgepdf_qr_Tile + * @sa CHAMELEON_sgepdf_qr_Tile + * + */ +int CHAMELEON_zgepdf_qr_Tile( int doqr, int optid, + const libhqr_tree_t *qrtreeT, const libhqr_tree_t *qrtreeB, + CHAM_desc_t *A1, CHAM_desc_t *TS1, CHAM_desc_t *TT1, CHAM_desc_t *Q1, + CHAM_desc_t *A2, CHAM_desc_t *TS2, CHAM_desc_t *TT2, CHAM_desc_t *Q2 ) +{ + CHAM_context_t *chamctxt; + RUNTIME_sequence_t *sequence = NULL; + RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER; + int status; + + chamctxt = chameleon_context_self(); + if (chamctxt == NULL) { + chameleon_fatal_error("CHAMELEON_zgepdf_qr_Tile", "CHAMELEON not initialized"); + return CHAMELEON_ERR_NOT_INITIALIZED; + } + chameleon_sequence_create( chamctxt, &sequence ); + + chameleon_pzgepdf_qr( 1, doqr, optid, qrtreeT, qrtreeB, + A1, TS1, TT1, NULL, Q1, + A2, TS2, TT2, NULL, Q2, + sequence, &request ); + + CHAMELEON_Desc_Flush( Q1, sequence ); + CHAMELEON_Desc_Flush( Q2, sequence ); + + chameleon_sequence_wait( chamctxt, sequence ); + status = sequence->status; + chameleon_sequence_destroy( chamctxt, sequence ); + return status; +} diff --git a/control/compute_z.h b/control/compute_z.h index cd08503c32d68f78d5e09ad001c84f70772f9630..125ee947ee7f7e048ecd2320a68e80dce1a23657 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -41,6 +41,7 @@ void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzgemm(cham_trans_t transA, cham_trans_t transB, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B, CHAMELEON_Complex64_t beta, CHAM_desc_t *C, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); +void chameleon_pzgepdf_qr( int genD, int doqr, int optid, const libhqr_tree_t *qrtreeT, const libhqr_tree_t *qrtreeB, CHAM_desc_t *A1, CHAM_desc_t *TS1, CHAM_desc_t *TT1, CHAM_desc_t *D1, CHAM_desc_t *Q1, CHAM_desc_t *A2, CHAM_desc_t *TS2, CHAM_desc_t *TT2, CHAM_desc_t *D2, CHAM_desc_t *Q2, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request ); void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgetrf_incpiv(CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, int *IPIV, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);