From e46b6449fb20f81c9446dd4c019a896946e986fc Mon Sep 17 00:00:00 2001 From: Mathieu Faverge <mathieu.faverge@inria.fr> Date: Thu, 11 Apr 2019 23:43:00 +0200 Subject: [PATCH] Rules precision --- CMakeLists.txt | 4 +- cmake_modules/local_subs.py | 91 +++++++++ cmake_modules/morse_cmake | 2 +- compute/zgels.c | 4 +- compute/zgels_param.c | 4 +- compute/zgesvd.c | 34 ++-- compute/zgetrs_incpiv.c | 4 +- compute/zgetrs_nopiv.c | 4 +- compute/zhetrd.c | 4 +- compute/zposv.c | 4 +- compute/zpotrf.c | 4 +- compute/zpotri.c | 12 +- compute/zpotrimm.c | 12 +- compute/zpotrs.c | 6 +- compute/zsysv.c | 4 +- compute/zsytrf.c | 4 +- compute/zsytrs.c | 6 +- compute/zunmlq.c | 18 +- compute/zunmlq_param.c | 18 +- compute/zunmqr.c | 18 +- compute/zunmqr_param.c | 18 +- control/compute_z.h | 4 - coreblas/compute/core_zherfb.c | 4 +- coreblas/compute/core_zpamm.c | 2 +- coreblas/compute/core_zparfb.c | 6 +- coreblas/compute/core_zpemv.c | 6 +- coreblas/compute/core_ztpmlqt.c | 14 +- coreblas/compute/core_ztpmqrt.c | 14 +- coreblas/compute/core_ztsmlq.c | 10 +- coreblas/compute/core_ztsmlq_hetra1.c | 6 +- coreblas/compute/core_ztsmqr.c | 10 +- coreblas/compute/core_ztsmqr_hetra1.c | 6 +- coreblas/compute/core_zttmlq.c | 10 +- coreblas/compute/core_zttmqr.c | 10 +- coreblas/compute/core_zunmlq.c | 10 +- coreblas/compute/core_zunmqr.c | 10 +- .../eztrace_module/coreblas_eztrace_module | 172 ------------------ coreblas/include/coreblas/coreblas_z.h | 43 ----- cudablas/compute/cuda_zparfb.c | 6 +- cudablas/compute/cuda_ztpmlqt.c | 14 +- cudablas/compute/cuda_ztpmqrt.c | 14 +- runtime/CMakeLists.txt | 2 +- .../{codelet_zasum.c => codelet_dzasum.c} | 4 +- runtime/openmp/codelets/codelet_zunmlq.c | 10 +- runtime/openmp/codelets/codelet_zunmqr.c | 10 +- .../{codelet_zasum.c => codelet_dzasum.c} | 6 +- .../{codelet_zasum.c => codelet_dzasum.c} | 4 +- runtime/quark/codelets/codelet_zunmlq.c | 10 +- runtime/quark/codelets/codelet_zunmqr.c | 10 +- runtime/quark/include/core_blas_dag.h | 2 + .../{codelet_zasum.c => codelet_dzasum.c} | 12 +- runtime/starpu/codelets/codelet_zcallback.c | 2 +- runtime/starpu/codelets/codelet_zunmlq.c | 10 +- runtime/starpu/codelets/codelet_zunmqr.c | 10 +- runtime/starpu/include/runtime_codelet_z.h | 114 ++++++------ runtime/starpu/include/runtime_codelets.h | 5 - testing/lin/clagsy.f | 2 +- testing/lin/clarhs.f | 6 +- testing/lin/clatrs.f | 24 +-- testing/lin/cpocon.f | 4 +- testing/lin/cporfs.f | 2 +- testing/lin/cposvx.f | 12 +- testing/lin/cpotri.f | 4 +- testing/lin/dpocon.f | 4 +- testing/lin/dporfs.f | 2 +- testing/lin/dposvx.f | 12 +- testing/lin/dpotri.f | 4 +- testing/lin/spocon.f | 4 +- testing/lin/sporfs.f | 2 +- testing/lin/sposvx.f | 12 +- testing/lin/spotri.f | 4 +- testing/lin/zlagsy.f | 2 +- testing/lin/zlarhs.f | 6 +- testing/lin/zlatrs.f | 24 +-- testing/lin/zpocon.f | 4 +- testing/lin/zporfs.f | 2 +- testing/lin/zposvx.f | 12 +- testing/lin/zpotri.f | 4 +- timing/timing_zauxiliary.c | 4 +- timing/timing_zauxiliary.h | 4 +- 80 files changed, 443 insertions(+), 574 deletions(-) create mode 100644 cmake_modules/local_subs.py rename runtime/openmp/codelets/{codelet_zasum.c => codelet_dzasum.c} (94%) rename runtime/parsec/codelets/{codelet_zasum.c => codelet_dzasum.c} (95%) rename runtime/quark/codelets/{codelet_zasum.c => codelet_dzasum.c} (96%) rename runtime/starpu/codelets/{codelet_zasum.c => codelet_dzasum.c} (88%) diff --git a/CMakeLists.txt b/CMakeLists.txt index 4d3309fee..4e2c41e4a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -97,7 +97,7 @@ option(BUILD_SHARED_LIBS "Build shared libraries" OFF) # Define precision supported by CHAMELEON # ----------------------------------------- -set( RP_CHAMELEON_DICTIONNARY ${CHAMELEON_CMAKE_MODULE_PATH}/precision_generator/subs.py ) +set( RP_CHAMELEON_DICTIONNARY ${CMAKE_SOURCE_DIR}/cmake_modules/local_subs.py ) set( RP_CHAMELEON_PRECISIONS "s;d;c;z" ) include(RulesPrecisions) @@ -586,7 +586,7 @@ endif(NOT CHAMELEON_SIMULATION) # ------------------------------- if( CHAMELEON_SCHED_STARPU ) - set(CHAMELEON_STARPU_VERSION "1.1" CACHE STRING "oldest STARPU version desired") + set(CHAMELEON_STARPU_VERSION "1.3" CACHE STRING "oldest STARPU version desired") # create list of components in order to make a single call to find_package(starpu...) if(NOT CHAMELEON_SIMULATION) diff --git a/cmake_modules/local_subs.py b/cmake_modules/local_subs.py new file mode 100644 index 000000000..4608e59b8 --- /dev/null +++ b/cmake_modules/local_subs.py @@ -0,0 +1,91 @@ +_extra_blas = [ + # ----- Additional BLAS + ('', 'dsgesv', 'dsgesv', 'zcgesv', 'zcgesv' ), + ('', 'sgesplit', 'dgesplit', 'cgesplit', 'zgesplit' ), + ('', 'slascal', 'dlascal', 'clascal', 'zlascal' ), + ('', 'slapack', 'dlapack', 'clapack', 'zlapack' ), + ('', 'stile', 'dtile', 'ctile', 'ztile' ), + ('', 'sgecon', 'dgecon', 'cgecon', 'zgecon' ), + ('', 'spocon', 'dpocon', 'cpocon', 'zpocon' ), + ('', 'strasm', 'dtrasm', 'ctrasm', 'ztrasm' ), + ('', 'sgecfi', 'dgecfi', 'cgecfi', 'zgecfi' ), + ('', 'splssq', 'dplssq', 'cplssq', 'zplssq' ), + ('', 'sy2sb', 'sy2sb' , 'he2hb', 'he2hb' ), + ('', 'she2ge', 'dhe2ge', 'che2ge', 'zhe2ge' ), + ('', 'slatro', 'dlatro', 'clatro', 'zlatro' ), #=> Replace by getmo/gecmo as in essl + ('', 'sbuild', 'dbuild', 'cbuild', 'zbuild' ), #=> Replace by map function +] + +_extra_BLAS = [ [ x.upper() for x in row ] for row in _extra_blas ] + +subs = { + # ------------------------------------------------------------ + # replacements applied to mixed precision files. + 'mixed' : [ + # double/single, double/single-complex + #'12345678901234567890', '12345678901234567890') + (r'\bdouble', r'\bCHAMELEON_Complex64_t'), + (r'\bChamRealDouble', r'\bChamComplexDouble' ), + (r'\bfloat', r'\bCHAMELEON_Complex32_t'), + (r'\bChamRealFloat', r'\bChamComplexFloat' ), + (r'\breal\b', r'\bcomplex\b' ), + + ('dsgels', 'zcgels' ), + ('dsorgesv', 'zcungesv' ), + ], + # ------------------------------------------------------------ + # replacements applied to mixed precision files. + 'normal': [ + # pattern single double single-complex double-complex + #'12345678901234567890', '12345678901234567890', '12345678901234567890', '12345678901234567890', '12345678901234567890') + ('int', 'float', 'double', 'CHAMELEON_Complex32_t', r'\bCHAMELEON_Complex64_t'), + ('ChamPattern', 'ChamRealFloat', 'ChamRealDouble', 'ChamComplexFloat', r'\bChamComplexDouble' ), + ('ChamPattern', 'ChamRealFloat', 'ChamRealDouble', 'ChamRealFloat', r'\bChamRealDouble' ), + + # ----- Additional BLAS + ('', 'sTile', 'dTile', 'cTile', 'zTile' ), + ('', 'sLapack', 'dLapack', 'cLapack', 'zLapack' ), + ('', 'ORMQR', 'ORMQR', 'UNMQR', 'UNMQR' ), + ('', 'ORMLQ', 'ORMLQ', 'UNMLQ', 'UNMLQ' ), + ('', 'SYEV', 'SYEV', 'HEEV', 'HEEV' ), + ('', 'SYG', 'SYG', 'HEG', 'HEG' ), + ] + + _extra_blas + + _extra_BLAS + + [ + + # ----- For norms: compute result in Real or Double + ('', 'slange', 'dlange', 'slange', 'dlange' ), + ('', 'slaset', 'dlaset', 'slaset', 'dlaset' ), + ('', 'splssq', 'dplssq', 'splssq', 'dplssq' ), + ('', 'slacpy', 'dlacpy', 'slacpy', 'dlacpy' ), + ('', 'saxpy', 'daxpy', 'saxpy', 'daxpy' ), + + (r'\b', r'szero\b', r'dzero\b', r'czero\b', r'zzero\b' ), +# (r'\b', r'sone\b', r'done\b', r'cone\b', r'zone\b' ), + + # ----- Chameleon Prefixes + ('CHAMELEON_P', 'CHAMELEON_S', 'CHAMELEON_D', 'CHAMELEON_C', 'CHAMELEON_Z' ), + ('RUNTIME_P', 'RUNTIME_s', 'RUNTIME_d', 'RUNTIME_c', 'RUNTIME_z' ), + ('chameleon_p', 'chameleon_s', 'chameleon_d', 'chameleon_c', 'chameleon_z' ), + ('codelet_p', 'codelet_s', 'codelet_d', 'codelet_c', 'codelet_z' ), + ('runtime_p', 'runtime_s', 'runtime_d', 'runtime_c', 'runtime_z' ), + ('testing_p', 'testing_s', 'testing_d', 'testing_c', 'testing_z' ), + ('timing_p', 'timing_s', 'timing_d', 'timing_c', 'timing_z' ), + ('workspace_p', 'workspace_s', 'workspace_d', 'workspace_c', 'workspace_z' ), +# ('CORE_P', 'CORE_S', 'CORE_D', 'CORE_C', 'CORE_Z' ), +# ('vec_p', 'vec_s', 'vec_d', 'vec_c', 'vec_z' ), + + # ('', 'starpu_s', 'starpu_d', 'starpu_c', 'starpu_z' ), + # ('', 'STARPU_S', 'STARPU_D', 'STARPU_C', 'STARPU_Z' ), + # ('', 's_', 'd_', 'c_', 'z_' ), + # ('', 'S_', 'D_', 'C_', 'Z_' ), + # ('', 'FLT_EPSILON', 'DBL_EPSILON', 'FLT_EPSILON', 'DBL_EPSILON' ), + # ('', 's_RAFF_FLOAT', 'd_RAFF_FLOAT', 'c_RAFF_FLOAT', 'z_RAFF_FLOAT' ), + # # ----- unused? + # ('', 's_check', 'd_check', 'c_check', 'z_check' ), + # ('', 'stesting', 'dtesting', 'ctesting', 'ztesting' ), + # ('', 'SAUXILIARY', 'DAUXILIARY', 'CAUXILIARY', 'ZAUXILIARY' ), + # ('', 'sbuild', 'dbuild', 'cbuild', 'zbuild' ), + ] +} diff --git a/cmake_modules/morse_cmake b/cmake_modules/morse_cmake index 33a182878..ade499661 160000 --- a/cmake_modules/morse_cmake +++ b/cmake_modules/morse_cmake @@ -1 +1 @@ -Subproject commit 33a182878f9049c47af1fce3e86e72b9a10e7f7a +Subproject commit ade499661b58c71fe0586c2bbb98ea9725a88c52 diff --git a/compute/zgels.c b/compute/zgels.c index 444021e55..2aed98e7d 100644 --- a/compute/zgels.c +++ b/compute/zgels.c @@ -48,7 +48,7 @@ * @param[in] trans * Intended usage: * = ChamNoTrans: the linear system involves A; - * = ChamConjTrans: the linear system involves A**H. + * = ChamConjTrans: the linear system involves A^H. * Currently only ChamNoTrans is supported. * * @param[in] M @@ -218,7 +218,7 @@ int CHAMELEON_zgels( cham_trans_t trans, int M, int N, int NRHS, * @param[in] trans * Intended usage: * = ChamNoTrans: the linear system involves A; - * = ChamConjTrans: the linear system involves A**H. + * = ChamConjTrans: the linear system involves A^H. * Currently only ChamNoTrans is supported. * * @param[in,out] A diff --git a/compute/zgels_param.c b/compute/zgels_param.c index fd5a99ddb..697e1b4ff 100644 --- a/compute/zgels_param.c +++ b/compute/zgels_param.c @@ -48,7 +48,7 @@ * @param[in] trans * Intended usage: * = ChamNoTrans: the linear system involves A; - * = ChamConjTrans: the linear system involves A**H. + * = ChamConjTrans: the linear system involves A^H. * Currently only ChamNoTrans is supported. * * @param[in] M @@ -221,7 +221,7 @@ int CHAMELEON_zgels_param( const libhqr_tree_t *qrtree, cham_trans_t trans, int * @param[in] trans * Intended usage: * = ChamNoTrans: the linear system involves A; - * = ChamConjTrans: the linear system involves A**H. + * = ChamConjTrans: the linear system involves A^H. * Currently only ChamNoTrans is supported. * * @param[in,out] A diff --git a/compute/zgesvd.c b/compute/zgesvd.c index c52fa59b3..7d12a10ba 100644 --- a/compute/zgesvd.c +++ b/compute/zgesvd.c @@ -44,7 +44,7 @@ * are returned in descending order. The first min(m,n) columns of * U and V are the left and right singular vectors of A. * - * Note that the routine returns V**T, not V. + * Note that the routine returns V^T, not V. ******************************************************************************* * * @param[in] jobu @@ -62,16 +62,16 @@ * NOT SUPPORTTED YET * * @param[in] jobvt - * Specifies options for computing all or part of the matrix V**H. + * Specifies options for computing all or part of the matrix V^H. * Intended usage: - * = ChamVec = 'A'(lapack): all N rows of V**H are returned + * = ChamVec = 'A'(lapack): all N rows of V^H are returned * in the array VT; - * = ChamNoVec = 'N': no rows of V**H (no right singular vectors) + * = ChamNoVec = 'N': no rows of V^H (no right singular vectors) * are computed. - * = ChamSVec = 'S': the first min(m,n) rows of V**H (the right + * = ChamSVec = 'S': the first min(m,n) rows of V^H (the right * singular vectors) are returned in the array VT; * NOT SUPPORTTED YET - * = ChamOVec = 'O': the first min(m,n) rows of V**H (the right + * = ChamOVec = 'O': the first min(m,n) rows of V^H (the right * singular vectors) are overwritten on the array A; * NOT SUPPORTTED YET * @@ -90,7 +90,7 @@ * columns of U (the left singular vectors, * stored columnwise); * if JOBVT = 'O', A is overwritten with the first min(m,n) - * rows of V**H (the right singular vectors, + * rows of V^H (the right singular vectors, * stored rowwise); * if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A * are destroyed. @@ -118,9 +118,9 @@ * * @param[out] VT * If JOBVT = 'A', VT contains the N-by-N unitary matrix - * V**H; + * V^H; * if JOBVT = 'S', VT contains the first min(m,n) rows of - * V**H (the right singular vectors, stored rowwise); + * V^H (the right singular vectors, stored rowwise); * if JOBVT = 'N' or 'O', VT is not referenced. * * @param[in] LDVT @@ -259,16 +259,16 @@ int CHAMELEON_zgesvd( cham_job_t jobu, cham_job_t jobvt, * NOT SUPPORTTED YET * * @param[in] jobvt - * Specifies options for computing all or part of the matrix V**H. + * Specifies options for computing all or part of the matrix V^H. * Intended usage: - * = ChamVec = 'A'(lapack): all N rows of V**H are returned + * = ChamVec = 'A'(lapack): all N rows of V^H are returned * in the array VT; - * = ChamNoVec = 'N': no rows of V**H (no right singular vectors) + * = ChamNoVec = 'N': no rows of V^H (no right singular vectors) * are computed. - * = ChamSVec = 'S': the first min(m,n) rows of V**H (the right + * = ChamSVec = 'S': the first min(m,n) rows of V^H (the right * singular vectors) are returned in the array VT; * NOT SUPPORTTED YET - * = ChamOVec = 'O': the first min(m,n) rows of V**H (the right + * = ChamOVec = 'O': the first min(m,n) rows of V^H (the right * singular vectors) are overwritten on the array A; * NOT SUPPORTTED YET * @@ -281,7 +281,7 @@ int CHAMELEON_zgesvd( cham_job_t jobu, cham_job_t jobvt, * columns of U (the left singular vectors, * stored columnwise); * if JOBVT = 'O', A is overwritten with the first min(m,n) - * rows of V**H (the right singular vectors, + * rows of V^H (the right singular vectors, * stored rowwise); * if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A * are destroyed. @@ -306,9 +306,9 @@ int CHAMELEON_zgesvd( cham_job_t jobu, cham_job_t jobvt, * * @param[out] VT * If JOBVT = 'A', VT contains the N-by-N unitary matrix - * V**H; + * V^H; * if JOBVT = 'S', VT contains the first min(m,n) rows of - * V**H (the right singular vectors, stored rowwise); + * V^H (the right singular vectors, stored rowwise); * if JOBVT = 'N' or 'O', VT is not referenced. * * @param[in] LDVT diff --git a/compute/zgetrs_incpiv.c b/compute/zgetrs_incpiv.c index d3e6324f7..225cb125f 100644 --- a/compute/zgetrs_incpiv.c +++ b/compute/zgetrs_incpiv.c @@ -37,8 +37,8 @@ * @param[in] trans * Intended to specify the the form of the system of equations: * = ChamNoTrans: A * X = B (No transpose) - * = ChamTrans: A**T * X = B (Transpose) - * = ChamConjTrans: A**H * X = B (Conjugate transpose) + * = ChamTrans: A^T * X = B (Transpose) + * = ChamConjTrans: A^H * X = B (Conjugate transpose) * Currently only ChamNoTrans is supported. * * @param[in] N diff --git a/compute/zgetrs_nopiv.c b/compute/zgetrs_nopiv.c index bb0de9875..3a3dfe360 100644 --- a/compute/zgetrs_nopiv.c +++ b/compute/zgetrs_nopiv.c @@ -38,8 +38,8 @@ * @param[in] trans * Intended to specify the the form of the system of equations: * = ChamNoTrans: A * X = B (No transpose) - * = ChamTrans: A**T * X = B (Transpose) - * = ChamConjTrans: A**H * X = B (Conjugate transpose) + * = ChamTrans: A^T * X = B (Transpose) + * = ChamConjTrans: A^H * X = B (Conjugate transpose) * Currently only ChamNoTrans is supported. * * @param[in] N diff --git a/compute/zhetrd.c b/compute/zhetrd.c index 0c7e3c66b..43c4fc59d 100644 --- a/compute/zhetrd.c +++ b/compute/zhetrd.c @@ -34,7 +34,7 @@ * tridiagonal form S using a two-stage approach * First stage: reduction to band tridiagonal form (unitary Q1); * Second stage: reduction from band to tridiagonal form (unitary - * Q2). Let Q = Q1 * Q2 be the global unitary transformation; Q**H * + * Q2). Let Q = Q1 * Q2 be the global unitary transformation; Q^H * * A * Q = S. * ******************************************************************************* @@ -190,7 +190,7 @@ int CHAMELEON_zhetrd( cham_job_t jobz, cham_uplo_t uplo, int N, * First stage: reduction to band tridiagonal form (unitary Q1); * Second stage: reduction from band to tridiagonal form (unitary Q2). * Let Q = Q1 * Q2 be the global unitary transformation; - * Q**H * A * Q = S. + * Q^H * A * Q = S. * Tile equivalent of CHAMELEON_zhetrd(). * Operates on matrices stored by tiles. * All matrices are passed through descriptors. diff --git a/compute/zposv.c b/compute/zposv.c index 27ab796eb..317f21f14 100644 --- a/compute/zposv.c +++ b/compute/zposv.c @@ -61,7 +61,7 @@ * triangular part of the matrix A, and the strictly upper triangular part of A is not * referenced. * On exit, if return value = 0, the factor U or L from the Cholesky factorization - * A = U**H*U or A = L*L**H. + * A = U^H*U or A = L*L^H. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). @@ -197,7 +197,7 @@ int CHAMELEON_zposv( cham_uplo_t uplo, int N, int NRHS, * triangular part of the matrix A, and the strictly upper triangular part of A is not * referenced. * On exit, if return value = 0, the factor U or L from the Cholesky factorization - * A = U**H*U or A = L*L**H. + * A = U^H*U or A = L*L^H. * * @param[in,out] B * On entry, the N-by-NRHS right hand side matrix B. diff --git a/compute/zpotrf.c b/compute/zpotrf.c index c898ca145..41093b0ca 100644 --- a/compute/zpotrf.c +++ b/compute/zpotrf.c @@ -55,7 +55,7 @@ * triangular part of the matrix A, and the strictly upper triangular part of A is not * referenced. * On exit, if return value = 0, the factor U or L from the Cholesky factorization - * A = U**H*U or A = L*L**H. + * A = U^H*U or A = L*L^H. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). @@ -170,7 +170,7 @@ int CHAMELEON_zpotrf( cham_uplo_t uplo, int N, * triangular part of the matrix A, and the strictly upper triangular part of A is not * referenced. * On exit, if return value = 0, the factor U or L from the Cholesky factorization - * A = U**H*U or A = L*L**H. + * A = U^H*U or A = L*L^H. * ******************************************************************************* * diff --git a/compute/zpotri.c b/compute/zpotri.c index eb1e95d61..899e13b89 100644 --- a/compute/zpotri.c +++ b/compute/zpotri.c @@ -29,7 +29,7 @@ * @ingroup CHAMELEON_Complex64_t * * CHAMELEON_zpotri - Computes the inverse of a complex Hermitian positive definite - * matrix A using the Cholesky factorization A = U**H*U or A = L*L**H + * matrix A using the Cholesky factorization A = U^H*U or A = L*L^H * computed by CHAMELEON_zpotrf. * ******************************************************************************* @@ -43,7 +43,7 @@ * * @param[in,out] A * On entry, the triangular factor U or L from the Cholesky - * factorization A = U**H*U or A = L*L**H, as computed by + * factorization A = U^H*U or A = L*L^H, as computed by * CHAMELEON_zpotrf. * On exit, the upper or lower triangle of the (Hermitian) * inverse of A, overwriting the input factor U or L. @@ -140,7 +140,7 @@ int CHAMELEON_zpotri( cham_uplo_t uplo, int N, * * CHAMELEON_zpotri_Tile - Computes the inverse of a complex Hermitian * positive definite matrix A using the Cholesky factorization - * A = U**H*U or A = L*L**H computed by CHAMELEON_zpotrf. + * A = U^H*U or A = L*L^H computed by CHAMELEON_zpotrf. * Tile equivalent of CHAMELEON_zpotri(). * Operates on matrices stored by tiles. * All matrices are passed through descriptors. @@ -154,7 +154,7 @@ int CHAMELEON_zpotri( cham_uplo_t uplo, int N, * * @param[in] A * On entry, the triangular factor U or L from the Cholesky - * factorization A = U**H*U or A = L*L**H, as computed by + * factorization A = U^H*U or A = L*L^H, as computed by * CHAMELEON_zpotrf. * On exit, the upper or lower triangle of the (Hermitian) * inverse of A, overwriting the input factor U or L. @@ -206,8 +206,8 @@ int CHAMELEON_zpotri_Tile( cham_uplo_t uplo, CHAM_desc_t *A ) * @ingroup CHAMELEON_Complex64_t_Tile_Async * * CHAMELEON_zpotri_Tile_Async - Computes the inverse of a complex Hermitian - * positive definite matrix A using the Cholesky factorization A = U**H*U - * or A = L*L**H computed by CHAMELEON_zpotrf. + * positive definite matrix A using the Cholesky factorization A = U^H*U + * or A = L*L^H computed by CHAMELEON_zpotrf. * Non-blocking equivalent of CHAMELEON_zpotri_Tile(). * May return before the computation is finished. * Allows for pipelining of operations at runtime. diff --git a/compute/zpotrimm.c b/compute/zpotrimm.c index f903d52e9..7d5cda488 100644 --- a/compute/zpotrimm.c +++ b/compute/zpotrimm.c @@ -29,7 +29,7 @@ * @ingroup CHAMELEON_Complex64_t * * CHAMELEON_zpotrimm - Computes the inverse of a complex Hermitian positive definite - * matrix A using the Cholesky factorization A = U**H*U or A = L*L**H + * matrix A using the Cholesky factorization A = U^H*U or A = L*L^H * computed by CHAMELEON_zpotrf. * ******************************************************************************* @@ -43,7 +43,7 @@ * * @param[in,out] A * On entry, the triangular factor U or L from the Cholesky - * factorization A = U**H*U or A = L*L**H, as computed by + * factorization A = U^H*U or A = L*L^H, as computed by * CHAMELEON_zpotrf. * On exit, the upper or lower triangle of the (Hermitian) * inverse of A, overwriting the input factor U or L. @@ -162,7 +162,7 @@ int CHAMELEON_zpotrimm( cham_uplo_t uplo, int N, * * CHAMELEON_zpotrimm_Tile - Computes the inverse of a complex Hermitian * positive definite matrix A using the Cholesky factorization - * A = U**H*U or A = L*L**H computed by CHAMELEON_zpotrf. + * A = U^H*U or A = L*L^H computed by CHAMELEON_zpotrf. * Tile equivalent of CHAMELEON_zpotrimm(). * Operates on matrices stored by tiles. * All matrices are passed through descriptors. @@ -176,7 +176,7 @@ int CHAMELEON_zpotrimm( cham_uplo_t uplo, int N, * * @param[in] A * On entry, the triangular factor U or L from the Cholesky - * factorization A = U**H*U or A = L*L**H, as computed by + * factorization A = U^H*U or A = L*L^H, as computed by * CHAMELEON_zpotrf. * On exit, the upper or lower triangle of the (Hermitian) * inverse of A, overwriting the input factor U or L. @@ -230,8 +230,8 @@ int CHAMELEON_zpotrimm_Tile( cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, C * @ingroup CHAMELEON_Complex64_t_Tile_Async * * CHAMELEON_zpotrimm_Tile_Async - Computes the inverse of a complex Hermitian - * positive definite matrix A using the Cholesky factorization A = U**H*U - * or A = L*L**H computed by CHAMELEON_zpotrf. + * positive definite matrix A using the Cholesky factorization A = U^H*U + * or A = L*L^H computed by CHAMELEON_zpotrf. * Non-blocking equivalent of CHAMELEON_zpotrimm_Tile(). * May return before the computation is finished. * Allows for pipelining of operations at runtime. diff --git a/compute/zpotrs.c b/compute/zpotrs.c index 72bd62a7f..1d290f64f 100644 --- a/compute/zpotrs.c +++ b/compute/zpotrs.c @@ -31,7 +31,7 @@ * * CHAMELEON_zpotrs - Solves a system of linear equations A * X = B with a symmetric positive * definite (or Hermitian positive definite in the complex case) matrix A using the Cholesky - * factorization A = U**H*U or A = L*L**H computed by CHAMELEON_zpotrf. + * factorization A = U^H*U or A = L*L^H computed by CHAMELEON_zpotrf. * ******************************************************************************* * @@ -46,7 +46,7 @@ * The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. * * @param[in] A - * The triangular factor U or L from the Cholesky factorization A = U**H*U or A = L*L**H, + * The triangular factor U or L from the Cholesky factorization A = U^H*U or A = L*L^H, * computed by CHAMELEON_zpotrf. * * @param[in] LDA @@ -173,7 +173,7 @@ int CHAMELEON_zpotrs( cham_uplo_t uplo, int N, int NRHS, * = ChamLower: Lower triangle of A is stored. * * @param[in] A - * The triangular factor U or L from the Cholesky factorization A = U**H*U or A = L*L**H, + * The triangular factor U or L from the Cholesky factorization A = U^H*U or A = L*L^H, * computed by CHAMELEON_zpotrf. * * @param[in,out] B diff --git a/compute/zsysv.c b/compute/zsysv.c index 256e27de9..ebee2c6f1 100644 --- a/compute/zsysv.c +++ b/compute/zsysv.c @@ -62,7 +62,7 @@ * triangular part of the matrix A, and the strictly upper triangular part of A is not * referenced. * On exit, if return value = 0, the factor U or L from the Cholesky factorization - * A = U**T*U or A = L*L**T. + * A = U^T*U or A = L*L^T. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). @@ -194,7 +194,7 @@ int CHAMELEON_zsysv( cham_uplo_t uplo, int N, int NRHS, * triangular part of the matrix A, and the strictly upper triangular part of A is not * referenced. * On exit, if return value = 0, the factor U or L from the Cholesky factorization - * A = U**T*U or A = L*L**T. + * A = U^T*U or A = L*L^T. * * @param[in,out] B * On entry, the N-by-NRHS right hand side matrix B. diff --git a/compute/zsytrf.c b/compute/zsytrf.c index e32b1f8f4..b603ddde5 100644 --- a/compute/zsytrf.c +++ b/compute/zsytrf.c @@ -50,7 +50,7 @@ * triangular part of the matrix A, and the strictly upper triangular part of A is not * referenced. * On exit, if return value = 0, the factor U or L from the Cholesky factorization - * A = U**H*U or A = L*L**H. + * A = U^H*U or A = L*L^H. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,N). @@ -164,7 +164,7 @@ int CHAMELEON_zsytrf( cham_uplo_t uplo, int N, * triangular part of the matrix A, and the strictly upper triangular part of A is not * referenced. * On exit, if return value = 0, the factor U or L from the Cholesky factorization - * A = U**T*U or A = L*L**T. + * A = U^T*U or A = L*L^T. * ******************************************************************************* * diff --git a/compute/zsytrs.c b/compute/zsytrs.c index b50289edd..84a2c778f 100644 --- a/compute/zsytrs.c +++ b/compute/zsytrs.c @@ -33,7 +33,7 @@ * * CHAMELEON_zsytrs - Solves a system of linear equations A * X = B with a complex * symmetric matrix A using the Cholesky factorization - * A = U**H*U or A = L*L**H computed by CHAMELEON_zsytrf. + * A = U^H*U or A = L*L^H computed by CHAMELEON_zsytrf. * ******************************************************************************* * @@ -48,7 +48,7 @@ * The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0. * * @param[in] A - * The triangular factor U or L from the Cholesky factorization A = U**T*U or A = L*L**T, + * The triangular factor U or L from the Cholesky factorization A = U^T*U or A = L*L^T, * computed by CHAMELEON_zsytrf. * * @param[in] LDA @@ -172,7 +172,7 @@ int CHAMELEON_zsytrs( cham_uplo_t uplo, int N, int NRHS, * = ChamLower: Lower triangle of A is stored. * * @param[in] A - * The triangular factor U or L from the Cholesky factorization A = U**T*U or A = L*L**T, + * The triangular factor U or L from the Cholesky factorization A = U^T*U or A = L*L^T, * computed by CHAMELEON_zsytrf. * * @param[in,out] B diff --git a/compute/zunmlq.c b/compute/zunmlq.c index 51506cdaa..bbc4a6a77 100644 --- a/compute/zunmlq.c +++ b/compute/zunmlq.c @@ -35,7 +35,7 @@ * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * C C * Q - * TRANS = 'C': Q**H * C C * Q**H + * TRANS = 'C': Q^H * C C * Q^H * * where Q is a complex unitary matrix defined as the product of k * elementary reflectors @@ -49,13 +49,13 @@ * * @param[in] side * Intended usage: - * = ChamLeft: apply Q or Q**H from the left; - * = ChamRight: apply Q or Q**H from the right. + * = ChamLeft: apply Q or Q^H from the left; + * = ChamRight: apply Q or Q^H from the right. * * @param[in] trans * Intended usage: * = ChamNoTrans: no transpose, apply Q; - * = ChamConjTrans: conjugate transpose, apply Q**H. + * = ChamConjTrans: conjugate transpose, apply Q^H. * * @param[in] M * The number of rows of the matrix C. M >= 0. @@ -79,7 +79,7 @@ * * @param[in,out] C * On entry, the M-by-N matrix C. - * On exit, C is overwritten by Q*C or Q**H*C. + * On exit, C is overwritten by Q*C or Q^H*C. * * @param[in] LDC * The leading dimension of the array C. LDC >= max(1,M). @@ -211,14 +211,14 @@ int CHAMELEON_zunmlq( cham_side_t side, cham_trans_t trans, int M, int N, int K, * * @param[in] side * Intended usage: - * = ChamLeft: apply Q or Q**H from the left; - * = ChamRight: apply Q or Q**H from the right. + * = ChamLeft: apply Q or Q^H from the left; + * = ChamRight: apply Q or Q^H from the right. * Currently only ChamLeft is supported. * * @param[in] trans * Intended usage: * = ChamNoTrans: no transpose, apply Q; - * = ChamConjTrans: conjugate transpose, apply Q**H. + * = ChamConjTrans: conjugate transpose, apply Q^H. * Currently only ChamConjTrans is supported. * * @param[in] A @@ -229,7 +229,7 @@ int CHAMELEON_zunmlq( cham_side_t side, cham_trans_t trans, int M, int N, int K, * * @param[in,out] C * On entry, the M-by-N matrix C. - * On exit, C is overwritten by Q*C or Q**H*C. + * On exit, C is overwritten by Q*C or Q^H*C. * ******************************************************************************* * diff --git a/compute/zunmlq_param.c b/compute/zunmlq_param.c index 7199aed91..df0fba8ba 100644 --- a/compute/zunmlq_param.c +++ b/compute/zunmlq_param.c @@ -29,7 +29,7 @@ * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * C C * Q - * TRANS = 'C': Q**H * C C * Q**H + * TRANS = 'C': Q^H * C C * Q^H * * where Q is a complex unitary matrix defined as the product of k * elementary reflectors @@ -46,13 +46,13 @@ * * @param[in] side * Intended usage: - * = ChamLeft: apply Q or Q**H from the left; - * = ChamRight: apply Q or Q**H from the right. + * = ChamLeft: apply Q or Q^H from the left; + * = ChamRight: apply Q or Q^H from the right. * * @param[in] trans * Intended usage: * = ChamNoTrans: no transpose, apply Q; - * = ChamConjTrans: conjugate transpose, apply Q**H. + * = ChamConjTrans: conjugate transpose, apply Q^H. * * @param[in] M * The number of rows of the matrix C. M >= 0. @@ -79,7 +79,7 @@ * * @param[in,out] C * On entry, the M-by-N matrix C. - * On exit, C is overwritten by Q*C or Q**H*C. + * On exit, C is overwritten by Q*C or Q^H*C. * * @param[in] LDC * The leading dimension of the array C. LDC >= max(1,M). @@ -210,14 +210,14 @@ int CHAMELEON_zunmlq_param( const libhqr_tree_t *qrtree, cham_side_t side, cham_ * * @param[in] side * Intended usage: - * = ChamLeft: apply Q or Q**H from the left; - * = ChamRight: apply Q or Q**H from the right. + * = ChamLeft: apply Q or Q^H from the left; + * = ChamRight: apply Q or Q^H from the right. * Currently only ChamLeft is supported. * * @param[in] trans * Intended usage: * = ChamNoTrans: no transpose, apply Q; - * = ChamConjTrans: conjugate transpose, apply Q**H. + * = ChamConjTrans: conjugate transpose, apply Q^H. * Currently only ChamConjTrans is supported. * * @param[in] A @@ -228,7 +228,7 @@ int CHAMELEON_zunmlq_param( const libhqr_tree_t *qrtree, cham_side_t side, cham_ * * @param[in,out] C * On entry, the M-by-N matrix C. - * On exit, C is overwritten by Q*C or Q**H*C. + * On exit, C is overwritten by Q*C or Q^H*C. * ******************************************************************************* * diff --git a/compute/zunmqr.c b/compute/zunmqr.c index c7adba166..4139f579a 100644 --- a/compute/zunmqr.c +++ b/compute/zunmqr.c @@ -34,7 +34,7 @@ * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * C C * Q - * TRANS = 'C': Q**H * C C * Q**H + * TRANS = 'C': Q^H * C C * Q^H * * where Q is a complex unitary matrix defined as the product of k * elementary reflectors @@ -48,13 +48,13 @@ * * @param[in] side * Intended usage: - * = ChamLeft: apply Q or Q**H from the left; - * = ChamRight: apply Q or Q**H from the right. + * = ChamLeft: apply Q or Q^H from the left; + * = ChamRight: apply Q or Q^H from the right. * * @param[in] trans * Intended usage: * = ChamNoTrans: no transpose, apply Q; - * = ChamConjTrans: conjugate transpose, apply Q**H. + * = ChamConjTrans: conjugate transpose, apply Q^H. * * @param[in] M * The number of rows of the matrix C. M >= 0. @@ -81,7 +81,7 @@ * * @param[in,out] C * On entry, the M-by-N matrix C. - * On exit, C is overwritten by Q*C or Q**H*C. + * On exit, C is overwritten by Q*C or Q^H*C. * * @param[in] LDC * The leading dimension of the array C. LDC >= max(1,M). @@ -212,14 +212,14 @@ int CHAMELEON_zunmqr( cham_side_t side, cham_trans_t trans, int M, int N, int K, * * @param[in] side * Intended usage: - * = ChamLeft: apply Q or Q**H from the left; - * = ChamRight: apply Q or Q**H from the right. + * = ChamLeft: apply Q or Q^H from the left; + * = ChamRight: apply Q or Q^H from the right. * Currently only ChamLeft is supported. * * @param[in] trans * Intended usage: * = ChamNoTrans: no transpose, apply Q; - * = ChamConjTrans: conjugate transpose, apply Q**H. + * = ChamConjTrans: conjugate transpose, apply Q^H. * Currently only ChamConjTrans is supported. * * @param[in] A @@ -231,7 +231,7 @@ int CHAMELEON_zunmqr( cham_side_t side, cham_trans_t trans, int M, int N, int K, * * @param[in,out] C * On entry, the M-by-N matrix C. - * On exit, C is overwritten by Q*C or Q**H*C. + * On exit, C is overwritten by Q*C or Q^H*C. * ******************************************************************************* * diff --git a/compute/zunmqr_param.c b/compute/zunmqr_param.c index 152257e77..e7b31e742 100644 --- a/compute/zunmqr_param.c +++ b/compute/zunmqr_param.c @@ -29,7 +29,7 @@ * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * C C * Q - * TRANS = 'C': Q**H * C C * Q**H + * TRANS = 'C': Q^H * C C * Q^H * * where Q is a complex unitary matrix defined as the product of k * elementary reflectors @@ -46,13 +46,13 @@ * * @param[in] side * Intended usage: - * = ChamLeft: apply Q or Q**H from the left; - * = ChamRight: apply Q or Q**H from the right. + * = ChamLeft: apply Q or Q^H from the left; + * = ChamRight: apply Q or Q^H from the right. * * @param[in] trans * Intended usage: * = ChamNoTrans: no transpose, apply Q; - * = ChamConjTrans: conjugate transpose, apply Q**H. + * = ChamConjTrans: conjugate transpose, apply Q^H. * * @param[in] M * The number of rows of the matrix C. M >= 0. @@ -82,7 +82,7 @@ * * @param[in,out] C * On entry, the M-by-N matrix C. - * On exit, C is overwritten by Q*C or Q**H*C. + * On exit, C is overwritten by Q*C or Q^H*C. * * @param[in] LDC * The leading dimension of the array C. LDC >= max(1,M). @@ -215,14 +215,14 @@ int CHAMELEON_zunmqr_param( const libhqr_tree_t *qrtree, * * @param[in] side * Intended usage: - * = ChamLeft: apply Q or Q**H from the left; - * = ChamRight: apply Q or Q**H from the right. + * = ChamLeft: apply Q or Q^H from the left; + * = ChamRight: apply Q or Q^H from the right. * Currently only ChamLeft is supported. * * @param[in] trans * Intended usage: * = ChamNoTrans: no transpose, apply Q; - * = ChamConjTrans: conjugate transpose, apply Q**H. + * = ChamConjTrans: conjugate transpose, apply Q^H. * Currently only ChamConjTrans is supported. * * @param[in] A @@ -234,7 +234,7 @@ int CHAMELEON_zunmqr_param( const libhqr_tree_t *qrtree, * * @param[in,out] C * On entry, the M-by-N matrix C. - * On exit, C is overwritten by Q*C or Q**H*C. + * On exit, C is overwritten by Q*C or Q^H*C. * ******************************************************************************* * diff --git a/control/compute_z.h b/control/compute_z.h index 6a78fe620..e34ec4dfc 100644 --- a/control/compute_z.h +++ b/control/compute_z.h @@ -35,10 +35,6 @@ int chameleon_zshift(CHAM_context_t *chamctxt, int m, int n, CHAMELEON_Complex64 /** * Declarations of parallel functions (dynamic scheduling) - alphabetical order */ -void chameleon_pzbarrier_pnl2tl(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -void chameleon_pzbarrier_row2tl(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -void chameleon_pzbarrier_tl2pnl(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); -void chameleon_pzbarrier_tl2row(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgebrd_gb2bd(cham_uplo_t uplo, CHAM_desc_t *A, double *D, double *E, CHAM_desc_t *T, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgebrd_ge2gb( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request); diff --git a/coreblas/compute/core_zherfb.c b/coreblas/compute/core_zherfb.c index d71c18028..a54de4a5a 100644 --- a/coreblas/compute/core_zherfb.c +++ b/coreblas/compute/core_zherfb.c @@ -27,7 +27,7 @@ * * CORE_zherfb overwrites the symmetric complex N-by-N tile C with * - * Q**T*C*Q + * Q^T*C*Q * * where Q is a complex unitary matrix defined as the product of k * elementary reflectors @@ -72,7 +72,7 @@ * * @param[in,out] C * On entry, the symmetric N-by-N tile C. - * On exit, C is overwritten by Q**T*C*Q. + * On exit, C is overwritten by Q^T*C*Q. * * @param[in] ldc * The leading dimension of the array C. LDC >= max(1,M). diff --git a/coreblas/compute/core_zpamm.c b/coreblas/compute/core_zpamm.c index eb995720c..01a25ea55 100644 --- a/coreblas/compute/core_zpamm.c +++ b/coreblas/compute/core_zpamm.c @@ -52,7 +52,7 @@ static inline int CORE_zpamm_w(cham_side_t side, cham_trans_t trans, cham_uplo_t * * where op( V ) is one of * - * op( V ) = V or op( V ) = V**T or op( V ) = V**H, + * op( V ) = V or op( V ) = V^T or op( V ) = V^H, * * A1, A2 and W are general matrices, and V is: * diff --git a/coreblas/compute/core_zparfb.c b/coreblas/compute/core_zparfb.c index 4b2246994..9bd1e809a 100644 --- a/coreblas/compute/core_zparfb.c +++ b/coreblas/compute/core_zparfb.c @@ -52,12 +52,12 @@ ******************************************************************************* * * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. + * @arg ChamLeft : apply Q or Q^H from the Left; + * @arg ChamRight : apply Q or Q^H from the Right. * * @param[in] trans * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : ConjTranspose, apply Q**H. + * @arg ChamConjTrans : ConjTranspose, apply Q^H. * * @param[in] direct * Indicates how H is formed from a product of elementary diff --git a/coreblas/compute/core_zpemv.c b/coreblas/compute/core_zpemv.c index f96ef1e14..0144020a7 100644 --- a/coreblas/compute/core_zpemv.c +++ b/coreblas/compute/core_zpemv.c @@ -36,7 +36,7 @@ * * where op( A ) is one of * - * op( A ) = A or op( A ) = A**T or op( A ) = A**H, + * op( A ) = A or op( A ) = A^T or op( A ) = A^H, * * alpha and beta are scalars, x and y are vectors and A is a * pentagonal matrix (see further details). @@ -52,8 +52,8 @@ * @param[in] trans * * @arg ChamNoTrans : y := alpha*A*x + beta*y. - * @arg ChamTrans : y := alpha*A**T*x + beta*y. - * @arg ChamConjTrans : y := alpha*A**H*x + beta*y. + * @arg ChamTrans : y := alpha*A^T*x + beta*y. + * @arg ChamConjTrans : y := alpha*A^H*x + beta*y. * * @param[in] M * Number of rows of the matrix A. diff --git a/coreblas/compute/core_ztpmlqt.c b/coreblas/compute/core_ztpmlqt.c index 7a0ecf7ae..a15c7db3d 100644 --- a/coreblas/compute/core_ztpmlqt.c +++ b/coreblas/compute/core_ztpmlqt.c @@ -33,12 +33,12 @@ ******************************************************************************* * * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. + * @arg ChamLeft : apply Q or Q^H from the Left; + * @arg ChamRight : apply Q or Q^H from the Right. * * @param[in] trans * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : ConjTranspose, apply Q**H. + * @arg ChamConjTrans : ConjTranspose, apply Q^H. * * @param[in] M * The number of rows of the tile B. M >= 0. @@ -78,7 +78,7 @@ * or (LDA,K) if SIDE = ChamRight * On entry, the K-by-N or M-by-K matrix A. * On exit, A is overwritten by the corresponding block of - * Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. + * Q*C or Q^H*C or C*Q or C*Q^H. See Further Details. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). @@ -88,7 +88,7 @@ * @param[in,out] B * On entry, the M-by-N tile B. * On exit, B is overwritten by the corresponding block of - * Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. + * Q*C or Q^H*C or C*Q or C*Q^H. See Further Details. * * @param[in] LDB * The leading dimension of the tile B. LDB >= max(1,M). @@ -122,11 +122,11 @@ * * If trans='N' and side='L', C is on exit replaced with Q * C. * - * If trans='C' and side='L', C is on exit replaced with Q**H * C. + * If trans='C' and side='L', C is on exit replaced with Q^H * C. * * If trans='N' and side='R', C is on exit replaced with C * Q. * - * If trans='C' and side='R', C is on exit replaced with C * Q**H. + * If trans='C' and side='R', C is on exit replaced with C * Q^H. * ******************************************************************************* * diff --git a/coreblas/compute/core_ztpmqrt.c b/coreblas/compute/core_ztpmqrt.c index f308f6389..68dfb9744 100644 --- a/coreblas/compute/core_ztpmqrt.c +++ b/coreblas/compute/core_ztpmqrt.c @@ -31,12 +31,12 @@ ******************************************************************************* * * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. + * @arg ChamLeft : apply Q or Q^H from the Left; + * @arg ChamRight : apply Q or Q^H from the Right. * * @param[in] trans * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : ConjTranspose, apply Q**H. + * @arg ChamConjTrans : ConjTranspose, apply Q^H. * * @param[in] M * The number of rows of the tile B. M >= 0. @@ -76,7 +76,7 @@ * or (LDA,K) if SIDE = ChamRight * On entry, the K-by-N or M-by-K matrix A. * On exit, A is overwritten by the corresponding block of - * Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. + * Q*C or Q^H*C or C*Q or C*Q^H. See Further Details. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). @@ -86,7 +86,7 @@ * @param[in,out] B * On entry, the M-by-N tile B. * On exit, B is overwritten by the corresponding block of - * Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. + * Q*C or Q^H*C or C*Q or C*Q^H. See Further Details. * * @param[in] LDB * The leading dimension of the tile B. LDB >= max(1,M). @@ -121,11 +121,11 @@ * * If trans='N' and side='L', C is on exit replaced with Q * C. * - * If trans='C' and side='L', C is on exit replaced with Q**H * C. + * If trans='C' and side='L', C is on exit replaced with Q^H * C. * * If trans='N' and side='R', C is on exit replaced with C * Q. * - * If trans='C' and side='R', C is on exit replaced with C * Q**H. + * If trans='C' and side='R', C is on exit replaced with C * Q^H. * ******************************************************************************* * diff --git a/coreblas/compute/core_ztsmlq.c b/coreblas/compute/core_ztsmlq.c index 34a3b2016..d0f55f225 100644 --- a/coreblas/compute/core_ztsmlq.c +++ b/coreblas/compute/core_ztsmlq.c @@ -38,7 +38,7 @@ * TRANS = 'N': Q * | A1 | | A1 A2 | * Q * | A2 | * - * TRANS = 'C': Q**H * | A1 | | A1 A2 | * Q**H + * TRANS = 'C': Q^H * | A1 | | A1 A2 | * Q^H * | A2 | * * where Q is a complex unitary matrix defined as the product of k @@ -51,12 +51,12 @@ ******************************************************************************* * * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. + * @arg ChamLeft : apply Q or Q^H from the Left; + * @arg ChamRight : apply Q or Q^H from the Right. * * @param[in] trans * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : ConjTranspose, apply Q**H. + * @arg ChamConjTrans : ConjTranspose, apply Q^H. * * @param[in] M1 * The number of rows of the tile A1. M1 >= 0. @@ -248,7 +248,7 @@ int CORE_ztsmlq(cham_side_t side, cham_trans_t trans, jc = i; } /* - * Apply H or H' (NOTE: CORE_zparfb used to be CORE_ztsrfb) + * Apply H or H' */ CORE_zparfb( side, trans, ChamDirForward, ChamRowwise, diff --git a/coreblas/compute/core_ztsmlq_hetra1.c b/coreblas/compute/core_ztsmlq_hetra1.c index bd3d8dc40..c4e947942 100644 --- a/coreblas/compute/core_ztsmlq_hetra1.c +++ b/coreblas/compute/core_ztsmlq_hetra1.c @@ -38,12 +38,12 @@ ******************************************************************************* * * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. + * @arg ChamLeft : apply Q or Q^H from the Left; + * @arg ChamRight : apply Q or Q^H from the Right. * * @param[in] trans * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : ConjTranspose, apply Q**H. + * @arg ChamConjTrans : ConjTranspose, apply Q^H. * * @param[in] m1 * The number of rows of the tile A1. m1 >= 0. diff --git a/coreblas/compute/core_ztsmqr.c b/coreblas/compute/core_ztsmqr.c index 0e48144dc..98b5b58d0 100644 --- a/coreblas/compute/core_ztsmqr.c +++ b/coreblas/compute/core_ztsmqr.c @@ -38,7 +38,7 @@ * TRANS = 'N': Q * | A1 | | A1 A2 | * Q * | A2 | * - * TRANS = 'C': Q**H * | A1 | | A1 A2 | * Q**H + * TRANS = 'C': Q^H * | A1 | | A1 A2 | * Q^H * | A2 | * * where Q is a complex unitary matrix defined as the product of k @@ -51,12 +51,12 @@ ******************************************************************************* * * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. + * @arg ChamLeft : apply Q or Q^H from the Left; + * @arg ChamRight : apply Q or Q^H from the Right. * * @param[in] trans * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : ConjTranspose, apply Q**H. + * @arg ChamConjTrans : ConjTranspose, apply Q^H. * * @param[in] M1 * The number of rows of the tile A1. M1 >= 0. @@ -243,7 +243,7 @@ int CORE_ztsmqr(cham_side_t side, cham_trans_t trans, jc = i; } /* - * Apply H or H' (NOTE: CORE_zparfb used to be CORE_ztsrfb) + * Apply H or H' */ CORE_zparfb( side, trans, ChamDirForward, ChamColumnwise, diff --git a/coreblas/compute/core_ztsmqr_hetra1.c b/coreblas/compute/core_ztsmqr_hetra1.c index 825fd30c7..2b9f8f661 100644 --- a/coreblas/compute/core_ztsmqr_hetra1.c +++ b/coreblas/compute/core_ztsmqr_hetra1.c @@ -40,12 +40,12 @@ ******************************************************************************* * * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. + * @arg ChamLeft : apply Q or Q^H from the Left; + * @arg ChamRight : apply Q or Q^H from the Right. * * @param[in] trans * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : ConjTranspose, apply Q**H. + * @arg ChamConjTrans : ConjTranspose, apply Q^H. * * @param[in] m1 * The number of rows of the tile A1. M1 >= 0. diff --git a/coreblas/compute/core_zttmlq.c b/coreblas/compute/core_zttmlq.c index d28b8057a..0226f47dd 100644 --- a/coreblas/compute/core_zttmlq.c +++ b/coreblas/compute/core_zttmlq.c @@ -36,7 +36,7 @@ * TRANS = 'N': Q * | A1 | | A1 | * Q * | A2 | | A2 | * - * TRANS = 'C': Q**H * | A1 | | A1 | * Q**H + * TRANS = 'C': Q^H * | A1 | | A1 | * Q^H * | A2 | | A2 | * * where Q is a complex unitary matrix defined as the product of k @@ -49,12 +49,12 @@ ******************************************************************************* * * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. + * @arg ChamLeft : apply Q or Q^H from the Left; + * @arg ChamRight : apply Q or Q^H from the Right. * * @param[in] trans * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : ConjTranspose, apply Q**H. + * @arg ChamConjTrans : ConjTranspose, apply Q^H. * * @param[in] M1 * The number of rows of the tile A1. M1 >= 0. @@ -244,7 +244,7 @@ int CORE_zttmlq(cham_side_t side, cham_trans_t trans, } /* - * Apply H or H' (NOTE: CORE_zparfb used to be CORE_zttrfb) + * Apply H or H' */ CORE_zparfb( side, trans, ChamDirForward, ChamRowwise, diff --git a/coreblas/compute/core_zttmqr.c b/coreblas/compute/core_zttmqr.c index 896b0c898..e04a22ac5 100644 --- a/coreblas/compute/core_zttmqr.c +++ b/coreblas/compute/core_zttmqr.c @@ -35,7 +35,7 @@ * TRANS = 'N': Q * | A1 | | A1 | * Q * | A2 | | A2 | * - * TRANS = 'C': Q**H * | A1 | | A1 | * Q**H + * TRANS = 'C': Q^H * | A1 | | A1 | * Q^H * | A2 | | A2 | * * where Q is a complex unitary matrix defined as the product of k @@ -48,12 +48,12 @@ ******************************************************************************* * * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. + * @arg ChamLeft : apply Q or Q^H from the Left; + * @arg ChamRight : apply Q or Q^H from the Right. * * @param[in] trans * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : ConjTranspose, apply Q**H. + * @arg ChamConjTrans : ConjTranspose, apply Q^H. * * @param[in] M1 * The number of rows of the tile A1. M1 >= 0. @@ -235,7 +235,7 @@ int CORE_zttmqr(cham_side_t side, cham_trans_t trans, } /* - * Apply H or H' (NOTE: CORE_zparfb used to be CORE_zttrfb) + * Apply H or H' */ CORE_zparfb( side, trans, ChamDirForward, ChamColumnwise, diff --git a/coreblas/compute/core_zunmlq.c b/coreblas/compute/core_zunmlq.c index 3b5d17c06..6a310b859 100644 --- a/coreblas/compute/core_zunmlq.c +++ b/coreblas/compute/core_zunmlq.c @@ -35,7 +35,7 @@ * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * C C * Q - * TRANS = 'C': Q**H * C C * Q**H + * TRANS = 'C': Q^H * C C * Q^H * * where Q is a complex unitary matrix defined as the product of k * elementary reflectors @@ -48,12 +48,12 @@ ******************************************************************************* * * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. + * @arg ChamLeft : apply Q or Q^H from the Left; + * @arg ChamRight : apply Q or Q^H from the Right. * * @param[in] trans * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : Transpose, apply Q**H. + * @arg ChamConjTrans : Transpose, apply Q^H. * * @param[in] M * The number of rows of the tile C. M >= 0. @@ -90,7 +90,7 @@ * * @param[in,out] C * On entry, the M-by-N tile C. - * On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + * On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q. * * @param[in] LDC * The leading dimension of the array C. LDC >= max(1,M). diff --git a/coreblas/compute/core_zunmqr.c b/coreblas/compute/core_zunmqr.c index 48e75b281..712da7e6c 100644 --- a/coreblas/compute/core_zunmqr.c +++ b/coreblas/compute/core_zunmqr.c @@ -35,7 +35,7 @@ * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * C C * Q - * TRANS = 'C': Q**H * C C * Q**H + * TRANS = 'C': Q^H * C C * Q^H * * where Q is a complex unitary matrix defined as the product of k * elementary reflectors @@ -48,12 +48,12 @@ ******************************************************************************* * * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. + * @arg ChamLeft : apply Q or Q^H from the Left; + * @arg ChamRight : apply Q or Q^H from the Right. * * @param[in] trans * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : Transpose, apply Q**H. + * @arg ChamConjTrans : Transpose, apply Q^H. * * @param[in] M * The number of rows of the tile C. M >= 0. @@ -91,7 +91,7 @@ * * @param[in,out] C * On entry, the M-by-N tile C. - * On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + * On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q. * * @param[in] LDC * The leading dimension of the array C. LDC >= max(1,M). diff --git a/coreblas/eztrace_module/coreblas_eztrace_module b/coreblas/eztrace_module/coreblas_eztrace_module index 4a8688192..3bca3cbf1 100644 --- a/coreblas/eztrace_module/coreblas_eztrace_module +++ b/coreblas/eztrace_module/coreblas_eztrace_module @@ -7,18 +7,6 @@ ID 7770 void CORE_scasum(int storev, int uplo, int M, int N, void *A, int lda, float *work); -void CORE_cbrdalg(int uplo, int N, int NB, - void *pA, void *C, void *S, - int i, int j, int m, int grsiz); -int CORE_cgbelr(int uplo, int N, - void **A, void *V, void *TAU, - int st, int ed, int eltsize); -int CORE_cgbrce(int uplo, int N, - void **A, void *V, void *TAU, - int st, int ed, int eltsize); -int CORE_cgblrx(int uplo, int N, - void **A, void *V, void *TAU, - int st, int ed, int eltsize); int CORE_cgeadd(int M, int N, void *alpha, void *A, int LDA, void *B, int LDB); @@ -64,30 +52,6 @@ int CORE_cgetrf_reclap(int M, int N, int CORE_cgetrf_rectil(void *A, int *IPIV, int *info); void CORE_cgetrip(int m, int n, void *A, void *work); -int CORE_chbelr(int uplo, int N, - void **A, void *V, void *TAU, - int st, int ed, int eltsize); -int CORE_chblrx(int uplo, int N, - void **A, void *V, void *TAU, - int st, int ed, int eltsize); -int CORE_chbrce(int uplo, int N, - void **A, void *V, void *TAU, - int st, int ed, int eltsize); -void CORE_chbtype1cb(int N, int NB, - void *A, int LDA, - void *V, void *TAU, - int st, int ed, int sweep, int Vblksiz, int WANTZ, - void *WORK); -void CORE_chbtype2cb(int N, int NB, - void *A, int LDA, - void *V, void *TAU, - int st, int ed, int sweep, int Vblksiz, int WANTZ, - void *WORK); -void CORE_chbtype3cb(int N, int NB, - void *A, int LDA, - void *V, void *TAU, - int st, int ed, int sweep, int Vblksiz, int WANTZ, - void *WORK); void CORE_chegst(int itype, int uplo, int N, void *A, int LDA, void *B, int LDB, int *INFO); @@ -230,13 +194,6 @@ int CORE_csyssq(int uplo, int N, void *A, int LDA, float *scale, float *sumsq); int CORE_csytf2_nopiv(int uplo, int n, void *A, int lda); -void CORE_cswpab(int i, int n1, int n2, - void *A, void *work); -int CORE_cswptr_ontile(void *descA, int i1, int i2, const int *ipiv, int inc, - void *Akk, int ldak); -void CORE_ctrdalg(int uplo, int N, int NB, - void *pA, void *V, void *TAU, - int i, int j, int m, int grsiz); void CORE_ctrmm(int side, int uplo, int transA, int diag, int M, int N, @@ -352,18 +309,6 @@ int CORE_cunmqr(int side, int trans, void CORE_dasum(int storev, int uplo, int M, int N, const double *A, int lda, double *work); -void CORE_dbrdalg(int uplo, int N, int NB, - void *pA, double *C, double *S, - int i, int j, int m, int grsiz); -int CORE_dgbelr(int uplo, int N, - void **A, double *V, double *TAU, - int st, int ed, int eltsize); -int CORE_dgbrce(int uplo, int N, - void **A, double *V, double *TAU, - int st, int ed, int eltsize); -int CORE_dgblrx(int uplo, int N, - void **A, double *V, double *TAU, - int st, int ed, int eltsize); int CORE_dgeadd(int M, int N, double alpha, const double *A, int LDA, double *B, int LDB); @@ -409,30 +354,6 @@ int CORE_dgetrf_reclap(int M, int N, int CORE_dgetrf_rectil(void *A, int *IPIV, int *info); void CORE_dgetrip(int m, int n, double *A, double *work); -int CORE_dhbelr(int uplo, int N, - void **A, double *V, double *TAU, - int st, int ed, int eltsize); -int CORE_dhblrx(int uplo, int N, - void **A, double *V, double *TAU, - int st, int ed, int eltsize); -int CORE_dhbrce(int uplo, int N, - void **A, double *V, double *TAU, - int st, int ed, int eltsize); -void CORE_dhbtype1cb(int N, int NB, - double *A, int LDA, - double *V, double *TAU, - int st, int ed, int sweep, int Vblksiz, int WANTZ, - double *WORK); -void CORE_dhbtype2cb(int N, int NB, - double *A, int LDA, - double *V, double *TAU, - int st, int ed, int sweep, int Vblksiz, int WANTZ, - double *WORK); -void CORE_dhbtype3cb(int N, int NB, - double *A, int LDA, - const double *V, const double *TAU, - int st, int ed, int sweep, int Vblksiz, int WANTZ, - double *WORK); void CORE_dsygst(int itype, int uplo, int N, double *A, int LDA, double *B, int LDB, int *INFO); @@ -553,13 +474,6 @@ int CORE_dssssm(int M1, int N1, int M2, int N2, int K, int IB, const double *L2, int LDL2, const int *IPIV); int CORE_dsytf2_nopiv(int uplo, int n, double *A, int lda); -void CORE_dswpab(int i, int n1, int n2, - double *A, double *work); -int CORE_dswptr_ontile(void *descA, int i1, int i2, const int *ipiv, int inc, - const double *Akk, int ldak); -void CORE_dtrdalg(int uplo, int N, int NB, - void *pA, double *V, double *TAU, - int i, int j, int m, int grsiz); void CORE_dtrmm(int side, int uplo, int transA, int diag, int M, int N, @@ -682,18 +596,6 @@ void CORE_dlag2s(int m, int n, void CORE_sasum(int storev, int uplo, int M, int N, const float *A, int lda, float *work); -void CORE_sbrdalg(int uplo, int N, int NB, - void *pA, float *C, float *S, - int i, int j, int m, int grsiz); -int CORE_sgbelr(int uplo, int N, - void **A, float *V, float *TAU, - int st, int ed, int eltsize); -int CORE_sgbrce(int uplo, int N, - void **A, float *V, float *TAU, - int st, int ed, int eltsize); -int CORE_sgblrx(int uplo, int N, - void **A, float *V, float *TAU, - int st, int ed, int eltsize); int CORE_sgeadd(int M, int N, float alpha, const float *A, int LDA, float *B, int LDB); @@ -739,30 +641,6 @@ int CORE_sgetrf_reclap(int M, int N, int CORE_sgetrf_rectil(void *A, int *IPIV, int *info); void CORE_sgetrip(int m, int n, float *A, float *work); -int CORE_shbelr(int uplo, int N, - void **A, float *V, float *TAU, - int st, int ed, int eltsize); -int CORE_shblrx(int uplo, int N, - void **A, float *V, float *TAU, - int st, int ed, int eltsize); -int CORE_shbrce(int uplo, int N, - void **A, float *V, float *TAU, - int st, int ed, int eltsize); -void CORE_shbtype1cb(int N, int NB, - float *A, int LDA, - float *V, float *TAU, - int st, int ed, int sweep, int Vblksiz, int WANTZ, - float *WORK); -void CORE_shbtype2cb(int N, int NB, - float *A, int LDA, - float *V, float *TAU, - int st, int ed, int sweep, int Vblksiz, int WANTZ, - float *WORK); -void CORE_shbtype3cb(int N, int NB, - float *A, int LDA, - const float *V, const float *TAU, - int st, int ed, int sweep, int Vblksiz, int WANTZ, - float *WORK); void CORE_ssygst(int itype, int uplo, int N, float *A, int LDA, float *B, int LDB, int *INFO); @@ -883,13 +761,6 @@ int CORE_sssssm(int M1, int N1, int M2, int N2, int K, int IB, const float *L2, int LDL2, const int *IPIV); int CORE_ssytf2_nopiv(int uplo, int n, float *A, int lda); -void CORE_sswpab(int i, int n1, int n2, - float *A, float *work); -int CORE_sswptr_ontile(void *descA, int i1, int i2, const int *ipiv, int inc, - const float *Akk, int ldak); -void CORE_strdalg(int uplo, int N, int NB, - void *pA, float *V, float *TAU, - int i, int j, int m, int grsiz); void CORE_strmm(int side, int uplo, int transA, int diag, int M, int N, @@ -1005,18 +876,6 @@ int CORE_sormqr(int side, int trans, void CORE_dzasum(int storev, int uplo, int M, int N, void *A, int lda, double *work); -void CORE_zbrdalg(int uplo, int N, int NB, - void *pA, void *C, void *S, - int i, int j, int m, int grsiz); -int CORE_zgbelr(int uplo, int N, - void **A, void *V, void *TAU, - int st, int ed, int eltsize); -int CORE_zgbrce(int uplo, int N, - void **A, void *V, void *TAU, - int st, int ed, int eltsize); -int CORE_zgblrx(int uplo, int N, - void **A, void *V, void *TAU, - int st, int ed, int eltsize); int CORE_zgeadd(int M, int N, void *alpha, void *A, int LDA, void *B, int LDB); @@ -1062,30 +921,6 @@ int CORE_zgetrf_reclap(int M, int N, int CORE_zgetrf_rectil(void *A, int *IPIV, int *info); void CORE_zgetrip(int m, int n, void *A, void *work); -int CORE_zhbelr(int uplo, int N, - void **A, void *V, void *TAU, - int st, int ed, int eltsize); -int CORE_zhblrx(int uplo, int N, - void **A, void *V, void *TAU, - int st, int ed, int eltsize); -int CORE_zhbrce(int uplo, int N, - void **A, void *V, void *TAU, - int st, int ed, int eltsize); -void CORE_zhbtype1cb(int N, int NB, - void *A, int LDA, - void *V, void *TAU, - int st, int ed, int sweep, int Vblksiz, int WANTZ, - void *WORK); -void CORE_zhbtype2cb(int N, int NB, - void *A, int LDA, - void *V, void *TAU, - int st, int ed, int sweep, int Vblksiz, int WANTZ, - void *WORK); -void CORE_zhbtype3cb(int N, int NB, - void *A, int LDA, - void *V, void *TAU, - int st, int ed, int sweep, int Vblksiz, int WANTZ, - void *WORK); void CORE_zhegst(int itype, int uplo, int N, void *A, int LDA, void *B, int LDB, int *INFO); @@ -1222,13 +1057,6 @@ int CORE_zsyssq(int uplo, int N, void *A, int LDA, double *scale, double *sumsq); int CORE_zsytf2_nopiv(int uplo, int n, void *A, int lda); -void CORE_zswpab(int i, int n1, int n2, - void *A, void *work); -int CORE_zswptr_ontile(void *descA, int i1, int i2, const int *ipiv, int inc, - void *Akk, int ldak); -void CORE_ztrdalg(int uplo, int N, int NB, - void *pA, void *V, void *TAU, - int i, int j, int m, int grsiz); void CORE_ztrmm(int side, int uplo, int transA, int diag, int M, int N, diff --git a/coreblas/include/coreblas/coreblas_z.h b/coreblas/include/coreblas/coreblas_z.h index 8a755db76..ee434de43 100644 --- a/coreblas/include/coreblas/coreblas_z.h +++ b/coreblas/include/coreblas/coreblas_z.h @@ -32,18 +32,6 @@ */ void CORE_dzasum(cham_store_t storev, cham_uplo_t uplo, int M, int N, const CHAMELEON_Complex64_t *A, int lda, double *work); -void CORE_zbrdalg(cham_uplo_t uplo, int N, int NB, - const CHAM_desc_t *pA, CHAMELEON_Complex64_t *C, CHAMELEON_Complex64_t *S, - int i, int j, int m, int grsiz); -int CORE_zgbelr(cham_uplo_t uplo, int N, - CHAM_desc_t *A, CHAMELEON_Complex64_t *V, CHAMELEON_Complex64_t *TAU, - int st, int ed, int eltsize); -int CORE_zgbrce(cham_uplo_t uplo, int N, - CHAM_desc_t *A, CHAMELEON_Complex64_t *V, CHAMELEON_Complex64_t *TAU, - int st, int ed, int eltsize); -int CORE_zgblrx(cham_uplo_t uplo, int N, - CHAM_desc_t *A, CHAMELEON_Complex64_t *V, CHAMELEON_Complex64_t *TAU, - int st, int ed, int eltsize); int CORE_zaxpy(int M, CHAMELEON_Complex64_t alpha, const CHAMELEON_Complex64_t *A, int incA, CHAMELEON_Complex64_t *B, int incB); @@ -99,30 +87,6 @@ void CORE_zgetrip(int m, int n, CHAMELEON_Complex64_t *A, void CORE_zhe2ge(cham_uplo_t uplo, int M, int N, const CHAMELEON_Complex64_t *A, int LDA, CHAMELEON_Complex64_t *B, int LDB); -int CORE_zhbelr(cham_uplo_t uplo, int N, - CHAM_desc_t *A, CHAMELEON_Complex64_t *V, CHAMELEON_Complex64_t *TAU, - int st, int ed, int eltsize); -int CORE_zhblrx(cham_uplo_t uplo, int N, - CHAM_desc_t *A, CHAMELEON_Complex64_t *V, CHAMELEON_Complex64_t *TAU, - int st, int ed, int eltsize); -int CORE_zhbrce(cham_uplo_t uplo, int N, - CHAM_desc_t *A, CHAMELEON_Complex64_t *V, CHAMELEON_Complex64_t *TAU, - int st, int ed, int eltsize); -void CORE_zhbtype1cb(int N, int NB, - CHAMELEON_Complex64_t *A, int LDA, - CHAMELEON_Complex64_t *V, CHAMELEON_Complex64_t *TAU, - int st, int ed, int sweep, int Vblksiz, int WANTZ, - CHAMELEON_Complex64_t *WORK); -void CORE_zhbtype2cb(int N, int NB, - CHAMELEON_Complex64_t *A, int LDA, - CHAMELEON_Complex64_t *V, CHAMELEON_Complex64_t *TAU, - int st, int ed, int sweep, int Vblksiz, int WANTZ, - CHAMELEON_Complex64_t *WORK); -void CORE_zhbtype3cb(int N, int NB, - CHAMELEON_Complex64_t *A, int LDA, - const CHAMELEON_Complex64_t *V, const CHAMELEON_Complex64_t *TAU, - int st, int ed, int sweep, int Vblksiz, int WANTZ, - CHAMELEON_Complex64_t *WORK); void CORE_zhegst(int itype, cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA, CHAMELEON_Complex64_t *B, int LDB, int *INFO); @@ -261,10 +225,6 @@ int CORE_zsyssq(cham_uplo_t uplo, int N, const CHAMELEON_Complex64_t *A, int LDA, double *scale, double *sumsq); int CORE_zsytf2_nopiv(cham_uplo_t uplo, int n, CHAMELEON_Complex64_t *A, int lda); -void CORE_zswpab(int i, int n1, int n2, - CHAMELEON_Complex64_t *A, CHAMELEON_Complex64_t *work); -int CORE_zswptr_ontile(CHAM_desc_t descA, int i1, int i2, const int *ipiv, int inc, - const CHAMELEON_Complex64_t *Akk, int ldak); int CORE_ztradd(cham_uplo_t uplo, cham_trans_t trans, int M, int N, CHAMELEON_Complex64_t alpha, const CHAMELEON_Complex64_t *A, int LDA, @@ -273,9 +233,6 @@ int CORE_ztradd(cham_uplo_t uplo, cham_trans_t trans, int M, int N, void CORE_ztrasm(cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N, const CHAMELEON_Complex64_t *A, int lda, double *work); -void CORE_ztrdalg(cham_uplo_t uplo, int N, int NB, - const CHAM_desc_t *pA, CHAMELEON_Complex64_t *V, CHAMELEON_Complex64_t *TAU, - int i, int j, int m, int grsiz); void CORE_ztrmm(cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, int M, int N, diff --git a/cudablas/compute/cuda_zparfb.c b/cudablas/compute/cuda_zparfb.c index 95bbe2115..b4e6f6c19 100644 --- a/cudablas/compute/cuda_zparfb.c +++ b/cudablas/compute/cuda_zparfb.c @@ -46,12 +46,12 @@ ******************************************************************************* * * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. + * @arg ChamLeft : apply Q or Q^H from the Left; + * @arg ChamRight : apply Q or Q^H from the Right. * * @param[in] trans * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : ConjTranspose, apply Q**H. + * @arg ChamConjTrans : ConjTranspose, apply Q^H. * * @param[in] direct * Indicates how H is formed from a product of elementary diff --git a/cudablas/compute/cuda_ztpmlqt.c b/cudablas/compute/cuda_ztpmlqt.c index a8ff6ce5a..aaf70c231 100644 --- a/cudablas/compute/cuda_ztpmlqt.c +++ b/cudablas/compute/cuda_ztpmlqt.c @@ -33,12 +33,12 @@ ******************************************************************************* * * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. + * @arg ChamLeft : apply Q or Q^H from the Left; + * @arg ChamRight : apply Q or Q^H from the Right. * * @param[in] trans * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : ConjTranspose, apply Q**H. + * @arg ChamConjTrans : ConjTranspose, apply Q^H. * * @param[in] M * The number of rows of the tile B. M >= 0. @@ -78,7 +78,7 @@ * or (LDA,K) if SIDE = ChamRight * On entry, the K-by-N or M-by-K matrix A. * On exit, A is overwritten by the corresponding block of - * Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. + * Q*C or Q^H*C or C*Q or C*Q^H. See Further Details. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). @@ -88,7 +88,7 @@ * @param[in,out] B * On entry, the M-by-N tile B. * On exit, B is overwritten by the corresponding block of - * Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. + * Q*C or Q^H*C or C*Q or C*Q^H. See Further Details. * * @param[in] LDB * The leading dimension of the tile B. LDB >= max(1,M). @@ -122,11 +122,11 @@ * * If trans='N' and side='L', C is on exit replaced with Q * C. * - * If trans='C' and side='L', C is on exit replaced with Q**H * C. + * If trans='C' and side='L', C is on exit replaced with Q^H * C. * * If trans='N' and side='R', C is on exit replaced with C * Q. * - * If trans='C' and side='R', C is on exit replaced with C * Q**H. + * If trans='C' and side='R', C is on exit replaced with C * Q^H. * ******************************************************************************* * diff --git a/cudablas/compute/cuda_ztpmqrt.c b/cudablas/compute/cuda_ztpmqrt.c index 22319e42d..c9a1fea2a 100644 --- a/cudablas/compute/cuda_ztpmqrt.c +++ b/cudablas/compute/cuda_ztpmqrt.c @@ -33,12 +33,12 @@ ******************************************************************************* * * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. + * @arg ChamLeft : apply Q or Q^H from the Left; + * @arg ChamRight : apply Q or Q^H from the Right. * * @param[in] trans * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : ConjTranspose, apply Q**H. + * @arg ChamConjTrans : ConjTranspose, apply Q^H. * * @param[in] M * The number of rows of the tile B. M >= 0. @@ -78,7 +78,7 @@ * or (LDA,K) if SIDE = ChamRight * On entry, the K-by-N or M-by-K matrix A. * On exit, A is overwritten by the corresponding block of - * Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. + * Q*C or Q^H*C or C*Q or C*Q^H. See Further Details. * * @param[in] LDA * The leading dimension of the array A. LDA >= max(1,M). @@ -88,7 +88,7 @@ * @param[in,out] B * On entry, the M-by-N tile B. * On exit, B is overwritten by the corresponding block of - * Q*C or Q**H*C or C*Q or C*Q**H. See Further Details. + * Q*C or Q^H*C or C*Q or C*Q^H. See Further Details. * * @param[in] LDB * The leading dimension of the tile B. LDB >= max(1,M). @@ -123,11 +123,11 @@ * * If trans='N' and side='L', C is on exit replaced with Q * C. * - * If trans='C' and side='L', C is on exit replaced with Q**H * C. + * If trans='C' and side='L', C is on exit replaced with Q^H * C. * * If trans='N' and side='R', C is on exit replaced with C * Q. * - * If trans='C' and side='R', C is on exit replaced with C * Q**H. + * If trans='C' and side='R', C is on exit replaced with C * Q^H. * ******************************************************************************* * diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt index f756adc45..e184ca536 100644 --- a/runtime/CMakeLists.txt +++ b/runtime/CMakeLists.txt @@ -28,7 +28,7 @@ # List of codelets required by all runtimes # ----------------------------------------- set(CODELETS_ZSRC - codelets/codelet_zasum.c + codelets/codelet_dzasum.c ################## # BLAS 1 ################## diff --git a/runtime/openmp/codelets/codelet_zasum.c b/runtime/openmp/codelets/codelet_dzasum.c similarity index 94% rename from runtime/openmp/codelets/codelet_zasum.c rename to runtime/openmp/codelets/codelet_dzasum.c index 0ec02b39c..1ce65879b 100644 --- a/runtime/openmp/codelets/codelet_zasum.c +++ b/runtime/openmp/codelets/codelet_dzasum.c @@ -1,6 +1,6 @@ /** * - * @file openmp/codelet_zasum.c + * @file openmp/codelet_dzasum.c * * @copyright 2009-2014 The University of Tennessee and The University of * Tennessee Research Foundation. All rights reserved. @@ -9,7 +9,7 @@ * *** * - * @brief Chameleon zasum OpenMP codelet + * @brief Chameleon dzasum OpenMP codelet * * @version 0.9.2 * @comment This file has been automatically generated diff --git a/runtime/openmp/codelets/codelet_zunmlq.c b/runtime/openmp/codelets/codelet_zunmlq.c index 6de8202b2..92d6e71f8 100644 --- a/runtime/openmp/codelets/codelet_zunmlq.c +++ b/runtime/openmp/codelets/codelet_zunmlq.c @@ -35,7 +35,7 @@ * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * C C * Q - * TRANS = 'C': Q**H * C C * Q**H + * TRANS = 'C': Q^H * C C * Q^H * * where Q is a complex unitary matrix defined as the product of k * elementary reflectors @@ -48,12 +48,12 @@ ******************************************************************************* * * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. + * @arg ChamLeft : apply Q or Q^H from the Left; + * @arg ChamRight : apply Q or Q^H from the Right. * * @param[in] trans * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : Transpose, apply Q**H. + * @arg ChamConjTrans : Transpose, apply Q^H. * * @param[in] M * The number of rows of the tile C. M >= 0. @@ -90,7 +90,7 @@ * * @param[in,out] C * On entry, the M-by-N tile C. - * On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + * On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q. * * @param[in] LDC * The leading dimension of the array C. LDC >= max(1,M). diff --git a/runtime/openmp/codelets/codelet_zunmqr.c b/runtime/openmp/codelets/codelet_zunmqr.c index 93190251e..66aa62b5d 100644 --- a/runtime/openmp/codelets/codelet_zunmqr.c +++ b/runtime/openmp/codelets/codelet_zunmqr.c @@ -34,7 +34,7 @@ * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * C C * Q - * TRANS = 'C': Q**H * C C * Q**H + * TRANS = 'C': Q^H * C C * Q^H * * where Q is a complex unitary matrix defined as the product of k * elementary reflectors @@ -47,12 +47,12 @@ ******************************************************************************* * * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. + * @arg ChamLeft : apply Q or Q^H from the Left; + * @arg ChamRight : apply Q or Q^H from the Right. * * @param[in] trans * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : Transpose, apply Q**H. + * @arg ChamConjTrans : Transpose, apply Q^H. * * @param[in] M * The number of rows of the tile C. M >= 0. @@ -90,7 +90,7 @@ * * @param[in,out] C * On entry, the M-by-N tile C. - * On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + * On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q. * * @param[in] LDC * The leading dimension of the array C. LDC >= max(1,M). diff --git a/runtime/parsec/codelets/codelet_zasum.c b/runtime/parsec/codelets/codelet_dzasum.c similarity index 95% rename from runtime/parsec/codelets/codelet_zasum.c rename to runtime/parsec/codelets/codelet_dzasum.c index ccea81743..e0faa8dd2 100644 --- a/runtime/parsec/codelets/codelet_zasum.c +++ b/runtime/parsec/codelets/codelet_dzasum.c @@ -1,6 +1,6 @@ /** * - * @file parsec/codelet_zasum.c + * @file parsec/codelet_dzasum.c * * @copyright 2009-2015 The University of Tennessee and The University of * Tennessee Research Foundation. All rights reserved. @@ -9,7 +9,7 @@ * *** * - * @brief Chameleon zasum PaRSEC codelet + * @brief Chameleon dzasum PaRSEC codelet * * @version 0.9.2 * @author Reazul Hoque @@ -50,7 +50,7 @@ void INSERT_TASK_dzasum(const RUNTIME_option_t *options, parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt); parsec_dtd_taskpool_insert_task( - PARSEC_dtd_taskpool, CORE_dzasum_parsec, options->priority, "zasum", + PARSEC_dtd_taskpool, CORE_dzasum_parsec, options->priority, "dzasum", sizeof(int), &storev, VALUE, sizeof(int), &uplo, VALUE, sizeof(int), &M, VALUE, diff --git a/runtime/quark/codelets/codelet_zasum.c b/runtime/quark/codelets/codelet_dzasum.c similarity index 96% rename from runtime/quark/codelets/codelet_zasum.c rename to runtime/quark/codelets/codelet_dzasum.c index 6dd5cce03..8c49a6f76 100644 --- a/runtime/quark/codelets/codelet_zasum.c +++ b/runtime/quark/codelets/codelet_dzasum.c @@ -1,6 +1,6 @@ /** * - * @file quark/codelet_zasum.c + * @file quark/codelet_dzasum.c * * @copyright 2009-2014 The University of Tennessee and The University of * Tennessee Research Foundation. All rights reserved. @@ -9,7 +9,7 @@ * *** * - * @brief Chameleon zasum Quark codelet + * @brief Chameleon dzasum Quark codelet * * @version 0.9.2 * @comment This file has been automatically generated diff --git a/runtime/quark/codelets/codelet_zunmlq.c b/runtime/quark/codelets/codelet_zunmlq.c index 76141454b..5b8687571 100644 --- a/runtime/quark/codelets/codelet_zunmlq.c +++ b/runtime/quark/codelets/codelet_zunmlq.c @@ -59,7 +59,7 @@ void CORE_zunmlq_quark(Quark *quark) * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * C C * Q - * TRANS = 'C': Q**H * C C * Q**H + * TRANS = 'C': Q^H * C C * Q^H * * where Q is a complex unitary matrix defined as the product of k * elementary reflectors @@ -72,12 +72,12 @@ void CORE_zunmlq_quark(Quark *quark) ******************************************************************************* * * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. + * @arg ChamLeft : apply Q or Q^H from the Left; + * @arg ChamRight : apply Q or Q^H from the Right. * * @param[in] trans * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : Transpose, apply Q**H. + * @arg ChamConjTrans : Transpose, apply Q^H. * * @param[in] M * The number of rows of the tile C. M >= 0. @@ -114,7 +114,7 @@ void CORE_zunmlq_quark(Quark *quark) * * @param[in,out] C * On entry, the M-by-N tile C. - * On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + * On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q. * * @param[in] LDC * The leading dimension of the array C. LDC >= max(1,M). diff --git a/runtime/quark/codelets/codelet_zunmqr.c b/runtime/quark/codelets/codelet_zunmqr.c index 75968addd..f03746016 100644 --- a/runtime/quark/codelets/codelet_zunmqr.c +++ b/runtime/quark/codelets/codelet_zunmqr.c @@ -58,7 +58,7 @@ void CORE_zunmqr_quark(Quark *quark) * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * C C * Q - * TRANS = 'C': Q**H * C C * Q**H + * TRANS = 'C': Q^H * C C * Q^H * * where Q is a complex unitary matrix defined as the product of k * elementary reflectors @@ -71,12 +71,12 @@ void CORE_zunmqr_quark(Quark *quark) ******************************************************************************* * * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. + * @arg ChamLeft : apply Q or Q^H from the Left; + * @arg ChamRight : apply Q or Q^H from the Right. * * @param[in] trans * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : Transpose, apply Q**H. + * @arg ChamConjTrans : Transpose, apply Q^H. * * @param[in] M * The number of rows of the tile C. M >= 0. @@ -114,7 +114,7 @@ void CORE_zunmqr_quark(Quark *quark) * * @param[in,out] C * On entry, the M-by-N tile C. - * On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + * On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q. * * @param[in] LDC * The leading dimension of the array C. LDC >= max(1,M). diff --git a/runtime/quark/include/core_blas_dag.h b/runtime/quark/include/core_blas_dag.h index f5ba7073d..83bdbd531 100644 --- a/runtime/quark/include/core_blas_dag.h +++ b/runtime/quark/include/core_blas_dag.h @@ -78,6 +78,8 @@ #define DAG_CORE_TSTRF DAG_SET_PROPERTIES( "TSTRF" , "red" ) #define DAG_CORE_UNMLQ DAG_SET_PROPERTIES( "UNMLQ" , "cyan" ) #define DAG_CORE_UNMQR DAG_SET_PROPERTIES( "UNMQR" , "cyan" ) +#define DAG_CORE_ORMLQ DAG_SET_PROPERTIES( "ORMLQ" , "cyan" ) +#define DAG_CORE_ORMQR DAG_SET_PROPERTIES( "ORMQR" , "cyan" ) #define DAG_CORE_TSLQT DAG_CORE_TPLQT #define DAG_CORE_TSMLQ DAG_CORE_TPMLQT diff --git a/runtime/starpu/codelets/codelet_zasum.c b/runtime/starpu/codelets/codelet_dzasum.c similarity index 88% rename from runtime/starpu/codelets/codelet_zasum.c rename to runtime/starpu/codelets/codelet_dzasum.c index 1cbd3e6b2..0e94fc672 100644 --- a/runtime/starpu/codelets/codelet_zasum.c +++ b/runtime/starpu/codelets/codelet_dzasum.c @@ -1,6 +1,6 @@ /** * - * @file starpu/codelet_zasum.c + * @file starpu/codelet_dzasum.c * * @copyright 2009-2014 The University of Tennessee and The University of * Tennessee Research Foundation. All rights reserved. @@ -9,7 +9,7 @@ * *** * - * @brief Chameleon zasum StarPU codelet + * @brief Chameleon dzasum StarPU codelet * * @version 0.9.2 * @comment This file has been automatically generated @@ -43,15 +43,15 @@ static void cl_dzasum_cpu_func(void *descr[], void *cl_arg) /* * Codelet definition */ -CODELETS_CPU(zasum, 2, cl_dzasum_cpu_func) +CODELETS_CPU(dzasum, 2, cl_dzasum_cpu_func) void INSERT_TASK_dzasum( const RUNTIME_option_t *options, cham_store_t storev, cham_uplo_t uplo, int M, int N, const CHAM_desc_t *A, int Am, int An, int lda, const CHAM_desc_t *B, int Bm, int Bn ) { - struct starpu_codelet *codelet = &cl_zasum; - void (*callback)(void*) = options->profiling ? cl_zasum_callback : NULL; + struct starpu_codelet *codelet = &cl_dzasum; + void (*callback)(void*) = options->profiling ? cl_dzasum_callback : NULL; CHAMELEON_BEGIN_ACCESS_DECLARATION; CHAMELEON_ACCESS_R(A, Am, An); @@ -70,7 +70,7 @@ void INSERT_TASK_dzasum( const RUNTIME_option_t *options, STARPU_PRIORITY, options->priority, STARPU_CALLBACK, callback, #if defined(CHAMELEON_CODELETS_HAVE_NAME) - STARPU_NAME, "zasum", + STARPU_NAME, "dzasum", #endif 0); } diff --git a/runtime/starpu/codelets/codelet_zcallback.c b/runtime/starpu/codelets/codelet_zcallback.c index 0c31aa653..35aea3122 100644 --- a/runtime/starpu/codelets/codelet_zcallback.c +++ b/runtime/starpu/codelets/codelet_zcallback.c @@ -22,7 +22,7 @@ #include "chameleon_starpu.h" #include "runtime_codelet_z.h" -CHAMELEON_CL_CB(zasum, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N) +CHAMELEON_CL_CB(dzasum, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N) CHAMELEON_CL_CB(zaxpy, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[1]), 0, M) CHAMELEON_CL_CB(zgeadd, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N) CHAMELEON_CL_CB(zlascal, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N) diff --git a/runtime/starpu/codelets/codelet_zunmlq.c b/runtime/starpu/codelets/codelet_zunmlq.c index bab65bfcc..be36f957d 100644 --- a/runtime/starpu/codelets/codelet_zunmlq.c +++ b/runtime/starpu/codelets/codelet_zunmlq.c @@ -104,7 +104,7 @@ CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC) * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * C C * Q - * TRANS = 'C': Q**H * C C * Q**H + * TRANS = 'C': Q^H * C C * Q^H * * where Q is a complex unitary matrix defined as the product of k * elementary reflectors @@ -117,12 +117,12 @@ CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC) ******************************************************************************* * * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. + * @arg ChamLeft : apply Q or Q^H from the Left; + * @arg ChamRight : apply Q or Q^H from the Right. * * @param[in] trans * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : Transpose, apply Q**H. + * @arg ChamConjTrans : Transpose, apply Q^H. * * @param[in] M * The number of rows of the tile C. M >= 0. @@ -159,7 +159,7 @@ CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC) * * @param[in,out] C * On entry, the M-by-N tile C. - * On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + * On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q. * * @param[in] LDC * The leading dimension of the array C. LDC >= max(1,M). diff --git a/runtime/starpu/codelets/codelet_zunmqr.c b/runtime/starpu/codelets/codelet_zunmqr.c index ed8cbccd5..8ff98bc79 100644 --- a/runtime/starpu/codelets/codelet_zunmqr.c +++ b/runtime/starpu/codelets/codelet_zunmqr.c @@ -103,7 +103,7 @@ CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, STARPU_CUDA_ASYNC) * * SIDE = 'L' SIDE = 'R' * TRANS = 'N': Q * C C * Q - * TRANS = 'C': Q**H * C C * Q**H + * TRANS = 'C': Q^H * C C * Q^H * * where Q is a complex unitary matrix defined as the product of k * elementary reflectors @@ -116,12 +116,12 @@ CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, STARPU_CUDA_ASYNC) ******************************************************************************* * * @param[in] side - * @arg ChamLeft : apply Q or Q**H from the Left; - * @arg ChamRight : apply Q or Q**H from the Right. + * @arg ChamLeft : apply Q or Q^H from the Left; + * @arg ChamRight : apply Q or Q^H from the Right. * * @param[in] trans * @arg ChamNoTrans : No transpose, apply Q; - * @arg ChamConjTrans : Transpose, apply Q**H. + * @arg ChamConjTrans : Transpose, apply Q^H. * * @param[in] M * The number of rows of the tile C. M >= 0. @@ -159,7 +159,7 @@ CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, STARPU_CUDA_ASYNC) * * @param[in,out] C * On entry, the M-by-N tile C. - * On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q. + * On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q. * * @param[in] LDC * The leading dimension of the array C. LDC >= max(1,M). diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h index d9c6e915e..4936d329c 100644 --- a/runtime/starpu/include/runtime_codelet_z.h +++ b/runtime/starpu/include/runtime_codelet_z.h @@ -36,89 +36,89 @@ /* * BLAS 1 functions */ -ZCODELETS_HEADER(axpy) +CODELETS_HEADER(zaxpy) /* * BLAS 3 functions */ -ZCODELETS_HEADER(gemm) -ZCODELETS_HEADER(hemm) -ZCODELETS_HEADER(her2k) -ZCODELETS_HEADER(herk) -ZCODELETS_HEADER(symm) -ZCODELETS_HEADER(syr2k) -ZCODELETS_HEADER(syrk) -ZCODELETS_HEADER(trmm) -ZCODELETS_HEADER(trsm) +CODELETS_HEADER(zgemm) +CODELETS_HEADER(zhemm) +CODELETS_HEADER(zher2k) +CODELETS_HEADER(zherk) +CODELETS_HEADER(zsymm) +CODELETS_HEADER(zsyr2k) +CODELETS_HEADER(zsyrk) +CODELETS_HEADER(ztrmm) +CODELETS_HEADER(ztrsm) /* * LAPACK functions */ -ZCODELETS_HEADER(gelqt) -ZCODELETS_HEADER(geqrt) -ZCODELETS_HEADER(gessm) -ZCODELETS_HEADER(gessq) -ZCODELETS_HEADER(getrf) -ZCODELETS_HEADER(getrf_incpiv) -ZCODELETS_HEADER(getrf_nopiv) -ZCODELETS_HEADER(herfb) -ZCODELETS_HEADER(lauum) -ZCODELETS_HEADER(potrf) -ZCODELETS_HEADER(ssssm) -ZCODELETS_HEADER(syssq) -ZCODELETS_HEADER(trasm) -ZCODELETS_HEADER(trssq) -ZCODELETS_HEADER(trtri) -ZCODELETS_HEADER(tplqt) -ZCODELETS_HEADER(tpqrt) -ZCODELETS_HEADER(tpmlqt) -ZCODELETS_HEADER(tpmqrt) -ZCODELETS_HEADER(tsmlq_hetra1) -ZCODELETS_HEADER(tsmqr_hetra1) -ZCODELETS_HEADER(tstrf) -ZCODELETS_HEADER(unmlq) -ZCODELETS_HEADER(unmqr) +CODELETS_HEADER(zgelqt) +CODELETS_HEADER(zgeqrt) +CODELETS_HEADER(zgessm) +CODELETS_HEADER(zgessq) +CODELETS_HEADER(zgetrf) +CODELETS_HEADER(zgetrf_incpiv) +CODELETS_HEADER(zgetrf_nopiv) +CODELETS_HEADER(zherfb) +CODELETS_HEADER(zlauum) +CODELETS_HEADER(zpotrf) +CODELETS_HEADER(zssssm) +CODELETS_HEADER(zsyssq) +CODELETS_HEADER(ztrasm) +CODELETS_HEADER(ztrssq) +CODELETS_HEADER(ztrtri) +CODELETS_HEADER(ztplqt) +CODELETS_HEADER(ztpqrt) +CODELETS_HEADER(ztpmlqt) +CODELETS_HEADER(ztpmqrt) +CODELETS_HEADER(ztsmlq_hetra1) +CODELETS_HEADER(ztsmqr_hetra1) +CODELETS_HEADER(ztstrf) +CODELETS_HEADER(zunmlq) +CODELETS_HEADER(zunmqr) /* * Auxiliary functions */ -ZCODELETS_HEADER(geadd) -ZCODELETS_HEADER(he2ge) -ZCODELETS_HEADER(lascal) -ZCODELETS_HEADER(tradd) -ZCODELETS_HEADER(lacpy) -ZCODELETS_HEADER(lange) -ZCODELETS_HEADER(lange_max) -ZCODELETS_HEADER(lansy) -ZCODELETS_HEADER(lantr) -ZCODELETS_HEADER(laset) -ZCODELETS_HEADER(laset2) -ZCODELETS_HEADER(latro) -ZCODELETS_HEADER(plssq) -ZCODELETS_HEADER(plssq2) +CODELETS_HEADER(zgeadd) +CODELETS_HEADER(zhe2ge) +CODELETS_HEADER(zlascal) +CODELETS_HEADER(ztradd) +CODELETS_HEADER(zlacpy) +CODELETS_HEADER(zlange) +CODELETS_HEADER(zlange_max) +CODELETS_HEADER(zlansy) +CODELETS_HEADER(zlantr) +CODELETS_HEADER(zlaset) +CODELETS_HEADER(zlaset2) +CODELETS_HEADER(zlatro) +CODELETS_HEADER(zplssq) +CODELETS_HEADER(zplssq2) /* * MIXED PRECISION functions */ -ZCODELETS_HEADER(lag2c) +CODELETS_HEADER(zlag2c) /* * DZ functions */ -ZCODELETS_HEADER(asum) +CODELETS_HEADER(dzasum) /* * CPU only functions */ -ZCODELETS_HEADER(plrnt) -ZCODELETS_HEADER(build) +CODELETS_HEADER(zplrnt) +CODELETS_HEADER(zbuild) #if defined(PRECISION_z) || defined(PRECISION_c) -ZCODELETS_HEADER(hessq) -ZCODELETS_HEADER(lanhe) -ZCODELETS_HEADER(plghe) -ZCODELETS_HEADER(sytrf_nopiv) +CODELETS_HEADER(zhessq) +CODELETS_HEADER(zlanhe) +CODELETS_HEADER(zplghe) +CODELETS_HEADER(zsytrf_nopiv) #endif -ZCODELETS_HEADER(plgsy) +CODELETS_HEADER(zplgsy) #endif /* _runtime_codelet_z_h_ */ diff --git a/runtime/starpu/include/runtime_codelets.h b/runtime/starpu/include/runtime_codelets.h index a68724449..4fb8b9cf8 100644 --- a/runtime/starpu/include/runtime_codelets.h +++ b/runtime/starpu/include/runtime_codelets.h @@ -112,11 +112,6 @@ #define CODELETS_HEADER(name) CODELETS_ALL_HEADER(name) #endif -#define SCODELETS_HEADER(name) CODELETS_HEADER(s##name) -#define DCODELETS_HEADER(name) CODELETS_HEADER(d##name) -#define CCODELETS_HEADER(name) CODELETS_HEADER(c##name) -#define ZCODELETS_HEADER(name) CODELETS_HEADER(z##name) - CODELETS_HEADER(map); #endif /* _runtime_codelets_h_ */ diff --git a/testing/lin/clagsy.f b/testing/lin/clagsy.f index c5fea1b7d..0522d0600 100644 --- a/testing/lin/clagsy.f +++ b/testing/lin/clagsy.f @@ -55,7 +55,7 @@ * * CLAGSY generates a complex symmetric matrix A, by pre- and post- * multiplying a real diagonal matrix D with a random unitary matrix: -* A = U*D*U**T. The semi-bandwidth may then be reduced to k by +* A = U*D*U^T. The semi-bandwidth may then be reduced to k by * additional unitary transformations. * * Arguments diff --git a/testing/lin/clarhs.f b/testing/lin/clarhs.f index 46f4d68c2..22165f3a3 100644 --- a/testing/lin/clarhs.f +++ b/testing/lin/clarhs.f @@ -58,7 +58,7 @@ * CLARHS chooses a set of NRHS random solution vectors and sets * up the right hand sides for the linear system * op( A ) * X = B, -* where op( A ) may be A, A**T (transpose of A), or A**H (conjugate +* where op( A ) may be A, A^T (transpose of A), or A^H (conjugate * transpose of A). * * Arguments @@ -102,8 +102,8 @@ * Used only if A is nonsymmetric; specifies the operation * applied to the matrix A. * = 'N': B := A * X -* = 'T': B := A**T * X -* = 'C': B := A**H * X +* = 'T': B := A^T * X +* = 'C': B := A^H * X * * M (input) INTEGER * The number of rows of the matrix A. M >= 0. diff --git a/testing/lin/clatrs.f b/testing/lin/clatrs.f index 87da087b8..38cc5a2a2 100644 --- a/testing/lin/clatrs.f +++ b/testing/lin/clatrs.f @@ -57,10 +57,10 @@ * * CLATRS solves one of the triangular systems * -* A * x = s*b, A**T * x = s*b, or A**H * x = s*b, +* A * x = s*b, A^T * x = s*b, or A^H * x = s*b, * * with scaling to prevent overflow. Here A is an upper or lower -* triangular matrix, A**T denotes the transpose of A, A**H denotes the +* triangular matrix, A^T denotes the transpose of A, A^H denotes the * conjugate transpose of A, x and b are n-element vectors, and s is a * scaling factor, usually less than or equal to 1, chosen so that the * components of x will be less than the overflow threshold. If the @@ -79,8 +79,8 @@ * TRANS (input) CHARACTER*1 * Specifies the operation applied to A. * = 'N': Solve A * x = s*b (No transpose) -* = 'T': Solve A**T * x = s*b (Transpose) -* = 'C': Solve A**H * x = s*b (Conjugate transpose) +* = 'T': Solve A^T * x = s*b (Transpose) +* = 'C': Solve A^H * x = s*b (Conjugate transpose) * * DIAG (input) CHARACTER*1 * Specifies whether or not the matrix A is unit triangular. @@ -115,7 +115,7 @@ * * SCALE (output) REAL * The scaling factor s for the triangular system -* A * x = s*b, A**T * x = s*b, or A**H * x = s*b. +* A * x = s*b, A^T * x = s*b, or A^H * x = s*b. * If SCALE = 0, the matrix A is singular or badly scaled, and * the vector x is an exact or approximate solution to A*x = 0. * @@ -181,8 +181,8 @@ * prevent overflow, but if the bound overflows, x is set to 0, x(j) to * 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. * -* Similarly, a row-wise scheme is used to solve A**T *x = b or -* A**H *x = b. The basic algorithm for A upper triangular is +* Similarly, a row-wise scheme is used to solve A^T *x = b or +* A^H *x = b. The basic algorithm for A upper triangular is * * for j = 1, ..., n * x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) @@ -412,7 +412,7 @@ * ELSE * -* Compute the growth in A**T * x = b or A**H * x = b. +* Compute the growth in A^T * x = b or A^H * x = b. * IF( UPPER ) THEN JFIRST = 1 @@ -632,7 +632,7 @@ * ELSE IF( LSAME( TRANS, 'T' ) ) THEN * -* Solve A**T * x = b +* Solve A^T * x = b * DO 150 J = JFIRST, JLAST, JINC * @@ -744,7 +744,7 @@ ELSE * * A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and -* scale = 0 and compute a solution to A**T *x = 0. +* scale = 0 and compute a solution to A^T *x = 0. * DO 140 I = 1, N X( I ) = ZERO @@ -766,7 +766,7 @@ * ELSE * -* Solve A**H * x = b +* Solve A^H * x = b * DO 190 J = JFIRST, JLAST, JINC * @@ -880,7 +880,7 @@ ELSE * * A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and -* scale = 0 and compute a solution to A**H *x = 0. +* scale = 0 and compute a solution to A^H *x = 0. * DO 180 I = 1, N X( I ) = ZERO diff --git a/testing/lin/cpocon.f b/testing/lin/cpocon.f index a5469bb8e..3fd5f26c4 100644 --- a/testing/lin/cpocon.f +++ b/testing/lin/cpocon.f @@ -59,7 +59,7 @@ * * CPOCON estimates the reciprocal of the condition number (in the * 1-norm) of a complex Hermitian positive definite matrix using the -* Cholesky factorization A = U**H*U or A = L*L**H computed by CPOTRF. +* Cholesky factorization A = U^H*U or A = L*L^H computed by CPOTRF. * * An estimate is obtained for norm(inv(A)), and the reciprocal of the * condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). @@ -76,7 +76,7 @@ * * A (input) COMPLEX array, dimension (LDA,N) * The triangular factor U or L from the Cholesky factorization -* A = U**H*U or A = L*L**H, as computed by CPOTRF. +* A = U^H*U or A = L*L^H, as computed by CPOTRF. * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= max(1,N). diff --git a/testing/lin/cporfs.f b/testing/lin/cporfs.f index 14708b5d3..616a71ce7 100644 --- a/testing/lin/cporfs.f +++ b/testing/lin/cporfs.f @@ -92,7 +92,7 @@ * * AF (input) COMPLEX array, dimension (LDAF,N) * The triangular factor U or L from the Cholesky factorization -* A = U**H*U or A = L*L**H, as computed by CPOTRF. +* A = U^H*U or A = L*L^H, as computed by CPOTRF. * * LDAF (input) INTEGER * The leading dimension of the array AF. LDAF >= max(1,N). diff --git a/testing/lin/cposvx.f b/testing/lin/cposvx.f index 950a1b514..d502bd56f 100644 --- a/testing/lin/cposvx.f +++ b/testing/lin/cposvx.f @@ -59,7 +59,7 @@ * Purpose * ======= * -* CPOSVX uses the Cholesky factorization A = U**H*U or A = L*L**H to +* CPOSVX uses the Cholesky factorization A = U^H*U or A = L*L^H to * compute the solution to a complex system of linear equations * A * X = B, * where A is an N-by-N Hermitian positive definite matrix and X and B @@ -82,8 +82,8 @@ * * 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to * factor the matrix A (after equilibration if FACT = 'E') as -* A = U**H* U, if UPLO = 'U', or -* A = L * L**H, if UPLO = 'L', +* A = U^H* U, if UPLO = 'U', or +* A = L * L^H, if UPLO = 'L', * where U is an upper triangular matrix and L is a lower triangular * matrix. * @@ -154,18 +154,18 @@ * AF (input or output) COMPLEX array, dimension (LDAF,N) * If FACT = 'F', then AF is an input argument and on entry * contains the triangular factor U or L from the Cholesky -* factorization A = U**H*U or A = L*L**H, in the same storage +* factorization A = U^H*U or A = L*L^H, in the same storage * format as A. If EQUED .ne. 'N', then AF is the factored form * of the equilibrated matrix diag(S)*A*diag(S). * * If FACT = 'N', then AF is an output argument and on exit * returns the triangular factor U or L from the Cholesky -* factorization A = U**H*U or A = L*L**H of the original +* factorization A = U^H*U or A = L*L^H of the original * matrix A. * * If FACT = 'E', then AF is an output argument and on exit * returns the triangular factor U or L from the Cholesky -* factorization A = U**H*U or A = L*L**H of the equilibrated +* factorization A = U^H*U or A = L*L^H of the equilibrated * matrix A (see the description of A for the form of the * equilibrated matrix). * diff --git a/testing/lin/cpotri.f b/testing/lin/cpotri.f index 839559633..dea6b9d01 100644 --- a/testing/lin/cpotri.f +++ b/testing/lin/cpotri.f @@ -53,7 +53,7 @@ * ======= * * CPOTRI computes the inverse of a complex Hermitian positive definite -* matrix A using the Cholesky factorization A = U**H*U or A = L*L**H +* matrix A using the Cholesky factorization A = U^H*U or A = L*L^H * computed by CPOTRF. * * Arguments @@ -68,7 +68,7 @@ * * A (input/output) COMPLEX array, dimension (LDA,N) * On entry, the triangular factor U or L from the Cholesky -* factorization A = U**H*U or A = L*L**H, as computed by +* factorization A = U^H*U or A = L*L^H, as computed by * CPOTRF. * On exit, the upper or lower triangle of the (Hermitian) * inverse of A, overwriting the input factor U or L. diff --git a/testing/lin/dpocon.f b/testing/lin/dpocon.f index 43c957d38..1a4c1b67a 100644 --- a/testing/lin/dpocon.f +++ b/testing/lin/dpocon.f @@ -59,7 +59,7 @@ * * DPOCON estimates the reciprocal of the condition number (in the * 1-norm) of a real symmetric positive definite matrix using the -* Cholesky factorization A = U**T*U or A = L*L**T computed by DPOTRF. +* Cholesky factorization A = U^T*U or A = L*L^T computed by DPOTRF. * * An estimate is obtained for norm(inv(A)), and the reciprocal of the * condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). @@ -76,7 +76,7 @@ * * A (input) DOUBLE PRECISION array, dimension (LDA,N) * The triangular factor U or L from the Cholesky factorization -* A = U**T*U or A = L*L**T, as computed by DPOTRF. +* A = U^T*U or A = L*L^T, as computed by DPOTRF. * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= max(1,N). diff --git a/testing/lin/dporfs.f b/testing/lin/dporfs.f index 3a1496638..c93d5793a 100644 --- a/testing/lin/dporfs.f +++ b/testing/lin/dporfs.f @@ -92,7 +92,7 @@ * * AF (input) DOUBLE PRECISION array, dimension (LDAF,N) * The triangular factor U or L from the Cholesky factorization -* A = U**T*U or A = L*L**T, as computed by DPOTRF. +* A = U^T*U or A = L*L^T, as computed by DPOTRF. * * LDAF (input) INTEGER * The leading dimension of the array AF. LDAF >= max(1,N). diff --git a/testing/lin/dposvx.f b/testing/lin/dposvx.f index aeca6aee0..79d723a27 100644 --- a/testing/lin/dposvx.f +++ b/testing/lin/dposvx.f @@ -61,7 +61,7 @@ * Purpose * ======= * -* DPOSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to +* DPOSVX uses the Cholesky factorization A = U^T*U or A = L*L^T to * compute the solution to a real system of linear equations * A * X = B, * where A is an N-by-N symmetric positive definite matrix and X and B @@ -84,8 +84,8 @@ * * 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to * factor the matrix A (after equilibration if FACT = 'E') as -* A = U**T* U, if UPLO = 'U', or -* A = L * L**T, if UPLO = 'L', +* A = U^T* U, if UPLO = 'U', or +* A = L * L^T, if UPLO = 'L', * where U is an upper triangular matrix and L is a lower triangular * matrix. * @@ -156,18 +156,18 @@ * AF (input or output) DOUBLE PRECISION array, dimension (LDAF,N) * If FACT = 'F', then AF is an input argument and on entry * contains the triangular factor U or L from the Cholesky -* factorization A = U**T*U or A = L*L**T, in the same storage +* factorization A = U^T*U or A = L*L^T, in the same storage * format as A. If EQUED .ne. 'N', then AF is the factored form * of the equilibrated matrix diag(S)*A*diag(S). * * If FACT = 'N', then AF is an output argument and on exit * returns the triangular factor U or L from the Cholesky -* factorization A = U**T*U or A = L*L**T of the original +* factorization A = U^T*U or A = L*L^T of the original * matrix A. * * If FACT = 'E', then AF is an output argument and on exit * returns the triangular factor U or L from the Cholesky -* factorization A = U**T*U or A = L*L**T of the equilibrated +* factorization A = U^T*U or A = L*L^T of the equilibrated * matrix A (see the description of A for the form of the * equilibrated matrix). * diff --git a/testing/lin/dpotri.f b/testing/lin/dpotri.f index f8585b348..2a5f4c2dd 100644 --- a/testing/lin/dpotri.f +++ b/testing/lin/dpotri.f @@ -53,7 +53,7 @@ * ======= * * DPOTRI computes the inverse of a real symmetric positive definite -* matrix A using the Cholesky factorization A = U**T*U or A = L*L**T +* matrix A using the Cholesky factorization A = U^T*U or A = L*L^T * computed by DPOTRF. * * Arguments @@ -68,7 +68,7 @@ * * A (input/output) DOUBLE PRECISION array, dimension (LDA,N) * On entry, the triangular factor U or L from the Cholesky -* factorization A = U**T*U or A = L*L**T, as computed by +* factorization A = U^T*U or A = L*L^T, as computed by * DPOTRF. * On exit, the upper or lower triangle of the (symmetric) * inverse of A, overwriting the input factor U or L. diff --git a/testing/lin/spocon.f b/testing/lin/spocon.f index 02392607f..380896480 100644 --- a/testing/lin/spocon.f +++ b/testing/lin/spocon.f @@ -59,7 +59,7 @@ * * SPOCON estimates the reciprocal of the condition number (in the * 1-norm) of a real symmetric positive definite matrix using the -* Cholesky factorization A = U**T*U or A = L*L**T computed by SPOTRF. +* Cholesky factorization A = U^T*U or A = L*L^T computed by SPOTRF. * * An estimate is obtained for norm(inv(A)), and the reciprocal of the * condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). @@ -76,7 +76,7 @@ * * A (input) REAL array, dimension (LDA,N) * The triangular factor U or L from the Cholesky factorization -* A = U**T*U or A = L*L**T, as computed by SPOTRF. +* A = U^T*U or A = L*L^T, as computed by SPOTRF. * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= max(1,N). diff --git a/testing/lin/sporfs.f b/testing/lin/sporfs.f index 8dcdea760..e633b0978 100644 --- a/testing/lin/sporfs.f +++ b/testing/lin/sporfs.f @@ -92,7 +92,7 @@ * * AF (input) REAL array, dimension (LDAF,N) * The triangular factor U or L from the Cholesky factorization -* A = U**T*U or A = L*L**T, as computed by SPOTRF. +* A = U^T*U or A = L*L^T, as computed by SPOTRF. * * LDAF (input) INTEGER * The leading dimension of the array AF. LDAF >= max(1,N). diff --git a/testing/lin/sposvx.f b/testing/lin/sposvx.f index b8a94475a..8a8f53564 100644 --- a/testing/lin/sposvx.f +++ b/testing/lin/sposvx.f @@ -61,7 +61,7 @@ * Purpose * ======= * -* SPOSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to +* SPOSVX uses the Cholesky factorization A = U^T*U or A = L*L^T to * compute the solution to a real system of linear equations * A * X = B, * where A is an N-by-N symmetric positive definite matrix and X and B @@ -84,8 +84,8 @@ * * 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to * factor the matrix A (after equilibration if FACT = 'E') as -* A = U**T* U, if UPLO = 'U', or -* A = L * L**T, if UPLO = 'L', +* A = U^T* U, if UPLO = 'U', or +* A = L * L^T, if UPLO = 'L', * where U is an upper triangular matrix and L is a lower triangular * matrix. * @@ -156,18 +156,18 @@ * AF (input or output) REAL array, dimension (LDAF,N) * If FACT = 'F', then AF is an input argument and on entry * contains the triangular factor U or L from the Cholesky -* factorization A = U**T*U or A = L*L**T, in the same storage +* factorization A = U^T*U or A = L*L^T, in the same storage * format as A. If EQUED .ne. 'N', then AF is the factored form * of the equilibrated matrix diag(S)*A*diag(S). * * If FACT = 'N', then AF is an output argument and on exit * returns the triangular factor U or L from the Cholesky -* factorization A = U**T*U or A = L*L**T of the original +* factorization A = U^T*U or A = L*L^T of the original * matrix A. * * If FACT = 'E', then AF is an output argument and on exit * returns the triangular factor U or L from the Cholesky -* factorization A = U**T*U or A = L*L**T of the equilibrated +* factorization A = U^T*U or A = L*L^T of the equilibrated * matrix A (see the description of A for the form of the * equilibrated matrix). * diff --git a/testing/lin/spotri.f b/testing/lin/spotri.f index 13885e2fd..d52f05699 100644 --- a/testing/lin/spotri.f +++ b/testing/lin/spotri.f @@ -53,7 +53,7 @@ * ======= * * SPOTRI computes the inverse of a real symmetric positive definite -* matrix A using the Cholesky factorization A = U**T*U or A = L*L**T +* matrix A using the Cholesky factorization A = U^T*U or A = L*L^T * computed by SPOTRF. * * Arguments @@ -68,7 +68,7 @@ * * A (input/output) REAL array, dimension (LDA,N) * On entry, the triangular factor U or L from the Cholesky -* factorization A = U**T*U or A = L*L**T, as computed by +* factorization A = U^T*U or A = L*L^T, as computed by * SPOTRF. * On exit, the upper or lower triangle of the (symmetric) * inverse of A, overwriting the input factor U or L. diff --git a/testing/lin/zlagsy.f b/testing/lin/zlagsy.f index d2a05500d..a9366c90c 100644 --- a/testing/lin/zlagsy.f +++ b/testing/lin/zlagsy.f @@ -55,7 +55,7 @@ * * ZLAGSY generates a complex symmetric matrix A, by pre- and post- * multiplying a real diagonal matrix D with a random unitary matrix: -* A = U*D*U**T. The semi-bandwidth may then be reduced to k by +* A = U*D*U^T. The semi-bandwidth may then be reduced to k by * additional unitary transformations. * * Arguments diff --git a/testing/lin/zlarhs.f b/testing/lin/zlarhs.f index 1da073177..333feeb71 100644 --- a/testing/lin/zlarhs.f +++ b/testing/lin/zlarhs.f @@ -58,7 +58,7 @@ * ZLARHS chooses a set of NRHS random solution vectors and sets * up the right hand sides for the linear system * op( A ) * X = B, -* where op( A ) may be A, A**T (transpose of A), or A**H (conjugate +* where op( A ) may be A, A^T (transpose of A), or A^H (conjugate * transpose of A). * * Arguments @@ -102,8 +102,8 @@ * Used only if A is nonsymmetric; specifies the operation * applied to the matrix A. * = 'N': B := A * X -* = 'T': B := A**T * X -* = 'C': B := A**H * X +* = 'T': B := A^T * X +* = 'C': B := A^H * X * * M (input) INTEGER * The number of rows of the matrix A. M >= 0. diff --git a/testing/lin/zlatrs.f b/testing/lin/zlatrs.f index c4271a045..ba7f497ef 100644 --- a/testing/lin/zlatrs.f +++ b/testing/lin/zlatrs.f @@ -57,10 +57,10 @@ * * ZLATRS solves one of the triangular systems * -* A * x = s*b, A**T * x = s*b, or A**H * x = s*b, +* A * x = s*b, A^T * x = s*b, or A^H * x = s*b, * * with scaling to prevent overflow. Here A is an upper or lower -* triangular matrix, A**T denotes the transpose of A, A**H denotes the +* triangular matrix, A^T denotes the transpose of A, A^H denotes the * conjugate transpose of A, x and b are n-element vectors, and s is a * scaling factor, usually less than or equal to 1, chosen so that the * components of x will be less than the overflow threshold. If the @@ -79,8 +79,8 @@ * TRANS (input) CHARACTER*1 * Specifies the operation applied to A. * = 'N': Solve A * x = s*b (No transpose) -* = 'T': Solve A**T * x = s*b (Transpose) -* = 'C': Solve A**H * x = s*b (Conjugate transpose) +* = 'T': Solve A^T * x = s*b (Transpose) +* = 'C': Solve A^H * x = s*b (Conjugate transpose) * * DIAG (input) CHARACTER*1 * Specifies whether or not the matrix A is unit triangular. @@ -115,7 +115,7 @@ * * SCALE (output) DOUBLE PRECISION * The scaling factor s for the triangular system -* A * x = s*b, A**T * x = s*b, or A**H * x = s*b. +* A * x = s*b, A^T * x = s*b, or A^H * x = s*b. * If SCALE = 0, the matrix A is singular or badly scaled, and * the vector x is an exact or approximate solution to A*x = 0. * @@ -181,8 +181,8 @@ * prevent overflow, but if the bound overflows, x is set to 0, x(j) to * 1, and scale to 0, and a non-trivial solution to A*x = 0 is found. * -* Similarly, a row-wise scheme is used to solve A**T *x = b or -* A**H *x = b. The basic algorithm for A upper triangular is +* Similarly, a row-wise scheme is used to solve A^T *x = b or +* A^H *x = b. The basic algorithm for A upper triangular is * * for j = 1, ..., n * x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j) @@ -412,7 +412,7 @@ * ELSE * -* Compute the growth in A**T * x = b or A**H * x = b. +* Compute the growth in A^T * x = b or A^H * x = b. * IF( UPPER ) THEN JFIRST = 1 @@ -632,7 +632,7 @@ * ELSE IF( LSAME( TRANS, 'T' ) ) THEN * -* Solve A**T * x = b +* Solve A^T * x = b * DO 170 J = JFIRST, JLAST, JINC * @@ -744,7 +744,7 @@ ELSE * * A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and -* scale = 0 and compute a solution to A**T *x = 0. +* scale = 0 and compute a solution to A^T *x = 0. * DO 150 I = 1, N X( I ) = ZERO @@ -766,7 +766,7 @@ * ELSE * -* Solve A**H * x = b +* Solve A^H * x = b * DO 220 J = JFIRST, JLAST, JINC * @@ -880,7 +880,7 @@ ELSE * * A(j,j) = 0: Set x(1:n) = 0, x(j) = 1, and -* scale = 0 and compute a solution to A**H *x = 0. +* scale = 0 and compute a solution to A^H *x = 0. * DO 200 I = 1, N X( I ) = ZERO diff --git a/testing/lin/zpocon.f b/testing/lin/zpocon.f index 4ead889ed..b3e91f057 100644 --- a/testing/lin/zpocon.f +++ b/testing/lin/zpocon.f @@ -59,7 +59,7 @@ * * ZPOCON estimates the reciprocal of the condition number (in the * 1-norm) of a complex Hermitian positive definite matrix using the -* Cholesky factorization A = U**H*U or A = L*L**H computed by ZPOTRF. +* Cholesky factorization A = U^H*U or A = L*L^H computed by ZPOTRF. * * An estimate is obtained for norm(inv(A)), and the reciprocal of the * condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))). @@ -76,7 +76,7 @@ * * A (input) COMPLEX*16 array, dimension (LDA,N) * The triangular factor U or L from the Cholesky factorization -* A = U**H*U or A = L*L**H, as computed by ZPOTRF. +* A = U^H*U or A = L*L^H, as computed by ZPOTRF. * * LDA (input) INTEGER * The leading dimension of the array A. LDA >= max(1,N). diff --git a/testing/lin/zporfs.f b/testing/lin/zporfs.f index 4503ef94a..696739a6c 100644 --- a/testing/lin/zporfs.f +++ b/testing/lin/zporfs.f @@ -92,7 +92,7 @@ * * AF (input) COMPLEX*16 array, dimension (LDAF,N) * The triangular factor U or L from the Cholesky factorization -* A = U**H*U or A = L*L**H, as computed by ZPOTRF. +* A = U^H*U or A = L*L^H, as computed by ZPOTRF. * * LDAF (input) INTEGER * The leading dimension of the array AF. LDAF >= max(1,N). diff --git a/testing/lin/zposvx.f b/testing/lin/zposvx.f index 5fe686b41..4d8d322fe 100644 --- a/testing/lin/zposvx.f +++ b/testing/lin/zposvx.f @@ -59,7 +59,7 @@ * Purpose * ======= * -* ZPOSVX uses the Cholesky factorization A = U**H*U or A = L*L**H to +* ZPOSVX uses the Cholesky factorization A = U^H*U or A = L*L^H to * compute the solution to a complex system of linear equations * A * X = B, * where A is an N-by-N Hermitian positive definite matrix and X and B @@ -82,8 +82,8 @@ * * 2. If FACT = 'N' or 'E', the Cholesky decomposition is used to * factor the matrix A (after equilibration if FACT = 'E') as -* A = U**H* U, if UPLO = 'U', or -* A = L * L**H, if UPLO = 'L', +* A = U^H* U, if UPLO = 'U', or +* A = L * L^H, if UPLO = 'L', * where U is an upper triangular matrix and L is a lower triangular * matrix. * @@ -154,18 +154,18 @@ * AF (input or output) COMPLEX*16 array, dimension (LDAF,N) * If FACT = 'F', then AF is an input argument and on entry * contains the triangular factor U or L from the Cholesky -* factorization A = U**H*U or A = L*L**H, in the same storage +* factorization A = U^H*U or A = L*L^H, in the same storage * format as A. If EQUED .ne. 'N', then AF is the factored form * of the equilibrated matrix diag(S)*A*diag(S). * * If FACT = 'N', then AF is an output argument and on exit * returns the triangular factor U or L from the Cholesky -* factorization A = U**H*U or A = L*L**H of the original +* factorization A = U^H*U or A = L*L^H of the original * matrix A. * * If FACT = 'E', then AF is an output argument and on exit * returns the triangular factor U or L from the Cholesky -* factorization A = U**H*U or A = L*L**H of the equilibrated +* factorization A = U^H*U or A = L*L^H of the equilibrated * matrix A (see the description of A for the form of the * equilibrated matrix). * diff --git a/testing/lin/zpotri.f b/testing/lin/zpotri.f index bf86de0ee..ed9fd8dd6 100644 --- a/testing/lin/zpotri.f +++ b/testing/lin/zpotri.f @@ -53,7 +53,7 @@ * ======= * * ZPOTRI computes the inverse of a complex Hermitian positive definite -* matrix A using the Cholesky factorization A = U**H*U or A = L*L**H +* matrix A using the Cholesky factorization A = U^H*U or A = L*L^H * computed by ZPOTRF. * * Arguments @@ -68,7 +68,7 @@ * * A (input/output) COMPLEX*16 array, dimension (LDA,N) * On entry, the triangular factor U or L from the Cholesky -* factorization A = U**H*U or A = L*L**H, as computed by +* factorization A = U^H*U or A = L*L^H, as computed by * ZPOTRF. * On exit, the upper or lower triangle of the (Hermitian) * inverse of A, overwriting the input factor U or L. diff --git a/timing/timing_zauxiliary.c b/timing/timing_zauxiliary.c index f804164b6..007d57c85 100644 --- a/timing/timing_zauxiliary.c +++ b/timing/timing_zauxiliary.c @@ -314,8 +314,8 @@ double z_check_solution(int M, int N, int NRHS, CHAMELEON_Complex64_t *A, int LD * * Check the accuracy of the computed inverse * */ -int zcheck_inverse(int N, CHAMELEON_Complex64_t *A1, CHAMELEON_Complex64_t *A2, int LDA, - cham_uplo_t uplo, double *rnorm, double *anorm, double *ainvnorm ) +int z_check_inverse( int N, CHAMELEON_Complex64_t *A1, CHAMELEON_Complex64_t *A2, int LDA, + cham_uplo_t uplo, double *rnorm, double *anorm, double *ainvnorm ) { int info_inverse; int i, j; diff --git a/timing/timing_zauxiliary.h b/timing/timing_zauxiliary.h index 2c9957f52..6fbb0ff15 100644 --- a/timing/timing_zauxiliary.h +++ b/timing/timing_zauxiliary.h @@ -39,8 +39,8 @@ double z_check_solution(int M, int N, int NRHS, CHAMELEON_Complex64_t *B1, CHAMELEON_Complex64_t *B2, int LDB, double *anorm, double *bnorm, double *xnorm); -int zcheck_inverse(int N, CHAMELEON_Complex64_t *A1, CHAMELEON_Complex64_t *A2, - int LDA, cham_uplo_t uplo, double *rnorm, double *anorm, double *ainvnorm); +int z_check_inverse( int N, CHAMELEON_Complex64_t *A1, CHAMELEON_Complex64_t *A2, + int LDA, cham_uplo_t uplo, double *rnorm, double *anorm, double *ainvnorm ); #endif /* _timing_zauxiliary_h_ */ -- GitLab