From e46b6449fb20f81c9446dd4c019a896946e986fc Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <>
Date: Thu, 11 Apr 2019 23:43:00 +0200
Subject: [PATCH] Rules precision

 CMakeLists.txt                                |   4 +-
 cmake_modules/                   |  91 +++++++++
 cmake_modules/morse_cmake                     |   2 +-
 compute/zgels.c                               |   4 +-
 compute/zgels_param.c                         |   4 +-
 compute/zgesvd.c                              |  34 ++--
 compute/zgetrs_incpiv.c                       |   4 +-
 compute/zgetrs_nopiv.c                        |   4 +-
 compute/zhetrd.c                              |   4 +-
 compute/zposv.c                               |   4 +-
 compute/zpotrf.c                              |   4 +-
 compute/zpotri.c                              |  12 +-
 compute/zpotrimm.c                            |  12 +-
 compute/zpotrs.c                              |   6 +-
 compute/zsysv.c                               |   4 +-
 compute/zsytrf.c                              |   4 +-
 compute/zsytrs.c                              |   6 +-
 compute/zunmlq.c                              |  18 +-
 compute/zunmlq_param.c                        |  18 +-
 compute/zunmqr.c                              |  18 +-
 compute/zunmqr_param.c                        |  18 +-
 control/compute_z.h                           |   4 -
 coreblas/compute/core_zherfb.c                |   4 +-
 coreblas/compute/core_zpamm.c                 |   2 +-
 coreblas/compute/core_zparfb.c                |   6 +-
 coreblas/compute/core_zpemv.c                 |   6 +-
 coreblas/compute/core_ztpmlqt.c               |  14 +-
 coreblas/compute/core_ztpmqrt.c               |  14 +-
 coreblas/compute/core_ztsmlq.c                |  10 +-
 coreblas/compute/core_ztsmlq_hetra1.c         |   6 +-
 coreblas/compute/core_ztsmqr.c                |  10 +-
 coreblas/compute/core_ztsmqr_hetra1.c         |   6 +-
 coreblas/compute/core_zttmlq.c                |  10 +-
 coreblas/compute/core_zttmqr.c                |  10 +-
 coreblas/compute/core_zunmlq.c                |  10 +-
 coreblas/compute/core_zunmqr.c                |  10 +-
 .../eztrace_module/coreblas_eztrace_module    | 172 ------------------
 coreblas/include/coreblas/coreblas_z.h        |  43 -----
 cudablas/compute/cuda_zparfb.c                |   6 +-
 cudablas/compute/cuda_ztpmlqt.c               |  14 +-
 cudablas/compute/cuda_ztpmqrt.c               |  14 +-
 runtime/CMakeLists.txt                        |   2 +-
 .../{codelet_zasum.c => codelet_dzasum.c}     |   4 +-
 runtime/openmp/codelets/codelet_zunmlq.c      |  10 +-
 runtime/openmp/codelets/codelet_zunmqr.c      |  10 +-
 .../{codelet_zasum.c => codelet_dzasum.c}     |   6 +-
 .../{codelet_zasum.c => codelet_dzasum.c}     |   4 +-
 runtime/quark/codelets/codelet_zunmlq.c       |  10 +-
 runtime/quark/codelets/codelet_zunmqr.c       |  10 +-
 runtime/quark/include/core_blas_dag.h         |   2 +
 .../{codelet_zasum.c => codelet_dzasum.c}     |  12 +-
 runtime/starpu/codelets/codelet_zcallback.c   |   2 +-
 runtime/starpu/codelets/codelet_zunmlq.c      |  10 +-
 runtime/starpu/codelets/codelet_zunmqr.c      |  10 +-
 runtime/starpu/include/runtime_codelet_z.h    | 114 ++++++------
 runtime/starpu/include/runtime_codelets.h     |   5 -
 testing/lin/clagsy.f                          |   2 +-
 testing/lin/clarhs.f                          |   6 +-
 testing/lin/clatrs.f                          |  24 +--
 testing/lin/cpocon.f                          |   4 +-
 testing/lin/cporfs.f                          |   2 +-
 testing/lin/cposvx.f                          |  12 +-
 testing/lin/cpotri.f                          |   4 +-
 testing/lin/dpocon.f                          |   4 +-
 testing/lin/dporfs.f                          |   2 +-
 testing/lin/dposvx.f                          |  12 +-
 testing/lin/dpotri.f                          |   4 +-
 testing/lin/spocon.f                          |   4 +-
 testing/lin/sporfs.f                          |   2 +-
 testing/lin/sposvx.f                          |  12 +-
 testing/lin/spotri.f                          |   4 +-
 testing/lin/zlagsy.f                          |   2 +-
 testing/lin/zlarhs.f                          |   6 +-
 testing/lin/zlatrs.f                          |  24 +--
 testing/lin/zpocon.f                          |   4 +-
 testing/lin/zporfs.f                          |   2 +-
 testing/lin/zposvx.f                          |  12 +-
 testing/lin/zpotri.f                          |   4 +-
 timing/timing_zauxiliary.c                    |   4 +-
 timing/timing_zauxiliary.h                    |   4 +-
 80 files changed, 443 insertions(+), 574 deletions(-)
 create mode 100644 cmake_modules/
 rename runtime/openmp/codelets/{codelet_zasum.c => codelet_dzasum.c} (94%)
 rename runtime/parsec/codelets/{codelet_zasum.c => codelet_dzasum.c} (95%)
 rename runtime/quark/codelets/{codelet_zasum.c => codelet_dzasum.c} (96%)
 rename runtime/starpu/codelets/{codelet_zasum.c => codelet_dzasum.c} (88%)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4d3309fee..4e2c41e4a 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -97,7 +97,7 @@ option(BUILD_SHARED_LIBS "Build shared libraries" OFF)
 # Define precision supported by CHAMELEON
 # -----------------------------------------
 set( RP_CHAMELEON_PRECISIONS  "s;d;c;z" )
@@ -586,7 +586,7 @@ endif(NOT CHAMELEON_SIMULATION)
 # -------------------------------
-    set(CHAMELEON_STARPU_VERSION "1.1" CACHE STRING "oldest STARPU version desired")
+    set(CHAMELEON_STARPU_VERSION "1.3" CACHE STRING "oldest STARPU version desired")
     # create list of components in order to make a single call to find_package(starpu...)
diff --git a/cmake_modules/ b/cmake_modules/
new file mode 100644
index 000000000..4608e59b8
--- /dev/null
+++ b/cmake_modules/
@@ -0,0 +1,91 @@
+_extra_blas = [
+    # ----- Additional BLAS
+    ('',                     'dsgesv',               'dsgesv',               'zcgesv',               'zcgesv'              ),
+    ('',                     'sgesplit',             'dgesplit',             'cgesplit',             'zgesplit'            ),
+    ('',                     'slascal',              'dlascal',              'clascal',              'zlascal'             ),
+    ('',                     'slapack',              'dlapack',              'clapack',              'zlapack'             ),
+    ('',                     'stile',                'dtile',                'ctile',                'ztile'               ),
+    ('',                     'sgecon',               'dgecon',               'cgecon',               'zgecon'              ),
+    ('',                     'spocon',               'dpocon',               'cpocon',               'zpocon'              ),
+    ('',                     'strasm',               'dtrasm',               'ctrasm',               'ztrasm'              ),
+    ('',                     'sgecfi',               'dgecfi',               'cgecfi',               'zgecfi'              ),
+    ('',                     'splssq',               'dplssq',               'cplssq',               'zplssq'              ),
+    ('',                     'sy2sb',                'sy2sb' ,               'he2hb',                'he2hb'               ),
+    ('',                     'she2ge',               'dhe2ge',               'che2ge',               'zhe2ge'              ),
+    ('',                     'slatro',               'dlatro',               'clatro',               'zlatro'              ), #=> Replace by getmo/gecmo as in essl
+    ('',                     'sbuild',               'dbuild',               'cbuild',               'zbuild'              ), #=> Replace by map function
+_extra_BLAS = [ [ x.upper() for x in row ] for row in _extra_blas ]
+subs = {
+    # ------------------------------------------------------------
+    # replacements applied to mixed precision files.
+    'mixed' : [
+        # double/single,          double/single-complex
+        #'12345678901234567890', '12345678901234567890')
+        (r'\bdouble',           r'\bCHAMELEON_Complex64_t'),
+        (r'\bChamRealDouble',   r'\bChamComplexDouble' ),
+        (r'\bfloat',            r'\bCHAMELEON_Complex32_t'),
+        (r'\bChamRealFloat',    r'\bChamComplexFloat'  ),
+        (r'\breal\b',           r'\bcomplex\b'         ),
+        ('dsgels',               'zcgels'              ),
+        ('dsorgesv',             'zcungesv'            ),
+    ],
+    # ------------------------------------------------------------
+    # replacements applied to mixed precision files.
+    'normal': [
+        # pattern                single                  double                  single-complex          double-complex
+        #'12345678901234567890', '12345678901234567890', '12345678901234567890', '12345678901234567890', '12345678901234567890')
+        ('int',                  'float',                'double',               'CHAMELEON_Complex32_t', r'\bCHAMELEON_Complex64_t'),
+        ('ChamPattern',          'ChamRealFloat',        'ChamRealDouble',       'ChamComplexFloat',    r'\bChamComplexDouble' ),
+        ('ChamPattern',          'ChamRealFloat',        'ChamRealDouble',       'ChamRealFloat',       r'\bChamRealDouble'    ),
+        # ----- Additional BLAS
+        ('',                     'sTile',                'dTile',                'cTile',                'zTile'               ),
+        ('',                     'sLapack',              'dLapack',              'cLapack',              'zLapack'             ),
+        ('',                     'ORMQR',                'ORMQR',                'UNMQR',                'UNMQR'               ),
+        ('',                     'ORMLQ',                'ORMLQ',                'UNMLQ',                'UNMLQ'               ),
+        ('',                     'SYEV',                 'SYEV',                 'HEEV',                 'HEEV'                ),
+        ('',                     'SYG',                  'SYG',                  'HEG',                  'HEG'                 ),
+    ]
+    + _extra_blas
+    + _extra_BLAS
+    + [
+        # ----- For norms: compute result in Real or Double
+        ('',                     'slange',               'dlange',               'slange',               'dlange'              ),
+        ('',                     'slaset',               'dlaset',               'slaset',               'dlaset'              ),
+        ('',                     'splssq',               'dplssq',               'splssq',               'dplssq'              ),
+        ('',                     'slacpy',               'dlacpy',               'slacpy',               'dlacpy'              ),
+        ('',                     'saxpy',                'daxpy',                'saxpy',                'daxpy'               ),
+        (r'\b',                 r'szero\b',             r'dzero\b',             r'czero\b',             r'zzero\b'             ),
+#        (r'\b',                 r'sone\b',              r'done\b',              r'cone\b',              r'zone\b'              ),
+        # ----- Chameleon Prefixes
+        ('CHAMELEON_P',          'CHAMELEON_S',          'CHAMELEON_D',          'CHAMELEON_C',          'CHAMELEON_Z'         ),
+        ('RUNTIME_P',            'RUNTIME_s',            'RUNTIME_d',            'RUNTIME_c',            'RUNTIME_z'           ),
+        ('chameleon_p',          'chameleon_s',          'chameleon_d',          'chameleon_c',          'chameleon_z'         ),
+        ('codelet_p',            'codelet_s',            'codelet_d',            'codelet_c',            'codelet_z'           ),
+        ('runtime_p',            'runtime_s',            'runtime_d',            'runtime_c',            'runtime_z'           ),
+        ('testing_p',            'testing_s',            'testing_d',            'testing_c',            'testing_z'           ),
+        ('timing_p',             'timing_s',             'timing_d',             'timing_c',             'timing_z'            ),
+        ('workspace_p',          'workspace_s',          'workspace_d',          'workspace_c',          'workspace_z'         ),
+#        ('CORE_P',               'CORE_S',               'CORE_D',               'CORE_C',               'CORE_Z'              ),
+#        ('vec_p',                'vec_s',                'vec_d',                'vec_c',                'vec_z'               ),
+      # ('',                     'starpu_s',             'starpu_d',             'starpu_c',             'starpu_z'            ),
+      # ('',                     'STARPU_S',             'STARPU_D',             'STARPU_C',             'STARPU_Z'            ),
+      # ('',                     's_',                   'd_',                   'c_',                   'z_'                  ),
+      # ('',                     'S_',                   'D_',                   'C_',                   'Z_'                  ),
+      # ('',                     'FLT_EPSILON',          'DBL_EPSILON',          'FLT_EPSILON',          'DBL_EPSILON'         ),
+      # ('',                     's_RAFF_FLOAT',         'd_RAFF_FLOAT',         'c_RAFF_FLOAT',         'z_RAFF_FLOAT'        ),
+      # # ----- unused?
+      # ('',                     's_check',              'd_check',              'c_check',              'z_check'             ),
+      # ('',                     'stesting',             'dtesting',             'ctesting',             'ztesting'            ),
+      # ('',                     'SAUXILIARY',           'DAUXILIARY',           'CAUXILIARY',           'ZAUXILIARY'          ),
+      # ('',                     'sbuild',               'dbuild',               'cbuild',               'zbuild'              ),
+    ]
diff --git a/cmake_modules/morse_cmake b/cmake_modules/morse_cmake
index 33a182878..ade499661 160000
--- a/cmake_modules/morse_cmake
+++ b/cmake_modules/morse_cmake
@@ -1 +1 @@
-Subproject commit 33a182878f9049c47af1fce3e86e72b9a10e7f7a
+Subproject commit ade499661b58c71fe0586c2bbb98ea9725a88c52
diff --git a/compute/zgels.c b/compute/zgels.c
index 444021e55..2aed98e7d 100644
--- a/compute/zgels.c
+++ b/compute/zgels.c
@@ -48,7 +48,7 @@
  * @param[in] trans
  *          Intended usage:
  *          = ChamNoTrans:   the linear system involves A;
- *          = ChamConjTrans: the linear system involves A**H.
+ *          = ChamConjTrans: the linear system involves A^H.
  *          Currently only ChamNoTrans is supported.
  * @param[in] M
@@ -218,7 +218,7 @@ int CHAMELEON_zgels( cham_trans_t trans, int M, int N, int NRHS,
  * @param[in] trans
  *          Intended usage:
  *          = ChamNoTrans:   the linear system involves A;
- *          = ChamConjTrans: the linear system involves A**H.
+ *          = ChamConjTrans: the linear system involves A^H.
  *          Currently only ChamNoTrans is supported.
  * @param[in,out] A
diff --git a/compute/zgels_param.c b/compute/zgels_param.c
index fd5a99ddb..697e1b4ff 100644
--- a/compute/zgels_param.c
+++ b/compute/zgels_param.c
@@ -48,7 +48,7 @@
  * @param[in] trans
  *          Intended usage:
  *          = ChamNoTrans:   the linear system involves A;
- *          = ChamConjTrans: the linear system involves A**H.
+ *          = ChamConjTrans: the linear system involves A^H.
  *          Currently only ChamNoTrans is supported.
  * @param[in] M
@@ -221,7 +221,7 @@ int CHAMELEON_zgels_param( const libhqr_tree_t *qrtree, cham_trans_t trans, int
  * @param[in] trans
  *          Intended usage:
  *          = ChamNoTrans:   the linear system involves A;
- *          = ChamConjTrans: the linear system involves A**H.
+ *          = ChamConjTrans: the linear system involves A^H.
  *          Currently only ChamNoTrans is supported.
  * @param[in,out] A
diff --git a/compute/zgesvd.c b/compute/zgesvd.c
index c52fa59b3..7d12a10ba 100644
--- a/compute/zgesvd.c
+++ b/compute/zgesvd.c
@@ -44,7 +44,7 @@
  *  are returned in descending order.  The first min(m,n) columns of
  *  U and V are the left and right singular vectors of A.
- *  Note that the routine returns V**T, not V.
+ *  Note that the routine returns V^T, not V.
  * @param[in] jobu
@@ -62,16 +62,16 @@
  *                        NOT SUPPORTTED YET
  * @param[in] jobvt
- *          Specifies options for computing all or part of the matrix V**H.
+ *          Specifies options for computing all or part of the matrix V^H.
  *          Intended usage:
- *          = ChamVec   = 'A'(lapack): all N rows of V**H are returned
+ *          = ChamVec   = 'A'(lapack): all N rows of V^H are returned
  *                        in the array VT;
- *          = ChamNoVec = 'N': no rows of V**H (no right singular vectors)
+ *          = ChamNoVec = 'N': no rows of V^H (no right singular vectors)
  *                        are computed.
- *          = ChamSVec  = 'S': the first min(m,n) rows of V**H (the right
+ *          = ChamSVec  = 'S': the first min(m,n) rows of V^H (the right
  *                        singular vectors) are returned in the array VT;
  *                        NOT SUPPORTTED YET
- *          = ChamOVec  = 'O': the first min(m,n) rows of V**H (the right
+ *          = ChamOVec  = 'O': the first min(m,n) rows of V^H (the right
  *                        singular vectors) are overwritten on the array A;
  *                        NOT SUPPORTTED YET
@@ -90,7 +90,7 @@
  *                          columns of U (the left singular vectors,
  *                          stored columnwise);
  *          if JOBVT = 'O', A is overwritten with the first min(m,n)
- *                          rows of V**H (the right singular vectors,
+ *                          rows of V^H (the right singular vectors,
  *                          stored rowwise);
  *          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
  *                          are destroyed.
@@ -118,9 +118,9 @@
  * @param[out] VT
  *         If JOBVT = 'A', VT contains the N-by-N unitary matrix
- *         V**H;
+ *         V^H;
  *         if JOBVT = 'S', VT contains the first min(m,n) rows of
- *         V**H (the right singular vectors, stored rowwise);
+ *         V^H (the right singular vectors, stored rowwise);
  *         if JOBVT = 'N' or 'O', VT is not referenced.
  * @param[in] LDVT
@@ -259,16 +259,16 @@ int CHAMELEON_zgesvd( cham_job_t jobu, cham_job_t jobvt,
  *                        NOT SUPPORTTED YET
  * @param[in] jobvt
- *          Specifies options for computing all or part of the matrix V**H.
+ *          Specifies options for computing all or part of the matrix V^H.
  *          Intended usage:
- *          = ChamVec   = 'A'(lapack): all N rows of V**H are returned
+ *          = ChamVec   = 'A'(lapack): all N rows of V^H are returned
  *                        in the array VT;
- *          = ChamNoVec = 'N': no rows of V**H (no right singular vectors)
+ *          = ChamNoVec = 'N': no rows of V^H (no right singular vectors)
  *                        are computed.
- *          = ChamSVec  = 'S': the first min(m,n) rows of V**H (the right
+ *          = ChamSVec  = 'S': the first min(m,n) rows of V^H (the right
  *                        singular vectors) are returned in the array VT;
  *                        NOT SUPPORTTED YET
- *          = ChamOVec  = 'O': the first min(m,n) rows of V**H (the right
+ *          = ChamOVec  = 'O': the first min(m,n) rows of V^H (the right
  *                        singular vectors) are overwritten on the array A;
  *                        NOT SUPPORTTED YET
@@ -281,7 +281,7 @@ int CHAMELEON_zgesvd( cham_job_t jobu, cham_job_t jobvt,
  *                          columns of U (the left singular vectors,
  *                          stored columnwise);
  *          if JOBVT = 'O', A is overwritten with the first min(m,n)
- *                          rows of V**H (the right singular vectors,
+ *                          rows of V^H (the right singular vectors,
  *                          stored rowwise);
  *          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
  *                          are destroyed.
@@ -306,9 +306,9 @@ int CHAMELEON_zgesvd( cham_job_t jobu, cham_job_t jobvt,
  * @param[out] VT
  *         If JOBVT = 'A', VT contains the N-by-N unitary matrix
- *         V**H;
+ *         V^H;
  *         if JOBVT = 'S', VT contains the first min(m,n) rows of
- *         V**H (the right singular vectors, stored rowwise);
+ *         V^H (the right singular vectors, stored rowwise);
  *         if JOBVT = 'N' or 'O', VT is not referenced.
  * @param[in] LDVT
diff --git a/compute/zgetrs_incpiv.c b/compute/zgetrs_incpiv.c
index d3e6324f7..225cb125f 100644
--- a/compute/zgetrs_incpiv.c
+++ b/compute/zgetrs_incpiv.c
@@ -37,8 +37,8 @@
  * @param[in] trans
  *          Intended to specify the the form of the system of equations:
  *          = ChamNoTrans:   A * X = B     (No transpose)
- *          = ChamTrans:     A**T * X = B  (Transpose)
- *          = ChamConjTrans: A**H * X = B  (Conjugate transpose)
+ *          = ChamTrans:     A^T * X = B  (Transpose)
+ *          = ChamConjTrans: A^H * X = B  (Conjugate transpose)
  *          Currently only ChamNoTrans is supported.
  * @param[in] N
diff --git a/compute/zgetrs_nopiv.c b/compute/zgetrs_nopiv.c
index bb0de9875..3a3dfe360 100644
--- a/compute/zgetrs_nopiv.c
+++ b/compute/zgetrs_nopiv.c
@@ -38,8 +38,8 @@
  * @param[in] trans
  *          Intended to specify the the form of the system of equations:
  *          = ChamNoTrans:   A * X = B     (No transpose)
- *          = ChamTrans:     A**T * X = B  (Transpose)
- *          = ChamConjTrans: A**H * X = B  (Conjugate transpose)
+ *          = ChamTrans:     A^T * X = B  (Transpose)
+ *          = ChamConjTrans: A^H * X = B  (Conjugate transpose)
  *          Currently only ChamNoTrans is supported.
  * @param[in] N
diff --git a/compute/zhetrd.c b/compute/zhetrd.c
index 0c7e3c66b..43c4fc59d 100644
--- a/compute/zhetrd.c
+++ b/compute/zhetrd.c
@@ -34,7 +34,7 @@
  *  tridiagonal form S using a two-stage approach
  *  First stage: reduction to band tridiagonal form (unitary Q1);
  *  Second stage: reduction from band to tridiagonal form (unitary
- *  Q2).  Let Q = Q1 * Q2 be the global unitary transformation; Q**H *
+ *  Q2).  Let Q = Q1 * Q2 be the global unitary transformation; Q^H *
  *  A * Q = S.
@@ -190,7 +190,7 @@ int CHAMELEON_zhetrd( cham_job_t jobz, cham_uplo_t uplo, int N,
  *  First stage: reduction to band tridiagonal form (unitary Q1);
  *  Second stage: reduction from band to tridiagonal form (unitary Q2).
  *  Let Q = Q1 * Q2 be the global unitary transformation;
- *  Q**H * A * Q = S.
+ *  Q^H * A * Q = S.
  *  Tile equivalent of CHAMELEON_zhetrd().
  *  Operates on matrices stored by tiles.
  *  All matrices are passed through descriptors.
diff --git a/compute/zposv.c b/compute/zposv.c
index 27ab796eb..317f21f14 100644
--- a/compute/zposv.c
+++ b/compute/zposv.c
@@ -61,7 +61,7 @@
  *          triangular part of the matrix A, and the strictly upper triangular part of A is not
  *          referenced.
  *          On exit, if return value = 0, the factor U or L from the Cholesky factorization
- *          A = U**H*U or A = L*L**H.
+ *          A = U^H*U or A = L*L^H.
  * @param[in] LDA
  *          The leading dimension of the array A. LDA >= max(1,N).
@@ -197,7 +197,7 @@ int CHAMELEON_zposv( cham_uplo_t uplo, int N, int NRHS,
  *          triangular part of the matrix A, and the strictly upper triangular part of A is not
  *          referenced.
  *          On exit, if return value = 0, the factor U or L from the Cholesky factorization
- *          A = U**H*U or A = L*L**H.
+ *          A = U^H*U or A = L*L^H.
  * @param[in,out] B
  *          On entry, the N-by-NRHS right hand side matrix B.
diff --git a/compute/zpotrf.c b/compute/zpotrf.c
index c898ca145..41093b0ca 100644
--- a/compute/zpotrf.c
+++ b/compute/zpotrf.c
@@ -55,7 +55,7 @@
  *          triangular part of the matrix A, and the strictly upper triangular part of A is not
  *          referenced.
  *          On exit, if return value = 0, the factor U or L from the Cholesky factorization
- *          A = U**H*U or A = L*L**H.
+ *          A = U^H*U or A = L*L^H.
  * @param[in] LDA
  *          The leading dimension of the array A. LDA >= max(1,N).
@@ -170,7 +170,7 @@ int CHAMELEON_zpotrf( cham_uplo_t uplo, int N,
  *          triangular part of the matrix A, and the strictly upper triangular part of A is not
  *          referenced.
  *          On exit, if return value = 0, the factor U or L from the Cholesky factorization
- *          A = U**H*U or A = L*L**H.
+ *          A = U^H*U or A = L*L^H.
diff --git a/compute/zpotri.c b/compute/zpotri.c
index eb1e95d61..899e13b89 100644
--- a/compute/zpotri.c
+++ b/compute/zpotri.c
@@ -29,7 +29,7 @@
  * @ingroup CHAMELEON_Complex64_t
  *  CHAMELEON_zpotri - Computes the inverse of a complex Hermitian positive definite
- *  matrix A using the Cholesky factorization A = U**H*U or A = L*L**H
+ *  matrix A using the Cholesky factorization A = U^H*U or A = L*L^H
  *  computed by CHAMELEON_zpotrf.
@@ -43,7 +43,7 @@
  * @param[in,out] A
  *          On entry, the triangular factor U or L from the Cholesky
- *          factorization A = U**H*U or A = L*L**H, as computed by
+ *          factorization A = U^H*U or A = L*L^H, as computed by
  *          CHAMELEON_zpotrf.
  *          On exit, the upper or lower triangle of the (Hermitian)
  *          inverse of A, overwriting the input factor U or L.
@@ -140,7 +140,7 @@ int CHAMELEON_zpotri( cham_uplo_t uplo, int N,
  *  CHAMELEON_zpotri_Tile - Computes the inverse of a complex Hermitian
  *  positive definite matrix A using the Cholesky factorization
- *  A = U**H*U or A = L*L**H computed by CHAMELEON_zpotrf.
+ *  A = U^H*U or A = L*L^H computed by CHAMELEON_zpotrf.
  *  Tile equivalent of CHAMELEON_zpotri().
  *  Operates on matrices stored by tiles.
  *  All matrices are passed through descriptors.
@@ -154,7 +154,7 @@ int CHAMELEON_zpotri( cham_uplo_t uplo, int N,
  * @param[in] A
  *          On entry, the triangular factor U or L from the Cholesky
- *          factorization A = U**H*U or A = L*L**H, as computed by
+ *          factorization A = U^H*U or A = L*L^H, as computed by
  *          CHAMELEON_zpotrf.
  *          On exit, the upper or lower triangle of the (Hermitian)
  *          inverse of A, overwriting the input factor U or L.
@@ -206,8 +206,8 @@ int CHAMELEON_zpotri_Tile( cham_uplo_t uplo, CHAM_desc_t *A )
  * @ingroup CHAMELEON_Complex64_t_Tile_Async
  *  CHAMELEON_zpotri_Tile_Async - Computes the inverse of a complex Hermitian
- *  positive definite matrix A using the Cholesky factorization A = U**H*U
- *  or A = L*L**H computed by CHAMELEON_zpotrf.
+ *  positive definite matrix A using the Cholesky factorization A = U^H*U
+ *  or A = L*L^H computed by CHAMELEON_zpotrf.
  *  Non-blocking equivalent of CHAMELEON_zpotri_Tile().
  *  May return before the computation is finished.
  *  Allows for pipelining of operations at runtime.
diff --git a/compute/zpotrimm.c b/compute/zpotrimm.c
index f903d52e9..7d5cda488 100644
--- a/compute/zpotrimm.c
+++ b/compute/zpotrimm.c
@@ -29,7 +29,7 @@
  * @ingroup CHAMELEON_Complex64_t
  *  CHAMELEON_zpotrimm - Computes the inverse of a complex Hermitian positive definite
- *  matrix A using the Cholesky factorization A = U**H*U or A = L*L**H
+ *  matrix A using the Cholesky factorization A = U^H*U or A = L*L^H
  *  computed by CHAMELEON_zpotrf.
@@ -43,7 +43,7 @@
  * @param[in,out] A
  *          On entry, the triangular factor U or L from the Cholesky
- *          factorization A = U**H*U or A = L*L**H, as computed by
+ *          factorization A = U^H*U or A = L*L^H, as computed by
  *          CHAMELEON_zpotrf.
  *          On exit, the upper or lower triangle of the (Hermitian)
  *          inverse of A, overwriting the input factor U or L.
@@ -162,7 +162,7 @@ int CHAMELEON_zpotrimm( cham_uplo_t uplo, int N,
  *  CHAMELEON_zpotrimm_Tile - Computes the inverse of a complex Hermitian
  *  positive definite matrix A using the Cholesky factorization
- *  A = U**H*U or A = L*L**H computed by CHAMELEON_zpotrf.
+ *  A = U^H*U or A = L*L^H computed by CHAMELEON_zpotrf.
  *  Tile equivalent of CHAMELEON_zpotrimm().
  *  Operates on matrices stored by tiles.
  *  All matrices are passed through descriptors.
@@ -176,7 +176,7 @@ int CHAMELEON_zpotrimm( cham_uplo_t uplo, int N,
  * @param[in] A
  *          On entry, the triangular factor U or L from the Cholesky
- *          factorization A = U**H*U or A = L*L**H, as computed by
+ *          factorization A = U^H*U or A = L*L^H, as computed by
  *          CHAMELEON_zpotrf.
  *          On exit, the upper or lower triangle of the (Hermitian)
  *          inverse of A, overwriting the input factor U or L.
@@ -230,8 +230,8 @@ int CHAMELEON_zpotrimm_Tile( cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, C
  * @ingroup CHAMELEON_Complex64_t_Tile_Async
  *  CHAMELEON_zpotrimm_Tile_Async - Computes the inverse of a complex Hermitian
- *  positive definite matrix A using the Cholesky factorization A = U**H*U
- *  or A = L*L**H computed by CHAMELEON_zpotrf.
+ *  positive definite matrix A using the Cholesky factorization A = U^H*U
+ *  or A = L*L^H computed by CHAMELEON_zpotrf.
  *  Non-blocking equivalent of CHAMELEON_zpotrimm_Tile().
  *  May return before the computation is finished.
  *  Allows for pipelining of operations at runtime.
diff --git a/compute/zpotrs.c b/compute/zpotrs.c
index 72bd62a7f..1d290f64f 100644
--- a/compute/zpotrs.c
+++ b/compute/zpotrs.c
@@ -31,7 +31,7 @@
  *  CHAMELEON_zpotrs - Solves a system of linear equations A * X = B with a symmetric positive
  *  definite (or Hermitian positive definite in the complex case) matrix A using the Cholesky
- *  factorization A = U**H*U or A = L*L**H computed by CHAMELEON_zpotrf.
+ *  factorization A = U^H*U or A = L*L^H computed by CHAMELEON_zpotrf.
@@ -46,7 +46,7 @@
  *          The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
  * @param[in] A
- *          The triangular factor U or L from the Cholesky factorization A = U**H*U or A = L*L**H,
+ *          The triangular factor U or L from the Cholesky factorization A = U^H*U or A = L*L^H,
  *          computed by CHAMELEON_zpotrf.
  * @param[in] LDA
@@ -173,7 +173,7 @@ int CHAMELEON_zpotrs( cham_uplo_t uplo, int N, int NRHS,
  *          = ChamLower: Lower triangle of A is stored.
  * @param[in] A
- *          The triangular factor U or L from the Cholesky factorization A = U**H*U or A = L*L**H,
+ *          The triangular factor U or L from the Cholesky factorization A = U^H*U or A = L*L^H,
  *          computed by CHAMELEON_zpotrf.
  * @param[in,out] B
diff --git a/compute/zsysv.c b/compute/zsysv.c
index 256e27de9..ebee2c6f1 100644
--- a/compute/zsysv.c
+++ b/compute/zsysv.c
@@ -62,7 +62,7 @@
  *          triangular part of the matrix A, and the strictly upper triangular part of A is not
  *          referenced.
  *          On exit, if return value = 0, the factor U or L from the Cholesky factorization
- *          A = U**T*U or A = L*L**T.
+ *          A = U^T*U or A = L*L^T.
  * @param[in] LDA
  *          The leading dimension of the array A. LDA >= max(1,N).
@@ -194,7 +194,7 @@ int CHAMELEON_zsysv( cham_uplo_t uplo, int N, int NRHS,
  *          triangular part of the matrix A, and the strictly upper triangular part of A is not
  *          referenced.
  *          On exit, if return value = 0, the factor U or L from the Cholesky factorization
- *          A = U**T*U or A = L*L**T.
+ *          A = U^T*U or A = L*L^T.
  * @param[in,out] B
  *          On entry, the N-by-NRHS right hand side matrix B.
diff --git a/compute/zsytrf.c b/compute/zsytrf.c
index e32b1f8f4..b603ddde5 100644
--- a/compute/zsytrf.c
+++ b/compute/zsytrf.c
@@ -50,7 +50,7 @@
  *          triangular part of the matrix A, and the strictly upper triangular part of A is not
  *          referenced.
  *          On exit, if return value = 0, the factor U or L from the Cholesky factorization
- *          A = U**H*U or A = L*L**H.
+ *          A = U^H*U or A = L*L^H.
  * @param[in] LDA
  *          The leading dimension of the array A. LDA >= max(1,N).
@@ -164,7 +164,7 @@ int CHAMELEON_zsytrf( cham_uplo_t uplo, int N,
  *          triangular part of the matrix A, and the strictly upper triangular part of A is not
  *          referenced.
  *          On exit, if return value = 0, the factor U or L from the Cholesky factorization
- *          A = U**T*U or A = L*L**T.
+ *          A = U^T*U or A = L*L^T.
diff --git a/compute/zsytrs.c b/compute/zsytrs.c
index b50289edd..84a2c778f 100644
--- a/compute/zsytrs.c
+++ b/compute/zsytrs.c
@@ -33,7 +33,7 @@
  *  CHAMELEON_zsytrs - Solves a system of linear equations A * X = B with a complex
  *  symmetric  matrix A using the Cholesky factorization
- *  A = U**H*U or A = L*L**H computed by CHAMELEON_zsytrf.
+ *  A = U^H*U or A = L*L^H computed by CHAMELEON_zsytrf.
@@ -48,7 +48,7 @@
  *          The number of right hand sides, i.e., the number of columns of the matrix B. NRHS >= 0.
  * @param[in] A
- *          The triangular factor U or L from the Cholesky factorization A = U**T*U or A = L*L**T,
+ *          The triangular factor U or L from the Cholesky factorization A = U^T*U or A = L*L^T,
  *          computed by CHAMELEON_zsytrf.
  * @param[in] LDA
@@ -172,7 +172,7 @@ int CHAMELEON_zsytrs( cham_uplo_t uplo, int N, int NRHS,
  *          = ChamLower: Lower triangle of A is stored.
  * @param[in] A
- *          The triangular factor U or L from the Cholesky factorization A = U**T*U or A = L*L**T,
+ *          The triangular factor U or L from the Cholesky factorization A = U^T*U or A = L*L^T,
  *          computed by CHAMELEON_zsytrf.
  * @param[in,out] B
diff --git a/compute/zunmlq.c b/compute/zunmlq.c
index 51506cdaa..bbc4a6a77 100644
--- a/compute/zunmlq.c
+++ b/compute/zunmlq.c
@@ -35,7 +35,7 @@
  *                  SIDE = 'L'     SIDE = 'R'
  *  TRANS = 'N':      Q * C          C * Q
- *  TRANS = 'C':      Q**H * C       C * Q**H
+ *  TRANS = 'C':      Q^H * C       C * Q^H
  *  where Q is a complex unitary matrix defined as the product of k
  *  elementary reflectors
@@ -49,13 +49,13 @@
  * @param[in] side
  *          Intended usage:
- *          = ChamLeft:  apply Q or Q**H from the left;
- *          = ChamRight: apply Q or Q**H from the right.
+ *          = ChamLeft:  apply Q or Q^H from the left;
+ *          = ChamRight: apply Q or Q^H from the right.
  * @param[in] trans
  *          Intended usage:
  *          = ChamNoTrans:   no transpose, apply Q;
- *          = ChamConjTrans: conjugate transpose, apply Q**H.
+ *          = ChamConjTrans: conjugate transpose, apply Q^H.
  * @param[in] M
  *          The number of rows of the matrix C. M >= 0.
@@ -79,7 +79,7 @@
  * @param[in,out] C
  *          On entry, the M-by-N matrix C.
- *          On exit, C is overwritten by Q*C or Q**H*C.
+ *          On exit, C is overwritten by Q*C or Q^H*C.
  * @param[in] LDC
  *          The leading dimension of the array C. LDC >= max(1,M).
@@ -211,14 +211,14 @@ int CHAMELEON_zunmlq( cham_side_t side, cham_trans_t trans, int M, int N, int K,
  * @param[in] side
  *          Intended usage:
- *          = ChamLeft:  apply Q or Q**H from the left;
- *          = ChamRight: apply Q or Q**H from the right.
+ *          = ChamLeft:  apply Q or Q^H from the left;
+ *          = ChamRight: apply Q or Q^H from the right.
  *          Currently only ChamLeft is supported.
  * @param[in] trans
  *          Intended usage:
  *          = ChamNoTrans:   no transpose, apply Q;
- *          = ChamConjTrans: conjugate transpose, apply Q**H.
+ *          = ChamConjTrans: conjugate transpose, apply Q^H.
  *          Currently only ChamConjTrans is supported.
  * @param[in] A
@@ -229,7 +229,7 @@ int CHAMELEON_zunmlq( cham_side_t side, cham_trans_t trans, int M, int N, int K,
  * @param[in,out] C
  *          On entry, the M-by-N matrix C.
- *          On exit, C is overwritten by Q*C or Q**H*C.
+ *          On exit, C is overwritten by Q*C or Q^H*C.
diff --git a/compute/zunmlq_param.c b/compute/zunmlq_param.c
index 7199aed91..df0fba8ba 100644
--- a/compute/zunmlq_param.c
+++ b/compute/zunmlq_param.c
@@ -29,7 +29,7 @@
  *                  SIDE = 'L'     SIDE = 'R'
  *  TRANS = 'N':      Q * C          C * Q
- *  TRANS = 'C':      Q**H * C       C * Q**H
+ *  TRANS = 'C':      Q^H * C       C * Q^H
  *  where Q is a complex unitary matrix defined as the product of k
  *  elementary reflectors
@@ -46,13 +46,13 @@
  * @param[in] side
  *          Intended usage:
- *          = ChamLeft:  apply Q or Q**H from the left;
- *          = ChamRight: apply Q or Q**H from the right.
+ *          = ChamLeft:  apply Q or Q^H from the left;
+ *          = ChamRight: apply Q or Q^H from the right.
  * @param[in] trans
  *          Intended usage:
  *          = ChamNoTrans:   no transpose, apply Q;
- *          = ChamConjTrans: conjugate transpose, apply Q**H.
+ *          = ChamConjTrans: conjugate transpose, apply Q^H.
  * @param[in] M
  *          The number of rows of the matrix C. M >= 0.
@@ -79,7 +79,7 @@
  * @param[in,out] C
  *          On entry, the M-by-N matrix C.
- *          On exit, C is overwritten by Q*C or Q**H*C.
+ *          On exit, C is overwritten by Q*C or Q^H*C.
  * @param[in] LDC
  *          The leading dimension of the array C. LDC >= max(1,M).
@@ -210,14 +210,14 @@ int CHAMELEON_zunmlq_param( const libhqr_tree_t *qrtree, cham_side_t side, cham_
  * @param[in] side
  *          Intended usage:
- *          = ChamLeft:  apply Q or Q**H from the left;
- *          = ChamRight: apply Q or Q**H from the right.
+ *          = ChamLeft:  apply Q or Q^H from the left;
+ *          = ChamRight: apply Q or Q^H from the right.
  *          Currently only ChamLeft is supported.
  * @param[in] trans
  *          Intended usage:
  *          = ChamNoTrans:   no transpose, apply Q;
- *          = ChamConjTrans: conjugate transpose, apply Q**H.
+ *          = ChamConjTrans: conjugate transpose, apply Q^H.
  *          Currently only ChamConjTrans is supported.
  * @param[in] A
@@ -228,7 +228,7 @@ int CHAMELEON_zunmlq_param( const libhqr_tree_t *qrtree, cham_side_t side, cham_
  * @param[in,out] C
  *          On entry, the M-by-N matrix C.
- *          On exit, C is overwritten by Q*C or Q**H*C.
+ *          On exit, C is overwritten by Q*C or Q^H*C.
diff --git a/compute/zunmqr.c b/compute/zunmqr.c
index c7adba166..4139f579a 100644
--- a/compute/zunmqr.c
+++ b/compute/zunmqr.c
@@ -34,7 +34,7 @@
  *                  SIDE = 'L'     SIDE = 'R'
  *  TRANS = 'N':      Q * C          C * Q
- *  TRANS = 'C':      Q**H * C       C * Q**H
+ *  TRANS = 'C':      Q^H * C       C * Q^H
  *  where Q is a complex unitary matrix defined as the product of k
  *  elementary reflectors
@@ -48,13 +48,13 @@
  * @param[in] side
  *          Intended usage:
- *          = ChamLeft:  apply Q or Q**H from the left;
- *          = ChamRight: apply Q or Q**H from the right.
+ *          = ChamLeft:  apply Q or Q^H from the left;
+ *          = ChamRight: apply Q or Q^H from the right.
  * @param[in] trans
  *          Intended usage:
  *          = ChamNoTrans:   no transpose, apply Q;
- *          = ChamConjTrans: conjugate transpose, apply Q**H.
+ *          = ChamConjTrans: conjugate transpose, apply Q^H.
  * @param[in] M
  *          The number of rows of the matrix C. M >= 0.
@@ -81,7 +81,7 @@
  * @param[in,out] C
  *          On entry, the M-by-N matrix C.
- *          On exit, C is overwritten by Q*C or Q**H*C.
+ *          On exit, C is overwritten by Q*C or Q^H*C.
  * @param[in] LDC
  *          The leading dimension of the array C. LDC >= max(1,M).
@@ -212,14 +212,14 @@ int CHAMELEON_zunmqr( cham_side_t side, cham_trans_t trans, int M, int N, int K,
  * @param[in] side
  *          Intended usage:
- *          = ChamLeft:  apply Q or Q**H from the left;
- *          = ChamRight: apply Q or Q**H from the right.
+ *          = ChamLeft:  apply Q or Q^H from the left;
+ *          = ChamRight: apply Q or Q^H from the right.
  *          Currently only ChamLeft is supported.
  * @param[in] trans
  *          Intended usage:
  *          = ChamNoTrans:   no transpose, apply Q;
- *          = ChamConjTrans: conjugate transpose, apply Q**H.
+ *          = ChamConjTrans: conjugate transpose, apply Q^H.
  *          Currently only ChamConjTrans is supported.
  * @param[in] A
@@ -231,7 +231,7 @@ int CHAMELEON_zunmqr( cham_side_t side, cham_trans_t trans, int M, int N, int K,
  * @param[in,out] C
  *          On entry, the M-by-N matrix C.
- *          On exit, C is overwritten by Q*C or Q**H*C.
+ *          On exit, C is overwritten by Q*C or Q^H*C.
diff --git a/compute/zunmqr_param.c b/compute/zunmqr_param.c
index 152257e77..e7b31e742 100644
--- a/compute/zunmqr_param.c
+++ b/compute/zunmqr_param.c
@@ -29,7 +29,7 @@
  *                  SIDE = 'L'     SIDE = 'R'
  *  TRANS = 'N':      Q * C          C * Q
- *  TRANS = 'C':      Q**H * C       C * Q**H
+ *  TRANS = 'C':      Q^H * C       C * Q^H
  *  where Q is a complex unitary matrix defined as the product of k
  *  elementary reflectors
@@ -46,13 +46,13 @@
  * @param[in] side
  *          Intended usage:
- *          = ChamLeft:  apply Q or Q**H from the left;
- *          = ChamRight: apply Q or Q**H from the right.
+ *          = ChamLeft:  apply Q or Q^H from the left;
+ *          = ChamRight: apply Q or Q^H from the right.
  * @param[in] trans
  *          Intended usage:
  *          = ChamNoTrans:   no transpose, apply Q;
- *          = ChamConjTrans: conjugate transpose, apply Q**H.
+ *          = ChamConjTrans: conjugate transpose, apply Q^H.
  * @param[in] M
  *          The number of rows of the matrix C. M >= 0.
@@ -82,7 +82,7 @@
  * @param[in,out] C
  *          On entry, the M-by-N matrix C.
- *          On exit, C is overwritten by Q*C or Q**H*C.
+ *          On exit, C is overwritten by Q*C or Q^H*C.
  * @param[in] LDC
  *          The leading dimension of the array C. LDC >= max(1,M).
@@ -215,14 +215,14 @@ int CHAMELEON_zunmqr_param( const libhqr_tree_t *qrtree,
  * @param[in] side
  *          Intended usage:
- *          = ChamLeft:  apply Q or Q**H from the left;
- *          = ChamRight: apply Q or Q**H from the right.
+ *          = ChamLeft:  apply Q or Q^H from the left;
+ *          = ChamRight: apply Q or Q^H from the right.
  *          Currently only ChamLeft is supported.
  * @param[in] trans
  *          Intended usage:
  *          = ChamNoTrans:   no transpose, apply Q;
- *          = ChamConjTrans: conjugate transpose, apply Q**H.
+ *          = ChamConjTrans: conjugate transpose, apply Q^H.
  *          Currently only ChamConjTrans is supported.
  * @param[in] A
@@ -234,7 +234,7 @@ int CHAMELEON_zunmqr_param( const libhqr_tree_t *qrtree,
  * @param[in,out] C
  *          On entry, the M-by-N matrix C.
- *          On exit, C is overwritten by Q*C or Q**H*C.
+ *          On exit, C is overwritten by Q*C or Q^H*C.
diff --git a/control/compute_z.h b/control/compute_z.h
index 6a78fe620..e34ec4dfc 100644
--- a/control/compute_z.h
+++ b/control/compute_z.h
@@ -35,10 +35,6 @@ int chameleon_zshift(CHAM_context_t *chamctxt, int m, int n, CHAMELEON_Complex64
  *  Declarations of parallel functions (dynamic scheduling) - alphabetical order
-void chameleon_pzbarrier_pnl2tl(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
-void chameleon_pzbarrier_row2tl(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
-void chameleon_pzbarrier_tl2pnl(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
-void chameleon_pzbarrier_tl2row(CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgebrd_gb2bd(cham_uplo_t uplo, CHAM_desc_t *A, double *D, double *E, CHAM_desc_t *T, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgebrd_ge2gb( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
diff --git a/coreblas/compute/core_zherfb.c b/coreblas/compute/core_zherfb.c
index d71c18028..a54de4a5a 100644
--- a/coreblas/compute/core_zherfb.c
+++ b/coreblas/compute/core_zherfb.c
@@ -27,7 +27,7 @@
  *  CORE_zherfb overwrites the symmetric complex N-by-N tile C with
- *    Q**T*C*Q
+ *    Q^T*C*Q
  *  where Q is a complex unitary matrix defined as the product of k
  *  elementary reflectors
@@ -72,7 +72,7 @@
  * @param[in,out] C
  *         On entry, the symmetric N-by-N tile C.
- *         On exit, C is overwritten by Q**T*C*Q.
+ *         On exit, C is overwritten by Q^T*C*Q.
  * @param[in] ldc
  *         The leading dimension of the array C. LDC >= max(1,M).
diff --git a/coreblas/compute/core_zpamm.c b/coreblas/compute/core_zpamm.c
index eb995720c..01a25ea55 100644
--- a/coreblas/compute/core_zpamm.c
+++ b/coreblas/compute/core_zpamm.c
@@ -52,7 +52,7 @@ static inline int CORE_zpamm_w(cham_side_t side, cham_trans_t trans, cham_uplo_t
  *  where  op( V ) is one of
- *     op( V ) = V   or   op( V ) = V**T   or   op( V ) = V**H,
+ *     op( V ) = V   or   op( V ) = V^T   or   op( V ) = V^H,
  *  A1, A2 and W are general matrices, and V is:
diff --git a/coreblas/compute/core_zparfb.c b/coreblas/compute/core_zparfb.c
index 4b2246994..9bd1e809a 100644
--- a/coreblas/compute/core_zparfb.c
+++ b/coreblas/compute/core_zparfb.c
@@ -52,12 +52,12 @@
  * @param[in] side
- *         @arg ChamLeft  : apply Q or Q**H from the Left;
- *         @arg ChamRight : apply Q or Q**H from the Right.
+ *         @arg ChamLeft  : apply Q or Q^H from the Left;
+ *         @arg ChamRight : apply Q or Q^H from the Right.
  * @param[in] trans
  *         @arg ChamNoTrans   : No transpose, apply Q;
- *         @arg ChamConjTrans : ConjTranspose, apply Q**H.
+ *         @arg ChamConjTrans : ConjTranspose, apply Q^H.
  * @param[in] direct
  *         Indicates how H is formed from a product of elementary
diff --git a/coreblas/compute/core_zpemv.c b/coreblas/compute/core_zpemv.c
index f96ef1e14..0144020a7 100644
--- a/coreblas/compute/core_zpemv.c
+++ b/coreblas/compute/core_zpemv.c
@@ -36,7 +36,7 @@
  *  where  op( A ) is one of
- *     op( A ) = A   or   op( A ) = A**T   or   op( A ) = A**H,
+ *     op( A ) = A   or   op( A ) = A^T   or   op( A ) = A^H,
  *  alpha and beta are scalars, x and y are vectors and A is a
  *  pentagonal matrix (see further details).
@@ -52,8 +52,8 @@
  * @param[in] trans
  *         @arg ChamNoTrans   :  y := alpha*A*x    + beta*y.
- *         @arg ChamTrans     :  y := alpha*A**T*x + beta*y.
- *         @arg ChamConjTrans :  y := alpha*A**H*x + beta*y.
+ *         @arg ChamTrans     :  y := alpha*A^T*x + beta*y.
+ *         @arg ChamConjTrans :  y := alpha*A^H*x + beta*y.
  * @param[in] M
  *         Number of rows of the matrix A.
diff --git a/coreblas/compute/core_ztpmlqt.c b/coreblas/compute/core_ztpmlqt.c
index 7a0ecf7ae..a15c7db3d 100644
--- a/coreblas/compute/core_ztpmlqt.c
+++ b/coreblas/compute/core_ztpmlqt.c
@@ -33,12 +33,12 @@
  * @param[in] side
- *         @arg ChamLeft  : apply Q or Q**H from the Left;
- *         @arg ChamRight : apply Q or Q**H from the Right.
+ *         @arg ChamLeft  : apply Q or Q^H from the Left;
+ *         @arg ChamRight : apply Q or Q^H from the Right.
  * @param[in] trans
  *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  ConjTranspose, apply Q**H.
+ *         @arg ChamConjTrans :  ConjTranspose, apply Q^H.
  * @param[in] M
  *         The number of rows of the tile B. M >= 0.
@@ -78,7 +78,7 @@
  *         or (LDA,K) if SIDE = ChamRight
  *         On entry, the K-by-N or M-by-K matrix A.
  *         On exit, A is overwritten by the corresponding block of
- *         Q*C or Q**H*C or C*Q or C*Q**H.  See Further Details.
+ *         Q*C or Q^H*C or C*Q or C*Q^H.  See Further Details.
  * @param[in] LDA
  *         The leading dimension of the array A. LDA >= max(1,M).
@@ -88,7 +88,7 @@
  * @param[in,out] B
  *         On entry, the M-by-N tile B.
  *         On exit, B is overwritten by the corresponding block of
- *         Q*C or Q**H*C or C*Q or C*Q**H.  See Further Details.
+ *         Q*C or Q^H*C or C*Q or C*Q^H.  See Further Details.
  * @param[in] LDB
  *         The leading dimension of the tile B. LDB >= max(1,M).
@@ -122,11 +122,11 @@
  *  If trans='N' and side='L', C is on exit replaced with Q * C.
- *  If trans='C' and side='L', C is on exit replaced with Q**H * C.
+ *  If trans='C' and side='L', C is on exit replaced with Q^H * C.
  *  If trans='N' and side='R', C is on exit replaced with C * Q.
- *  If trans='C' and side='R', C is on exit replaced with C * Q**H.
+ *  If trans='C' and side='R', C is on exit replaced with C * Q^H.
diff --git a/coreblas/compute/core_ztpmqrt.c b/coreblas/compute/core_ztpmqrt.c
index f308f6389..68dfb9744 100644
--- a/coreblas/compute/core_ztpmqrt.c
+++ b/coreblas/compute/core_ztpmqrt.c
@@ -31,12 +31,12 @@
  * @param[in] side
- *         @arg ChamLeft  : apply Q or Q**H from the Left;
- *         @arg ChamRight : apply Q or Q**H from the Right.
+ *         @arg ChamLeft  : apply Q or Q^H from the Left;
+ *         @arg ChamRight : apply Q or Q^H from the Right.
  * @param[in] trans
  *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  ConjTranspose, apply Q**H.
+ *         @arg ChamConjTrans :  ConjTranspose, apply Q^H.
  * @param[in] M
  *         The number of rows of the tile B. M >= 0.
@@ -76,7 +76,7 @@
  *         or (LDA,K) if SIDE = ChamRight
  *         On entry, the K-by-N or M-by-K matrix A.
  *         On exit, A is overwritten by the corresponding block of
- *         Q*C or Q**H*C or C*Q or C*Q**H.  See Further Details.
+ *         Q*C or Q^H*C or C*Q or C*Q^H.  See Further Details.
  * @param[in] LDA
  *         The leading dimension of the array A. LDA >= max(1,M).
@@ -86,7 +86,7 @@
  * @param[in,out] B
  *         On entry, the M-by-N tile B.
  *         On exit, B is overwritten by the corresponding block of
- *         Q*C or Q**H*C or C*Q or C*Q**H.  See Further Details.
+ *         Q*C or Q^H*C or C*Q or C*Q^H.  See Further Details.
  * @param[in] LDB
  *         The leading dimension of the tile B. LDB >= max(1,M).
@@ -121,11 +121,11 @@
  *  If trans='N' and side='L', C is on exit replaced with Q * C.
- *  If trans='C' and side='L', C is on exit replaced with Q**H * C.
+ *  If trans='C' and side='L', C is on exit replaced with Q^H * C.
  *  If trans='N' and side='R', C is on exit replaced with C * Q.
- *  If trans='C' and side='R', C is on exit replaced with C * Q**H.
+ *  If trans='C' and side='R', C is on exit replaced with C * Q^H.
diff --git a/coreblas/compute/core_ztsmlq.c b/coreblas/compute/core_ztsmlq.c
index 34a3b2016..d0f55f225 100644
--- a/coreblas/compute/core_ztsmlq.c
+++ b/coreblas/compute/core_ztsmlq.c
@@ -38,7 +38,7 @@
  *    TRANS = 'N':         Q * | A1 |     | A1 A2 | * Q
  *                             | A2 |
- *    TRANS = 'C':      Q**H * | A1 |     | A1 A2 | * Q**H
+ *    TRANS = 'C':      Q^H * | A1 |     | A1 A2 | * Q^H
  *                             | A2 |
  *  where Q is a complex unitary matrix defined as the product of k
@@ -51,12 +51,12 @@
  * @param[in] side
- *         @arg ChamLeft  : apply Q or Q**H from the Left;
- *         @arg ChamRight : apply Q or Q**H from the Right.
+ *         @arg ChamLeft  : apply Q or Q^H from the Left;
+ *         @arg ChamRight : apply Q or Q^H from the Right.
  * @param[in] trans
  *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  ConjTranspose, apply Q**H.
+ *         @arg ChamConjTrans :  ConjTranspose, apply Q^H.
  * @param[in] M1
  *         The number of rows of the tile A1. M1 >= 0.
@@ -248,7 +248,7 @@ int CORE_ztsmlq(cham_side_t side, cham_trans_t trans,
             jc = i;
-         * Apply H or H' (NOTE: CORE_zparfb used to be CORE_ztsrfb)
+         * Apply H or H'
             side, trans, ChamDirForward, ChamRowwise,
diff --git a/coreblas/compute/core_ztsmlq_hetra1.c b/coreblas/compute/core_ztsmlq_hetra1.c
index bd3d8dc40..c4e947942 100644
--- a/coreblas/compute/core_ztsmlq_hetra1.c
+++ b/coreblas/compute/core_ztsmlq_hetra1.c
@@ -38,12 +38,12 @@
  * @param[in] side
- *         @arg ChamLeft  : apply Q or Q**H from the Left;
- *         @arg ChamRight : apply Q or Q**H from the Right.
+ *         @arg ChamLeft  : apply Q or Q^H from the Left;
+ *         @arg ChamRight : apply Q or Q^H from the Right.
  * @param[in] trans
  *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  ConjTranspose, apply Q**H.
+ *         @arg ChamConjTrans :  ConjTranspose, apply Q^H.
  * @param[in] m1
  *         The number of rows of the tile A1. m1 >= 0.
diff --git a/coreblas/compute/core_ztsmqr.c b/coreblas/compute/core_ztsmqr.c
index 0e48144dc..98b5b58d0 100644
--- a/coreblas/compute/core_ztsmqr.c
+++ b/coreblas/compute/core_ztsmqr.c
@@ -38,7 +38,7 @@
  *    TRANS = 'N':         Q * | A1 |     | A1 A2 | * Q
  *                             | A2 |
- *    TRANS = 'C':      Q**H * | A1 |     | A1 A2 | * Q**H
+ *    TRANS = 'C':      Q^H * | A1 |     | A1 A2 | * Q^H
  *                             | A2 |
  *  where Q is a complex unitary matrix defined as the product of k
@@ -51,12 +51,12 @@
  * @param[in] side
- *         @arg ChamLeft  : apply Q or Q**H from the Left;
- *         @arg ChamRight : apply Q or Q**H from the Right.
+ *         @arg ChamLeft  : apply Q or Q^H from the Left;
+ *         @arg ChamRight : apply Q or Q^H from the Right.
  * @param[in] trans
  *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  ConjTranspose, apply Q**H.
+ *         @arg ChamConjTrans :  ConjTranspose, apply Q^H.
  * @param[in] M1
  *         The number of rows of the tile A1. M1 >= 0.
@@ -243,7 +243,7 @@ int CORE_ztsmqr(cham_side_t side, cham_trans_t trans,
             jc = i;
-         * Apply H or H' (NOTE: CORE_zparfb used to be CORE_ztsrfb)
+         * Apply H or H'
             side, trans, ChamDirForward, ChamColumnwise,
diff --git a/coreblas/compute/core_ztsmqr_hetra1.c b/coreblas/compute/core_ztsmqr_hetra1.c
index 825fd30c7..2b9f8f661 100644
--- a/coreblas/compute/core_ztsmqr_hetra1.c
+++ b/coreblas/compute/core_ztsmqr_hetra1.c
@@ -40,12 +40,12 @@
  * @param[in] side
- *         @arg ChamLeft  : apply Q or Q**H from the Left;
- *         @arg ChamRight : apply Q or Q**H from the Right.
+ *         @arg ChamLeft  : apply Q or Q^H from the Left;
+ *         @arg ChamRight : apply Q or Q^H from the Right.
  * @param[in] trans
  *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  ConjTranspose, apply Q**H.
+ *         @arg ChamConjTrans :  ConjTranspose, apply Q^H.
  * @param[in] m1
  *         The number of rows of the tile A1. M1 >= 0.
diff --git a/coreblas/compute/core_zttmlq.c b/coreblas/compute/core_zttmlq.c
index d28b8057a..0226f47dd 100644
--- a/coreblas/compute/core_zttmlq.c
+++ b/coreblas/compute/core_zttmlq.c
@@ -36,7 +36,7 @@
  *    TRANS = 'N':         Q * | A1 |       | A1 | * Q
  *                             | A2 |       | A2 |
- *    TRANS = 'C':      Q**H * | A1 |       | A1 | * Q**H
+ *    TRANS = 'C':      Q^H * | A1 |       | A1 | * Q^H
  *                             | A2 |       | A2 |
  *  where Q is a complex unitary matrix defined as the product of k
@@ -49,12 +49,12 @@
  * @param[in] side
- *         @arg ChamLeft  : apply Q or Q**H from the Left;
- *         @arg ChamRight : apply Q or Q**H from the Right.
+ *         @arg ChamLeft  : apply Q or Q^H from the Left;
+ *         @arg ChamRight : apply Q or Q^H from the Right.
  * @param[in] trans
  *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  ConjTranspose, apply Q**H.
+ *         @arg ChamConjTrans :  ConjTranspose, apply Q^H.
  * @param[in] M1
  *         The number of rows of the tile A1. M1 >= 0.
@@ -244,7 +244,7 @@ int CORE_zttmlq(cham_side_t side, cham_trans_t trans,
-         * Apply H or H' (NOTE: CORE_zparfb used to be CORE_zttrfb)
+         * Apply H or H'
             side, trans, ChamDirForward, ChamRowwise,
diff --git a/coreblas/compute/core_zttmqr.c b/coreblas/compute/core_zttmqr.c
index 896b0c898..e04a22ac5 100644
--- a/coreblas/compute/core_zttmqr.c
+++ b/coreblas/compute/core_zttmqr.c
@@ -35,7 +35,7 @@
  *    TRANS = 'N':         Q * | A1 |       | A1 | * Q
  *                             | A2 |       | A2 |
- *    TRANS = 'C':      Q**H * | A1 |       | A1 | * Q**H
+ *    TRANS = 'C':      Q^H * | A1 |       | A1 | * Q^H
  *                             | A2 |       | A2 |
  *  where Q is a complex unitary matrix defined as the product of k
@@ -48,12 +48,12 @@
  * @param[in] side
- *         @arg ChamLeft  : apply Q or Q**H from the Left;
- *         @arg ChamRight : apply Q or Q**H from the Right.
+ *         @arg ChamLeft  : apply Q or Q^H from the Left;
+ *         @arg ChamRight : apply Q or Q^H from the Right.
  * @param[in] trans
  *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  ConjTranspose, apply Q**H.
+ *         @arg ChamConjTrans :  ConjTranspose, apply Q^H.
  * @param[in] M1
  *         The number of rows of the tile A1. M1 >= 0.
@@ -235,7 +235,7 @@ int CORE_zttmqr(cham_side_t side, cham_trans_t trans,
-         * Apply H or H' (NOTE: CORE_zparfb used to be CORE_zttrfb)
+         * Apply H or H'
             side, trans, ChamDirForward, ChamColumnwise,
diff --git a/coreblas/compute/core_zunmlq.c b/coreblas/compute/core_zunmlq.c
index 3b5d17c06..6a310b859 100644
--- a/coreblas/compute/core_zunmlq.c
+++ b/coreblas/compute/core_zunmlq.c
@@ -35,7 +35,7 @@
  *                    SIDE = 'L'     SIDE = 'R'
  *    TRANS = 'N':      Q * C          C * Q
- *    TRANS = 'C':      Q**H * C       C * Q**H
+ *    TRANS = 'C':      Q^H * C       C * Q^H
  *  where Q is a complex unitary matrix defined as the product of k
  *  elementary reflectors
@@ -48,12 +48,12 @@
  * @param[in] side
- *         @arg ChamLeft  : apply Q or Q**H from the Left;
- *         @arg ChamRight : apply Q or Q**H from the Right.
+ *         @arg ChamLeft  : apply Q or Q^H from the Left;
+ *         @arg ChamRight : apply Q or Q^H from the Right.
  * @param[in] trans
  *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  Transpose, apply Q**H.
+ *         @arg ChamConjTrans :  Transpose, apply Q^H.
  * @param[in] M
  *         The number of rows of the tile C.  M >= 0.
@@ -90,7 +90,7 @@
  * @param[in,out] C
  *         On entry, the M-by-N tile C.
- *         On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+ *         On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q.
  * @param[in] LDC
  *         The leading dimension of the array C. LDC >= max(1,M).
diff --git a/coreblas/compute/core_zunmqr.c b/coreblas/compute/core_zunmqr.c
index 48e75b281..712da7e6c 100644
--- a/coreblas/compute/core_zunmqr.c
+++ b/coreblas/compute/core_zunmqr.c
@@ -35,7 +35,7 @@
  *                    SIDE = 'L'     SIDE = 'R'
  *    TRANS = 'N':      Q * C          C * Q
- *    TRANS = 'C':      Q**H * C       C * Q**H
+ *    TRANS = 'C':      Q^H * C       C * Q^H
  *  where Q is a complex unitary matrix defined as the product of k
  *  elementary reflectors
@@ -48,12 +48,12 @@
  * @param[in] side
- *         @arg ChamLeft  : apply Q or Q**H from the Left;
- *         @arg ChamRight : apply Q or Q**H from the Right.
+ *         @arg ChamLeft  : apply Q or Q^H from the Left;
+ *         @arg ChamRight : apply Q or Q^H from the Right.
  * @param[in] trans
  *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  Transpose, apply Q**H.
+ *         @arg ChamConjTrans :  Transpose, apply Q^H.
  * @param[in] M
  *         The number of rows of the tile C.  M >= 0.
@@ -91,7 +91,7 @@
  * @param[in,out] C
  *         On entry, the M-by-N tile C.
- *         On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+ *         On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q.
  * @param[in] LDC
  *         The leading dimension of the array C. LDC >= max(1,M).
diff --git a/coreblas/eztrace_module/coreblas_eztrace_module b/coreblas/eztrace_module/coreblas_eztrace_module
index 4a8688192..3bca3cbf1 100644
--- a/coreblas/eztrace_module/coreblas_eztrace_module
+++ b/coreblas/eztrace_module/coreblas_eztrace_module
@@ -7,18 +7,6 @@ ID 7770
 void CORE_scasum(int storev, int uplo, int M, int N,
                  void *A, int lda, float *work);
-void CORE_cbrdalg(int uplo, int N, int NB,
-                  void *pA, void *C, void *S,
-                  int i, int j, int m, int grsiz);
-int CORE_cgbelr(int uplo, int N,
-                void **A, void *V, void *TAU,
-                int st, int ed, int eltsize);
-int CORE_cgbrce(int uplo, int N,
-                void **A, void *V, void *TAU,
-                int st, int ed, int eltsize);
-int CORE_cgblrx(int uplo, int N,
-                void **A, void *V, void *TAU,
-                int st, int ed, int eltsize);
 int CORE_cgeadd(int M, int N, void *alpha,
                 void *A, int LDA,
                       void *B, int LDB);
@@ -64,30 +52,6 @@ int  CORE_cgetrf_reclap(int M, int N,
 int  CORE_cgetrf_rectil(void *A, int *IPIV, int *info);
 void CORE_cgetrip(int m, int n, void *A,
                   void *work);
-int CORE_chbelr(int uplo, int N,
-                void **A, void *V, void *TAU,
-                int st, int ed, int eltsize);
-int CORE_chblrx(int uplo, int N,
-                void **A, void *V, void *TAU,
-                int st, int ed, int eltsize);
-int CORE_chbrce(int uplo, int N,
-                void **A, void *V, void *TAU,
-                int st, int ed, int eltsize);
-void CORE_chbtype1cb(int N, int NB,
-                     void *A, int LDA,
-                     void *V, void *TAU,
-                     int st, int ed, int sweep, int Vblksiz, int WANTZ,
-                     void *WORK);
-void CORE_chbtype2cb(int N, int NB,
-                     void *A, int LDA,
-                     void *V, void *TAU,
-                     int st, int ed, int sweep, int Vblksiz, int WANTZ,
-                     void *WORK);
-void CORE_chbtype3cb(int N, int NB,
-                     void *A, int LDA,
-                     void *V, void *TAU,
-                     int st, int ed, int sweep, int Vblksiz, int WANTZ,
-                     void *WORK);
 void CORE_chegst(int itype, int uplo, int N,
                  void *A, int LDA,
                  void *B, int LDB, int *INFO);
@@ -230,13 +194,6 @@ int  CORE_csyssq(int uplo, int N,
                  void *A, int LDA,
                  float *scale, float *sumsq);
 int CORE_csytf2_nopiv(int uplo, int n, void *A, int lda);
-void CORE_cswpab(int i, int n1, int n2,
-                 void *A, void *work);
-int  CORE_cswptr_ontile(void *descA, int i1, int i2, const int *ipiv, int inc,
-                        void *Akk, int ldak);
-void CORE_ctrdalg(int uplo, int N, int NB,
-                  void *pA, void *V, void *TAU,
-                  int i, int j, int m, int grsiz);
 void CORE_ctrmm(int side, int uplo,
                 int transA, int diag,
                 int M, int N,
@@ -352,18 +309,6 @@ int  CORE_cunmqr(int side, int trans,
 void CORE_dasum(int storev, int uplo, int M, int N,
                  const double *A, int lda, double *work);
-void CORE_dbrdalg(int uplo, int N, int NB,
-                  void *pA, double *C, double *S,
-                  int i, int j, int m, int grsiz);
-int CORE_dgbelr(int uplo, int N,
-                void **A, double *V, double *TAU,
-                int st, int ed, int eltsize);
-int CORE_dgbrce(int uplo, int N,
-                void **A, double *V, double *TAU,
-                int st, int ed, int eltsize);
-int CORE_dgblrx(int uplo, int N,
-                void **A, double *V, double *TAU,
-                int st, int ed, int eltsize);
 int CORE_dgeadd(int M, int N, double alpha,
                 const double *A, int LDA,
                       double *B, int LDB);
@@ -409,30 +354,6 @@ int  CORE_dgetrf_reclap(int M, int N,
 int  CORE_dgetrf_rectil(void *A, int *IPIV, int *info);
 void CORE_dgetrip(int m, int n, double *A,
                   double *work);
-int CORE_dhbelr(int uplo, int N,
-                void **A, double *V, double *TAU,
-                int st, int ed, int eltsize);
-int CORE_dhblrx(int uplo, int N,
-                void **A, double *V, double *TAU,
-                int st, int ed, int eltsize);
-int CORE_dhbrce(int uplo, int N,
-                void **A, double *V, double *TAU,
-                int st, int ed, int eltsize);
-void CORE_dhbtype1cb(int N, int NB,
-                     double *A, int LDA,
-                     double *V, double *TAU,
-                     int st, int ed, int sweep, int Vblksiz, int WANTZ,
-                     double *WORK);
-void CORE_dhbtype2cb(int N, int NB,
-                     double *A, int LDA,
-                     double *V, double *TAU,
-                     int st, int ed, int sweep, int Vblksiz, int WANTZ,
-                     double *WORK);
-void CORE_dhbtype3cb(int N, int NB,
-                     double *A, int LDA,
-                     const double *V, const double *TAU,
-                     int st, int ed, int sweep, int Vblksiz, int WANTZ,
-                     double *WORK);
 void CORE_dsygst(int itype, int uplo, int N,
                  double *A, int LDA,
                  double *B, int LDB, int *INFO);
@@ -553,13 +474,6 @@ int  CORE_dssssm(int M1, int N1, int M2, int N2, int K, int IB,
                  const double *L2, int LDL2,
                  const int *IPIV);
 int CORE_dsytf2_nopiv(int uplo, int n, double *A, int lda);
-void CORE_dswpab(int i, int n1, int n2,
-                 double *A, double *work);
-int  CORE_dswptr_ontile(void *descA, int i1, int i2, const int *ipiv, int inc,
-                        const double *Akk, int ldak);
-void CORE_dtrdalg(int uplo, int N, int NB,
-                  void *pA, double *V, double *TAU,
-                  int i, int j, int m, int grsiz);
 void CORE_dtrmm(int side, int uplo,
                 int transA, int diag,
                 int M, int N,
@@ -682,18 +596,6 @@ void CORE_dlag2s(int m, int n,
 void CORE_sasum(int storev, int uplo, int M, int N,
                  const float *A, int lda, float *work);
-void CORE_sbrdalg(int uplo, int N, int NB,
-                  void *pA, float *C, float *S,
-                  int i, int j, int m, int grsiz);
-int CORE_sgbelr(int uplo, int N,
-                void **A, float *V, float *TAU,
-                int st, int ed, int eltsize);
-int CORE_sgbrce(int uplo, int N,
-                void **A, float *V, float *TAU,
-                int st, int ed, int eltsize);
-int CORE_sgblrx(int uplo, int N,
-                void **A, float *V, float *TAU,
-                int st, int ed, int eltsize);
 int CORE_sgeadd(int M, int N, float alpha,
                 const float *A, int LDA,
                       float *B, int LDB);
@@ -739,30 +641,6 @@ int  CORE_sgetrf_reclap(int M, int N,
 int  CORE_sgetrf_rectil(void *A, int *IPIV, int *info);
 void CORE_sgetrip(int m, int n, float *A,
                   float *work);
-int CORE_shbelr(int uplo, int N,
-                void **A, float *V, float *TAU,
-                int st, int ed, int eltsize);
-int CORE_shblrx(int uplo, int N,
-                void **A, float *V, float *TAU,
-                int st, int ed, int eltsize);
-int CORE_shbrce(int uplo, int N,
-                void **A, float *V, float *TAU,
-                int st, int ed, int eltsize);
-void CORE_shbtype1cb(int N, int NB,
-                     float *A, int LDA,
-                     float *V, float *TAU,
-                     int st, int ed, int sweep, int Vblksiz, int WANTZ,
-                     float *WORK);
-void CORE_shbtype2cb(int N, int NB,
-                     float *A, int LDA,
-                     float *V, float *TAU,
-                     int st, int ed, int sweep, int Vblksiz, int WANTZ,
-                     float *WORK);
-void CORE_shbtype3cb(int N, int NB,
-                     float *A, int LDA,
-                     const float *V, const float *TAU,
-                     int st, int ed, int sweep, int Vblksiz, int WANTZ,
-                     float *WORK);
 void CORE_ssygst(int itype, int uplo, int N,
                  float *A, int LDA,
                  float *B, int LDB, int *INFO);
@@ -883,13 +761,6 @@ int  CORE_sssssm(int M1, int N1, int M2, int N2, int K, int IB,
                  const float *L2, int LDL2,
                  const int *IPIV);
 int CORE_ssytf2_nopiv(int uplo, int n, float *A, int lda);
-void CORE_sswpab(int i, int n1, int n2,
-                 float *A, float *work);
-int  CORE_sswptr_ontile(void *descA, int i1, int i2, const int *ipiv, int inc,
-                        const float *Akk, int ldak);
-void CORE_strdalg(int uplo, int N, int NB,
-                  void *pA, float *V, float *TAU,
-                  int i, int j, int m, int grsiz);
 void CORE_strmm(int side, int uplo,
                 int transA, int diag,
                 int M, int N,
@@ -1005,18 +876,6 @@ int  CORE_sormqr(int side, int trans,
 void CORE_dzasum(int storev, int uplo, int M, int N,
                  void *A, int lda, double *work);
-void CORE_zbrdalg(int uplo, int N, int NB,
-                  void *pA, void *C, void *S,
-                  int i, int j, int m, int grsiz);
-int CORE_zgbelr(int uplo, int N,
-                void **A, void *V, void *TAU,
-                int st, int ed, int eltsize);
-int CORE_zgbrce(int uplo, int N,
-                void **A, void *V, void *TAU,
-                int st, int ed, int eltsize);
-int CORE_zgblrx(int uplo, int N,
-                void **A, void *V, void *TAU,
-                int st, int ed, int eltsize);
 int CORE_zgeadd(int M, int N, void *alpha,
                 void *A, int LDA,
                       void *B, int LDB);
@@ -1062,30 +921,6 @@ int  CORE_zgetrf_reclap(int M, int N,
 int  CORE_zgetrf_rectil(void *A, int *IPIV, int *info);
 void CORE_zgetrip(int m, int n, void *A,
                   void *work);
-int CORE_zhbelr(int uplo, int N,
-                void **A, void *V, void *TAU,
-                int st, int ed, int eltsize);
-int CORE_zhblrx(int uplo, int N,
-                void **A, void *V, void *TAU,
-                int st, int ed, int eltsize);
-int CORE_zhbrce(int uplo, int N,
-                void **A, void *V, void *TAU,
-                int st, int ed, int eltsize);
-void CORE_zhbtype1cb(int N, int NB,
-                     void *A, int LDA,
-                     void *V, void *TAU,
-                     int st, int ed, int sweep, int Vblksiz, int WANTZ,
-                     void *WORK);
-void CORE_zhbtype2cb(int N, int NB,
-                     void *A, int LDA,
-                     void *V, void *TAU,
-                     int st, int ed, int sweep, int Vblksiz, int WANTZ,
-                     void *WORK);
-void CORE_zhbtype3cb(int N, int NB,
-                     void *A, int LDA,
-                     void *V, void *TAU,
-                     int st, int ed, int sweep, int Vblksiz, int WANTZ,
-                     void *WORK);
 void CORE_zhegst(int itype, int uplo, int N,
                  void *A, int LDA,
                  void *B, int LDB, int *INFO);
@@ -1222,13 +1057,6 @@ int  CORE_zsyssq(int uplo, int N,
                  void *A, int LDA,
                  double *scale, double *sumsq);
 int CORE_zsytf2_nopiv(int uplo, int n, void *A, int lda);
-void CORE_zswpab(int i, int n1, int n2,
-                 void *A, void *work);
-int  CORE_zswptr_ontile(void *descA, int i1, int i2, const int *ipiv, int inc,
-                        void *Akk, int ldak);
-void CORE_ztrdalg(int uplo, int N, int NB,
-                  void *pA, void *V, void *TAU,
-                  int i, int j, int m, int grsiz);
 void CORE_ztrmm(int side, int uplo,
                 int transA, int diag,
                 int M, int N,
diff --git a/coreblas/include/coreblas/coreblas_z.h b/coreblas/include/coreblas/coreblas_z.h
index 8a755db76..ee434de43 100644
--- a/coreblas/include/coreblas/coreblas_z.h
+++ b/coreblas/include/coreblas/coreblas_z.h
@@ -32,18 +32,6 @@
 void CORE_dzasum(cham_store_t storev, cham_uplo_t uplo, int M, int N,
                  const CHAMELEON_Complex64_t *A, int lda, double *work);
-void CORE_zbrdalg(cham_uplo_t uplo, int N, int NB,
-                  const CHAM_desc_t *pA, CHAMELEON_Complex64_t *C, CHAMELEON_Complex64_t *S,
-                  int i, int j, int m, int grsiz);
-int CORE_zgbelr(cham_uplo_t uplo, int N,
-                CHAM_desc_t *A, CHAMELEON_Complex64_t *V, CHAMELEON_Complex64_t *TAU,
-                int st, int ed, int eltsize);
-int CORE_zgbrce(cham_uplo_t uplo, int N,
-                CHAM_desc_t *A, CHAMELEON_Complex64_t *V, CHAMELEON_Complex64_t *TAU,
-                int st, int ed, int eltsize);
-int CORE_zgblrx(cham_uplo_t uplo, int N,
-                CHAM_desc_t *A, CHAMELEON_Complex64_t *V, CHAMELEON_Complex64_t *TAU,
-                int st, int ed, int eltsize);
 int CORE_zaxpy(int M, CHAMELEON_Complex64_t alpha,
                const CHAMELEON_Complex64_t *A, int incA,
                      CHAMELEON_Complex64_t *B, int incB);
@@ -99,30 +87,6 @@ void CORE_zgetrip(int m, int n, CHAMELEON_Complex64_t *A,
 void CORE_zhe2ge(cham_uplo_t uplo, int M, int N,
                  const CHAMELEON_Complex64_t *A, int LDA,
                  CHAMELEON_Complex64_t *B, int LDB);
-int CORE_zhbelr(cham_uplo_t uplo, int N,
-                CHAM_desc_t *A, CHAMELEON_Complex64_t *V, CHAMELEON_Complex64_t *TAU,
-                int st, int ed, int eltsize);
-int CORE_zhblrx(cham_uplo_t uplo, int N,
-                CHAM_desc_t *A, CHAMELEON_Complex64_t *V, CHAMELEON_Complex64_t *TAU,
-                int st, int ed, int eltsize);
-int CORE_zhbrce(cham_uplo_t uplo, int N,
-                CHAM_desc_t *A, CHAMELEON_Complex64_t *V, CHAMELEON_Complex64_t *TAU,
-                int st, int ed, int eltsize);
-void CORE_zhbtype1cb(int N, int NB,
-                     CHAMELEON_Complex64_t *A, int LDA,
-                     CHAMELEON_Complex64_t *V, CHAMELEON_Complex64_t *TAU,
-                     int st, int ed, int sweep, int Vblksiz, int WANTZ,
-                     CHAMELEON_Complex64_t *WORK);
-void CORE_zhbtype2cb(int N, int NB,
-                     CHAMELEON_Complex64_t *A, int LDA,
-                     CHAMELEON_Complex64_t *V, CHAMELEON_Complex64_t *TAU,
-                     int st, int ed, int sweep, int Vblksiz, int WANTZ,
-                     CHAMELEON_Complex64_t *WORK);
-void CORE_zhbtype3cb(int N, int NB,
-                     CHAMELEON_Complex64_t *A, int LDA,
-                     const CHAMELEON_Complex64_t *V, const CHAMELEON_Complex64_t *TAU,
-                     int st, int ed, int sweep, int Vblksiz, int WANTZ,
-                     CHAMELEON_Complex64_t *WORK);
 void CORE_zhegst(int itype, cham_uplo_t uplo, int N,
                  CHAMELEON_Complex64_t *A, int LDA,
                  CHAMELEON_Complex64_t *B, int LDB, int *INFO);
@@ -261,10 +225,6 @@ int  CORE_zsyssq(cham_uplo_t uplo, int N,
                  const CHAMELEON_Complex64_t *A, int LDA,
                  double *scale, double *sumsq);
 int CORE_zsytf2_nopiv(cham_uplo_t uplo, int n, CHAMELEON_Complex64_t *A, int lda);
-void CORE_zswpab(int i, int n1, int n2,
-                 CHAMELEON_Complex64_t *A, CHAMELEON_Complex64_t *work);
-int  CORE_zswptr_ontile(CHAM_desc_t descA, int i1, int i2, const int *ipiv, int inc,
-                        const CHAMELEON_Complex64_t *Akk, int ldak);
 int CORE_ztradd(cham_uplo_t uplo, cham_trans_t trans, int M, int N,
                       CHAMELEON_Complex64_t alpha,
                 const CHAMELEON_Complex64_t *A, int LDA,
@@ -273,9 +233,6 @@ int CORE_ztradd(cham_uplo_t uplo, cham_trans_t trans, int M, int N,
 void CORE_ztrasm(cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag,
                  int M, int N,
                  const CHAMELEON_Complex64_t *A, int lda, double *work);
-void CORE_ztrdalg(cham_uplo_t uplo, int N, int NB,
-                  const CHAM_desc_t *pA, CHAMELEON_Complex64_t *V, CHAMELEON_Complex64_t *TAU,
-                  int i, int j, int m, int grsiz);
 void CORE_ztrmm(cham_side_t side, cham_uplo_t uplo,
                 cham_trans_t transA, cham_diag_t diag,
                 int M, int N,
diff --git a/cudablas/compute/cuda_zparfb.c b/cudablas/compute/cuda_zparfb.c
index 95bbe2115..b4e6f6c19 100644
--- a/cudablas/compute/cuda_zparfb.c
+++ b/cudablas/compute/cuda_zparfb.c
@@ -46,12 +46,12 @@
  * @param[in] side
- *         @arg ChamLeft  : apply Q or Q**H from the Left;
- *         @arg ChamRight : apply Q or Q**H from the Right.
+ *         @arg ChamLeft  : apply Q or Q^H from the Left;
+ *         @arg ChamRight : apply Q or Q^H from the Right.
  * @param[in] trans
  *         @arg ChamNoTrans   : No transpose, apply Q;
- *         @arg ChamConjTrans : ConjTranspose, apply Q**H.
+ *         @arg ChamConjTrans : ConjTranspose, apply Q^H.
  * @param[in] direct
  *         Indicates how H is formed from a product of elementary
diff --git a/cudablas/compute/cuda_ztpmlqt.c b/cudablas/compute/cuda_ztpmlqt.c
index a8ff6ce5a..aaf70c231 100644
--- a/cudablas/compute/cuda_ztpmlqt.c
+++ b/cudablas/compute/cuda_ztpmlqt.c
@@ -33,12 +33,12 @@
  * @param[in] side
- *         @arg ChamLeft  : apply Q or Q**H from the Left;
- *         @arg ChamRight : apply Q or Q**H from the Right.
+ *         @arg ChamLeft  : apply Q or Q^H from the Left;
+ *         @arg ChamRight : apply Q or Q^H from the Right.
  * @param[in] trans
  *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  ConjTranspose, apply Q**H.
+ *         @arg ChamConjTrans :  ConjTranspose, apply Q^H.
  * @param[in] M
  *         The number of rows of the tile B. M >= 0.
@@ -78,7 +78,7 @@
  *         or (LDA,K) if SIDE = ChamRight
  *         On entry, the K-by-N or M-by-K matrix A.
  *         On exit, A is overwritten by the corresponding block of
- *         Q*C or Q**H*C or C*Q or C*Q**H.  See Further Details.
+ *         Q*C or Q^H*C or C*Q or C*Q^H.  See Further Details.
  * @param[in] LDA
  *         The leading dimension of the array A. LDA >= max(1,M).
@@ -88,7 +88,7 @@
  * @param[in,out] B
  *         On entry, the M-by-N tile B.
  *         On exit, B is overwritten by the corresponding block of
- *         Q*C or Q**H*C or C*Q or C*Q**H.  See Further Details.
+ *         Q*C or Q^H*C or C*Q or C*Q^H.  See Further Details.
  * @param[in] LDB
  *         The leading dimension of the tile B. LDB >= max(1,M).
@@ -122,11 +122,11 @@
  *  If trans='N' and side='L', C is on exit replaced with Q * C.
- *  If trans='C' and side='L', C is on exit replaced with Q**H * C.
+ *  If trans='C' and side='L', C is on exit replaced with Q^H * C.
  *  If trans='N' and side='R', C is on exit replaced with C * Q.
- *  If trans='C' and side='R', C is on exit replaced with C * Q**H.
+ *  If trans='C' and side='R', C is on exit replaced with C * Q^H.
diff --git a/cudablas/compute/cuda_ztpmqrt.c b/cudablas/compute/cuda_ztpmqrt.c
index 22319e42d..c9a1fea2a 100644
--- a/cudablas/compute/cuda_ztpmqrt.c
+++ b/cudablas/compute/cuda_ztpmqrt.c
@@ -33,12 +33,12 @@
  * @param[in] side
- *         @arg ChamLeft  : apply Q or Q**H from the Left;
- *         @arg ChamRight : apply Q or Q**H from the Right.
+ *         @arg ChamLeft  : apply Q or Q^H from the Left;
+ *         @arg ChamRight : apply Q or Q^H from the Right.
  * @param[in] trans
  *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  ConjTranspose, apply Q**H.
+ *         @arg ChamConjTrans :  ConjTranspose, apply Q^H.
  * @param[in] M
  *         The number of rows of the tile B. M >= 0.
@@ -78,7 +78,7 @@
  *         or (LDA,K) if SIDE = ChamRight
  *         On entry, the K-by-N or M-by-K matrix A.
  *         On exit, A is overwritten by the corresponding block of
- *         Q*C or Q**H*C or C*Q or C*Q**H.  See Further Details.
+ *         Q*C or Q^H*C or C*Q or C*Q^H.  See Further Details.
  * @param[in] LDA
  *         The leading dimension of the array A. LDA >= max(1,M).
@@ -88,7 +88,7 @@
  * @param[in,out] B
  *         On entry, the M-by-N tile B.
  *         On exit, B is overwritten by the corresponding block of
- *         Q*C or Q**H*C or C*Q or C*Q**H.  See Further Details.
+ *         Q*C or Q^H*C or C*Q or C*Q^H.  See Further Details.
  * @param[in] LDB
  *         The leading dimension of the tile B. LDB >= max(1,M).
@@ -123,11 +123,11 @@
  *  If trans='N' and side='L', C is on exit replaced with Q * C.
- *  If trans='C' and side='L', C is on exit replaced with Q**H * C.
+ *  If trans='C' and side='L', C is on exit replaced with Q^H * C.
  *  If trans='N' and side='R', C is on exit replaced with C * Q.
- *  If trans='C' and side='R', C is on exit replaced with C * Q**H.
+ *  If trans='C' and side='R', C is on exit replaced with C * Q^H.
diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
index f756adc45..e184ca536 100644
--- a/runtime/CMakeLists.txt
+++ b/runtime/CMakeLists.txt
@@ -28,7 +28,7 @@
 # List of codelets required by all runtimes
 # -----------------------------------------
-    codelets/codelet_zasum.c
+    codelets/codelet_dzasum.c
     # BLAS 1
diff --git a/runtime/openmp/codelets/codelet_zasum.c b/runtime/openmp/codelets/codelet_dzasum.c
similarity index 94%
rename from runtime/openmp/codelets/codelet_zasum.c
rename to runtime/openmp/codelets/codelet_dzasum.c
index 0ec02b39c..1ce65879b 100644
--- a/runtime/openmp/codelets/codelet_zasum.c
+++ b/runtime/openmp/codelets/codelet_dzasum.c
@@ -1,6 +1,6 @@
- * @file openmp/codelet_zasum.c
+ * @file openmp/codelet_dzasum.c
  * @copyright 2009-2014 The University of Tennessee and The University of
  *                      Tennessee Research Foundation. All rights reserved.
@@ -9,7 +9,7 @@
- * @brief Chameleon zasum OpenMP codelet
+ * @brief Chameleon dzasum OpenMP codelet
  * @version 0.9.2
  * @comment This file has been automatically generated
diff --git a/runtime/openmp/codelets/codelet_zunmlq.c b/runtime/openmp/codelets/codelet_zunmlq.c
index 6de8202b2..92d6e71f8 100644
--- a/runtime/openmp/codelets/codelet_zunmlq.c
+++ b/runtime/openmp/codelets/codelet_zunmlq.c
@@ -35,7 +35,7 @@
  *                    SIDE = 'L'     SIDE = 'R'
  *    TRANS = 'N':      Q * C          C * Q
- *    TRANS = 'C':      Q**H * C       C * Q**H
+ *    TRANS = 'C':      Q^H * C       C * Q^H
  *  where Q is a complex unitary matrix defined as the product of k
  *  elementary reflectors
@@ -48,12 +48,12 @@
  * @param[in] side
- *         @arg ChamLeft  : apply Q or Q**H from the Left;
- *         @arg ChamRight : apply Q or Q**H from the Right.
+ *         @arg ChamLeft  : apply Q or Q^H from the Left;
+ *         @arg ChamRight : apply Q or Q^H from the Right.
  * @param[in] trans
  *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  Transpose, apply Q**H.
+ *         @arg ChamConjTrans :  Transpose, apply Q^H.
  * @param[in] M
  *         The number of rows of the tile C.  M >= 0.
@@ -90,7 +90,7 @@
  * @param[in,out] C
  *         On entry, the M-by-N tile C.
- *         On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+ *         On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q.
  * @param[in] LDC
  *         The leading dimension of the array C. LDC >= max(1,M).
diff --git a/runtime/openmp/codelets/codelet_zunmqr.c b/runtime/openmp/codelets/codelet_zunmqr.c
index 93190251e..66aa62b5d 100644
--- a/runtime/openmp/codelets/codelet_zunmqr.c
+++ b/runtime/openmp/codelets/codelet_zunmqr.c
@@ -34,7 +34,7 @@
  *                    SIDE = 'L'     SIDE = 'R'
  *    TRANS = 'N':      Q * C          C * Q
- *    TRANS = 'C':      Q**H * C       C * Q**H
+ *    TRANS = 'C':      Q^H * C       C * Q^H
  *  where Q is a complex unitary matrix defined as the product of k
  *  elementary reflectors
@@ -47,12 +47,12 @@
  * @param[in] side
- *         @arg ChamLeft  : apply Q or Q**H from the Left;
- *         @arg ChamRight : apply Q or Q**H from the Right.
+ *         @arg ChamLeft  : apply Q or Q^H from the Left;
+ *         @arg ChamRight : apply Q or Q^H from the Right.
  * @param[in] trans
  *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  Transpose, apply Q**H.
+ *         @arg ChamConjTrans :  Transpose, apply Q^H.
  * @param[in] M
  *         The number of rows of the tile C.  M >= 0.
@@ -90,7 +90,7 @@
  * @param[in,out] C
  *         On entry, the M-by-N tile C.
- *         On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+ *         On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q.
  * @param[in] LDC
  *         The leading dimension of the array C. LDC >= max(1,M).
diff --git a/runtime/parsec/codelets/codelet_zasum.c b/runtime/parsec/codelets/codelet_dzasum.c
similarity index 95%
rename from runtime/parsec/codelets/codelet_zasum.c
rename to runtime/parsec/codelets/codelet_dzasum.c
index ccea81743..e0faa8dd2 100644
--- a/runtime/parsec/codelets/codelet_zasum.c
+++ b/runtime/parsec/codelets/codelet_dzasum.c
@@ -1,6 +1,6 @@
- * @file parsec/codelet_zasum.c
+ * @file parsec/codelet_dzasum.c
  * @copyright 2009-2015 The University of Tennessee and The University of
  *                      Tennessee Research Foundation. All rights reserved.
@@ -9,7 +9,7 @@
- * @brief Chameleon zasum PaRSEC codelet
+ * @brief Chameleon dzasum PaRSEC codelet
  * @version 0.9.2
  * @author Reazul Hoque
@@ -50,7 +50,7 @@ void INSERT_TASK_dzasum(const RUNTIME_option_t *options,
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
-        PARSEC_dtd_taskpool, CORE_dzasum_parsec, options->priority, "zasum",
+        PARSEC_dtd_taskpool, CORE_dzasum_parsec, options->priority, "dzasum",
         sizeof(int),    &storev,                           VALUE,
         sizeof(int),    &uplo,                             VALUE,
         sizeof(int),           &M,                                VALUE,
diff --git a/runtime/quark/codelets/codelet_zasum.c b/runtime/quark/codelets/codelet_dzasum.c
similarity index 96%
rename from runtime/quark/codelets/codelet_zasum.c
rename to runtime/quark/codelets/codelet_dzasum.c
index 6dd5cce03..8c49a6f76 100644
--- a/runtime/quark/codelets/codelet_zasum.c
+++ b/runtime/quark/codelets/codelet_dzasum.c
@@ -1,6 +1,6 @@
- * @file quark/codelet_zasum.c
+ * @file quark/codelet_dzasum.c
  * @copyright 2009-2014 The University of Tennessee and The University of
  *                      Tennessee Research Foundation. All rights reserved.
@@ -9,7 +9,7 @@
- * @brief Chameleon zasum Quark codelet
+ * @brief Chameleon dzasum Quark codelet
  * @version 0.9.2
  * @comment This file has been automatically generated
diff --git a/runtime/quark/codelets/codelet_zunmlq.c b/runtime/quark/codelets/codelet_zunmlq.c
index 76141454b..5b8687571 100644
--- a/runtime/quark/codelets/codelet_zunmlq.c
+++ b/runtime/quark/codelets/codelet_zunmlq.c
@@ -59,7 +59,7 @@ void CORE_zunmlq_quark(Quark *quark)
  *                    SIDE = 'L'     SIDE = 'R'
  *    TRANS = 'N':      Q * C          C * Q
- *    TRANS = 'C':      Q**H * C       C * Q**H
+ *    TRANS = 'C':      Q^H * C       C * Q^H
  *  where Q is a complex unitary matrix defined as the product of k
  *  elementary reflectors
@@ -72,12 +72,12 @@ void CORE_zunmlq_quark(Quark *quark)
  * @param[in] side
- *         @arg ChamLeft  : apply Q or Q**H from the Left;
- *         @arg ChamRight : apply Q or Q**H from the Right.
+ *         @arg ChamLeft  : apply Q or Q^H from the Left;
+ *         @arg ChamRight : apply Q or Q^H from the Right.
  * @param[in] trans
  *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  Transpose, apply Q**H.
+ *         @arg ChamConjTrans :  Transpose, apply Q^H.
  * @param[in] M
  *         The number of rows of the tile C.  M >= 0.
@@ -114,7 +114,7 @@ void CORE_zunmlq_quark(Quark *quark)
  * @param[in,out] C
  *         On entry, the M-by-N tile C.
- *         On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+ *         On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q.
  * @param[in] LDC
  *         The leading dimension of the array C. LDC >= max(1,M).
diff --git a/runtime/quark/codelets/codelet_zunmqr.c b/runtime/quark/codelets/codelet_zunmqr.c
index 75968addd..f03746016 100644
--- a/runtime/quark/codelets/codelet_zunmqr.c
+++ b/runtime/quark/codelets/codelet_zunmqr.c
@@ -58,7 +58,7 @@ void CORE_zunmqr_quark(Quark *quark)
  *                    SIDE = 'L'     SIDE = 'R'
  *    TRANS = 'N':      Q * C          C * Q
- *    TRANS = 'C':      Q**H * C       C * Q**H
+ *    TRANS = 'C':      Q^H * C       C * Q^H
  *  where Q is a complex unitary matrix defined as the product of k
  *  elementary reflectors
@@ -71,12 +71,12 @@ void CORE_zunmqr_quark(Quark *quark)
  * @param[in] side
- *         @arg ChamLeft  : apply Q or Q**H from the Left;
- *         @arg ChamRight : apply Q or Q**H from the Right.
+ *         @arg ChamLeft  : apply Q or Q^H from the Left;
+ *         @arg ChamRight : apply Q or Q^H from the Right.
  * @param[in] trans
  *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  Transpose, apply Q**H.
+ *         @arg ChamConjTrans :  Transpose, apply Q^H.
  * @param[in] M
  *         The number of rows of the tile C.  M >= 0.
@@ -114,7 +114,7 @@ void CORE_zunmqr_quark(Quark *quark)
  * @param[in,out] C
  *         On entry, the M-by-N tile C.
- *         On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+ *         On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q.
  * @param[in] LDC
  *         The leading dimension of the array C. LDC >= max(1,M).
diff --git a/runtime/quark/include/core_blas_dag.h b/runtime/quark/include/core_blas_dag.h
index f5ba7073d..83bdbd531 100644
--- a/runtime/quark/include/core_blas_dag.h
+++ b/runtime/quark/include/core_blas_dag.h
@@ -78,6 +78,8 @@
 #define DAG_CORE_TSTRF      DAG_SET_PROPERTIES( "TSTRF"     , "red"     )
 #define DAG_CORE_UNMLQ      DAG_SET_PROPERTIES( "UNMLQ"     , "cyan"    )
 #define DAG_CORE_UNMQR      DAG_SET_PROPERTIES( "UNMQR"     , "cyan"    )
+#define DAG_CORE_ORMLQ      DAG_SET_PROPERTIES( "ORMLQ"     , "cyan"    )
+#define DAG_CORE_ORMQR      DAG_SET_PROPERTIES( "ORMQR"     , "cyan"    )
diff --git a/runtime/starpu/codelets/codelet_zasum.c b/runtime/starpu/codelets/codelet_dzasum.c
similarity index 88%
rename from runtime/starpu/codelets/codelet_zasum.c
rename to runtime/starpu/codelets/codelet_dzasum.c
index 1cbd3e6b2..0e94fc672 100644
--- a/runtime/starpu/codelets/codelet_zasum.c
+++ b/runtime/starpu/codelets/codelet_dzasum.c
@@ -1,6 +1,6 @@
- * @file starpu/codelet_zasum.c
+ * @file starpu/codelet_dzasum.c
  * @copyright 2009-2014 The University of Tennessee and The University of
  *                      Tennessee Research Foundation. All rights reserved.
@@ -9,7 +9,7 @@
- * @brief Chameleon zasum StarPU codelet
+ * @brief Chameleon dzasum StarPU codelet
  * @version 0.9.2
  * @comment This file has been automatically generated
@@ -43,15 +43,15 @@ static void cl_dzasum_cpu_func(void *descr[], void *cl_arg)
  * Codelet definition
-CODELETS_CPU(zasum, 2, cl_dzasum_cpu_func)
+CODELETS_CPU(dzasum, 2, cl_dzasum_cpu_func)
 void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
                          cham_store_t storev, cham_uplo_t uplo, int M, int N,
                          const CHAM_desc_t *A, int Am, int An, int lda,
                          const CHAM_desc_t *B, int Bm, int Bn )
-    struct starpu_codelet *codelet = &cl_zasum;
-    void (*callback)(void*) = options->profiling ? cl_zasum_callback : NULL;
+    struct starpu_codelet *codelet = &cl_dzasum;
+    void (*callback)(void*) = options->profiling ? cl_dzasum_callback : NULL;
@@ -70,7 +70,7 @@ void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
         STARPU_PRIORITY,    options->priority,
         STARPU_CALLBACK,    callback,
-        STARPU_NAME, "zasum",
+        STARPU_NAME, "dzasum",
diff --git a/runtime/starpu/codelets/codelet_zcallback.c b/runtime/starpu/codelets/codelet_zcallback.c
index 0c31aa653..35aea3122 100644
--- a/runtime/starpu/codelets/codelet_zcallback.c
+++ b/runtime/starpu/codelets/codelet_zcallback.c
@@ -22,7 +22,7 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
-CHAMELEON_CL_CB(zasum,         starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                      M*N)
+CHAMELEON_CL_CB(dzasum,         starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                      M*N)
 CHAMELEON_CL_CB(zaxpy,         starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[1]), 0,                                      M)
 CHAMELEON_CL_CB(zgeadd,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                      M*N)
 CHAMELEON_CL_CB(zlascal,       starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                      M*N)
diff --git a/runtime/starpu/codelets/codelet_zunmlq.c b/runtime/starpu/codelets/codelet_zunmlq.c
index bab65bfcc..be36f957d 100644
--- a/runtime/starpu/codelets/codelet_zunmlq.c
+++ b/runtime/starpu/codelets/codelet_zunmlq.c
@@ -104,7 +104,7 @@ CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC)
  *                    SIDE = 'L'     SIDE = 'R'
  *    TRANS = 'N':      Q * C          C * Q
- *    TRANS = 'C':      Q**H * C       C * Q**H
+ *    TRANS = 'C':      Q^H * C       C * Q^H
  *  where Q is a complex unitary matrix defined as the product of k
  *  elementary reflectors
@@ -117,12 +117,12 @@ CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC)
  * @param[in] side
- *         @arg ChamLeft  : apply Q or Q**H from the Left;
- *         @arg ChamRight : apply Q or Q**H from the Right.
+ *         @arg ChamLeft  : apply Q or Q^H from the Left;
+ *         @arg ChamRight : apply Q or Q^H from the Right.
  * @param[in] trans
  *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  Transpose, apply Q**H.
+ *         @arg ChamConjTrans :  Transpose, apply Q^H.
  * @param[in] M
  *         The number of rows of the tile C.  M >= 0.
@@ -159,7 +159,7 @@ CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC)
  * @param[in,out] C
  *         On entry, the M-by-N tile C.
- *         On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+ *         On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q.
  * @param[in] LDC
  *         The leading dimension of the array C. LDC >= max(1,M).
diff --git a/runtime/starpu/codelets/codelet_zunmqr.c b/runtime/starpu/codelets/codelet_zunmqr.c
index ed8cbccd5..8ff98bc79 100644
--- a/runtime/starpu/codelets/codelet_zunmqr.c
+++ b/runtime/starpu/codelets/codelet_zunmqr.c
@@ -103,7 +103,7 @@ CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, STARPU_CUDA_ASYNC)
  *                    SIDE = 'L'     SIDE = 'R'
  *    TRANS = 'N':      Q * C          C * Q
- *    TRANS = 'C':      Q**H * C       C * Q**H
+ *    TRANS = 'C':      Q^H * C       C * Q^H
  *  where Q is a complex unitary matrix defined as the product of k
  *  elementary reflectors
@@ -116,12 +116,12 @@ CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, STARPU_CUDA_ASYNC)
  * @param[in] side
- *         @arg ChamLeft  : apply Q or Q**H from the Left;
- *         @arg ChamRight : apply Q or Q**H from the Right.
+ *         @arg ChamLeft  : apply Q or Q^H from the Left;
+ *         @arg ChamRight : apply Q or Q^H from the Right.
  * @param[in] trans
  *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  Transpose, apply Q**H.
+ *         @arg ChamConjTrans :  Transpose, apply Q^H.
  * @param[in] M
  *         The number of rows of the tile C.  M >= 0.
@@ -159,7 +159,7 @@ CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, STARPU_CUDA_ASYNC)
  * @param[in,out] C
  *         On entry, the M-by-N tile C.
- *         On exit, C is overwritten by Q*C or Q**T*C or C*Q**T or C*Q.
+ *         On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q.
  * @param[in] LDC
  *         The leading dimension of the array C. LDC >= max(1,M).
diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h
index d9c6e915e..4936d329c 100644
--- a/runtime/starpu/include/runtime_codelet_z.h
+++ b/runtime/starpu/include/runtime_codelet_z.h
@@ -36,89 +36,89 @@
  * BLAS 1 functions
  * BLAS 3 functions
  * LAPACK functions
  * Auxiliary functions
  * MIXED PRECISION functions
  * DZ functions
  * CPU only functions
 #if defined(PRECISION_z) || defined(PRECISION_c)
 #endif /* _runtime_codelet_z_h_ */
diff --git a/runtime/starpu/include/runtime_codelets.h b/runtime/starpu/include/runtime_codelets.h
index a68724449..4fb8b9cf8 100644
--- a/runtime/starpu/include/runtime_codelets.h
+++ b/runtime/starpu/include/runtime_codelets.h
@@ -112,11 +112,6 @@
-#define SCODELETS_HEADER(name)                CODELETS_HEADER(s##name)
-#define DCODELETS_HEADER(name)                CODELETS_HEADER(d##name)
-#define CCODELETS_HEADER(name)                CODELETS_HEADER(c##name)
-#define ZCODELETS_HEADER(name)                CODELETS_HEADER(z##name)
 #endif /* _runtime_codelets_h_ */
diff --git a/testing/lin/clagsy.f b/testing/lin/clagsy.f
index c5fea1b7d..0522d0600 100644
--- a/testing/lin/clagsy.f
+++ b/testing/lin/clagsy.f
@@ -55,7 +55,7 @@
 *  CLAGSY generates a complex symmetric matrix A, by pre- and post-
 *  multiplying a real diagonal matrix D with a random unitary matrix:
-*  A = U*D*U**T. The semi-bandwidth may then be reduced to k by
+*  A = U*D*U^T. The semi-bandwidth may then be reduced to k by
 *  additional unitary transformations.
 *  Arguments
diff --git a/testing/lin/clarhs.f b/testing/lin/clarhs.f
index 46f4d68c2..22165f3a3 100644
--- a/testing/lin/clarhs.f
+++ b/testing/lin/clarhs.f
@@ -58,7 +58,7 @@
 *  CLARHS chooses a set of NRHS random solution vectors and sets
 *  up the right hand sides for the linear system
 *     op( A ) * X = B,
-*  where op( A ) may be A, A**T (transpose of A), or A**H (conjugate
+*  where op( A ) may be A, A^T (transpose of A), or A^H (conjugate
 *  transpose of A).
 *  Arguments
@@ -102,8 +102,8 @@
 *          Used only if A is nonsymmetric; specifies the operation
 *          applied to the matrix A.
 *          = 'N':  B := A    * X
-*          = 'T':  B := A**T * X
-*          = 'C':  B := A**H * X
+*          = 'T':  B := A^T * X
+*          = 'C':  B := A^H * X
 *  M       (input) INTEGER
 *          The number of rows of the matrix A.  M >= 0.
diff --git a/testing/lin/clatrs.f b/testing/lin/clatrs.f
index 87da087b8..38cc5a2a2 100644
--- a/testing/lin/clatrs.f
+++ b/testing/lin/clatrs.f
@@ -57,10 +57,10 @@
 *  CLATRS solves one of the triangular systems
-*     A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b,
+*     A * x = s*b,  A^T * x = s*b,  or  A^H * x = s*b,
 *  with scaling to prevent overflow.  Here A is an upper or lower
-*  triangular matrix, A**T denotes the transpose of A, A**H denotes the
+*  triangular matrix, A^T denotes the transpose of A, A^H denotes the
 *  conjugate transpose of A, x and b are n-element vectors, and s is a
 *  scaling factor, usually less than or equal to 1, chosen so that the
 *  components of x will be less than the overflow threshold.  If the
@@ -79,8 +79,8 @@
 *  TRANS   (input) CHARACTER*1
 *          Specifies the operation applied to A.
 *          = 'N':  Solve A * x = s*b     (No transpose)
-*          = 'T':  Solve A**T * x = s*b  (Transpose)
-*          = 'C':  Solve A**H * x = s*b  (Conjugate transpose)
+*          = 'T':  Solve A^T * x = s*b  (Transpose)
+*          = 'C':  Solve A^H * x = s*b  (Conjugate transpose)
 *  DIAG    (input) CHARACTER*1
 *          Specifies whether or not the matrix A is unit triangular.
@@ -115,7 +115,7 @@
 *  SCALE   (output) REAL
 *          The scaling factor s for the triangular system
-*             A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b.
+*             A * x = s*b,  A^T * x = s*b,  or  A^H * x = s*b.
 *          If SCALE = 0, the matrix A is singular or badly scaled, and
 *          the vector x is an exact or approximate solution to A*x = 0.
@@ -181,8 +181,8 @@
 *  prevent overflow, but if the bound overflows, x is set to 0, x(j) to
 *  1, and scale to 0, and a non-trivial solution to A*x = 0 is found.
-*  Similarly, a row-wise scheme is used to solve A**T *x = b  or
-*  A**H *x = b.  The basic algorithm for A upper triangular is
+*  Similarly, a row-wise scheme is used to solve A^T *x = b  or
+*  A^H *x = b.  The basic algorithm for A upper triangular is
 *       for j = 1, ..., n
 *            x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j)
@@ -412,7 +412,7 @@
-*        Compute the growth in A**T * x = b  or  A**H * x = b.
+*        Compute the growth in A^T * x = b  or  A^H * x = b.
          IF( UPPER ) THEN
             JFIRST = 1
@@ -632,7 +632,7 @@
          ELSE IF( LSAME( TRANS, 'T' ) ) THEN
-*           Solve A**T * x = b
+*           Solve A^T * x = b
             DO 150 J = JFIRST, JLAST, JINC
@@ -744,7 +744,7 @@
 *                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
-*                       scale = 0 and compute a solution to A**T *x = 0.
+*                       scale = 0 and compute a solution to A^T *x = 0.
                         DO 140 I = 1, N
                            X( I ) = ZERO
@@ -766,7 +766,7 @@
-*           Solve A**H * x = b
+*           Solve A^H * x = b
             DO 190 J = JFIRST, JLAST, JINC
@@ -880,7 +880,7 @@
 *                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
-*                       scale = 0 and compute a solution to A**H *x = 0.
+*                       scale = 0 and compute a solution to A^H *x = 0.
                         DO 180 I = 1, N
                            X( I ) = ZERO
diff --git a/testing/lin/cpocon.f b/testing/lin/cpocon.f
index a5469bb8e..3fd5f26c4 100644
--- a/testing/lin/cpocon.f
+++ b/testing/lin/cpocon.f
@@ -59,7 +59,7 @@
 *  CPOCON estimates the reciprocal of the condition number (in the
 *  1-norm) of a complex Hermitian positive definite matrix using the
-*  Cholesky factorization A = U**H*U or A = L*L**H computed by CPOTRF.
+*  Cholesky factorization A = U^H*U or A = L*L^H computed by CPOTRF.
 *  An estimate is obtained for norm(inv(A)), and the reciprocal of the
 *  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
@@ -76,7 +76,7 @@
 *  A       (input) COMPLEX array, dimension (LDA,N)
 *          The triangular factor U or L from the Cholesky factorization
-*          A = U**H*U or A = L*L**H, as computed by CPOTRF.
+*          A = U^H*U or A = L*L^H, as computed by CPOTRF.
 *  LDA     (input) INTEGER
 *          The leading dimension of the array A.  LDA >= max(1,N).
diff --git a/testing/lin/cporfs.f b/testing/lin/cporfs.f
index 14708b5d3..616a71ce7 100644
--- a/testing/lin/cporfs.f
+++ b/testing/lin/cporfs.f
@@ -92,7 +92,7 @@
 *  AF      (input) COMPLEX array, dimension (LDAF,N)
 *          The triangular factor U or L from the Cholesky factorization
-*          A = U**H*U or A = L*L**H, as computed by CPOTRF.
+*          A = U^H*U or A = L*L^H, as computed by CPOTRF.
 *  LDAF    (input) INTEGER
 *          The leading dimension of the array AF.  LDAF >= max(1,N).
diff --git a/testing/lin/cposvx.f b/testing/lin/cposvx.f
index 950a1b514..d502bd56f 100644
--- a/testing/lin/cposvx.f
+++ b/testing/lin/cposvx.f
@@ -59,7 +59,7 @@
 *  Purpose
 *  =======
-*  CPOSVX uses the Cholesky factorization A = U**H*U or A = L*L**H to
+*  CPOSVX uses the Cholesky factorization A = U^H*U or A = L*L^H to
 *  compute the solution to a complex system of linear equations
 *     A * X = B,
 *  where A is an N-by-N Hermitian positive definite matrix and X and B
@@ -82,8 +82,8 @@
 *  2. If FACT = 'N' or 'E', the Cholesky decomposition is used to
 *     factor the matrix A (after equilibration if FACT = 'E') as
-*        A = U**H* U,  if UPLO = 'U', or
-*        A = L * L**H,  if UPLO = 'L',
+*        A = U^H* U,  if UPLO = 'U', or
+*        A = L * L^H,  if UPLO = 'L',
 *     where U is an upper triangular matrix and L is a lower triangular
 *     matrix.
@@ -154,18 +154,18 @@
 *  AF      (input or output) COMPLEX array, dimension (LDAF,N)
 *          If FACT = 'F', then AF is an input argument and on entry
 *          contains the triangular factor U or L from the Cholesky
-*          factorization A = U**H*U or A = L*L**H, in the same storage
+*          factorization A = U^H*U or A = L*L^H, in the same storage
 *          format as A.  If EQUED .ne. 'N', then AF is the factored form
 *          of the equilibrated matrix diag(S)*A*diag(S).
 *          If FACT = 'N', then AF is an output argument and on exit
 *          returns the triangular factor U or L from the Cholesky
-*          factorization A = U**H*U or A = L*L**H of the original
+*          factorization A = U^H*U or A = L*L^H of the original
 *          matrix A.
 *          If FACT = 'E', then AF is an output argument and on exit
 *          returns the triangular factor U or L from the Cholesky
-*          factorization A = U**H*U or A = L*L**H of the equilibrated
+*          factorization A = U^H*U or A = L*L^H of the equilibrated
 *          matrix A (see the description of A for the form of the
 *          equilibrated matrix).
diff --git a/testing/lin/cpotri.f b/testing/lin/cpotri.f
index 839559633..dea6b9d01 100644
--- a/testing/lin/cpotri.f
+++ b/testing/lin/cpotri.f
@@ -53,7 +53,7 @@
 *  =======
 *  CPOTRI computes the inverse of a complex Hermitian positive definite
-*  matrix A using the Cholesky factorization A = U**H*U or A = L*L**H
+*  matrix A using the Cholesky factorization A = U^H*U or A = L*L^H
 *  computed by CPOTRF.
 *  Arguments
@@ -68,7 +68,7 @@
 *  A       (input/output) COMPLEX array, dimension (LDA,N)
 *          On entry, the triangular factor U or L from the Cholesky
-*          factorization A = U**H*U or A = L*L**H, as computed by
+*          factorization A = U^H*U or A = L*L^H, as computed by
 *          CPOTRF.
 *          On exit, the upper or lower triangle of the (Hermitian)
 *          inverse of A, overwriting the input factor U or L.
diff --git a/testing/lin/dpocon.f b/testing/lin/dpocon.f
index 43c957d38..1a4c1b67a 100644
--- a/testing/lin/dpocon.f
+++ b/testing/lin/dpocon.f
@@ -59,7 +59,7 @@
 *  DPOCON estimates the reciprocal of the condition number (in the
 *  1-norm) of a real symmetric positive definite matrix using the
-*  Cholesky factorization A = U**T*U or A = L*L**T computed by DPOTRF.
+*  Cholesky factorization A = U^T*U or A = L*L^T computed by DPOTRF.
 *  An estimate is obtained for norm(inv(A)), and the reciprocal of the
 *  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
@@ -76,7 +76,7 @@
 *  A       (input) DOUBLE PRECISION array, dimension (LDA,N)
 *          The triangular factor U or L from the Cholesky factorization
-*          A = U**T*U or A = L*L**T, as computed by DPOTRF.
+*          A = U^T*U or A = L*L^T, as computed by DPOTRF.
 *  LDA     (input) INTEGER
 *          The leading dimension of the array A.  LDA >= max(1,N).
diff --git a/testing/lin/dporfs.f b/testing/lin/dporfs.f
index 3a1496638..c93d5793a 100644
--- a/testing/lin/dporfs.f
+++ b/testing/lin/dporfs.f
@@ -92,7 +92,7 @@
 *  AF      (input) DOUBLE PRECISION array, dimension (LDAF,N)
 *          The triangular factor U or L from the Cholesky factorization
-*          A = U**T*U or A = L*L**T, as computed by DPOTRF.
+*          A = U^T*U or A = L*L^T, as computed by DPOTRF.
 *  LDAF    (input) INTEGER
 *          The leading dimension of the array AF.  LDAF >= max(1,N).
diff --git a/testing/lin/dposvx.f b/testing/lin/dposvx.f
index aeca6aee0..79d723a27 100644
--- a/testing/lin/dposvx.f
+++ b/testing/lin/dposvx.f
@@ -61,7 +61,7 @@
 *  Purpose
 *  =======
-*  DPOSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to
+*  DPOSVX uses the Cholesky factorization A = U^T*U or A = L*L^T to
 *  compute the solution to a real system of linear equations
 *     A * X = B,
 *  where A is an N-by-N symmetric positive definite matrix and X and B
@@ -84,8 +84,8 @@
 *  2. If FACT = 'N' or 'E', the Cholesky decomposition is used to
 *     factor the matrix A (after equilibration if FACT = 'E') as
-*        A = U**T* U,  if UPLO = 'U', or
-*        A = L * L**T,  if UPLO = 'L',
+*        A = U^T* U,  if UPLO = 'U', or
+*        A = L * L^T,  if UPLO = 'L',
 *     where U is an upper triangular matrix and L is a lower triangular
 *     matrix.
@@ -156,18 +156,18 @@
 *  AF      (input or output) DOUBLE PRECISION array, dimension (LDAF,N)
 *          If FACT = 'F', then AF is an input argument and on entry
 *          contains the triangular factor U or L from the Cholesky
-*          factorization A = U**T*U or A = L*L**T, in the same storage
+*          factorization A = U^T*U or A = L*L^T, in the same storage
 *          format as A.  If EQUED .ne. 'N', then AF is the factored form
 *          of the equilibrated matrix diag(S)*A*diag(S).
 *          If FACT = 'N', then AF is an output argument and on exit
 *          returns the triangular factor U or L from the Cholesky
-*          factorization A = U**T*U or A = L*L**T of the original
+*          factorization A = U^T*U or A = L*L^T of the original
 *          matrix A.
 *          If FACT = 'E', then AF is an output argument and on exit
 *          returns the triangular factor U or L from the Cholesky
-*          factorization A = U**T*U or A = L*L**T of the equilibrated
+*          factorization A = U^T*U or A = L*L^T of the equilibrated
 *          matrix A (see the description of A for the form of the
 *          equilibrated matrix).
diff --git a/testing/lin/dpotri.f b/testing/lin/dpotri.f
index f8585b348..2a5f4c2dd 100644
--- a/testing/lin/dpotri.f
+++ b/testing/lin/dpotri.f
@@ -53,7 +53,7 @@
 *  =======
 *  DPOTRI computes the inverse of a real symmetric positive definite
-*  matrix A using the Cholesky factorization A = U**T*U or A = L*L**T
+*  matrix A using the Cholesky factorization A = U^T*U or A = L*L^T
 *  computed by DPOTRF.
 *  Arguments
@@ -68,7 +68,7 @@
 *  A       (input/output) DOUBLE PRECISION array, dimension (LDA,N)
 *          On entry, the triangular factor U or L from the Cholesky
-*          factorization A = U**T*U or A = L*L**T, as computed by
+*          factorization A = U^T*U or A = L*L^T, as computed by
 *          DPOTRF.
 *          On exit, the upper or lower triangle of the (symmetric)
 *          inverse of A, overwriting the input factor U or L.
diff --git a/testing/lin/spocon.f b/testing/lin/spocon.f
index 02392607f..380896480 100644
--- a/testing/lin/spocon.f
+++ b/testing/lin/spocon.f
@@ -59,7 +59,7 @@
 *  SPOCON estimates the reciprocal of the condition number (in the 
 *  1-norm) of a real symmetric positive definite matrix using the
-*  Cholesky factorization A = U**T*U or A = L*L**T computed by SPOTRF.
+*  Cholesky factorization A = U^T*U or A = L*L^T computed by SPOTRF.
 *  An estimate is obtained for norm(inv(A)), and the reciprocal of the
 *  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
@@ -76,7 +76,7 @@
 *  A       (input) REAL array, dimension (LDA,N)
 *          The triangular factor U or L from the Cholesky factorization
-*          A = U**T*U or A = L*L**T, as computed by SPOTRF.
+*          A = U^T*U or A = L*L^T, as computed by SPOTRF.
 *  LDA     (input) INTEGER
 *          The leading dimension of the array A.  LDA >= max(1,N).
diff --git a/testing/lin/sporfs.f b/testing/lin/sporfs.f
index 8dcdea760..e633b0978 100644
--- a/testing/lin/sporfs.f
+++ b/testing/lin/sporfs.f
@@ -92,7 +92,7 @@
 *  AF      (input) REAL array, dimension (LDAF,N)
 *          The triangular factor U or L from the Cholesky factorization
-*          A = U**T*U or A = L*L**T, as computed by SPOTRF.
+*          A = U^T*U or A = L*L^T, as computed by SPOTRF.
 *  LDAF    (input) INTEGER
 *          The leading dimension of the array AF.  LDAF >= max(1,N).
diff --git a/testing/lin/sposvx.f b/testing/lin/sposvx.f
index b8a94475a..8a8f53564 100644
--- a/testing/lin/sposvx.f
+++ b/testing/lin/sposvx.f
@@ -61,7 +61,7 @@
 *  Purpose
 *  =======
-*  SPOSVX uses the Cholesky factorization A = U**T*U or A = L*L**T to
+*  SPOSVX uses the Cholesky factorization A = U^T*U or A = L*L^T to
 *  compute the solution to a real system of linear equations
 *     A * X = B,
 *  where A is an N-by-N symmetric positive definite matrix and X and B
@@ -84,8 +84,8 @@
 *  2. If FACT = 'N' or 'E', the Cholesky decomposition is used to
 *     factor the matrix A (after equilibration if FACT = 'E') as
-*        A = U**T* U,  if UPLO = 'U', or
-*        A = L * L**T,  if UPLO = 'L',
+*        A = U^T* U,  if UPLO = 'U', or
+*        A = L * L^T,  if UPLO = 'L',
 *     where U is an upper triangular matrix and L is a lower triangular
 *     matrix.
@@ -156,18 +156,18 @@
 *  AF      (input or output) REAL array, dimension (LDAF,N)
 *          If FACT = 'F', then AF is an input argument and on entry
 *          contains the triangular factor U or L from the Cholesky
-*          factorization A = U**T*U or A = L*L**T, in the same storage
+*          factorization A = U^T*U or A = L*L^T, in the same storage
 *          format as A.  If EQUED .ne. 'N', then AF is the factored form
 *          of the equilibrated matrix diag(S)*A*diag(S).
 *          If FACT = 'N', then AF is an output argument and on exit
 *          returns the triangular factor U or L from the Cholesky
-*          factorization A = U**T*U or A = L*L**T of the original
+*          factorization A = U^T*U or A = L*L^T of the original
 *          matrix A.
 *          If FACT = 'E', then AF is an output argument and on exit
 *          returns the triangular factor U or L from the Cholesky
-*          factorization A = U**T*U or A = L*L**T of the equilibrated
+*          factorization A = U^T*U or A = L*L^T of the equilibrated
 *          matrix A (see the description of A for the form of the
 *          equilibrated matrix).
diff --git a/testing/lin/spotri.f b/testing/lin/spotri.f
index 13885e2fd..d52f05699 100644
--- a/testing/lin/spotri.f
+++ b/testing/lin/spotri.f
@@ -53,7 +53,7 @@
 *  =======
 *  SPOTRI computes the inverse of a real symmetric positive definite
-*  matrix A using the Cholesky factorization A = U**T*U or A = L*L**T
+*  matrix A using the Cholesky factorization A = U^T*U or A = L*L^T
 *  computed by SPOTRF.
 *  Arguments
@@ -68,7 +68,7 @@
 *  A       (input/output) REAL array, dimension (LDA,N)
 *          On entry, the triangular factor U or L from the Cholesky
-*          factorization A = U**T*U or A = L*L**T, as computed by
+*          factorization A = U^T*U or A = L*L^T, as computed by
 *          SPOTRF.
 *          On exit, the upper or lower triangle of the (symmetric)
 *          inverse of A, overwriting the input factor U or L.
diff --git a/testing/lin/zlagsy.f b/testing/lin/zlagsy.f
index d2a05500d..a9366c90c 100644
--- a/testing/lin/zlagsy.f
+++ b/testing/lin/zlagsy.f
@@ -55,7 +55,7 @@
 *  ZLAGSY generates a complex symmetric matrix A, by pre- and post-
 *  multiplying a real diagonal matrix D with a random unitary matrix:
-*  A = U*D*U**T. The semi-bandwidth may then be reduced to k by
+*  A = U*D*U^T. The semi-bandwidth may then be reduced to k by
 *  additional unitary transformations.
 *  Arguments
diff --git a/testing/lin/zlarhs.f b/testing/lin/zlarhs.f
index 1da073177..333feeb71 100644
--- a/testing/lin/zlarhs.f
+++ b/testing/lin/zlarhs.f
@@ -58,7 +58,7 @@
 *  ZLARHS chooses a set of NRHS random solution vectors and sets
 *  up the right hand sides for the linear system
 *     op( A ) * X = B,
-*  where op( A ) may be A, A**T (transpose of A), or A**H (conjugate
+*  where op( A ) may be A, A^T (transpose of A), or A^H (conjugate
 *  transpose of A).
 *  Arguments
@@ -102,8 +102,8 @@
 *          Used only if A is nonsymmetric; specifies the operation
 *          applied to the matrix A.
 *          = 'N':  B := A    * X
-*          = 'T':  B := A**T * X
-*          = 'C':  B := A**H * X
+*          = 'T':  B := A^T * X
+*          = 'C':  B := A^H * X
 *  M       (input) INTEGER
 *          The number of rows of the matrix A.  M >= 0.
diff --git a/testing/lin/zlatrs.f b/testing/lin/zlatrs.f
index c4271a045..ba7f497ef 100644
--- a/testing/lin/zlatrs.f
+++ b/testing/lin/zlatrs.f
@@ -57,10 +57,10 @@
 *  ZLATRS solves one of the triangular systems
-*     A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b,
+*     A * x = s*b,  A^T * x = s*b,  or  A^H * x = s*b,
 *  with scaling to prevent overflow.  Here A is an upper or lower
-*  triangular matrix, A**T denotes the transpose of A, A**H denotes the
+*  triangular matrix, A^T denotes the transpose of A, A^H denotes the
 *  conjugate transpose of A, x and b are n-element vectors, and s is a
 *  scaling factor, usually less than or equal to 1, chosen so that the
 *  components of x will be less than the overflow threshold.  If the
@@ -79,8 +79,8 @@
 *  TRANS   (input) CHARACTER*1
 *          Specifies the operation applied to A.
 *          = 'N':  Solve A * x = s*b     (No transpose)
-*          = 'T':  Solve A**T * x = s*b  (Transpose)
-*          = 'C':  Solve A**H * x = s*b  (Conjugate transpose)
+*          = 'T':  Solve A^T * x = s*b  (Transpose)
+*          = 'C':  Solve A^H * x = s*b  (Conjugate transpose)
 *  DIAG    (input) CHARACTER*1
 *          Specifies whether or not the matrix A is unit triangular.
@@ -115,7 +115,7 @@
 *          The scaling factor s for the triangular system
-*             A * x = s*b,  A**T * x = s*b,  or  A**H * x = s*b.
+*             A * x = s*b,  A^T * x = s*b,  or  A^H * x = s*b.
 *          If SCALE = 0, the matrix A is singular or badly scaled, and
 *          the vector x is an exact or approximate solution to A*x = 0.
@@ -181,8 +181,8 @@
 *  prevent overflow, but if the bound overflows, x is set to 0, x(j) to
 *  1, and scale to 0, and a non-trivial solution to A*x = 0 is found.
-*  Similarly, a row-wise scheme is used to solve A**T *x = b  or
-*  A**H *x = b.  The basic algorithm for A upper triangular is
+*  Similarly, a row-wise scheme is used to solve A^T *x = b  or
+*  A^H *x = b.  The basic algorithm for A upper triangular is
 *       for j = 1, ..., n
 *            x(j) := ( b(j) - A[1:j-1,j]' * x[1:j-1] ) / A(j,j)
@@ -412,7 +412,7 @@
-*        Compute the growth in A**T * x = b  or  A**H * x = b.
+*        Compute the growth in A^T * x = b  or  A^H * x = b.
          IF( UPPER ) THEN
             JFIRST = 1
@@ -632,7 +632,7 @@
          ELSE IF( LSAME( TRANS, 'T' ) ) THEN
-*           Solve A**T * x = b
+*           Solve A^T * x = b
             DO 170 J = JFIRST, JLAST, JINC
@@ -744,7 +744,7 @@
 *                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
-*                       scale = 0 and compute a solution to A**T *x = 0.
+*                       scale = 0 and compute a solution to A^T *x = 0.
                      DO 150 I = 1, N
                         X( I ) = ZERO
@@ -766,7 +766,7 @@
-*           Solve A**H * x = b
+*           Solve A^H * x = b
             DO 220 J = JFIRST, JLAST, JINC
@@ -880,7 +880,7 @@
 *                       A(j,j) = 0:  Set x(1:n) = 0, x(j) = 1, and
-*                       scale = 0 and compute a solution to A**H *x = 0.
+*                       scale = 0 and compute a solution to A^H *x = 0.
                      DO 200 I = 1, N
                         X( I ) = ZERO
diff --git a/testing/lin/zpocon.f b/testing/lin/zpocon.f
index 4ead889ed..b3e91f057 100644
--- a/testing/lin/zpocon.f
+++ b/testing/lin/zpocon.f
@@ -59,7 +59,7 @@
 *  ZPOCON estimates the reciprocal of the condition number (in the
 *  1-norm) of a complex Hermitian positive definite matrix using the
-*  Cholesky factorization A = U**H*U or A = L*L**H computed by ZPOTRF.
+*  Cholesky factorization A = U^H*U or A = L*L^H computed by ZPOTRF.
 *  An estimate is obtained for norm(inv(A)), and the reciprocal of the
 *  condition number is computed as RCOND = 1 / (ANORM * norm(inv(A))).
@@ -76,7 +76,7 @@
 *  A       (input) COMPLEX*16 array, dimension (LDA,N)
 *          The triangular factor U or L from the Cholesky factorization
-*          A = U**H*U or A = L*L**H, as computed by ZPOTRF.
+*          A = U^H*U or A = L*L^H, as computed by ZPOTRF.
 *  LDA     (input) INTEGER
 *          The leading dimension of the array A.  LDA >= max(1,N).
diff --git a/testing/lin/zporfs.f b/testing/lin/zporfs.f
index 4503ef94a..696739a6c 100644
--- a/testing/lin/zporfs.f
+++ b/testing/lin/zporfs.f
@@ -92,7 +92,7 @@
 *  AF      (input) COMPLEX*16 array, dimension (LDAF,N)
 *          The triangular factor U or L from the Cholesky factorization
-*          A = U**H*U or A = L*L**H, as computed by ZPOTRF.
+*          A = U^H*U or A = L*L^H, as computed by ZPOTRF.
 *  LDAF    (input) INTEGER
 *          The leading dimension of the array AF.  LDAF >= max(1,N).
diff --git a/testing/lin/zposvx.f b/testing/lin/zposvx.f
index 5fe686b41..4d8d322fe 100644
--- a/testing/lin/zposvx.f
+++ b/testing/lin/zposvx.f
@@ -59,7 +59,7 @@
 *  Purpose
 *  =======
-*  ZPOSVX uses the Cholesky factorization A = U**H*U or A = L*L**H to
+*  ZPOSVX uses the Cholesky factorization A = U^H*U or A = L*L^H to
 *  compute the solution to a complex system of linear equations
 *     A * X = B,
 *  where A is an N-by-N Hermitian positive definite matrix and X and B
@@ -82,8 +82,8 @@
 *  2. If FACT = 'N' or 'E', the Cholesky decomposition is used to
 *     factor the matrix A (after equilibration if FACT = 'E') as
-*        A = U**H* U,  if UPLO = 'U', or
-*        A = L * L**H,  if UPLO = 'L',
+*        A = U^H* U,  if UPLO = 'U', or
+*        A = L * L^H,  if UPLO = 'L',
 *     where U is an upper triangular matrix and L is a lower triangular
 *     matrix.
@@ -154,18 +154,18 @@
 *  AF      (input or output) COMPLEX*16 array, dimension (LDAF,N)
 *          If FACT = 'F', then AF is an input argument and on entry
 *          contains the triangular factor U or L from the Cholesky
-*          factorization A = U**H*U or A = L*L**H, in the same storage
+*          factorization A = U^H*U or A = L*L^H, in the same storage
 *          format as A.  If EQUED .ne. 'N', then AF is the factored form
 *          of the equilibrated matrix diag(S)*A*diag(S).
 *          If FACT = 'N', then AF is an output argument and on exit
 *          returns the triangular factor U or L from the Cholesky
-*          factorization A = U**H*U or A = L*L**H of the original
+*          factorization A = U^H*U or A = L*L^H of the original
 *          matrix A.
 *          If FACT = 'E', then AF is an output argument and on exit
 *          returns the triangular factor U or L from the Cholesky
-*          factorization A = U**H*U or A = L*L**H of the equilibrated
+*          factorization A = U^H*U or A = L*L^H of the equilibrated
 *          matrix A (see the description of A for the form of the
 *          equilibrated matrix).
diff --git a/testing/lin/zpotri.f b/testing/lin/zpotri.f
index bf86de0ee..ed9fd8dd6 100644
--- a/testing/lin/zpotri.f
+++ b/testing/lin/zpotri.f
@@ -53,7 +53,7 @@
 *  =======
 *  ZPOTRI computes the inverse of a complex Hermitian positive definite
-*  matrix A using the Cholesky factorization A = U**H*U or A = L*L**H
+*  matrix A using the Cholesky factorization A = U^H*U or A = L*L^H
 *  computed by ZPOTRF.
 *  Arguments
@@ -68,7 +68,7 @@
 *  A       (input/output) COMPLEX*16 array, dimension (LDA,N)
 *          On entry, the triangular factor U or L from the Cholesky
-*          factorization A = U**H*U or A = L*L**H, as computed by
+*          factorization A = U^H*U or A = L*L^H, as computed by
 *          ZPOTRF.
 *          On exit, the upper or lower triangle of the (Hermitian)
 *          inverse of A, overwriting the input factor U or L.
diff --git a/timing/timing_zauxiliary.c b/timing/timing_zauxiliary.c
index f804164b6..007d57c85 100644
--- a/timing/timing_zauxiliary.c
+++ b/timing/timing_zauxiliary.c
@@ -314,8 +314,8 @@ double z_check_solution(int M, int N, int NRHS, CHAMELEON_Complex64_t *A, int LD
  *  *  Check the accuracy of the computed inverse
  *   */
-int zcheck_inverse(int N, CHAMELEON_Complex64_t *A1, CHAMELEON_Complex64_t *A2, int LDA,
-                        cham_uplo_t uplo, double *rnorm, double *anorm, double *ainvnorm )
+int z_check_inverse( int N, CHAMELEON_Complex64_t *A1, CHAMELEON_Complex64_t *A2, int LDA,
+                     cham_uplo_t uplo, double *rnorm, double *anorm, double *ainvnorm )
     int info_inverse;
     int i, j;
diff --git a/timing/timing_zauxiliary.h b/timing/timing_zauxiliary.h
index 2c9957f52..6fbb0ff15 100644
--- a/timing/timing_zauxiliary.h
+++ b/timing/timing_zauxiliary.h
@@ -39,8 +39,8 @@ double z_check_solution(int M, int N, int NRHS,
                       CHAMELEON_Complex64_t *B1, CHAMELEON_Complex64_t *B2, int LDB,
                       double *anorm, double *bnorm, double *xnorm);
-int zcheck_inverse(int N, CHAMELEON_Complex64_t *A1, CHAMELEON_Complex64_t *A2,
-                         int LDA, cham_uplo_t uplo, double *rnorm, double *anorm, double *ainvnorm);
+int z_check_inverse( int N, CHAMELEON_Complex64_t *A1, CHAMELEON_Complex64_t *A2,
+                     int LDA, cham_uplo_t uplo, double *rnorm, double *anorm, double *ainvnorm );
 #endif /* _timing_zauxiliary_h_ */