diff --git a/.gitignore b/.gitignore
index 8f6bc547daa6b82071fe009078707683d3c83aa4..a6fc53760369da3fe6ead5acf96181bb418e850b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -464,6 +464,7 @@ MigrationBackup/
 FodyWeavers.xsd
 
 # VS Code files for those working on multiple tools
+.vscode
 .vscode/*
 !.vscode/settings.json
 !.vscode/tasks.json
diff --git a/ChangeLog b/ChangeLog
index 237f203273ceef734df52a947d35ff7cd52f8a33..e615c618376b5283cd311809fd3c5721e7d994ea 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,8 @@
 chameleon-1.3.0
 ------------------------------------------------------------------------
+ - mixed-precision: introduce descripto with precision adapted to local norms
+        - Add CHAMELEON_[dz]gered... functions to reduce the precision of the tiles based on a requested accuracy
+        - Add CHAMELEON_[dz]gerst... functions to restore the original numerical precision of the tiles in a descriptor
  - types: add support for half precision arithmetic into the data descriptors
  - cuda: add half precision conversion kernels, and variants of the gemm kernels (hgemm, and gemmex)
  - descriptors: Add the possibility to pass arguments to the rankof
diff --git a/cmake_modules/local_subs.py b/cmake_modules/local_subs.py
index df41dd109b68835ae2f07759d0bb81b758bfe075..a833ae81636299440af4f81b27efe9260497d4a9 100644
--- a/cmake_modules/local_subs.py
+++ b/cmake_modules/local_subs.py
@@ -11,7 +11,7 @@
  @author Florent Pruvost
  @author Nathalie Furmento
  @author Alycia Lisito
- @date 2023-07-04
+ @date 2023-07-06
 
 """
 _extra_blas = [
@@ -39,6 +39,7 @@ _extra_blas = [
     ('',                     'slatm1',               'dlatm1',               'slatm1',               'dlatm1'              ),
     ('',                     'sgenm2',               'dgenm2',               'cgenm2',               'zgenm2'              ),
     ('',                     'slag2c_fake',          'dlag2z_fake',          'slag2c',               'dlag2z'              ),
+    ('',                     'slag2d',               'slag2d',               'clag2z',               'clag2z'              ),
     ('',                     'slag2h',               'dlag2h',               'slag2h',               'dlag2h'              ),
     ('',                     'hlag2s',               'hlag2d',               'hlag2s',               'hlag2d'              ),
     ('',                     'slag2h',               'dlag2h',               'clag2x',               'zlag2x'              ),
@@ -48,6 +49,8 @@ _extra_blas = [
     ('',                     'sgesum',               'dgesum',               'cgesum',               'zgesum'              ),
     ('',                     'sgersum',              'dgersum',              'cgersum',              'zgersum'             ),
     ('',                     'sprint',               'dprint',               'cprint',               'zprint'              ),
+    ('',                     'sgered',               'dgered',               'cgered',               'zgered'              ),
+    ('',                     'sgerst',               'dgerst',               'cgerst',               'zgerst'              ),
 ]
 
 _extra_BLAS = [ [ x.upper() for x in row ] for row in _extra_blas ]
@@ -114,6 +117,7 @@ subs = {
         ('CHAMELEON_p',          'CHAMELEON_s',          'CHAMELEON_d',          'CHAMELEON_c',          'CHAMELEON_z'         ),
         ('RUNTIME_P',            'RUNTIME_s',            'RUNTIME_d',            'RUNTIME_c',            'RUNTIME_z'           ),
         ('chameleon_p',          'chameleon_s',          'chameleon_d',          'chameleon_c',          'chameleon_z'         ),
+        ('codelet_p',            'codelet_ds',           'codelet_ds',           'codelet_zc',          r'codelet_zc\b'          ),
         ('codelet_p',            'codelet_s',            'codelet_d',            'codelet_c',            'codelet_z'           ),
         ('runtime_p',            'runtime_s',            'runtime_d',            'runtime_c',            'runtime_z'           ),
         ('testing_p',            'testing_s',            'testing_d',            'testing_c',            'testing_z'           ),
diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt
index 14eec76587d3ae4bc865e23dd2725625bc0694e9..1c7b5a0b2e623d1091819c72004998b1cd673ac1 100644
--- a/compute/CMakeLists.txt
+++ b/compute/CMakeLists.txt
@@ -17,7 +17,7 @@
 #     Univ. of California Berkeley,
 #     Univ. of Colorado Denver.
 #
-# @version 1.2.0
+# @version 1.3.0
 #  @author Cedric Castagnede
 #  @author Emmanuel Agullo
 #  @author Mathieu Faverge
@@ -27,7 +27,7 @@
 #  @author Alycia Lisito
 #  @author Loris Lucido
 #  @author Matthieu Kuhn
-#  @date 2023-01-30
+#  @date 2023-07-06
 #
 ###
 
@@ -192,7 +192,11 @@ set(ZSRC
     # MIXED PRECISION
     ##################
     pzlag2c.c
+    pzgered.c
+    pzgerst.c
     ###
+    zgered.c
+    zgerst.c
     #zcgels.c
     #zcgesv.c
     #zcposv.c
diff --git a/compute/pzgered.c b/compute/pzgered.c
new file mode 100644
index 0000000000000000000000000000000000000000..f934c494df6a576e4d2fe52bc2a2091ff9b4827a
--- /dev/null
+++ b/compute/pzgered.c
@@ -0,0 +1,260 @@
+/**
+ *
+ * @file pzgered.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zlange parallel algorithm
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @date 2023-07-06
+ * @precisions normal z -> z d
+ *
+ */
+//ALLOC_WS :  A->mb
+//ALLOC_WS :  A->nb
+//WS_ADD :  A->mb + A->nb
+#include "control/common.h"
+#include <coreblas/lapacke.h>
+
+#define A( m, n )        A,     (m), (n)
+#define W( desc, m, n ) (desc), (m), (n)
+
+static inline void
+chameleon_pzgered_frb( cham_uplo_t uplo,
+                        CHAM_desc_t *A, CHAM_desc_t *Wnorm, CHAM_desc_t *Welt,
+                        RUNTIME_option_t *options )
+{
+    double alpha = 1.0;
+    double beta  = 0.0;
+
+    int m, n;
+    int minMNT = chameleon_min( A->mt, A->nt );
+    int minMN  = chameleon_min( A->m,  A->n  );
+    int MT = (uplo == ChamUpper) ? minMNT : A->mt;
+    int NT = (uplo == ChamLower) ? minMNT : A->nt;
+    int M  = (uplo == ChamUpper) ? minMN  : A->m;
+    int N  = (uplo == ChamLower) ? minMN  : A->n;
+    int P  = Welt->p;
+    int Q  = Welt->q;
+
+    /* Initialize workspaces for tile norms */
+    for(m = 0; m < Wnorm->mt; m++) {
+        int nmin = ( uplo == ChamUpper ) ? m                      : 0;
+        int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT;
+
+        for(n = nmin; n < nmax; n++) {
+            INSERT_TASK_dlaset(
+                options,
+                ChamUpperLower, Wnorm->mb, Wnorm->nb,
+                alpha, beta,
+                W( Wnorm, m, n ) );
+        }
+    }
+
+    /* Initialize workspaces */
+    for(m = 0; m < Welt->mt; m++) {
+        for(n = 0; n < Welt->nt; n++) {
+            INSERT_TASK_dlaset(
+                options,
+                ChamUpperLower, Welt->mb, Welt->nb,
+                alpha, beta,
+                W( Welt, m, n ) );
+        }
+    }
+
+    /**
+     * Step 1:
+     *  For j in [1,Q], Welt(m, j) = reduce( A(m, j+k*Q) )
+     */
+    for(m = 0; m < MT; m++) {
+        int nmin = ( uplo == ChamUpper ) ? m                      : 0;
+        int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT;
+
+        int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
+
+        for(n = nmin; n < nmax; n++) {
+            int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
+
+            if ( (n == m) && (uplo != ChamUpperLower) ) {
+                INSERT_TASK_ztrssq(
+                    options,
+                    uplo, ChamNonUnit, tempmm, tempnn,
+                    A(m, n), W( Wnorm, m, n) );
+            }
+            else {
+                INSERT_TASK_zgessq(
+                    options,
+                    ChamEltwise,
+                    tempmm, tempnn,
+                    A(m, n), W( Wnorm, m, n) );
+            }
+
+            /* Compress the info per line */
+            INSERT_TASK_dplssq(
+                options, ChamEltwise, 1, 1, W( Wnorm, m, n), W( Welt, m, n%Q) );
+
+            /* Compute the final norm of the tile */
+            INSERT_TASK_dplssq2(
+                options, 1, W( Wnorm, m, n ) );
+        }
+
+        /**
+         * Step 2:
+         *  For each j, W(m, j) = reduce( Welt(m, 0..Q-1) )
+         */
+        for(n = 1; n < Q; n++) {
+            INSERT_TASK_dplssq(
+                options, ChamEltwise, 1, 1, W( Welt, m, n), W( Welt, m, 0) );
+        }
+    }
+
+    /**
+     * Step 3:
+     *  For m in 0..P-1, Welt(m, n) = max( Welt(m..mt[P], n ) )
+     */
+    for(m = P; m < MT; m++) {
+        INSERT_TASK_dplssq(
+            options, ChamEltwise, 1, 1, W( Welt, m, 0), W( Welt, m%P, 0) );
+    }
+
+    /**
+     * Step 4:
+     *  For each i, Welt(i, n) = max( Welt(0..P-1, n) )
+     */
+    for(m = 1; m < P; m++) {
+        INSERT_TASK_dplssq(
+            options, ChamEltwise, 1, 1, W( Welt, m, 0), W( Welt, 0, 0) );
+    }
+
+    INSERT_TASK_dplssq2(
+        options, 1, W( Welt, 0, 0) );
+
+    /**
+     * Broadcast the result
+     */
+    for(m = 0; m < A->p; m++) {
+        for(n = 0; n < A->q; n++) {
+            if ( (m != 0) || (n != 0) ) {
+                INSERT_TASK_dlacpy(
+                    options,
+                    ChamUpperLower, 1, 1,
+                    W( Welt, 0, 0 ), W( Welt, m, n ) );
+            }
+        }
+    }
+}
+
+/**
+ *
+ */
+void chameleon_pzgered( cham_uplo_t uplo, double prec, CHAM_desc_t *A,
+                         RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
+{
+    CHAM_context_t *chamctxt;
+    RUNTIME_option_t options;
+    CHAM_desc_t Wcol;
+    CHAM_desc_t Welt;
+    double gnorm, lnorm, threshold, eps;
+
+    int workmt, worknt;
+    int m, n;
+
+    chamctxt = chameleon_context_self();
+    if ( sequence->status != CHAMELEON_SUCCESS ) {
+        return;
+    }
+    RUNTIME_options_init(&options, chamctxt, sequence, request);
+
+    workmt = chameleon_max( A->mt, A->p );
+    worknt = chameleon_max( A->nt, A->q );
+
+    RUNTIME_options_ws_alloc( &options, 1, 0 );
+
+    /* Matrix to store the norm of each element */
+    chameleon_desc_init( &Wcol, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2,
+                         A->mt * 2, A->nt, 0, 0, A->mt * 2, A->nt, A->p, A->q,
+                         NULL, NULL, A->get_rankof_init, A->get_rankof_init_arg );
+
+    /* Matrix to compute the global frobenius norm */
+    chameleon_desc_init( &Welt, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2,
+                         workmt*2, worknt, 0, 0, workmt*2, worknt, A->p, A->q,
+                         NULL, NULL, NULL, NULL );
+
+    chameleon_pzgered_frb( uplo, A, &Wcol, &Welt, &options );
+
+    CHAMELEON_Desc_Flush( &Wcol, sequence );
+    CHAMELEON_Desc_Flush( &Welt, sequence );
+    CHAMELEON_Desc_Flush( A,     sequence );
+
+    RUNTIME_sequence_wait( chamctxt, sequence );
+
+    gnorm = *((double *)Welt.get_blkaddr( &Welt, A->myrank / A->q, A->myrank % A->q ));
+    chameleon_desc_destroy( &Welt );
+
+    /**
+     * Reduce the precision of the tiles if possible
+     */
+    if ( prec < 0. ) {
+#if !defined(CHAMELEON_SIMULATION)
+        double eps  = LAPACKE_dlamch_work('e');
+#else
+#if defined(PRECISION_z) || defined(PRECISION_d)
+        double eps  = 1.e-15;
+#else
+        double eps  = 1.e-7;
+#endif
+#endif
+    }
+    else {
+        eps = prec;
+    }
+    threshold = (eps * gnorm) / (double)(chameleon_min(A->mt, A->nt));
+
+#if defined(CHAMELEON_DEBUG_GERED)
+    fprintf( stderr,
+             "[%2d] The norm of A is:           %e\n"
+             "[%2d] The requested precision is: %e\n"
+             "[%2d] The computed threshold is:  %e\n",
+             A->myrank, gnorm,
+             A->myrank, eps,
+             A->myrank, threshold );
+#endif
+    for(m = 0; m < A->mt; m++) {
+        int tempmm = ( m == (A->mt-1) ) ? A->m - m * A->mb : A->mb;
+        int nmin   = ( uplo == ChamUpper ) ? m                         : 0;
+        int nmax   = ( uplo == ChamLower ) ? chameleon_min(m+1, A->nt) : A->nt;
+
+        for(n = nmin; n < nmax; n++) {
+            CHAM_tile_t *tile = A->get_blktile( A, m, n );
+            if ( tile->rank == A->myrank ) {
+                int tempnn = ( n == (A->nt-1) ) ? A->n - n * A->nb : A->nb;
+
+                /* Get the frobenius norm of the tile A( m, n ) */
+                lnorm = ((double*)((Wcol.get_blktile( &Wcol, m, n ))->mat))[0];
+
+                /*
+                 * u_{high} = 1e-16 (later should be application accuraccy)
+                 * u_{low} = 1e-8
+                 * ||A_{i,j}||_F  < u_{high} * || A ||_F / (nt * u_{low})
+                 * ||A_{i,j}||_F  < threshold / u_{low}
+                 */
+                INSERT_TASK_zgered( &options, threshold, lnorm,
+                                     tempmm, tempnn, A( m, n ) );
+            }
+        }
+    }
+
+    CHAMELEON_Desc_Flush( A, sequence );
+    RUNTIME_sequence_wait( chamctxt, sequence );
+
+    chameleon_desc_destroy( &Wcol );
+    RUNTIME_options_ws_free(&options);
+    RUNTIME_options_finalize(&options, chamctxt);
+}
diff --git a/compute/pzgerst.c b/compute/pzgerst.c
new file mode 100644
index 0000000000000000000000000000000000000000..86d01e1688b668878d5d1a432b53705df4d2f84a
--- /dev/null
+++ b/compute/pzgerst.c
@@ -0,0 +1,58 @@
+/**
+ *
+ * @file pzgerst.c
+ *
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgerst parallel algorithm
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @date 2023-07-06
+ * @precisions normal z -> d
+ *
+ */
+#include "control/common.h"
+
+#define A(m,n) A,  m,  n
+#define B(m,n) B,  m,  n
+
+void chameleon_pzgerst( cham_uplo_t         uplo,
+                        CHAM_desc_t        *A,
+                        RUNTIME_sequence_t *sequence,
+                        RUNTIME_request_t  *request )
+{
+    CHAM_context_t *chamctxt;
+    RUNTIME_option_t options;
+    int m, n;
+
+    chamctxt = chameleon_context_self();
+    if (sequence->status != CHAMELEON_SUCCESS) {
+        return;
+    }
+    RUNTIME_options_init(&options, chamctxt, sequence, request);
+
+    for(m = 0; m < A->mt; m++) {
+        int tempmm = ( m == (A->mt-1) ) ? A->m - m * A->mb : A->mb;
+        int nmin   = ( uplo == ChamUpper ) ? m                         : 0;
+        int nmax   = ( uplo == ChamLower ) ? chameleon_min(m+1, A->nt) : A->nt;
+
+        for(n = nmin; n < nmax; n++) {
+            CHAM_tile_t *tile = A->get_blktile( A, m, n );
+
+            if (( tile->rank == A->myrank ) &&
+                ( tile->flttype != ChamComplexDouble ) )
+            {
+                int tempnn = ( n == (A->nt-1) ) ? A->n - n * A->nb : A->nb;
+
+                INSERT_TASK_zgerst( &options,
+                                     tempmm, tempnn, A( m, n ) );
+            }
+        }
+    }
+
+    RUNTIME_options_finalize(&options, chamctxt);
+}
diff --git a/compute/zgered.c b/compute/zgered.c
new file mode 100644
index 0000000000000000000000000000000000000000..7c95e7bd10b68c129545c69034ffecc964abc2ce
--- /dev/null
+++ b/compute/zgered.c
@@ -0,0 +1,172 @@
+/**
+ *
+ * @file zgered.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgered wrappers
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @date 2023-07-06
+ * @precisions normal z -> z d
+ *
+ */
+#include "control/common.h"
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t_Tile
+ *
+ * @brief Computes the Cholesky factorization of a symmetric positive definite
+ * or Hermitian positive definite matrix with mixed precision.
+ *
+ * This is the synchronous version of CHAMELEON_zgeredinit_Tile_Async().  It
+ * operates on matrices stored by tiles with tiles of potentially different
+ * precisions.  All matrices are passed through descriptors.  All dimensions are
+ * taken from the descriptors.
+ *
+ *******************************************************************************
+ *
+ * @param[in] uplo
+ *          = ChamUpper: Upper triangle of A is stored;
+ *          = ChamLower: Lower triangle of A is stored.
+ *
+ * @param[in] A
+ *          On entry, the symmetric positive definite (or Hermitian) matrix A.
+ *          If uplo = ChamUpper, the leading N-by-N upper triangular part of A
+ *          contains the upper triangular part of the matrix A, and the strictly lower triangular
+ *          part of A is not referenced.
+ *          If UPLO = 'L', the leading N-by-N lower triangular part of A contains the lower
+ *          triangular part of the matrix A, and the strictly upper triangular part of A is not
+ *          referenced.
+ *          On exit, if return value = 0, the factor U or L from the Cholesky factorization
+ *          A = U^H*U or A = L*L^H.
+ *
+ *******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval >0 if i, the leading minor of order i of A is not positive definite, so the
+ *               factorization could not be completed, and the solution has not been computed.
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zgered
+ * @sa CHAMELEON_zgered_Tile_Async
+ * @sa CHAMELEON_cpotrfmp_Tile
+ * @sa CHAMELEON_dpotrfmp_Tile
+ * @sa CHAMELEON_spotrfmp_Tile
+ * @sa CHAMELEON_zpotrs_Tile
+ *
+ */
+int CHAMELEON_zgered_Tile( cham_uplo_t uplo, double precision, CHAM_desc_t *A )
+{
+    CHAM_context_t *chamctxt;
+    RUNTIME_sequence_t *sequence = NULL;
+    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
+    int status;
+
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_fatal_error("CHAMELEON_zgeredinit_Tile", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    chameleon_sequence_create( chamctxt, &sequence );
+
+    CHAMELEON_zgered_Tile_Async( uplo, precision, A, sequence, &request );
+
+    CHAMELEON_Desc_Flush( A, sequence );
+
+    chameleon_sequence_wait( chamctxt, sequence );
+    status = sequence->status;
+    chameleon_sequence_destroy( chamctxt, sequence );
+    return status;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t_Tile_Async
+ *
+ * @brief Computes the Cholesky factorization of a symmetric positive definite
+ * or Hermitian positive definite matrix with mixed precision.
+ *
+ * This is the non-blocking equivalent of CHAMELEON_zgered_Tile().  It
+ * operates on matrices stored by tiles with tiles of potentially different
+ * precisions.  All matrices are passed through descriptors.  All dimensions are
+ * taken from the descriptors. It may return before the computation is
+ * finished. This function allows for pipelining operations at runtime.
+ *
+ *******************************************************************************
+ *
+ * @param[in] sequence
+ *          Identifies the sequence of function calls that this call belongs to
+ *          (for completion checks and exception handling purposes).
+ *
+ * @param[out] request
+ *          Identifies this function call (for exception handling purposes).
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zgered
+ * @sa CHAMELEON_zgered_Tile
+ * @sa CHAMELEON_cpotrfmp_Tile_Async
+ * @sa CHAMELEON_dpotrfmp_Tile_Async
+ * @sa CHAMELEON_spotrfmp_Tile_Async
+ * @sa CHAMELEON_zpotrs_Tile_Async
+ *
+ */
+int CHAMELEON_zgered_Tile_Async( cham_uplo_t uplo, double precision, CHAM_desc_t *A,
+                                 RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
+{
+    CHAM_context_t *chamctxt;
+
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_fatal_error("CHAMELEON_zgered_Tile_Async", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    if (sequence == NULL) {
+        chameleon_fatal_error("CHAMELEON_zgered_Tile_Async", "NULL sequence");
+        return CHAMELEON_ERR_UNALLOCATED;
+    }
+    if (request == NULL) {
+        chameleon_fatal_error("CHAMELEON_zgered_Tile_Async", "NULL request");
+        return CHAMELEON_ERR_UNALLOCATED;
+    }
+    /* Check sequence status */
+    if (sequence->status == CHAMELEON_SUCCESS) {
+        request->status = CHAMELEON_SUCCESS;
+    }
+    else {
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED);
+    }
+
+    /* Check descriptors for correctness */
+    if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) {
+        chameleon_error("CHAMELEON_zgered_Tile_Async", "invalid descriptor");
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
+    }
+    /* Check input arguments */
+    if (A->nb != A->mb) {
+        chameleon_error("CHAMELEON_zgered_Tile_Async", "only square tiles supported");
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
+    }
+
+    /*
+     * Quick return
+     */
+    if ( chameleon_max( A->m, A->n ) == 0 ) {
+        return CHAMELEON_SUCCESS;
+    }
+
+    chameleon_pzgered( uplo, precision, A, sequence, request );
+
+    return CHAMELEON_SUCCESS;
+}
diff --git a/compute/zgerst.c b/compute/zgerst.c
new file mode 100644
index 0000000000000000000000000000000000000000..8d283a6f8a15371f4d9e15079834135bb1180eaa
--- /dev/null
+++ b/compute/zgerst.c
@@ -0,0 +1,148 @@
+/**
+ *
+ * @file zgerst.c
+ *
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon auxiliary routines to restore the original precision of a matrix.
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @author Yuxi Hong
+ * @date 2023-07-06
+ * @precisions normal z -> d
+ *
+ */
+#include "control/common.h"
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t_Tile
+ *
+ * @brief Restore the original precision of a given matrix that may have been
+ * used in reduced precision during some computations. See
+ * CHAMELEON_zgered_Tile() to introduce mixed-precision tiles into the matrix.
+ *
+ *******************************************************************************
+ *
+ * @param[in] uplo
+ *          Specifies the shape of the matrix A:
+ *          = ChamUpper: A is upper triangular;
+ *          = ChamLower: A is lower triangular;
+ *          = ChamUpperLower: A is general.
+ *
+ * @param[in] A
+ *          Descriptor of the CHAMELEON matrix to restore.
+ *
+ *******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zgered_Tile
+ * @sa CHAMELEON_zgered_Tile_Async
+ * @sa CHAMELEON_zgerst_Tile
+ * @sa CHAMELEON_cgerst_Tile
+ * @sa CHAMELEON_dgerst_Tile
+ * @sa CHAMELEON_sgerst_Tile
+ *
+ */
+int CHAMELEON_zgerst_Tile( cham_uplo_t  uplo,
+                            CHAM_desc_t *A )
+{
+    CHAM_context_t *chamctxt;
+    RUNTIME_sequence_t *sequence = NULL;
+    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
+    int status;
+
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_fatal_error("CHAMELEON_zgerst_Tile", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    chameleon_sequence_create( chamctxt, &sequence );
+
+    CHAMELEON_zgerst_Tile_Async( uplo, A, sequence, &request );
+
+    CHAMELEON_Desc_Flush( A, sequence );
+
+    chameleon_sequence_wait( chamctxt, sequence );
+    status = sequence->status;
+    chameleon_sequence_destroy( chamctxt, sequence );
+    return status;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t_Tile_Async
+ *
+ * @brief Restore the original precision of a given matrix that may have been
+ * used in reduced precision during some computations. See
+ * CHAMELEON_zgered_Tile() to introduce mixed-precision tiles into the matrix.
+ *
+ * This is the non-blocking equivalent of CHAMELEON_zgerst_Tile(). It
+ * operates on matrices stored by tiles with tiles of potentially different
+ * precisions.  All matrices are passed through descriptors.  All dimensions are
+ * taken from the descriptors. It may return before the computation is
+ * finished. This function allows for pipelining operations at runtime.
+ *
+ *******************************************************************************
+ *
+ * @param[in] sequence
+ *          Identifies the sequence of function calls that this call belongs to
+ *          (for completion checks and exception handling purposes).
+ *
+ * @param[out] request
+ *          Identifies this function call (for exception handling purposes).
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zgerst_Tile
+ * @sa CHAMELEON_zgered_Tile
+ * @sa CHAMELEON_zgered_Tile_Async
+ *
+ */
+int CHAMELEON_zgerst_Tile_Async( cham_uplo_t         uplo,
+                                  CHAM_desc_t        *A,
+                                  RUNTIME_sequence_t *sequence,
+                                  RUNTIME_request_t  *request )
+{
+    CHAM_context_t *chamctxt;
+
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_fatal_error("CHAMELEON_zgerst_Tile_Async", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    if (sequence == NULL) {
+        chameleon_fatal_error("CHAMELEON_zgerst_Tile_Async", "NULL sequence");
+        return CHAMELEON_ERR_UNALLOCATED;
+    }
+    if (request == NULL) {
+        chameleon_fatal_error("CHAMELEON_zgerst_Tile_Async", "NULL request");
+        return CHAMELEON_ERR_UNALLOCATED;
+    }
+    /* Check sequence status */
+    if (sequence->status == CHAMELEON_SUCCESS) {
+        request->status = CHAMELEON_SUCCESS;
+    }
+    else {
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED);
+    }
+
+    /* Check descriptors for correctness */
+    if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) {
+        chameleon_error("CHAMELEON_zgerst_Tile_Async", "invalid descriptor");
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
+    }
+
+    chameleon_pzgerst( uplo, A, sequence, request );
+
+    return CHAMELEON_SUCCESS;
+}
diff --git a/control/compute_z.h b/control/compute_z.h
index 8bec9da5fa4bab356cde2bdb61b7717ffd086fde..06eae17b508c012918bbd011bad9cbb25a7bb7d4 100644
--- a/control/compute_z.h
+++ b/control/compute_z.h
@@ -22,7 +22,7 @@
  * @author Alycia Lisito
  * @author Matthieu Kuhn
  * @author Lionel Eyraud-Dubois
- * @date 2023-07-05
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -76,6 +76,12 @@ int chameleon_zshift(CHAM_context_t *chamctxt, int m, int n, CHAMELEON_Complex64
 /**
  *  Declarations of parallel functions (dynamic scheduling) - alphabetical order
  */
+#if defined(PRECISION_z) || defined(PRECISION_d)
+void chameleon_pzgered( cham_uplo_t uplo, double prec, CHAM_desc_t *A,
+                        RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
+void chameleon_pzgerst( cham_uplo_t uplo, CHAM_desc_t *A,
+                        RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
+#endif
 int chameleon_pzgebrd( int genD, cham_job_t jobu, cham_job_t jobvt,
                        CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D,
                        CHAMELEON_Complex64_t *U, int LDU, CHAMELEON_Complex64_t *VT, int LDVT,
diff --git a/control/descriptor.c b/control/descriptor.c
index 1436ad25d7c195bfa173b8f5ed2e70cf4c3ad2a2..e52f276366038554761d3a9fe2f7616789addc1f 100644
--- a/control/descriptor.c
+++ b/control/descriptor.c
@@ -19,7 +19,7 @@
  * @author Raphael Boucherie
  * @author Samuel Thibault
  * @author Lionel Eyraud-Dubois
- * @date 2023-07-05
+ * @date 2023-07-06
  *
  ***
  *
@@ -500,12 +500,6 @@ int CHAMELEON_Desc_Create( CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp
 {
     blkrankof_fct_t get_rankof = NULL;
 
-    /* if (getenv("CHAMELEON_1Dx1D_DISTRIBUTION")){ */
-    /*     printf("[CHAMELEON] : Using 1Dx1D distribubtion\n"); */
-    /*     get_rankof = chameleon_getrankof_custom; */
-    /*     load_dist(&custom_dist_a, m, n, mb, nb); */
-    /* } */
-
     return CHAMELEON_Desc_Create_User( descptr, mat, dtyp, mb, nb, bsiz,
                                        lm, ln, i, j, m, n, p, q,
                                        NULL, NULL, get_rankof, NULL );
diff --git a/gpucublas/include/CMakeLists.txt b/gpucublas/include/CMakeLists.txt
index b208d377e5b85e2549153728182caa53c9ffa2ef..2899bcf14ddcefaeacb1fcf7a48afedad36851cb 100644
--- a/gpucublas/include/CMakeLists.txt
+++ b/gpucublas/include/CMakeLists.txt
@@ -17,10 +17,10 @@
 #     Univ. of California Berkeley,
 #     Univ. of Colorado Denver.
 #
-# @version 1.2.0
+# @version 1.3.0
 #  @author Florent Pruvost
 #  @author Mathieu Faverge
-#  @date 2022-02-22
+#  @date 2023-07-06
 #
 ###
 
@@ -29,6 +29,7 @@
 set(GPUCUBLAS_HDRS_GENERATED "")
 set(ZHDR
     gpucublas/gpucublas_z.h
+    gpucublas/gpucublas_zc.h
 )
 precisions_rules_py(
   GPUCUBLAS_HDRS_GENERATED "${ZHDR}"
diff --git a/gpucublas/include/gpucublas.h b/gpucublas/include/gpucublas.h
index 2305b4f9be07ee8f06d2ce6154fd1d55c0ece938..e44a7e094278010192a979e2b831f57a5ee5a849 100644
--- a/gpucublas/include/gpucublas.h
+++ b/gpucublas/include/gpucublas.h
@@ -61,6 +61,8 @@ BEGIN_C_DECLS
 #include "gpucublas/gpucublas_d.h"
 #include "gpucublas/gpucublas_c.h"
 #include "gpucublas/gpucublas_s.h"
+#include "gpucublas/gpucublas_zc.h"
+#include "gpucublas/gpucublas_ds.h"
 
 int CUDA_hgemm( cham_trans_t transa, cham_trans_t transb,
                 int m, int n, int k,
diff --git a/gpucublas/include/gpucublas/gpucublas_z.h b/gpucublas/include/gpucublas/gpucublas_z.h
index 0773e003598a9a72597355dc71d5f785ec48af70..cf655f41fa47511b06a4c60d11e6397a8b6d9514 100644
--- a/gpucublas/include/gpucublas/gpucublas_z.h
+++ b/gpucublas/include/gpucublas/gpucublas_z.h
@@ -11,10 +11,10 @@
  *
  * @brief Chameleon GPU CHAMELEON_Complex64_t kernels header
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Florent Pruvost
  * @author Mathieu Faverge
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -24,6 +24,8 @@
 /**
  *  Declarations of cuda kernels - alphabetical order
  */
+int CUDA_dlag2h( int m, int n, const double *A, int lda, CHAMELEON_Real16_t *B, int ldb, cublasHandle_t handle );
+int CUDA_hlag2d( int m, int n, const CHAMELEON_Real16_t *A, int lda, double *B, int ldb, cublasHandle_t handle );
 int CUDA_zgeadd( cham_trans_t trans, int m, int n, const cuDoubleComplex *alpha, const cuDoubleComplex *A, int lda, const cuDoubleComplex *beta, cuDoubleComplex *B, int ldb, cublasHandle_t handle );
 int CUDA_zgemerge( cham_side_t side, cham_diag_t diag, int M, int N, const cuDoubleComplex *A, int LDA, cuDoubleComplex *B, int LDB, cublasHandle_t handle );
 int CUDA_zgemm(  cham_trans_t transa, cham_trans_t transb, int m, int n, int k, const cuDoubleComplex *alpha, const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, const cuDoubleComplex *beta, cuDoubleComplex *C, int ldc, cublasHandle_t handle );
diff --git a/gpucublas/include/gpucublas/gpucublas_zc.h b/gpucublas/include/gpucublas/gpucublas_zc.h
new file mode 100644
index 0000000000000000000000000000000000000000..48f7073da82b211b806d9ccb83961900b7fb6535
--- /dev/null
+++ b/gpucublas/include/gpucublas/gpucublas_zc.h
@@ -0,0 +1,24 @@
+/**
+ *
+ * @file gpucublas_zc.h
+ *
+ * @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon GPU Mixed-precision kernels header
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @date 2023-07-06
+ * @precisions mixed zc -> zc ds
+ *
+ */
+#ifndef _gpucublas_zc_h_
+#define _gpucublas_zc_h_
+
+int CUDA_clag2z( int m, int n, const cuFloatComplex *A, int lda, cuDoubleComplex *B, int ldb, cublasHandle_t handle );
+int CUDA_zlag2c( int m, int n, const cuDoubleComplex *A, int lda, cuFloatComplex *B, int ldb, cublasHandle_t handle );
+
+#endif /* _gpucublas_zc_h_ */
diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h
index fa5af0a740a3af86a8ce25c0e9cc74f72eaf4296..fa5f069e6057cb0f79eb47a7f0e2bbc38777a14b 100644
--- a/include/chameleon/chameleon_z.h
+++ b/include/chameleon/chameleon_z.h
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon CHAMELEON_complex64_t wrappers
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Jakub Kurzak
@@ -23,7 +23,7 @@
  * @author Florent Pruvost
  * @author Alycia Lisito
  * @author Matthieu Kuhn
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -168,6 +168,10 @@ int CHAMELEON_zplrnk_Tile(int K, CHAM_desc_t *C, unsigned long long int seedA, u
 int CHAMELEON_zpoinv_Tile(cham_uplo_t uplo, CHAM_desc_t *A);
 int CHAMELEON_zposv_Tile(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B);
 int CHAMELEON_zpotrf_Tile(cham_uplo_t uplo, CHAM_desc_t *A);
+#if defined(PRECISION_z) || defined(PRECISION_d)
+int CHAMELEON_zgered_Tile( cham_uplo_t uplo, double prec, CHAM_desc_t *A );
+int CHAMELEON_zgerst_Tile( cham_uplo_t uplo, CHAM_desc_t *A );
+#endif
 int CHAMELEON_zsytrf_Tile(cham_uplo_t uplo, CHAM_desc_t *A);
 int CHAMELEON_zpotri_Tile(cham_uplo_t uplo, CHAM_desc_t *A);
 int CHAMELEON_zpotrimm_Tile(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *C);
@@ -245,6 +249,10 @@ int CHAMELEON_zplrnk_Tile_Async(int K, CHAM_desc_t *C, unsigned long long int se
 int CHAMELEON_zpoinv_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zposv_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zpotrf_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
+#if defined(PRECISION_z) || defined(PRECISION_d)
+int CHAMELEON_zgered_Tile_Async(cham_uplo_t uplo, double prec, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
+int CHAMELEON_zgerst_Tile_Async( cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
+#endif
 int CHAMELEON_zsytrf_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zpotri_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zpotrimm_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *C, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
diff --git a/include/chameleon/config.h.in b/include/chameleon/config.h.in
index 34b3f3febe2558bd3eca466e7b9fcdd21d8dbeb1..3b5ceb7fa3e00d5571aca777ac1854d2a33a28c3 100644
--- a/include/chameleon/config.h.in
+++ b/include/chameleon/config.h.in
@@ -57,6 +57,14 @@
 #cmakedefine CHAMELEON_USE_CUBLAS
 #cmakedefine CHAMELEON_USE_HIP
 
+/* Arithmetics support */
+#cmakedefine CHAMELEON_PREC_S
+#cmakedefine CHAMELEON_PREC_D
+#cmakedefine CHAMELEON_PREC_C
+#cmakedefine CHAMELEON_PREC_Z
+#cmakedefine CHAMELEON_PREC_DS
+#cmakedefine CHAMELEON_PREC_ZC
+
 /* Hmat-oss */
 #cmakedefine CHAMELEON_USE_HMAT
 
diff --git a/include/chameleon/tasks.h b/include/chameleon/tasks.h
index b7281f13566b0c9a4ed274d7a9be762a5c0f3cc9..bc7a59e6f0b36ae28602218d6579c1f47ddbf142 100644
--- a/include/chameleon/tasks.h
+++ b/include/chameleon/tasks.h
@@ -11,11 +11,11 @@
  *
  * @brief Chameleon elementary tasks main header
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Mathieu Faverge
  * @author Cedric Augonnet
  * @author Florent Pruvost
- * @date 2022-02-22
+ * @date 2023-07-06
  *
  */
 #ifndef _chameleon_tasks_h_
@@ -100,6 +100,27 @@ void INSERT_TASK_map( const RUNTIME_option_t *options,
                       cham_access_t accessA, cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An,
                       cham_unary_operator_t op_fct, void *op_args );
 
+void INSERT_TASK_gemm( const RUNTIME_option_t *options,
+                       cham_trans_t transA, cham_trans_t transB,
+                       int m, int n, int k, int nb,
+                       double alpha, const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *B, int Bm, int Bn,
+                       double beta, const CHAM_desc_t *C, int Cm, int Cn );
+
+void INSERT_TASK_gemmex( const RUNTIME_option_t *options,
+                         cham_trans_t transA, cham_trans_t transB,
+                         int m, int n, int k, int nb,
+                         double alpha, const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn,
+                         double beta, const CHAM_desc_t *C, int Cm, int Cn );
+
+void INSERT_TASK_hgemm( const RUNTIME_option_t *options,
+                        cham_trans_t transA, cham_trans_t transB,
+                        int m, int n, int k, int nb,
+                        CHAMELEON_Real16_t alpha, const CHAM_desc_t *A, int Am, int An,
+                                                  const CHAM_desc_t *B, int Bm, int Bn,
+                        CHAMELEON_Real16_t beta,  const CHAM_desc_t *C, int Cm, int Cn );
+
 #include "chameleon/tasks_z.h"
 #include "chameleon/tasks_d.h"
 #include "chameleon/tasks_c.h"
diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h
index db562075436d82c7ac06759eb0489f376b59d3bf..b58895aa4346b597fbd7dece2606df7a6de35fd3 100644
--- a/include/chameleon/tasks_z.h
+++ b/include/chameleon/tasks_z.h
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon CHAMELEON_Complex64_t elementary tasks header
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Jakub Kurzak
@@ -24,7 +24,7 @@
  * @author Alycia Lisito
  * @author Romain Peressoni
  * @author Matthieu Kuhn
- * @date 2023-02-21
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -78,6 +78,12 @@ void INSERT_TASK_zgeqrt( const RUNTIME_option_t *options,
                          int m, int n, int ib, int nb,
                          const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *T, int Tm, int Tn );
+void INSERT_TASK_zgered( const RUNTIME_option_t *options,
+                         double threshold, double Anorm, int m, int n,
+                         const CHAM_desc_t *A, int Am, int An );
+void INSERT_TASK_zgerst( const RUNTIME_option_t *options,
+                         int m, int n,
+                         const CHAM_desc_t *A, int Am, int An );
 void INSERT_TASK_zgessm( const RUNTIME_option_t *options,
                          int m, int n, int k, int ib, int nb,
                          int *IPIV,
diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
index 5d8113b9b85792e30f0d0aaaf6c22d7504a1b128..fc1aac3355eb6fda653647270b5103280117c0d9 100644
--- a/runtime/CMakeLists.txt
+++ b/runtime/CMakeLists.txt
@@ -114,6 +114,11 @@ set(CODELETS_ZSRC
     # Reduction methods
     ##################
     codelets/codelet_zgersum.c
+    ##################
+    # Precision modification kernels
+    ##################
+    codelets/codelet_zgered.c
+    codelets/codelet_zgerst.c
     )
 
 set(CODELETS_SRC
diff --git a/runtime/openmp/codelets/codelet_zgered.c b/runtime/openmp/codelets/codelet_zgered.c
new file mode 100644
index 0000000000000000000000000000000000000000..4ccc67ba8741495d294ba45c66d4db111118e5bb
--- /dev/null
+++ b/runtime/openmp/codelets/codelet_zgered.c
@@ -0,0 +1,34 @@
+/**
+ *
+ * @file openmp/codelet_zgered.c
+ *
+ * @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgered OpenMP codelet
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @date 2023-07-06
+ * @precisions normal z -> d
+ *
+ */
+#include "chameleon_openmp.h"
+
+void INSERT_TASK_zgered( const RUNTIME_option_t *options,
+                         double threshold, double Anorm, int m, int n,
+                         const CHAM_desc_t *A, int Am, int An )
+{
+    fprintf( stderr, "WARNING: gered kernel is not available with OpenMP\n" );
+
+    (void)options;
+    (void)threshold;
+    (void)Anorm;
+    (void)m;
+    (void)n;
+    (void)A;
+    (void)Am;
+    (void)An;
+}
diff --git a/runtime/openmp/codelets/codelet_zgerst.c b/runtime/openmp/codelets/codelet_zgerst.c
new file mode 100644
index 0000000000000000000000000000000000000000..97a9d65718d192c9be9b73b35b4c69ae19aae71c
--- /dev/null
+++ b/runtime/openmp/codelets/codelet_zgerst.c
@@ -0,0 +1,32 @@
+/**
+ *
+ * @file openmp/codelet_zgerst.c
+ *
+ * @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgerst OpenMP codelet
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @date 2023-07-06
+ * @precisions normal z -> d
+ *
+ */
+#include "chameleon_openmp.h"
+
+void INSERT_TASK_zgerst( const RUNTIME_option_t *options,
+                         int m, int n,
+                         const CHAM_desc_t *A, int Am, int An )
+{
+    fprintf( stderr, "WARNING: gerst kernel is not available with OpenMP\n" );
+
+    (void)options;
+    (void)m;
+    (void)n;
+    (void)A;
+    (void)Am;
+    (void)An;
+}
diff --git a/runtime/openmp/codelets/codelet_zgersum.c b/runtime/openmp/codelets/codelet_zgersum.c
index 3ce42080a47a1122d83fe35aa921592ae275f54b..83c479a3983932742b5a96d231c661c1daa502f9 100644
--- a/runtime/openmp/codelets/codelet_zgersum.c
+++ b/runtime/openmp/codelets/codelet_zgersum.c
@@ -1,6 +1,6 @@
 /**
  *
- * @file starpu/codelet_zgersum.c
+ * @file openmp/codelet_zgersum.c
  *
  * @copyright 2009-2014 The University of Tennessee and The University of
  *                      Tennessee Research Foundation. All rights reserved.
diff --git a/runtime/openmp/control/runtime_async.c b/runtime/openmp/control/runtime_async.c
index 64cccaa4b8ebade256a98b3e294b602ed7849520..764612d4b8c55ca0fd1cfbb8ff7f65fbb7e64815 100644
--- a/runtime/openmp/control/runtime_async.c
+++ b/runtime/openmp/control/runtime_async.c
@@ -9,7 +9,7 @@
  *
  ***
  *
- * @brief Chameleon StarPU asynchronous routines
+ * @brief Chameleon OpenMP asynchronous routines
  *
  * @version 1.2.0
  * @author Mathieu Faverge
diff --git a/runtime/openmp/control/runtime_context.c b/runtime/openmp/control/runtime_context.c
index 05743743c3082330d891ed491d74ec7045f0449f..5a1a1f917a059ec3d327ef4a13e6e0734922d043 100644
--- a/runtime/openmp/control/runtime_context.c
+++ b/runtime/openmp/control/runtime_context.c
@@ -9,7 +9,7 @@
  *
  ***
  *
- * @brief Chameleon StarPU context routines
+ * @brief Chameleon OpenMP context routines
  *
  * @version 1.2.0
  * @author Cedric Augonnet
diff --git a/runtime/openmp/control/runtime_control.c b/runtime/openmp/control/runtime_control.c
index 08830268f1f4677951bf1d0291265ce63282bd5a..8aa704f33fbbc5ae7a7a25145415cc55455ac221 100644
--- a/runtime/openmp/control/runtime_control.c
+++ b/runtime/openmp/control/runtime_control.c
@@ -9,7 +9,7 @@
  *
  ***
  *
- * @brief Chameleon StarPU control routines
+ * @brief Chameleon OpenMP control routines
  *
  * @version 1.2.0
  * @author Mathieu Faverge
diff --git a/runtime/openmp/control/runtime_options.c b/runtime/openmp/control/runtime_options.c
index 3c10e5801060670ea28b31631ef270cc9028516d..3f52b69331786bc9ddfd59979c641e3b2b9002a1 100644
--- a/runtime/openmp/control/runtime_options.c
+++ b/runtime/openmp/control/runtime_options.c
@@ -9,7 +9,7 @@
  *
  ***
  *
- * @brief Chameleon StarPU options routines
+ * @brief Chameleon OpenMP options routines
  *
  * @version 1.2.0
  * @author Cedric Augonnet
diff --git a/runtime/openmp/control/runtime_profiling.c b/runtime/openmp/control/runtime_profiling.c
index e1c15de64fa7bdb6845ccaa2984f2fe5ee43a204..f632fbe9cb7b00a79f95345169bb55a9becec8ee 100644
--- a/runtime/openmp/control/runtime_profiling.c
+++ b/runtime/openmp/control/runtime_profiling.c
@@ -9,7 +9,7 @@
  *
  ***
  *
- * @brief Chameleon StarPU profiling routines
+ * @brief Chameleon OpenMP profiling routines
  *
  * @version 1.2.0
  * @author Cedric Augonnet
diff --git a/runtime/parsec/codelets/codelet_zgered.c b/runtime/parsec/codelets/codelet_zgered.c
new file mode 100644
index 0000000000000000000000000000000000000000..b4fc056018fa7e8a7fe623a60dc409a15a245251
--- /dev/null
+++ b/runtime/parsec/codelets/codelet_zgered.c
@@ -0,0 +1,34 @@
+/**
+ *
+ * @file parsec/codelet_zgered.c
+ *
+ * @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgered PaRSEC codelet
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @date 2023-07-06
+ * @precisions normal z -> d
+ *
+ */
+#include "chameleon_parsec.h"
+
+void INSERT_TASK_zgered( const RUNTIME_option_t *options,
+                         double threshold, double Anorm, int m, int n,
+                         const CHAM_desc_t *A, int Am, int An )
+{
+    fprintf( stderr, "WARNING: gered kernel is not available with PaRSEC\n" );
+
+    (void)options;
+    (void)threshold;
+    (void)Anorm;
+    (void)m;
+    (void)n;
+    (void)A;
+    (void)Am;
+    (void)An;
+}
diff --git a/runtime/parsec/codelets/codelet_zgerst.c b/runtime/parsec/codelets/codelet_zgerst.c
new file mode 100644
index 0000000000000000000000000000000000000000..134dc20c7cb88eec4215639c17f186e1d5eb80c4
--- /dev/null
+++ b/runtime/parsec/codelets/codelet_zgerst.c
@@ -0,0 +1,32 @@
+/**
+ *
+ * @file parsec/codelet_zgerst.c
+ *
+ * @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgerst PaRSEC codelet
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @date 2023-07-06
+ * @precisions normal z -> d
+ *
+ */
+#include "chameleon_parsec.h"
+
+void INSERT_TASK_zgerst( const RUNTIME_option_t *options,
+                         int m, int n,
+                         const CHAM_desc_t *A, int Am, int An )
+{
+    fprintf( stderr, "WARNING: gerst kernel is not available with PaRSEC\n" );
+
+    (void)options;
+    (void)m;
+    (void)n;
+    (void)A;
+    (void)Am;
+    (void)An;
+}
diff --git a/runtime/parsec/codelets/codelet_zgersum.c b/runtime/parsec/codelets/codelet_zgersum.c
index 4ab4c6208da83bf9d9923c73faaddc3368c05dd9..0d68ff0601d051b7534038346964c12e334cce39 100644
--- a/runtime/parsec/codelets/codelet_zgersum.c
+++ b/runtime/parsec/codelets/codelet_zgersum.c
@@ -1,6 +1,6 @@
 /**
  *
- * @file starpu/codelet_zgersum.c
+ * @file parsec/codelet_zgersum.c
  *
  * @copyright 2009-2014 The University of Tennessee and The University of
  *                      Tennessee Research Foundation. All rights reserved.
diff --git a/runtime/quark/codelets/codelet_zgered.c b/runtime/quark/codelets/codelet_zgered.c
new file mode 100644
index 0000000000000000000000000000000000000000..d1574eaee84b6178474ffe1dca36d57119ae6628
--- /dev/null
+++ b/runtime/quark/codelets/codelet_zgered.c
@@ -0,0 +1,34 @@
+/**
+ *
+ * @file quark/codelet_zgered.c
+ *
+ * @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgered Quark codelet
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @date 2023-07-06
+ * @precisions normal z -> d
+ *
+ */
+#include "chameleon_quark.h"
+
+void INSERT_TASK_zgered( const RUNTIME_option_t *options,
+                         double threshold, double Anorm, int m, int n,
+                         const CHAM_desc_t *A, int Am, int An )
+{
+    fprintf( stderr, "WARNING: gered kernel is not available with Quark\n" );
+
+    (void)options;
+    (void)threshold;
+    (void)Anorm;
+    (void)m;
+    (void)n;
+    (void)A;
+    (void)Am;
+    (void)An;
+}
diff --git a/runtime/quark/codelets/codelet_zgerst.c b/runtime/quark/codelets/codelet_zgerst.c
new file mode 100644
index 0000000000000000000000000000000000000000..f717011dfca2091bb14e6a903902dff04bbed940
--- /dev/null
+++ b/runtime/quark/codelets/codelet_zgerst.c
@@ -0,0 +1,32 @@
+/**
+ *
+ * @file quark/codelet_zgerst.c
+ *
+ * @copyright 2023-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgerst Quark codelet
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @date 2023-07-06
+ * @precisions normal z -> d
+ *
+ */
+#include "chameleon_quark.h"
+
+void INSERT_TASK_zgerst( const RUNTIME_option_t *options,
+                         int m, int n,
+                         const CHAM_desc_t *A, int Am, int An )
+{
+    fprintf( stderr, "WARNING: gerst kernel is not available with Quark\n" );
+
+    (void)options;
+    (void)m;
+    (void)n;
+    (void)A;
+    (void)Am;
+    (void)An;
+}
diff --git a/runtime/quark/codelets/codelet_zgersum.c b/runtime/quark/codelets/codelet_zgersum.c
index 60534e23c920d43ba2c4f9493206c53239309914..0d2525112406a72de037ac859fb333c5a97a4fd1 100644
--- a/runtime/quark/codelets/codelet_zgersum.c
+++ b/runtime/quark/codelets/codelet_zgersum.c
@@ -1,6 +1,6 @@
 /**
  *
- * @file starpu/codelet_zgersum.c
+ * @file quark/codelet_zgersum.c
  *
  * @copyright 2009-2014 The University of Tennessee and The University of
  *                      Tennessee Research Foundation. All rights reserved.
diff --git a/runtime/starpu/CMakeLists.txt b/runtime/starpu/CMakeLists.txt
index f7c00783b3e6ec08c10edf41ef9530857810c4de..30ea76045131884c610a0f4ee430393f732f6360 100644
--- a/runtime/starpu/CMakeLists.txt
+++ b/runtime/starpu/CMakeLists.txt
@@ -17,7 +17,7 @@
 #     Univ. of California Berkeley,
 #     Univ. of Colorado Denver.
 #
-# @version 1.2.0
+# @version 1.3.0
 #  @author Cedric Castagnede
 #  @author Emmanuel Agullo
 #  @author Mathieu Faverge
@@ -26,7 +26,7 @@
 #  @author Matthieu Kuhn
 #  @author Loris Lucido
 #  @author Terry Cojean
-#  @date 2023-01-30
+#  @date 2023-07-06
 #
 ###
 cmake_minimum_required(VERSION 3.1)
@@ -242,6 +242,7 @@ set(RUNTIME_SRCS_GENERATED "")
 set(ZSRC
   codelets/codelet_zcallback.c
   codelets/codelet_zccallback.c
+  codelets/codelet_dlag2h.c
   ${CODELETS_ZSRC}
   )
 
@@ -249,6 +250,13 @@ precisions_rules_py(RUNTIME_SRCS_GENERATED "${ZSRC}"
   PRECISIONS "${CHAMELEON_PRECISION}"
   TARGETDIR "codelets")
 
+set(CODELETS_SRC
+  codelets/codelet_convert.c
+  codelets/codelet_hgemm.c
+  codelets/codelet_gemm.c
+  ${CODELETS_SRC}
+  )
+
 set(RUNTIME_SRCS
   ${RUNTIME_COMMON}
   ${RUNTIME_SRCS_GENERATED}
diff --git a/runtime/starpu/codelets/codelet_convert.c b/runtime/starpu/codelets/codelet_convert.c
new file mode 100644
index 0000000000000000000000000000000000000000..0b07c069a1d509dc2fbbc09bd97f611154aa8ee0
--- /dev/null
+++ b/runtime/starpu/codelets/codelet_convert.c
@@ -0,0 +1,133 @@
+/**
+ *
+ * @file starpu/codelet_convert.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgemm StarPU codelet
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @date 2023-07-06
+ *
+ */
+#include "chameleon_starpu.h"
+#include "runtime_codelets.h"
+#define PRECISION_z
+#include "runtime_codelet_z.h"
+#undef PRECISION_z
+#define PRECISION_d
+#include "runtime_codelet_d.h"
+#undef PRECISION_d
+#define PRECISION_c
+#include "runtime_codelet_c.h"
+#undef PRECISION_c
+#define PRECISION_s
+#include "runtime_codelet_s.h"
+#undef PRECISION_s
+#define PRECISION_zc
+#include "runtime_codelet_zc.h"
+#undef PRECISION_zc
+#define PRECISION_ds
+#include "runtime_codelet_ds.h"
+#undef PRECISION_ds
+
+void
+insert_task_convert( const RUNTIME_option_t *options,
+                     int m, int n,
+                     cham_flttype_t       fromtype,
+                     starpu_data_handle_t fromtile,
+                     cham_flttype_t       totype,
+                     starpu_data_handle_t totile )
+{
+    struct starpu_codelet *codelet = NULL;
+    void (*callback)(void*) = NULL;
+
+    int conversion = ChamConvert( fromtype, totype );
+
+    switch( conversion ) {
+#if defined(CHAMELEON_PREC_ZC)
+    case ChamConvertComplexDoubleToSingle:
+        codelet = &cl_zlag2c;
+        callback = cl_zlag2c_callback;
+        break;
+
+    case ChamConvertComplexSingleToDouble:
+        codelet = &cl_clag2z;
+        callback = cl_clag2z_callback;
+        break;
+#endif
+
+#if defined(CHAMELEON_PREC_DS)
+    case ChamConvertRealDoubleToSingle:
+        codelet = &cl_dlag2s;
+        callback = cl_dlag2s_callback;
+        break;
+
+    case ChamConvertRealSingleToDouble:
+        codelet = &cl_slag2d;
+        callback = cl_slag2d_callback;
+        break;
+#endif
+
+#if defined(CHAMELEON_PREC_D) && defined(CHAMELON_USE_CUDA)
+    case ChamConvertRealDoubleToHalf:
+        codelet = &cl_dlag2h;
+        callback = cl_dlag2h_callback;
+        break;
+
+    case ChamConvertRealHalfToDouble:
+        codelet = &cl_hlag2d;
+        callback = cl_hlag2d_callback;
+        break;
+#endif
+
+#if defined(CHAMELEON_PREC_S) && defined(CHAMELON_USE_CUDA)
+    case ChamConvertRealSingleToHalf:
+        codelet = &cl_slag2h;
+        callback = cl_slag2h_callback;
+        break;
+
+    case ChamConvertRealHalfToSingle:
+        codelet = &cl_hlag2s;
+        callback = cl_hlag2s_callback;
+        break;
+#endif
+
+    case ChamConvertComplexDoubleToDouble:
+        return;
+    case ChamConvertComplexSingleToSingle:
+        return;
+    case ChamConvertComplexHalfToHalf:
+        return;
+    case ChamConvertRealDoubleToDouble:
+        return;
+    case ChamConvertRealSingleToSingle:
+        return;
+    case ChamConvertRealHalfToHalf:
+        return;
+
+    default:
+        assert(0);
+        fprintf( stderr, "INSERT_TASK_convert: Unknown conversion type\n" );
+        return;
+    }
+
+    rt_starpu_insert_task(
+        codelet,
+        STARPU_VALUE,    &m,                 sizeof(int),
+        STARPU_VALUE,    &n,                 sizeof(int),
+        STARPU_R,         fromtile,
+        STARPU_W,         totile,
+        STARPU_PRIORITY,  options->priority,
+        STARPU_CALLBACK,  options->profiling ? callback : NULL,
+        STARPU_EXECUTE_ON_WORKER, options->workerid,
+        0);
+
+    return;
+}
diff --git a/runtime/starpu/codelets/codelet_dlag2h.c b/runtime/starpu/codelets/codelet_dlag2h.c
new file mode 100644
index 0000000000000000000000000000000000000000..b910559db38c20a147e23173c884ea5364d923e1
--- /dev/null
+++ b/runtime/starpu/codelets/codelet_dlag2h.c
@@ -0,0 +1,161 @@
+/**
+ *
+ * @file starpu/codelet_dlag2h.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon dlag2h StarPU codelet
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @author Emmanuel Agullo
+ * @author Cedric Castagnede
+ * @author Lucas Barros de Assis
+ * @author Florent Pruvost
+ * @author Samuel Thibault
+ * @date 2023-07-06
+ * @precisions normal d -> d s
+ *
+ */
+#include "chameleon_starpu.h"
+#include "runtime_codelet_d.h"
+
+#if !defined(CHAMELEON_SIMULATION)
+#if defined(CHAMELEON_USE_CUDA)
+static void
+cl_dlag2h_cuda_func( void *descr[], void *cl_arg )
+{
+    cublasHandle_t handle = starpu_cublas_get_local_handle();
+    CHAM_tile_t   *tileA;
+    CHAM_tile_t   *tileB;
+    int            m, n;
+
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+
+    assert( tileA->format & CHAMELEON_TILE_FULLRANK );
+    assert( tileB->format & CHAMELEON_TILE_FULLRANK );
+
+    starpu_codelet_unpack_args( cl_arg, &m, &n );
+
+    int rc = CUDA_dlag2h(
+        m, n,
+        tileA->mat, tileA->ld,
+        tileB->mat, tileB->ld,
+        handle );
+
+    if ( rc != 0 ) {
+        fprintf( stderr, "core_dlag2h failed with info(%d)\n", rc );
+    }
+}
+#endif /* defined(CHAMELEON_USE_CUDA) */
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS( dlag2h, NULL, cl_dlag2h_cuda_func, STARPU_CUDA_ASYNC )
+
+/**
+ *
+ * @ingroup INSERT_TASK_Complex64_t
+ *
+ */
+void INSERT_TASK_dlag2h( const RUNTIME_option_t *options,
+                         int m, int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
+{
+    (void)nb;
+    struct starpu_codelet *codelet = &cl_dlag2h;
+    void (*callback)(void*) = options->profiling ? cl_dlag2h_callback : NULL;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_W(B, Bm, Bn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    rt_starpu_insert_task(
+        codelet,
+        STARPU_VALUE,    &m,                 sizeof(int),
+        STARPU_VALUE,    &n,                 sizeof(int),
+        STARPU_R,         RTBLKADDR(A, ChamRealDouble, Am, An),
+        STARPU_W,         RTBLKADDR(B, ChamRealHalf, Bm, Bn),
+        STARPU_PRIORITY,  options->priority,
+        STARPU_CALLBACK,  callback,
+        STARPU_EXECUTE_ON_WORKER, options->workerid,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "dlag2h",
+#endif
+        0);
+}
+
+#if !defined(CHAMELEON_SIMULATION)
+#if defined(CHAMELEON_USE_CUDA)
+static void
+cl_hlag2d_cuda_func( void *descr[], void *cl_arg )
+{
+    cublasHandle_t handle = starpu_cublas_get_local_handle();
+    CHAM_tile_t   *tileA;
+    CHAM_tile_t   *tileB;
+    int            m, n;
+
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+
+    assert( tileA->format & CHAMELEON_TILE_FULLRANK );
+    assert( tileB->format & CHAMELEON_TILE_FULLRANK );
+
+    starpu_codelet_unpack_args( cl_arg, &m, &n );
+
+    int rc = CUDA_hlag2d(
+        m, n,
+        tileA->mat, tileA->ld,
+        tileB->mat, tileB->ld,
+        handle );
+
+    if ( rc != 0 ) {
+        fprintf( stderr, "core_hlag2d failed with info(%d)\n", rc );
+    }
+}
+#endif /* defined(CHAMELEON_USE_CUDA) */
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS( hlag2d, NULL, cl_hlag2d_cuda_func, STARPU_CUDA_ASYNC )
+
+void INSERT_TASK_hlag2d( const RUNTIME_option_t *options,
+                         int m, int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
+{
+    (void)nb;
+    struct starpu_codelet *codelet = &cl_hlag2d;
+    void (*callback)(void*) = options->profiling ? cl_hlag2d_callback : NULL;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R( A, Am, An );
+    CHAMELEON_ACCESS_W( B, Bm, Bn );
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    rt_starpu_insert_task(
+        codelet,
+        STARPU_VALUE,    &m,                 sizeof(int),
+        STARPU_VALUE,    &n,                 sizeof(int),
+        STARPU_R,         RTBLKADDR(A, ChamComplexFloat, Am, An),
+        STARPU_W,         RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
+        STARPU_PRIORITY,  options->priority,
+        STARPU_CALLBACK,  callback,
+        STARPU_EXECUTE_ON_WORKER, options->workerid,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "hlag2d",
+#endif
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_dlag2z.c b/runtime/starpu/codelets/codelet_dlag2z.c
index 19516c88cdff7174efdd18f64fe9d9f3125ad581..34e0b16c0e29207f514110cb9333876e40524319 100644
--- a/runtime/starpu/codelets/codelet_dlag2z.c
+++ b/runtime/starpu/codelets/codelet_dlag2z.c
@@ -11,9 +11,9 @@
  *
  * @brief Chameleon dlag2z StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Mathieu Faverge
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c
  *
  */
@@ -65,8 +65,8 @@ void INSERT_TASK_dlag2z( const RUNTIME_option_t *options,
         STARPU_VALUE,    &uplo,              sizeof(uplo),
         STARPU_VALUE,    &m,                 sizeof(int),
         STARPU_VALUE,    &n,                 sizeof(int),
-        STARPU_R,         RTBLKADDR(A, double, Am, An),
-        STARPU_W,         RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_R,         RTBLKADDR(A, ChamRealDouble, Am, An),
+        STARPU_W,         RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
diff --git a/runtime/starpu/codelets/codelet_dzasum.c b/runtime/starpu/codelets/codelet_dzasum.c
index a085f919427e9cc470a9ff9eb3758bb2ebfc3d60..1fc2cf3090bf046b8ffdda4dd021619ce2d08090 100644
--- a/runtime/starpu/codelets/codelet_dzasum.c
+++ b/runtime/starpu/codelets/codelet_dzasum.c
@@ -11,14 +11,14 @@
  *
  * @brief Chameleon dzasum StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.6.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -67,8 +67,8 @@ void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
         STARPU_VALUE,    &uplo,                sizeof(cham_uplo_t),
         STARPU_VALUE,    &M,                   sizeof(int),
         STARPU_VALUE,    &N,                   sizeof(int),
-        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_RW,        RTBLKADDR(B, double, Bm, Bn),
+        STARPU_R,         RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_RW,        RTBLKADDR(B, ChamRealDouble, Bm, Bn),
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
          STARPU_EXECUTE_ON_WORKER, options->workerid,
diff --git a/runtime/starpu/codelets/codelet_gemm.c b/runtime/starpu/codelets/codelet_gemm.c
new file mode 100644
index 0000000000000000000000000000000000000000..69cd67a294d2fcdf116e28326072702527a8d699
--- /dev/null
+++ b/runtime/starpu/codelets/codelet_gemm.c
@@ -0,0 +1,207 @@
+/**
+ *
+ * @file starpu/codelet_gemm.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon gemm StarPU codelet
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @date 2023-07-06
+ *
+ */
+#include "chameleon_starpu.h"
+#include "runtime_codelets.h"
+#define PRECISION_z
+#include "runtime_codelet_z.h"
+#undef PRECISION_z
+#define PRECISION_d
+#include "runtime_codelet_d.h"
+#undef PRECISION_d
+#define PRECISION_c
+#include "runtime_codelet_c.h"
+#undef PRECISION_c
+#define PRECISION_s
+#include "runtime_codelet_s.h"
+#undef PRECISION_s
+
+void
+INSERT_TASK_gemm( const RUNTIME_option_t *options,
+                  cham_trans_t transA, cham_trans_t transB,
+                  int m, int n, int k, int nb,
+                  double alpha, const CHAM_desc_t *A, int Am, int An,
+                                const CHAM_desc_t *B, int Bm, int Bn,
+                  double beta,  const CHAM_desc_t *C, int Cm, int Cn )
+{
+    struct starpu_codelet *codelet = NULL;
+    void (*callback)(void*) = NULL;
+
+    /* if ( alpha == 0. ) { */
+    /*     INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, */
+    /*                          beta, C, Cm, Cn ); */
+    /*     return; */
+    /* } */
+
+    void          *clargs = NULL;
+    int            accessC;
+    int            exec = 0;
+    size_t         argssize = 0;
+    char          *cl_name = "Xgemm";
+    CHAM_tile_t   *tileC;
+    cham_flttype_t Cflttype;
+
+    /* Handle cache */
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_R(B, Bm, Bn);
+    CHAMELEON_ACCESS_RW(C, Cm, Cn);
+    exec = __chameleon_need_exec;
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    /* Reduce the C access if needed */
+    accessC = ( beta == 0. ) ? STARPU_W : (STARPU_RW | ((beta == 1.) ? STARPU_COMMUTE : 0));
+
+    tileC = C->get_blktile( C, Cm, Cn );
+    Cflttype = tileC->flttype;
+
+    switch( Cflttype ) {
+#if defined(CHAMELEON_PREC_Z)
+    case ChamComplexDouble:
+        codelet  = &cl_zgemm;
+        callback = cl_zgemm_callback;
+        if ( exec ) {
+            struct cl_zgemm_args_s *cl_zargs;
+            cl_zargs = malloc( sizeof( struct cl_zgemm_args_s ) );
+            cl_zargs->transA = transA;
+            cl_zargs->transB = transB;
+            cl_zargs->m      = m;
+            cl_zargs->n      = n;
+            cl_zargs->k      = k;
+            cl_zargs->alpha  = alpha;
+            cl_zargs->beta   = beta;
+            clargs = (void*)cl_zargs;
+            argssize = sizeof( struct cl_zgemm_args_s );
+        }
+        break;
+#endif
+#if defined(CHAMELEON_PREC_C)
+    case ChamComplexSingle:
+        codelet  = &cl_cgemm;
+        callback = cl_cgemm_callback;
+        if ( exec ) {
+            struct cl_cgemm_args_s *cl_cargs;
+            cl_cargs = malloc( sizeof( struct cl_cgemm_args_s ) );
+            cl_cargs->transA = transA;
+            cl_cargs->transB = transB;
+            cl_cargs->m      = m;
+            cl_cargs->n      = n;
+            cl_cargs->k      = k;
+            cl_cargs->alpha  = alpha;
+            cl_cargs->beta   = beta;
+            clargs = (void*)cl_cargs;
+            argssize = sizeof( struct cl_cgemm_args_s );
+        }
+        break;
+#endif
+#if defined(CHAMELEON_PREC_D)
+    case ChamRealDouble:
+        codelet  = &cl_dgemm;
+        callback = cl_dgemm_callback;
+        if ( exec ) {
+            struct cl_dgemm_args_s *cl_dargs;
+            cl_dargs = malloc( sizeof( struct cl_dgemm_args_s ) );
+            cl_dargs->transA = transA;
+            cl_dargs->transB = transB;
+            cl_dargs->m      = m;
+            cl_dargs->n      = n;
+            cl_dargs->k      = k;
+            cl_dargs->alpha  = alpha;
+            cl_dargs->beta   = beta;
+            clargs = (void*)cl_dargs;
+            argssize = sizeof( struct cl_dgemm_args_s );
+        }
+        break;
+#endif
+#if defined(CHAMELEON_PREC_S)
+    case ChamRealSingle:
+        codelet  = &cl_sgemm;
+        callback = cl_sgemm_callback;
+        if ( exec ) {
+            struct cl_sgemm_args_s *cl_sargs;
+            cl_sargs = malloc( sizeof( struct cl_sgemm_args_s ) );
+            cl_sargs->transA = transA;
+            cl_sargs->transB = transB;
+            cl_sargs->m      = m;
+            cl_sargs->n      = n;
+            cl_sargs->k      = k;
+            cl_sargs->alpha  = alpha;
+            cl_sargs->beta   = beta;
+            clargs = (void*)cl_sargs;
+            argssize = sizeof( struct cl_sgemm_args_s );
+        }
+        break;
+#endif
+#if (defined(CHAMELEON_PREC_D) || defined(CHAMELEON_PREC_S)) && defined(CHAMELEON_USE_CUDA)
+    case ChamRealHalf:
+        codelet  = &cl_hgemm;
+        callback = cl_hgemm_callback;
+        if ( exec ) {
+            struct cl_hgemm_args_s *cl_hargs;
+            cl_hargs = malloc( sizeof( struct cl_hgemm_args_s ) );
+            cl_hargs->transA = transA;
+            cl_hargs->transB = transB;
+            cl_hargs->m      = m;
+            cl_hargs->n      = n;
+            cl_hargs->k      = k;
+            cl_hargs->alpha  = alpha;
+            cl_hargs->beta   = beta;
+            clargs = (void*)cl_hargs;
+            argssize = sizeof( struct cl_hgemm_args_s );
+        }
+        break;
+#endif
+    default:
+        fprintf( stderr, "INSERT_TASK_gemm: Unknown datatype %d (Mixed=%3s, Type=%d, Size=%d\n",
+                 Cflttype, cham_is_mixed(Cflttype) ? "Yes" : "No",
+                 cham_get_ftype(Cflttype), cham_get_arith(Cflttype) );
+        return;
+    }
+
+    /* Refine name */
+    cl_name = chameleon_codelet_name( cl_name, 3,
+                                      A->get_blktile( A, Am, An ),
+                                      B->get_blktile( B, Bm, Bn ),
+                                      C->get_blktile( C, Cm, Cn ) );
+
+    /* Callback for profiling information */
+    callback = options->profiling ? callback : NULL;
+
+    /* Insert the task */
+    rt_starpu_insert_task(
+        codelet,
+        /* Task codelet arguments */
+        STARPU_CL_ARGS, clargs, argssize,
+
+        /* Task handles */
+        STARPU_R, RUNTIME_data_getaddr_withconversion( options, STARPU_R, Cflttype, A, Am, An ),
+        STARPU_R, RUNTIME_data_getaddr_withconversion( options, STARPU_R, Cflttype, B, Bm, Bn ),
+        accessC,  RUNTIME_data_getaddr_withconversion( options, accessC,  Cflttype, C, Cm, Cn ),
+
+        /* Common task arguments */
+        STARPU_PRIORITY,          options->priority,
+        STARPU_CALLBACK,          callback,
+        STARPU_EXECUTE_ON_WORKER, options->workerid,
+        STARPU_POSSIBLY_PARALLEL, options->parallel,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME,              cl_name,
+#endif
+        0 );
+
+    return;
+}
diff --git a/runtime/starpu/codelets/codelet_gemmex.c b/runtime/starpu/codelets/codelet_gemmex.c
new file mode 100644
index 0000000000000000000000000000000000000000..efc33dab33d5fef8c8186aebf0c53e401f9ef2ba
--- /dev/null
+++ b/runtime/starpu/codelets/codelet_gemmex.c
@@ -0,0 +1,278 @@
+/**
+ *
+ * @file starpu/codelet_gemmex.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon gemmex StarPU codelet
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @date 2023-07-06
+ *
+ */
+#include "chameleon_starpu.h"
+#include "runtime_codelets.h"
+
+CHAMELEON_CL_CB( gemmex, cti_handle_get_m(task->handles[2]), cti_handle_get_n(task->handles[2]), cti_handle_get_n(task->handles[0]), 2. *M*N*K) /* If A^t, computation is wrong */
+
+struct cl_gemmex_args_s {
+    cham_trans_t transA;
+    cham_trans_t transB;
+    int m;
+    int n;
+    int k;
+    double alpha;
+    double beta;
+};
+
+#if !defined(CHAMELEON_SIMULATION)
+#if defined(CHAMELEON_USE_CUDA)
+static void
+cl_gemmex_cuda_func( void *descr[], void *cl_arg )
+{
+    struct cl_gemmex_args_s *clargs = (struct cl_gemmex_args_s *)cl_arg;
+    cublasHandle_t          handle = starpu_cublas_get_local_handle();
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+    CHAM_tile_t *tileC;
+    void *ptrAlpha, *ptrBeta;
+
+    switch( tileC->flttype ) {
+    case ChamRealHalf:
+    {
+        CHAMELEON_Real16_t halpha = clargs->alpha;
+        CHAMELEON_Real16_t hbeta  = clargs->beta;
+        ptrAlpha = &halpha;
+        ptrBeta  = &hbeta;
+    }
+    break;
+    case ChamRealFloat:
+    {
+        float salpha = clargs->alpha;
+        float sbeta  = clargs->beta;
+        ptrAlpha = &salpha;
+        ptrBeta  = &sbeta;
+    }
+    break;
+    case ChamRealDouble:
+    {
+        double dalpha = clargs->alpha;
+        double dbeta  = clargs->beta;
+        ptrAlpha = &dalpha;
+        ptrBeta  = &dbeta;
+    }
+    break;
+    case ChamComplexFloat:
+    {
+        CHAMELEON_Complex32_t calpha = clargs->alpha;
+        CHAMELEON_Complex32_t cbeta  = clargs->beta;
+        ptrAlpha = &calpha;
+        ptrBeta  = &cbeta;
+    }
+    break;
+    case ChamComplexDouble:
+    {
+        CHAMELEON_Complex64_t zalpha = clargs->alpha;
+        CHAMELEON_Complex64_t zbeta  = clargs->beta;
+        ptrAlpha = &zalpha;
+        ptrBeta  = &zbeta;
+    }
+    break;
+    default:
+        fprintf( stderr, "cl_gemmex: Unknown C datatype\n" );
+        return;
+    }
+
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
+
+    assert( tileA->format & CHAMELEON_TILE_FULLRANK );
+    assert( tileB->format & CHAMELEON_TILE_FULLRANK );
+    assert( tileC->format & CHAMELEON_TILE_FULLRANK );
+
+    CUDA_gemmex(
+        clargs->transA, clargs->transB,
+        clargs->m, clargs->n, clargs->k,
+        ptrAlpha,
+        tileA->mat, tileA->ld, tileA->flttype,
+        tileB->mat, tileB->ld, tileB->flttype,
+        ptrBeta,
+        tileC->mat, tileC->ld, tileC->flttype,
+        handle );
+}
+#endif /* defined(CHAMELEON_USE_CUDA) */
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS( gemmex, NULL, cl_gemmex_cuda_func, STARPU_CUDA_ASYNC )
+
+void INSERT_TASK_gemmex_Astat( const RUNTIME_option_t *options,
+                               cham_trans_t transA, cham_trans_t transB,
+                               int m, int n, int k, int nb,
+                               double alpha, const CHAM_desc_t *A, int Am, int An,
+                                             const CHAM_desc_t *B, int Bm, int Bn,
+                               double beta,  const CHAM_desc_t *C, int Cm, int Cn )
+{
+    /* if ( alpha == 0. ) { */
+    /*     INSERT_TASK_hlascal( options, ChamUpperLower, m, n, nb, */
+    /*                          beta, C, Cm, Cn ); */
+    /*     return; */
+    /* } */
+
+    struct cl_gemmex_args_s  *clargs = NULL;
+    void (*callback)(void*);
+    int                      accessC;
+    int                      exec    = 0;
+    char                    *cl_name = "gemmex_Astat";
+
+    /* Handle cache */
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+     /* Check A as write, since it will be the owner of the computation */
+    CHAMELEON_ACCESS_W(A, Am, An);
+    CHAMELEON_ACCESS_R(B, Bm, Bn);
+     /* Check C as read, since it will be used in a reduction */
+    CHAMELEON_ACCESS_R(C, Cm, Cn);
+    exec = __chameleon_need_exec;
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    if ( exec ) {
+        clargs = malloc( sizeof( struct cl_gemmex_args_s ) );
+        clargs->transA = transA;
+        clargs->transB = transB;
+        clargs->m      = m;
+        clargs->n      = n;
+        clargs->k      = k;
+        clargs->alpha  = alpha;
+        clargs->beta   = beta;
+    }
+
+    /* Callback for profiling information */
+    callback = options->profiling ? cl_gemmex_callback : NULL;
+
+    /* Reduce the C access if needed */
+    if ( beta == 0. ) {
+        accessC = STARPU_W;
+    }
+#if defined(HAVE_STARPU_MPI_REDUX)
+    else if ( beta == 1. ) {
+        accessC = STARPU_MPI_REDUX;
+    }
+#endif
+    else {
+        accessC = STARPU_RW;
+    }
+
+    /* Refine name */
+    cl_name = chameleon_codelet_name( cl_name, 3,
+                                      A->get_blktile( A, Am, An ),
+                                      B->get_blktile( B, Bm, Bn ),
+                                      C->get_blktile( C, Cm, Cn ) );
+
+    /* Insert the task */
+    rt_starpu_insert_task(
+        &cl_gemmex,
+        /* Task codelet arguments */
+        STARPU_CL_ARGS, clargs, sizeof(struct cl_gemmex_args_s),
+
+        /* Task handles */
+        STARPU_R, RTBLKADDR(A, ChamRealHalf, Am, An),
+        STARPU_R, RTBLKADDR(B, ChamRealHalf, Bm, Bn),
+        accessC,  RTBLKADDR(C, ChamRealHalf, Cm, Cn),
+
+        /* Common task arguments */
+        STARPU_PRIORITY,          options->priority,
+        STARPU_CALLBACK,          callback,
+        STARPU_EXECUTE_ON_NODE,   A->get_rankof(A, Am, An),
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME,              cl_name,
+#endif
+        0 );
+}
+
+void INSERT_TASK_gemmex( const RUNTIME_option_t *options,
+                         cham_trans_t transA, cham_trans_t transB,
+                         int m, int n, int k, int nb,
+                         double alpha, const CHAM_desc_t *A, int Am, int An,
+                                       const CHAM_desc_t *B, int Bm, int Bn,
+                         double beta,  const CHAM_desc_t *C, int Cm, int Cn )
+{
+    /* if ( alpha == 0. ) { */
+    /*     INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, */
+    /*                          beta, C, Cm, Cn ); */
+    /*     return; */
+    /* } */
+
+    struct cl_gemmex_args_s  *clargs = NULL;
+    void (*callback)(void*);
+    int                      accessC;
+    int                      exec = 0;
+    char                    *cl_name = "gemmex";
+
+    if ( !(options->withcuda) ) {
+        /* Fallback to cpu version */
+        INSERT_TASK_gemm( options, transA, transB, m, n, k, nb,
+                          alpha, A, Am, An, B, Bm, Bn, beta, C, Cm, Cn );
+        return;
+    }
+
+    /* Handle cache */
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_R(B, Bm, Bn);
+    CHAMELEON_ACCESS_RW(C, Cm, Cn);
+    exec = __chameleon_need_exec;
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    if ( exec ) {
+        clargs = malloc( sizeof( struct cl_gemmex_args_s ) );
+        clargs->transA = transA;
+        clargs->transB = transB;
+        clargs->m      = m;
+        clargs->n      = n;
+        clargs->k      = k;
+        clargs->alpha  = alpha;
+        clargs->beta   = beta;
+    }
+
+    /* Callback for profiling information */
+    callback = options->profiling ? cl_gemmex_callback : NULL;
+
+    /* Reduce the C access if needed */
+    accessC = ( beta == 0. ) ? STARPU_W : (STARPU_RW | ((beta == 1.) ? STARPU_COMMUTE : 0));
+
+    /* Refine name */
+    cl_name = chameleon_codelet_name( cl_name, 3,
+                                      A->get_blktile( A, Am, An ),
+                                      B->get_blktile( B, Bm, Bn ),
+                                      C->get_blktile( C, Cm, Cn ) );
+
+    /* Insert the task */
+    rt_starpu_insert_task(
+        &cl_gemmex,
+        /* Task codelet arguments */
+        STARPU_CL_ARGS, clargs, sizeof(struct cl_gemmex_args_s),
+
+        /* Task handles */
+        STARPU_R, RTBLKADDR(A, ChamRealHalf, Am, An),
+        STARPU_R, RTBLKADDR(B, ChamRealHalf, Bm, Bn),
+        accessC,  RTBLKADDR(C, ChamRealHalf, Cm, Cn),
+
+        /* Common task arguments */
+        STARPU_PRIORITY,          options->priority,
+        STARPU_CALLBACK,          callback,
+        STARPU_EXECUTE_ON_WORKER, options->workerid,
+        STARPU_POSSIBLY_PARALLEL, options->parallel,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME,              cl_name,
+#endif
+        0 );
+}
diff --git a/runtime/starpu/codelets/codelet_hgemm.c b/runtime/starpu/codelets/codelet_hgemm.c
new file mode 100644
index 0000000000000000000000000000000000000000..a595bf990cd336e50aee8c541954ef5591c5ed8b
--- /dev/null
+++ b/runtime/starpu/codelets/codelet_hgemm.c
@@ -0,0 +1,250 @@
+/**
+ *
+ * @file starpu/codelet_hgemm.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon hgemm StarPU codelet
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @date 2023-07-06
+ *
+ */
+#include "chameleon_starpu.h"
+#include "runtime_codelets.h"
+
+CHAMELEON_CL_CB( hgemm, cti_handle_get_m(task->handles[2]), cti_handle_get_n(task->handles[2]), cti_handle_get_n(task->handles[0]), 2. *M*N*K) /* If A^t, computation is wrong */
+
+#if !defined(CHAMELEON_SIMULATION)
+#if defined(CHAMELEON_USE_CUDA)
+static void
+cl_hgemm_cuda_func( void *descr[], void *cl_arg )
+{
+    struct cl_hgemm_args_s *clargs = (struct cl_hgemm_args_s *)cl_arg;
+    cublasHandle_t          handle = starpu_cublas_get_local_handle();
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+    CHAM_tile_t *tileC;
+
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
+
+    assert( tileA->format & CHAMELEON_TILE_FULLRANK );
+    assert( tileB->format & CHAMELEON_TILE_FULLRANK );
+    assert( tileC->format & CHAMELEON_TILE_FULLRANK );
+
+    CUDA_hgemm(
+        clargs->transA, clargs->transB,
+        clargs->m, clargs->n, clargs->k,
+        (CHAMELEON_Real16_t*)&(clargs->alpha),
+        tileA->mat, tileA->ld,
+        tileB->mat, tileB->ld,
+        (CHAMELEON_Real16_t*)&(clargs->beta),
+        tileC->mat, tileC->ld,
+        handle );
+}
+#endif /* defined(CHAMELEON_USE_CUDA) */
+
+#if defined(CHAMELEON_USE_HIP)
+static void
+cl_hgemm_hip_func( void *descr[], void *cl_arg )
+{
+    struct cl_hgemm_args_s *clargs = (struct cl_hgemm_args_s *)cl_arg;
+    hipblasHandle_t         handle = starpu_hipblas_get_local_handle();
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+    CHAM_tile_t *tileC;
+
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
+
+    assert( tileA->format & CHAMELEON_TILE_FULLRANK );
+    assert( tileB->format & CHAMELEON_TILE_FULLRANK );
+    assert( tileC->format & CHAMELEON_TILE_FULLRANK );
+
+    HIP_hgemm(
+        clargs->transA, clargs->transB,
+        clargs->m, clargs->n, clargs->k,
+        (CHAMELEON_Real16_t*)&(clargs->alpha),
+        tileA->mat, tileA->ld,
+        tileB->mat, tileB->ld,
+        (CHAMELEON_Real16_t*)&(clargs->beta),
+        tileC->mat, tileC->ld,
+        handle );
+
+    return;
+}
+#endif /* defined(CHAMELEON_USE_HIP) */
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+#if defined(CHAMELEON_USE_HIP)
+CODELETS_GPU( hgemm, NULL, cl_hgemm_hip_func, STARPU_HIP_ASYNC )
+#else
+CODELETS( hgemm, NULL, cl_hgemm_cuda_func, STARPU_CUDA_ASYNC )
+#endif
+
+void INSERT_TASK_hgemm_Astat( const RUNTIME_option_t *options,
+                              cham_trans_t transA, cham_trans_t transB,
+                              int m, int n, int k, int nb,
+                              CHAMELEON_Real16_t alpha, const CHAM_desc_t *A, int Am, int An,
+                                                        const CHAM_desc_t *B, int Bm, int Bn,
+                              CHAMELEON_Real16_t beta,  const CHAM_desc_t *C, int Cm, int Cn )
+{
+    /* if ( alpha == 0. ) { */
+    /*     INSERT_TASK_hlascal( options, ChamUpperLower, m, n, nb, */
+    /*                          beta, C, Cm, Cn ); */
+    /*     return; */
+    /* } */
+
+    struct cl_hgemm_args_s  *clargs = NULL;
+    void (*callback)(void*);
+    int                      accessC;
+    int                      exec    = 0;
+    char                    *cl_name = "hgemm_Astat";
+
+    /* Handle cache */
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+     /* Check A as write, since it will be the owner of the computation */
+    CHAMELEON_ACCESS_W(A, Am, An);
+    CHAMELEON_ACCESS_R(B, Bm, Bn);
+     /* Check C as read, since it will be used in a reduction */
+    CHAMELEON_ACCESS_R(C, Cm, Cn);
+    exec = __chameleon_need_exec;
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    if ( exec ) {
+        clargs = malloc( sizeof( struct cl_hgemm_args_s ) );
+        clargs->transA = transA;
+        clargs->transB = transB;
+        clargs->m      = m;
+        clargs->n      = n;
+        clargs->k      = k;
+        clargs->alpha  = alpha;
+        clargs->beta   = beta;
+    }
+
+    /* Callback for profiling information */
+    callback = options->profiling ? cl_hgemm_callback : NULL;
+
+    /* Reduce the C access if needed */
+    if ( beta == 0. ) {
+        accessC = STARPU_W;
+    }
+#if defined(HAVE_STARPU_MPI_REDUX)
+    else if ( beta == 1. ) {
+        accessC = STARPU_MPI_REDUX;
+    }
+#endif
+    else {
+        accessC = STARPU_RW;
+    }
+
+    /* Refine name */
+    cl_name = chameleon_codelet_name( cl_name, 3,
+                                      A->get_blktile( A, Am, An ),
+                                      B->get_blktile( B, Bm, Bn ),
+                                      C->get_blktile( C, Cm, Cn ) );
+
+    /* Insert the task */
+    rt_starpu_insert_task(
+        &cl_hgemm,
+        /* Task codelet arguments */
+        STARPU_CL_ARGS, clargs, sizeof(struct cl_hgemm_args_s),
+
+        /* Task handles */
+        STARPU_R, RTBLKADDR(A, ChamRealHalf, Am, An),
+        STARPU_R, RTBLKADDR(B, ChamRealHalf, Bm, Bn),
+        accessC,  RTBLKADDR(C, ChamRealHalf, Cm, Cn),
+
+        /* Common task arguments */
+        STARPU_PRIORITY,          options->priority,
+        STARPU_CALLBACK,          callback,
+        STARPU_EXECUTE_ON_NODE,   A->get_rankof(A, Am, An),
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME,              cl_name,
+#endif
+        0 );
+}
+
+void INSERT_TASK_hgemm( const RUNTIME_option_t *options,
+                        cham_trans_t transA, cham_trans_t transB,
+                        int m, int n, int k, int nb,
+                        CHAMELEON_Real16_t alpha, const CHAM_desc_t *A, int Am, int An,
+                                                  const CHAM_desc_t *B, int Bm, int Bn,
+                        CHAMELEON_Real16_t beta,  const CHAM_desc_t *C, int Cm, int Cn )
+{
+    /* if ( alpha == 0. ) { */
+    /*     INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb, */
+    /*                          beta, C, Cm, Cn ); */
+    /*     return; */
+    /* } */
+
+    struct cl_hgemm_args_s  *clargs = NULL;
+    void (*callback)(void*);
+    int                      accessC;
+    int                      exec = 0;
+    char                    *cl_name = "hgemm";
+
+    /* Handle cache */
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_R(B, Bm, Bn);
+    CHAMELEON_ACCESS_RW(C, Cm, Cn);
+    exec = __chameleon_need_exec;
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    if ( exec ) {
+        clargs = malloc( sizeof( struct cl_hgemm_args_s ) );
+        clargs->transA = transA;
+        clargs->transB = transB;
+        clargs->m      = m;
+        clargs->n      = n;
+        clargs->k      = k;
+        clargs->alpha  = alpha;
+        clargs->beta   = beta;
+    }
+
+    /* Callback for profiling information */
+    callback = options->profiling ? cl_hgemm_callback : NULL;
+
+    /* Reduce the C access if needed */
+    accessC = ( beta == 0. ) ? STARPU_W : (STARPU_RW | ((beta == 1.) ? STARPU_COMMUTE : 0));
+
+    /* Refine name */
+    cl_name = chameleon_codelet_name( cl_name, 3,
+                                      A->get_blktile( A, Am, An ),
+                                      B->get_blktile( B, Bm, Bn ),
+                                      C->get_blktile( C, Cm, Cn ) );
+
+    /* Insert the task */
+    rt_starpu_insert_task(
+        &cl_hgemm,
+        /* Task codelet arguments */
+        STARPU_CL_ARGS, clargs, sizeof(struct cl_hgemm_args_s),
+
+        /* Task handles */
+        STARPU_R, RTBLKADDR(A, ChamRealHalf, Am, An),
+        STARPU_R, RTBLKADDR(B, ChamRealHalf, Bm, Bn),
+        accessC,  RTBLKADDR(C, ChamRealHalf, Cm, Cn),
+
+        /* Common task arguments */
+        STARPU_PRIORITY,          options->priority,
+        STARPU_CALLBACK,          callback,
+        STARPU_EXECUTE_ON_WORKER, options->workerid,
+        STARPU_POSSIBLY_PARALLEL, options->parallel,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME,              cl_name,
+#endif
+        0 );
+}
diff --git a/runtime/starpu/codelets/codelet_map.c b/runtime/starpu/codelets/codelet_map.c
index 97de57e26efa988a6e425573d637d13115f8f2d3..a55c56ef8fc79e84db3a04a9fa332e889443192b 100644
--- a/runtime/starpu/codelets/codelet_map.c
+++ b/runtime/starpu/codelets/codelet_map.c
@@ -9,10 +9,10 @@
  *
  * @brief Chameleon map StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Mathieu Faverge
  * @author Florent Pruvost
- * @date 2022-02-22
+ * @date 2023-07-06
  *
  */
 #include "chameleon_starpu.h"
@@ -49,25 +49,29 @@ void INSERT_TASK_map( const RUNTIME_option_t *options,
 
     struct starpu_codelet *codelet = &cl_map;
     void (*callback)(void*) = options->profiling ? cl_map_callback : NULL;
+    char                  *cl_name = "map";
 
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_RW(A, Am, An);
     CHAMELEON_END_ACCESS_DECLARATION;
 
+    cl_name = chameleon_codelet_name( cl_name, 1,
+                                      A->get_blktile( A, Am, An ) );
+
     rt_starpu_insert_task(
         codelet,
         STARPU_VALUE,    &A,                      sizeof(CHAM_desc_t*),
         STARPU_VALUE,    &uplo,                   sizeof(cham_uplo_t),
         STARPU_VALUE,    &Am,                     sizeof(int),
         STARPU_VALUE,    &An,                     sizeof(int),
-        cham_to_starpu_access(accessA), RTBLKADDR(A, void, Am, An),
+        cham_to_starpu_access(accessA), RTBLKADDR(A, ChamByte, Am, An),
         STARPU_VALUE,    &op_fct,                 sizeof(cham_unary_operator_t),
         STARPU_VALUE,    &op_args,                sizeof(void*),
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "map",
+        STARPU_NAME, cl_name,
 #endif
         0);
 }
diff --git a/runtime/starpu/codelets/codelet_zaxpy.c b/runtime/starpu/codelets/codelet_zaxpy.c
index 2c1170dacd7cc1a569627028ccb0989bfcd9e1ce..be1fbbbf422325c84068513392c11bb7352c424c 100644
--- a/runtime/starpu/codelets/codelet_zaxpy.c
+++ b/runtime/starpu/codelets/codelet_zaxpy.c
@@ -11,11 +11,11 @@
  *
  * @brief Chameleon zaxpy StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Florent Pruvost
  * @author Mathieu Faverge
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -65,9 +65,9 @@ void INSERT_TASK_zaxpy( const RUNTIME_option_t *options,
             codelet,
             STARPU_VALUE,    &M,                           sizeof(int),
             STARPU_VALUE,    &alpha,                       sizeof(CHAMELEON_Complex64_t),
-            STARPU_R,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+            STARPU_R,        RTBLKADDR(A, ChamComplexDouble, Am, An),
             STARPU_VALUE,    &incA,                        sizeof(int),
-            STARPU_RW,       RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+            STARPU_RW,       RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
             STARPU_VALUE,    &incB,                        sizeof(int),
             STARPU_PRIORITY, options->priority,
             STARPU_CALLBACK, callback,
diff --git a/runtime/starpu/codelets/codelet_zbuild.c b/runtime/starpu/codelets/codelet_zbuild.c
index ad92712dab565e6969e21f09669b3271a1c24d19..3ce93dea06a18299c2ac4fa0d7c471b8b975d297 100644
--- a/runtime/starpu/codelets/codelet_zbuild.c
+++ b/runtime/starpu/codelets/codelet_zbuild.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon zbuild StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Piotr Luszczek
@@ -23,7 +23,7 @@
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -79,7 +79,7 @@ CODELETS_CPU(zbuild, cl_zbuild_cpu_func)
         STARPU_VALUE,    &row_max,                      sizeof(int),
         STARPU_VALUE,    &col_min,                      sizeof(int),
         STARPU_VALUE,    &col_max,                      sizeof(int),
-        STARPU_W,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_W,         RTBLKADDR(A, ChamComplexDouble, Am, An),
         STARPU_VALUE,    &user_data,                    sizeof(void*),
         STARPU_VALUE,    &user_build_callback,          sizeof(void*),
         STARPU_PRIORITY,  options->priority,
diff --git a/runtime/starpu/codelets/codelet_zcallback.c b/runtime/starpu/codelets/codelet_zcallback.c
index 8f05509f70faf0d971c15ecb42aa064738611631..7d1bf87f6c669c18c06d72727d45713be1b86189 100644
--- a/runtime/starpu/codelets/codelet_zcallback.c
+++ b/runtime/starpu/codelets/codelet_zcallback.c
@@ -11,12 +11,12 @@
  *
  * @brief Chameleon zcallback StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  *  @author Mathieu Faverge
  *  @author Cedric Augonnet
  *  @author Florent Pruvost
  *  @author Alycia Lisito
- *  @date 2022-02-22
+ *  @date 2023-07-06
  *  @precisions normal z -> c d s
  *
  */
@@ -84,3 +84,7 @@ CHAMELEON_CL_CB(ztsmqr_hetra1, cti_handle_get_m(task->handles[0]), cti_handle_ge
 CHAMELEON_CL_CB(ztstrf,        cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]),         M* M*M)
 CHAMELEON_CL_CB(zunmlq,        cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]),     2. *M* M*M)
 CHAMELEON_CL_CB(zunmqr,        cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]),     2. *M* M*M)
+#if defined(PRECISION_d) || defined(PRECISION_s)
+CHAMELEON_CL_CB(dlag2h,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      M*N)
+CHAMELEON_CL_CB(hlag2d,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      M*N)
+#endif
diff --git a/runtime/starpu/codelets/codelet_zcesca.c b/runtime/starpu/codelets/codelet_zcesca.c
index e2bdf69b2b841c28dcd58bcf1b848f9a167dff41..74cb9c7c6834723ae1a536b933b450c8b6439344 100644
--- a/runtime/starpu/codelets/codelet_zcesca.c
+++ b/runtime/starpu/codelets/codelet_zcesca.c
@@ -9,9 +9,9 @@
  *
  * @brief Chameleon zcesca StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Florent Pruvost
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -96,12 +96,12 @@ void INSERT_TASK_zcesca( const RUNTIME_option_t *options,
     rt_starpu_insert_task(
         codelet,
         STARPU_CL_ARGS, clargs, sizeof(struct cl_zcesca_args_s),
-        STARPU_R,        RTBLKADDR(Gi, CHAMELEON_Complex64_t, Gim, Gin),
-        STARPU_R,        RTBLKADDR(Gj, CHAMELEON_Complex64_t, Gjm, Gjn),
-        STARPU_R,        RTBLKADDR(G, CHAMELEON_Complex64_t, Gm, Gn),
-        STARPU_R,        RTBLKADDR(Di, double, Dim, Din),
-        STARPU_R,        RTBLKADDR(Dj, double, Djm, Djn),
-        STARPU_RW,       RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_R,        RTBLKADDR(Gi, ChamComplexDouble, Gim, Gin),
+        STARPU_R,        RTBLKADDR(Gj, ChamComplexDouble, Gjm, Gjn),
+        STARPU_R,        RTBLKADDR(G, ChamComplexDouble, Gm, Gn),
+        STARPU_R,        RTBLKADDR(Di, ChamRealDouble, Dim, Din),
+        STARPU_R,        RTBLKADDR(Dj, ChamRealDouble, Djm, Djn),
+        STARPU_RW,       RTBLKADDR(A, ChamComplexDouble, Am, An),
         STARPU_PRIORITY, options->priority,
         STARPU_CALLBACK, callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
diff --git a/runtime/starpu/codelets/codelet_zgeadd.c b/runtime/starpu/codelets/codelet_zgeadd.c
index d39ace8e1a1b0f1784184fdba9de4d7c45b46d0f..60daa3b177a7ec86bcbba840c0e64dd33e72b39b 100644
--- a/runtime/starpu/codelets/codelet_zgeadd.c
+++ b/runtime/starpu/codelets/codelet_zgeadd.c
@@ -11,14 +11,14 @@
  *
  * @brief Chameleon zgeadd StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Mathieu Faverge
  * @author Emmanuel Agullo
  * @author Cedric Castagnede
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -104,9 +104,9 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
         STARPU_VALUE,    &m,                  sizeof(int),
         STARPU_VALUE,    &n,                  sizeof(int),
         STARPU_VALUE,    &alpha,              sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_R,         RTBLKADDR(A, ChamComplexDouble, Am, An),
         STARPU_VALUE,    &beta,               sizeof(CHAMELEON_Complex64_t),
-        accessB,          RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        accessB,          RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
diff --git a/runtime/starpu/codelets/codelet_zgelqt.c b/runtime/starpu/codelets/codelet_zgelqt.c
index 2bacf5dfc1683a0731bbb097e552660d5cd8d380..846482361badc28556fb6da43492b38754354ece 100644
--- a/runtime/starpu/codelets/codelet_zgelqt.c
+++ b/runtime/starpu/codelets/codelet_zgelqt.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon zgelqt StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
@@ -22,7 +22,7 @@
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -80,8 +80,8 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
         STARPU_VALUE,    &m,                 sizeof(int),
         STARPU_VALUE,    &n,                 sizeof(int),
         STARPU_VALUE,    &ib,                sizeof(int),
-        STARPU_RW,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_W,         RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
+        STARPU_RW,        RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_W,         RTBLKADDR(T, ChamComplexDouble, Tm, Tn),
         /* max( nb * (ib+1), ib * (ib+nb) ) */
         STARPU_SCRATCH,   options->ws_worker,
         /* /\* ib*n + 3*ib*ib + max(m,n) *\/ */
diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c
index cf9b4b168f1abdfe222b1cd3be504b7dfb6bf38b..4b47627258ba618751e82a33945f3fbeed27c8bc 100644
--- a/runtime/starpu/codelets/codelet_zgemm.c
+++ b/runtime/starpu/codelets/codelet_zgemm.c
@@ -31,16 +31,6 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-struct cl_zgemm_args_s {
-    cham_trans_t transA;
-    cham_trans_t transB;
-    int m;
-    int n;
-    int k;
-    CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t beta;
-};
-
 #if !defined(CHAMELEON_SIMULATION)
 static void
 cl_zgemm_cpu_func( void *descr[], void *cl_arg )
@@ -201,9 +191,9 @@ void INSERT_TASK_zgemm_Astat( const RUNTIME_option_t *options,
         STARPU_CL_ARGS, clargs, sizeof(struct cl_zgemm_args_s),
 
         /* Task handles */
-        STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        accessC,  RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_R, RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_R, RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
+        accessC,  RTBLKADDR(C, ChamComplexDouble, Cm, Cn),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
@@ -272,9 +262,9 @@ void INSERT_TASK_zgemm( const RUNTIME_option_t *options,
         STARPU_CL_ARGS, clargs, sizeof(struct cl_zgemm_args_s),
 
         /* Task handles */
-        STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        accessC,  RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_R, RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_R, RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
+        accessC,  RTBLKADDR(C, ChamComplexDouble, Cm, Cn),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
diff --git a/runtime/starpu/codelets/codelet_zgemv.c b/runtime/starpu/codelets/codelet_zgemv.c
index 4d0f2dd0920df83898c9f6f0222a49ddc567af49..a2529d99995bf272d4e960916c61c82fea26b762 100644
--- a/runtime/starpu/codelets/codelet_zgemv.c
+++ b/runtime/starpu/codelets/codelet_zgemv.c
@@ -11,9 +11,9 @@
  *
  * @brief Chameleon zgemv StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Mathieu Faverge
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -109,11 +109,11 @@ void INSERT_TASK_zgemv( const RUNTIME_option_t *options,
         STARPU_VALUE,    &m,                 sizeof(int),
         STARPU_VALUE,    &n,                 sizeof(int),
         STARPU_VALUE,    &alpha,             sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_R,         RTBLKADDR(X, CHAMELEON_Complex64_t, Xm, Xn),
+        STARPU_R,         RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_R,         RTBLKADDR(X, ChamComplexDouble, Xm, Xn),
         STARPU_VALUE,    &incX,              sizeof(int),
         STARPU_VALUE,    &beta,              sizeof(CHAMELEON_Complex64_t),
-        accessY,          RTBLKADDR(Y, CHAMELEON_Complex64_t, Ym, Yn),
+        accessY,          RTBLKADDR(Y, ChamComplexDouble, Ym, Yn),
         STARPU_VALUE,    &incY,              sizeof(int),
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
diff --git a/runtime/starpu/codelets/codelet_zgeqrt.c b/runtime/starpu/codelets/codelet_zgeqrt.c
index f77f09c5da068b333fc564c94c6287685e936cd9..0ee282e4a02c5bb1dc47e82c474c88a445144114 100644
--- a/runtime/starpu/codelets/codelet_zgeqrt.c
+++ b/runtime/starpu/codelets/codelet_zgeqrt.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon zgeqrt StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
@@ -22,7 +22,7 @@
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -81,8 +81,8 @@ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
         STARPU_VALUE,    &m,                 sizeof(int),
         STARPU_VALUE,    &n,                 sizeof(int),
         STARPU_VALUE,    &ib,                sizeof(int),
-        STARPU_RW,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_W,         RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
+        STARPU_RW,        RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_W,         RTBLKADDR(T, ChamComplexDouble, Tm, Tn),
         /* max( nb * (ib+1), ib * (ib+nb) ) */
         STARPU_SCRATCH,   options->ws_worker,
         /* ib * (m+3*ib) + max(m,n) */
diff --git a/runtime/starpu/codelets/codelet_zgered.c b/runtime/starpu/codelets/codelet_zgered.c
new file mode 100644
index 0000000000000000000000000000000000000000..394ef74c24ac9023a1b594b348ba21479df48d3a
--- /dev/null
+++ b/runtime/starpu/codelets/codelet_zgered.c
@@ -0,0 +1,142 @@
+/**
+ *
+ * @file starpu/codelet_zgered.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgered StarPU codelet
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @date 2023-07-06
+ * @precisions normal z -> d
+ *
+ */
+#include "chameleon_starpu.h"
+#include <coreblas/lapacke.h>
+#include "runtime_codelet_zc.h"
+#include "runtime_codelet_z.h"
+
+//#define CHAMELEON_DEBUG_GERED
+
+void INSERT_TASK_zgered( const RUNTIME_option_t *options,
+                         double threshold, double Anorm, int m, int n,
+                         const CHAM_desc_t *A, int Am, int An )
+{
+    CHAM_tile_t          *tileA;
+    double                u_low;
+    int64_t               mm, nn;
+#if defined(CHAMELEON_USE_MPI)
+    int                   tag;
+#endif
+    starpu_data_handle_t *handleAin;
+    starpu_data_handle_t  handleAout;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_RW(A, Am, An);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    /* Get the Input handle */
+    mm = Am + (A->i / A->mb);
+    nn = An + (A->j / A->nb);
+    handleAin = A->schedopt;
+    handleAin += ((int64_t)A->lmt) * nn + mm;
+
+    assert( *handleAin != NULL );
+
+    /*
+     * Lets convert the tile precision based on the following criteria:
+     *
+     * ||A_{i,j}||_F  < u_{high} * || A ||_F / (nt * u_{low})
+     * ||A_{i,j}||_F  < u_{high} * || A ||_F / nt *  1/ u_{low}
+     * ||A_{i,j}||_F  < threshold / u_{low}
+     */
+
+    tileA = A->get_blktile( A, Am, An );
+#if defined(CHAMELEON_USE_MPI)
+    tag = starpu_mpi_data_get_tag( *handleAin );
+#endif /* defined(CHAMELEON_USE_MPI) */
+
+#if defined(CHAMELEON_USE_CUDA) && (CUDA_VERSION >= 7500)
+#if defined(PRECISION_d)
+    if ( options->withcuda ) {
+        /*
+         * Check for half precision
+         */
+        u_low = 1.e-4;
+        if ( Anorm < (threshold / u_low) ) {
+#if defined(CHAMELEON_DEBUG_GERED)
+            fprintf( stderr,
+                     "[%2d] Convert the tile ( %d, %d ) to half precision\n",
+                     A->myrank, Am, An );
+#endif
+            starpu_cham_tile_register( &handleAout, -1, tileA, ChamComplexHalf );
+
+            rt_starpu_insert_task(
+                &cl_dlag2h,
+                STARPU_VALUE,    &m,                 sizeof(int),
+                STARPU_VALUE,    &n,                 sizeof(int),
+                STARPU_R,        *handleAin,
+                STARPU_W,         handleAout,
+                STARPU_PRIORITY,  options->priority,
+                STARPU_EXECUTE_ON_WORKER, options->workerid,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+                STARPU_NAME, "dlag2h",
+#endif
+                0);
+
+            starpu_data_unregister_submit( *handleAin );
+            *handleAin = handleAout;
+            tileA->flttype = ChamComplexHalf;
+#if defined(CHAMELEON_USE_MPI)
+            starpu_mpi_data_register( handleAout, tag, tileA->rank );
+#endif
+            return;
+        }
+    }
+#endif
+#endif
+
+    /*
+     * Check for single precision
+     */
+#if !defined(CHAMELEON_SIMULATION)
+    u_low = LAPACKE_slamch_work('e');
+#else
+    u_low = 1e-8;
+#endif
+    if ( Anorm < (threshold / u_low) ) {
+#if defined(CHAMELEON_DEBUG_GERED)
+        fprintf( stderr,
+                 "[%2d] Convert the tile ( %d, %d ) to single precision\n",
+                 A->myrank, Am, An );
+#endif
+        starpu_cham_tile_register( &handleAout, -1, tileA, ChamComplexFloat );
+
+        rt_starpu_insert_task(
+            &cl_zlag2c,
+            STARPU_VALUE,    &m,                 sizeof(int),
+            STARPU_VALUE,    &n,                 sizeof(int),
+            STARPU_R,        *handleAin,
+            STARPU_W,         handleAout,
+            STARPU_PRIORITY,  options->priority,
+            STARPU_EXECUTE_ON_WORKER, options->workerid,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+            STARPU_NAME, "zlag2c",
+#endif
+            0);
+
+        starpu_data_unregister_submit( *handleAin );
+        *handleAin = handleAout;
+        tileA->flttype = ChamComplexFloat;
+#if defined(CHAMELEON_USE_MPI)
+        starpu_mpi_data_register( *handleAin, tag, tileA->rank );
+#endif
+        return;
+    }
+}
diff --git a/runtime/starpu/codelets/codelet_zgerst.c b/runtime/starpu/codelets/codelet_zgerst.c
new file mode 100644
index 0000000000000000000000000000000000000000..68490f011eef8227bfa656bafdf60ab4c0e70e79
--- /dev/null
+++ b/runtime/starpu/codelets/codelet_zgerst.c
@@ -0,0 +1,118 @@
+/**
+ *
+ * @file starpu/codelet_zgerst.c
+ *
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgerst StarPU codelet
+ *
+ * @version 1.3.0
+ * @author Mathieu Faverge
+ * @date 2023-07-06
+ * @precisions normal z -> d
+ *
+ */
+#include "chameleon_starpu.h"
+#include <coreblas/lapacke.h>
+#include "runtime_codelet_zc.h"
+#include "runtime_codelet_z.h"
+
+//#define CHAMELEON_DEBUG_GERST
+
+void INSERT_TASK_zgerst( const RUNTIME_option_t *options,
+                         int m, int n,
+                         const CHAM_desc_t *A, int Am, int An )
+{
+    CHAM_tile_t          *tileA;
+    int64_t               mm, nn;
+#if defined(CHAMELEON_USE_MPI)
+    int                   tag;
+#endif
+    starpu_data_handle_t *handleAin;
+    starpu_data_handle_t  handleAout;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_RW(A, Am, An);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    tileA = A->get_blktile( A, Am, An );
+    if ( tileA->flttype == ChamComplexDouble ) {
+        return;
+    }
+
+    /* Get the Input handle */
+    mm = Am + (A->i / A->mb);
+    nn = An + (A->j / A->nb);
+    handleAin = A->schedopt;
+    handleAin += ((int64_t)A->lmt) * nn + mm;
+
+    assert( *handleAin != NULL );
+
+#if defined(CHAMELEON_USE_MPI)
+    tag = starpu_mpi_data_get_tag( *handleAin );
+#endif /* defined(CHAMELEON_USE_MPI) */
+
+    starpu_cham_tile_register( &handleAout, -1, tileA, ChamComplexDouble );
+
+    switch( tileA->flttype ) {
+#if defined(CHAMELEON_USE_CUDA) && (CUDA_VERSION >= 7500)
+#if defined(PRECISION_d)
+    /*
+     * Restore from half precision
+     */
+    case ChamComplexHalf:
+#if defined(CHAMELEON_DEBUG_GERST)
+        fprintf( stderr,
+                 "[%2d] Convert back the tile ( %d, %d ) from half precision\n",
+                 A->myrank, Am, An );
+#endif
+        rt_starpu_insert_task(
+            &cl_hlag2d,
+            STARPU_VALUE,    &m,                 sizeof(int),
+            STARPU_VALUE,    &n,                 sizeof(int),
+            STARPU_R,        *handleAin,
+            STARPU_W,         handleAout,
+            STARPU_PRIORITY,  options->priority,
+            STARPU_EXECUTE_ON_WORKER, options->workerid,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+            STARPU_NAME, "hlag2d",
+#endif
+            0);
+        break;
+#endif
+#endif
+
+    case ChamComplexFloat:
+#if defined(CHAMELEON_DEBUG_GERST)
+        fprintf( stderr,
+                 "[%2d] Convert back the tile ( %d, %d ) from half precision\n",
+                 A->myrank, Am, An );
+#endif
+        rt_starpu_insert_task(
+            &cl_clag2z,
+            STARPU_VALUE,    &m,                 sizeof(int),
+            STARPU_VALUE,    &n,                 sizeof(int),
+            STARPU_R,        *handleAin,
+            STARPU_W,         handleAout,
+            STARPU_PRIORITY,  options->priority,
+            STARPU_EXECUTE_ON_WORKER, options->workerid,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+            STARPU_NAME, "clag2z",
+#endif
+            0);
+        break;
+
+    default:
+        fprintf( stderr, "ERROR: Unknonw input datatype" );
+    }
+
+    starpu_data_unregister_submit( *handleAin );
+    *handleAin = handleAout;
+    tileA->flttype = ChamComplexDouble;
+#if defined(CHAMELEON_USE_MPI)
+    starpu_mpi_data_register( handleAout, tag, tileA->rank );
+#endif
+}
diff --git a/runtime/starpu/codelets/codelet_zgersum.c b/runtime/starpu/codelets/codelet_zgersum.c
index 5a53839a635243b0fb8904cd7768d852a984b70a..7c218ab0075ef5c56ceda76360c9797fe765546d 100644
--- a/runtime/starpu/codelets/codelet_zgersum.c
+++ b/runtime/starpu/codelets/codelet_zgersum.c
@@ -11,11 +11,11 @@
  *
  * @brief Chameleon zgersum StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Romain Peressoni
  * @author Mathieu Faverge
  * @author Antoine Jego
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -117,7 +117,7 @@ void
 RUNTIME_zgersum_set_methods( const CHAM_desc_t *A, int Am, int An )
 {
 #if defined(HAVE_STARPU_MPI_REDUX)
-    starpu_data_set_reduction_methods( RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+    starpu_data_set_reduction_methods( RTBLKADDR(A, ChamComplexDouble, Am, An),
                                        &cl_zgersum_redux,
                                        &cl_zgersum_init );
 #endif
@@ -129,7 +129,7 @@ RUNTIME_zgersum_submit_tree( const RUNTIME_option_t *options,
 {
 #if defined(HAVE_STARPU_MPI_REDUX) && defined(CHAMELEON_USE_MPI)
     starpu_mpi_redux_data_prio_tree( MPI_COMM_WORLD,
-                                     RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+                                     RTBLKADDR(A, ChamComplexDouble, Am, An),
                                      options->priority + 1,
                                      2 /* Binary tree */ );
 #else
diff --git a/runtime/starpu/codelets/codelet_zgessm.c b/runtime/starpu/codelets/codelet_zgessm.c
index edd99dc349250967a54a951f0e83eddda11bcb9c..2afae321494854f744c6ad1fcf9161db8701b402 100644
--- a/runtime/starpu/codelets/codelet_zgessm.c
+++ b/runtime/starpu/codelets/codelet_zgessm.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon zgessm StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
@@ -22,7 +22,7 @@
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -77,9 +77,9 @@ void INSERT_TASK_zgessm( const RUNTIME_option_t *options,
         STARPU_VALUE,     &k,                        sizeof(int),
         STARPU_VALUE,    &ib,                        sizeof(int),
         STARPU_VALUE,          &IPIV,                      sizeof(int*),
-        STARPU_R,             RTBLKADDR(L, CHAMELEON_Complex64_t, Lm, Ln),
-        STARPU_R,             RTBLKADDR(D, CHAMELEON_Complex64_t, Dm, Dn),
-        STARPU_RW,            RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_R,             RTBLKADDR(L, ChamComplexDouble, Lm, Ln),
+        STARPU_R,             RTBLKADDR(D, ChamComplexDouble, Dm, Dn),
+        STARPU_RW,            RTBLKADDR(A, ChamComplexDouble, Am, An),
         STARPU_PRIORITY,    options->priority,
         STARPU_CALLBACK,    callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
diff --git a/runtime/starpu/codelets/codelet_zgessq.c b/runtime/starpu/codelets/codelet_zgessq.c
index 8972fea258861e817c83486682a8d844c3925bc6..e7de6b666101fbb6d240e85ead4bcb849f9dbe24 100644
--- a/runtime/starpu/codelets/codelet_zgessq.c
+++ b/runtime/starpu/codelets/codelet_zgessq.c
@@ -11,14 +11,14 @@
  *
  * @brief Chameleon zgessq StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.6.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -65,8 +65,8 @@ void INSERT_TASK_zgessq( const RUNTIME_option_t *options,
         STARPU_VALUE,    &storev,                     sizeof(cham_store_t),
         STARPU_VALUE,    &m,                          sizeof(int),
         STARPU_VALUE,    &n,                          sizeof(int),
-        STARPU_R,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_RW,       RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn),
+        STARPU_R,        RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_RW,       RTBLKADDR(SCALESUMSQ, ChamRealDouble, SCALESUMSQm, SCALESUMSQn),
         STARPU_PRIORITY, options->priority,
         STARPU_CALLBACK, callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
diff --git a/runtime/starpu/codelets/codelet_zgesum.c b/runtime/starpu/codelets/codelet_zgesum.c
index d8b64642637f1a8c4e13026495a132344ee3bd4a..d1947b4a1ecb6666008e63c9b82f558c1bb60e8e 100644
--- a/runtime/starpu/codelets/codelet_zgesum.c
+++ b/runtime/starpu/codelets/codelet_zgesum.c
@@ -9,9 +9,9 @@
  *
  * @brief Chameleon zgesum StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Florent Pruvost
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -69,8 +69,8 @@ void INSERT_TASK_zgesum( const RUNTIME_option_t *options,
     rt_starpu_insert_task(
         codelet,
         STARPU_CL_ARGS, clargs, sizeof(struct cl_zgesum_args_s),
-        STARPU_R,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_RW,       RTBLKADDR(SUMS, CHAMELEON_Complex64_t, SUMSm, SUMSn),
+        STARPU_R,        RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_RW,       RTBLKADDR(SUMS, ChamComplexDouble, SUMSm, SUMSn),
         STARPU_PRIORITY, options->priority,
         STARPU_CALLBACK, callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
diff --git a/runtime/starpu/codelets/codelet_zgetrf.c b/runtime/starpu/codelets/codelet_zgetrf.c
index 6f667ac70c1869cbe24196553bf7fa8247ff7d6e..568265ee27721d0c34d80012e860514772a414b4 100644
--- a/runtime/starpu/codelets/codelet_zgetrf.c
+++ b/runtime/starpu/codelets/codelet_zgetrf.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon zgetrf StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
@@ -20,7 +20,7 @@
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -74,7 +74,7 @@ void INSERT_TASK_zgetrf( const RUNTIME_option_t *options,
         codelet,
         STARPU_VALUE,             &m,                        sizeof(int),
         STARPU_VALUE,             &n,                        sizeof(int),
-        STARPU_RW,                     RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_RW,                     RTBLKADDR(A, ChamComplexDouble, Am, An),
         STARPU_VALUE,                  &IPIV,                      sizeof(int*),
         STARPU_VALUE,    &check_info,                sizeof(cham_bool_t),
         STARPU_VALUE,         &iinfo,                        sizeof(int),
diff --git a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
index 5ea78b61145999a5ce02803361e5448c9aa9f562..03398f6447c34fb15f85dba005d2a92cb9d1aa9a 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon zgetrf_incpiv StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
@@ -22,7 +22,7 @@
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -83,8 +83,8 @@ void INSERT_TASK_zgetrf_incpiv(const RUNTIME_option_t *options,
         STARPU_VALUE,    &m,                 sizeof(int),
         STARPU_VALUE,    &n,                 sizeof(int),
         STARPU_VALUE,    &ib,                sizeof(int),
-        STARPU_RW,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_W,         RTBLKADDR(L, CHAMELEON_Complex64_t, Lm, Ln),
+        STARPU_RW,        RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_W,         RTBLKADDR(L, ChamComplexDouble, Lm, Ln),
         STARPU_VALUE,    &IPIV,              sizeof(int*),
         STARPU_VALUE,    &check_info,        sizeof(cham_bool_t),
         STARPU_VALUE,    &iinfo,             sizeof(int),
diff --git a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
index 9c40effb01e380ebba58794a1ba06dc1673965a3..4488882e796ff7d9cb87a07f57e9b40c3f49036c 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon zgetrf_nopiv StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Omar Zenati
  * @author Mathieu Faverge
  * @author Emmanuel Agullo
@@ -19,7 +19,7 @@
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -75,7 +75,7 @@ void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options,
         STARPU_VALUE,    &m,                         sizeof(int),
         STARPU_VALUE,    &n,                         sizeof(int),
         STARPU_VALUE,    &ib,                        sizeof(int),
-        STARPU_RW,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_RW,        RTBLKADDR(A, ChamComplexDouble, Am, An),
         STARPU_VALUE,    &iinfo,                     sizeof(int),
         STARPU_VALUE,    &(options->sequence),       sizeof(RUNTIME_sequence_t*),
         STARPU_VALUE,    &(options->request),        sizeof(RUNTIME_request_t*),
diff --git a/runtime/starpu/codelets/codelet_zgram.c b/runtime/starpu/codelets/codelet_zgram.c
index 83437b670113205d7d88dac910895997695440e2..883093966a5355c00a2fc9f0de9d35bd4b0c823c 100644
--- a/runtime/starpu/codelets/codelet_zgram.c
+++ b/runtime/starpu/codelets/codelet_zgram.c
@@ -9,11 +9,11 @@
  *
  * @brief Chameleon zgram StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Mathieu Faverge
  * @author Florent Pruvost
  * @author Lucas Barros de Assis
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -70,10 +70,10 @@ void INSERT_TASK_zgram( const RUNTIME_option_t *options,
         STARPU_VALUE,    &n,                         sizeof(int),
         STARPU_VALUE,    &mt,                        sizeof(int),
         STARPU_VALUE,    &nt,                        sizeof(int),
-        STARPU_R,        RTBLKADDR(Di, double, Dim, Din),
-        STARPU_R,        RTBLKADDR(Dj, double, Djm, Djn),
-        STARPU_R,        RTBLKADDR(D, double, Dm, Dn),
-        STARPU_RW,       RTBLKADDR(A, double, Am, An),
+        STARPU_R,        RTBLKADDR(Di, ChamRealDouble, Dim, Din),
+        STARPU_R,        RTBLKADDR(Dj, ChamRealDouble, Djm, Djn),
+        STARPU_R,        RTBLKADDR(D, ChamRealDouble, Dm, Dn),
+        STARPU_RW,       RTBLKADDR(A, ChamRealDouble, Am, An),
         STARPU_PRIORITY, options->priority,
         STARPU_CALLBACK, callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
diff --git a/runtime/starpu/codelets/codelet_zhe2ge.c b/runtime/starpu/codelets/codelet_zhe2ge.c
index 638b3a9f61d71a2d002c0f4bbb2ddf23a449d0d9..2fa77bfb0464567fdbdf1f4decac0627c39c8076 100644
--- a/runtime/starpu/codelets/codelet_zhe2ge.c
+++ b/runtime/starpu/codelets/codelet_zhe2ge.c
@@ -11,12 +11,12 @@
  *
  * @brief Chameleon zhe2ge StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Mathieu Faverge
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -65,8 +65,8 @@ void INSERT_TASK_zhe2ge( const RUNTIME_option_t *options,
         STARPU_VALUE,  &uplo,                sizeof(int),
         STARPU_VALUE,     &m,                        sizeof(int),
         STARPU_VALUE,     &n,                        sizeof(int),
-        STARPU_R,             RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_W,             RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_R,             RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_W,             RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
         STARPU_PRIORITY,    options->priority,
         STARPU_CALLBACK,    callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c
index 67fe91b28888f1532966332f275683e9c6ba5717..c38e525963aed0aef6789129b8ae2cb8aa8a96d7 100644
--- a/runtime/starpu/codelets/codelet_zhemm.c
+++ b/runtime/starpu/codelets/codelet_zhemm.c
@@ -193,9 +193,9 @@ void INSERT_TASK_zhemm_Astat( const RUNTIME_option_t *options,
         STARPU_CL_ARGS, clargs, sizeof(struct cl_zhemm_args_s),
 
         /* Task handles */
-        STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        accessC,  RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_R, RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_R, RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
+        accessC,  RTBLKADDR(C, ChamComplexDouble, Cm, Cn),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
@@ -263,9 +263,9 @@ void INSERT_TASK_zhemm( const RUNTIME_option_t *options,
         STARPU_CL_ARGS, clargs, sizeof(struct cl_zhemm_args_s),
 
         /* Task handles */
-        STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        accessC,  RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_R, RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_R, RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
+        accessC,  RTBLKADDR(C, ChamComplexDouble, Cm, Cn),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c
index 96ff9fbf871d309c7674da923826d2989729c7da..223522bbb6559cb1b753579bf48ac608089bdd92 100644
--- a/runtime/starpu/codelets/codelet_zher2k.c
+++ b/runtime/starpu/codelets/codelet_zher2k.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon zher2k StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
@@ -20,7 +20,7 @@
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Loris Lucido
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c
  *
  */
@@ -154,10 +154,10 @@ INSERT_TASK_zher2k( const RUNTIME_option_t *options,
         STARPU_VALUE,         &n,                        sizeof(int),
         STARPU_VALUE,         &k,                        sizeof(int),
         STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_R,                 RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_R,                 RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_R,                 RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
         STARPU_VALUE,      &beta,                     sizeof(double),
-        accessC,                  RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        accessC,                  RTBLKADDR(C, ChamComplexDouble, Cm, Cn),
         STARPU_PRIORITY,    options->priority,
         STARPU_CALLBACK,    callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
diff --git a/runtime/starpu/codelets/codelet_zherfb.c b/runtime/starpu/codelets/codelet_zherfb.c
index 9665901ee5d3cd196db65ed8c1030a1be357b3e1..1429529684ee9630f21717f1bc1d594c06697469 100644
--- a/runtime/starpu/codelets/codelet_zherfb.c
+++ b/runtime/starpu/codelets/codelet_zherfb.c
@@ -11,12 +11,12 @@
  *
  * @brief Chameleon zherfb StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Hatem Ltaief
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Mathieu Faverge
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -106,9 +106,9 @@ void INSERT_TASK_zherfb(const RUNTIME_option_t *options,
         STARPU_VALUE,    &ib,                sizeof(int),
         STARPU_VALUE,    &nb,                sizeof(int),
         STARPU_VALUE,    &nb,                sizeof(int), /* ldw */
-        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_R,         RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
-        STARPU_RW,        RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_R,         RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_R,         RTBLKADDR(T, ChamComplexDouble, Tm, Tn),
+        STARPU_RW,        RTBLKADDR(C, ChamComplexDouble, Cm, Cn),
         STARPU_SCRATCH,   options->ws_worker,
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c
index c53607f9d1034671d29bb2a4160d26e535daf598..c9a9e3017c11c26ce042ea8ac9c3990d2db1b0c1 100644
--- a/runtime/starpu/codelets/codelet_zherk.c
+++ b/runtime/starpu/codelets/codelet_zherk.c
@@ -157,8 +157,8 @@ void INSERT_TASK_zherk( const RUNTIME_option_t *options,
         &cl_zherk,
         /* Task codelet arguments */
         STARPU_CL_ARGS, clargs, sizeof(struct cl_zherk_args_s),
-        STARPU_R,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        accessC,       RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_R,      RTBLKADDR(A, ChamComplexDouble, Am, An),
+        accessC,       RTBLKADDR(C, ChamComplexDouble, Cm, Cn),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
diff --git a/runtime/starpu/codelets/codelet_zlacpy.c b/runtime/starpu/codelets/codelet_zlacpy.c
index 5a0deb8da5757a4c9120703de5b82459adc0da00..1614e45af17f684366b36d359f0c30fcafe623d3 100644
--- a/runtime/starpu/codelets/codelet_zlacpy.c
+++ b/runtime/starpu/codelets/codelet_zlacpy.c
@@ -112,8 +112,8 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
         &cl_zlacpyx,
         /* Task codelet arguments */
         STARPU_CL_ARGS, clargs, sizeof(struct cl_zlacpy_args_s),
-        STARPU_R,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_W,      RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_R,      RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_W,      RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
@@ -162,8 +162,8 @@ void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
         &cl_zlacpy,
         /* Task codelet arguments */
         STARPU_CL_ARGS, clargs, sizeof(struct cl_zlacpy_args_s),
-        STARPU_R,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_W,      RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_R,      RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_W,      RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
diff --git a/runtime/starpu/codelets/codelet_zlag2c.c b/runtime/starpu/codelets/codelet_zlag2c.c
index b6ad117f48741309e6b1ab2e54ac2a827a155982..961cc9c0376c0fa48ce6bbf0fe2b4e183e3714ff 100644
--- a/runtime/starpu/codelets/codelet_zlag2c.c
+++ b/runtime/starpu/codelets/codelet_zlag2c.c
@@ -11,16 +11,14 @@
  *
  * @brief Chameleon zlag2c StarPU codelet
  *
- * @version 1.2.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
+ * @version 1.3.0
  * @author Mathieu Faverge
  * @author Emmanuel Agullo
  * @author Cedric Castagnede
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions mixed zc -> ds
  *
  */
@@ -28,7 +26,8 @@
 #include "runtime_codelet_zc.h"
 
 #if !defined(CHAMELEON_SIMULATION)
-static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg)
+static void
+cl_zlag2c_cpu_func( void *descr[], void *cl_arg )
 {
     int info = 0;
     int m;
@@ -39,15 +38,44 @@ static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg)
     tileA = cti_interface_get(descr[0]);
     tileB = cti_interface_get(descr[1]);
 
-    starpu_codelet_unpack_args(cl_arg, &m, &n);
+    starpu_codelet_unpack_args( cl_arg, &m, &n );
     TCORE_zlag2c( m, n, tileA, tileB, &info );
 }
+
+#if defined(CHAMELEON_USE_CUDA)
+static void
+cl_zlag2c_cuda_func( void *descr[], void *cl_arg )
+{
+    cublasHandle_t handle = starpu_cublas_get_local_handle();
+    CHAM_tile_t   *tileA;
+    CHAM_tile_t   *tileB;
+    int            m, n;
+
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+
+    assert( tileA->format & CHAMELEON_TILE_FULLRANK );
+    assert( tileB->format & CHAMELEON_TILE_FULLRANK );
+
+    starpu_codelet_unpack_args( cl_arg, &m, &n );
+
+    int rc = CUDA_zlag2c(
+        m, n,
+        tileA->mat, tileA->ld,
+        tileB->mat, tileB->ld,
+        handle );
+
+    if ( rc != 0 ) {
+        fprintf( stderr, "core_zlag2c failed with info(%d)\n", rc );
+    }
+}
+#endif /* defined(CHAMELEON_USE_CUDA) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-CODELETS_CPU(zlag2c, cl_zlag2c_cpu_func)
+CODELETS( zlag2c, cl_zlag2c_cpu_func, cl_zlag2c_cuda_func, STARPU_CUDA_ASYNC )
 
 /**
  *
@@ -72,8 +100,8 @@ void INSERT_TASK_zlag2c(const RUNTIME_option_t *options,
         codelet,
         STARPU_VALUE,    &m,                 sizeof(int),
         STARPU_VALUE,    &n,                 sizeof(int),
-        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_W,         RTBLKADDR(B, CHAMELEON_Complex32_t, Bm, Bn),
+        STARPU_R,         RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_W,         RTBLKADDR(B, ChamComplexFloat, Bm, Bn),
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
@@ -97,17 +125,45 @@ static void cl_clag2z_cpu_func(void *descr[], void *cl_arg)
     starpu_codelet_unpack_args(cl_arg, &m, &n);
     TCORE_clag2z( m, n, tileA, tileB);
 }
+
+#if defined(CHAMELEON_USE_CUDA)
+static void
+cl_clag2z_cuda_func( void *descr[], void *cl_arg )
+{
+    cublasHandle_t handle = starpu_cublas_get_local_handle();
+    CHAM_tile_t   *tileA;
+    CHAM_tile_t   *tileB;
+    int            m, n;
+
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+
+    assert( tileA->format & CHAMELEON_TILE_FULLRANK );
+    assert( tileB->format & CHAMELEON_TILE_FULLRANK );
+
+    starpu_codelet_unpack_args( cl_arg, &m, &n );
+
+    int rc = CUDA_clag2z(
+        m, n,
+        tileA->mat, tileA->ld,
+        tileB->mat, tileB->ld,
+        handle );
+    if ( rc != 0 ) {
+        fprintf( stderr, "core_clag2z failed with info(%d)\n", rc );
+    }
+}
+#endif /* defined(CHAMELEON_USE_CUDA) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-CODELETS_CPU(clag2z, cl_clag2z_cpu_func)
+CODELETS( clag2z, cl_clag2z_cpu_func, cl_clag2z_cuda_func, STARPU_CUDA_ASYNC )
 
-void INSERT_TASK_clag2z(const RUNTIME_option_t *options,
-                       int m, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An,
-                       const CHAM_desc_t *B, int Bm, int Bn)
+void INSERT_TASK_clag2z( const RUNTIME_option_t *options,
+                         int m, int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_clag2z;
@@ -122,8 +178,8 @@ void INSERT_TASK_clag2z(const RUNTIME_option_t *options,
         codelet,
         STARPU_VALUE,    &m,                 sizeof(int),
         STARPU_VALUE,    &n,                 sizeof(int),
-        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex32_t, Am, An),
-        STARPU_W,         RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_R,         RTBLKADDR(A, ChamComplexFloat, Am, An),
+        STARPU_W,         RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
diff --git a/runtime/starpu/codelets/codelet_zlange.c b/runtime/starpu/codelets/codelet_zlange.c
index 38d5d7304e9768612917fa9344e9e4e6240182eb..1747215a60e72b1cdac0c7e77bfff04a31725ce7 100644
--- a/runtime/starpu/codelets/codelet_zlange.c
+++ b/runtime/starpu/codelets/codelet_zlange.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon zlange StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.6.0 for CHAMELEON 0.9.2
  * @author Julien Langou
@@ -20,7 +20,7 @@
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -70,9 +70,9 @@ void INSERT_TASK_zlange( const RUNTIME_option_t *options,
         STARPU_VALUE,    &norm,              sizeof(cham_normtype_t),
         STARPU_VALUE,    &M,                 sizeof(int),
         STARPU_VALUE,    &N,                 sizeof(int),
-        STARPU_R,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_R,        RTBLKADDR(A, ChamComplexDouble, Am, An),
         STARPU_SCRATCH,  options->ws_worker,
-        STARPU_W,        RTBLKADDR(B, double, Bm, Bn),
+        STARPU_W,        RTBLKADDR(B, ChamRealDouble, Bm, Bn),
         STARPU_PRIORITY, options->priority,
         STARPU_CALLBACK, callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
@@ -121,8 +121,8 @@ void INSERT_TASK_zlange_max(const RUNTIME_option_t *options,
 
     rt_starpu_insert_task(
         codelet,
-        STARPU_R,        RTBLKADDR(A, double, Am, An),
-        STARPU_RW,       RTBLKADDR(B, double, Bm, Bn),
+        STARPU_R,        RTBLKADDR(A, ChamRealDouble, Am, An),
+        STARPU_RW,       RTBLKADDR(B, ChamRealDouble, Bm, Bn),
         STARPU_PRIORITY, options->priority,
         STARPU_CALLBACK, callback,
          STARPU_EXECUTE_ON_WORKER, options->workerid,
diff --git a/runtime/starpu/codelets/codelet_zlanhe.c b/runtime/starpu/codelets/codelet_zlanhe.c
index 0245319f3f8ab78997c60b5b9332eb2be265f223..c45ecc83987918785f212fd8922453e679ecb63f 100644
--- a/runtime/starpu/codelets/codelet_zlanhe.c
+++ b/runtime/starpu/codelets/codelet_zlanhe.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon zlanhe StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.6.0 for CHAMELEON 0.9.2
  * @author Julien Langou
@@ -20,7 +20,7 @@
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c
  *
  */
@@ -69,9 +69,9 @@ void INSERT_TASK_zlanhe(const RUNTIME_option_t *options,
         STARPU_VALUE,    &norm,              sizeof(int),
         STARPU_VALUE,    &uplo,              sizeof(int),
         STARPU_VALUE,    &N,                 sizeof(int),
-        STARPU_R,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_R,        RTBLKADDR(A, ChamComplexDouble, Am, An),
         STARPU_SCRATCH,  options->ws_worker,
-        STARPU_W,        RTBLKADDR(B, double, Bm, Bn),
+        STARPU_W,        RTBLKADDR(B, ChamRealDouble, Bm, Bn),
         STARPU_PRIORITY, options->priority,
         STARPU_CALLBACK, callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
diff --git a/runtime/starpu/codelets/codelet_zlansy.c b/runtime/starpu/codelets/codelet_zlansy.c
index f75ad866e0066addc6c0d32c6f3882ada8f09809..21b5a1140c3a9d94d032308cf81a55b751844dbd 100644
--- a/runtime/starpu/codelets/codelet_zlansy.c
+++ b/runtime/starpu/codelets/codelet_zlansy.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon zlansy StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.6.0 for CHAMELEON 0.9.2
  * @author Julien Langou
@@ -20,7 +20,7 @@
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -70,9 +70,9 @@ void INSERT_TASK_zlansy( const RUNTIME_option_t *options,
         STARPU_VALUE,    &norm,              sizeof(int),
         STARPU_VALUE,    &uplo,              sizeof(int),
         STARPU_VALUE,    &N,                 sizeof(int),
-        STARPU_R,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_R,        RTBLKADDR(A, ChamComplexDouble, Am, An),
         STARPU_SCRATCH,  options->ws_worker,
-        STARPU_W,        RTBLKADDR(B, double, Bm, Bn),
+        STARPU_W,        RTBLKADDR(B, ChamRealDouble, Bm, Bn),
         STARPU_PRIORITY, options->priority,
         STARPU_CALLBACK, callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
diff --git a/runtime/starpu/codelets/codelet_zlantr.c b/runtime/starpu/codelets/codelet_zlantr.c
index ff96adb2c9a80100cee65978a44eea36865eb35e..e2067a02c392391c39dc28e9bf81ebf922defc4e 100644
--- a/runtime/starpu/codelets/codelet_zlantr.c
+++ b/runtime/starpu/codelets/codelet_zlantr.c
@@ -11,14 +11,14 @@
  *
  * @brief Chameleon zlantr StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.6.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @author Florent Pruvost
  * @author Lucas Barros de Assis
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -71,9 +71,9 @@ void INSERT_TASK_zlantr( const RUNTIME_option_t *options,
         STARPU_VALUE,    &diag,              sizeof(int),
         STARPU_VALUE,    &M,                 sizeof(int),
         STARPU_VALUE,    &N,                 sizeof(int),
-        STARPU_R,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_R,        RTBLKADDR(A, ChamComplexDouble, Am, An),
         STARPU_SCRATCH,  options->ws_worker,
-        STARPU_W,        RTBLKADDR(B, double, Bm, Bn),
+        STARPU_W,        RTBLKADDR(B, ChamRealDouble, Bm, Bn),
         STARPU_PRIORITY, options->priority,
         STARPU_CALLBACK, callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
diff --git a/runtime/starpu/codelets/codelet_zlascal.c b/runtime/starpu/codelets/codelet_zlascal.c
index 19162d6388142d66808bd436e959aa048dc2b831..1246af7734068fdb4b836e03ba87adee2751ba69 100644
--- a/runtime/starpu/codelets/codelet_zlascal.c
+++ b/runtime/starpu/codelets/codelet_zlascal.c
@@ -91,7 +91,7 @@ void INSERT_TASK_zlascal( const RUNTIME_option_t *options,
         &cl_zlascal,
         /* Task codelet arguments */
         STARPU_CL_ARGS, clargs, sizeof(struct cl_zlascal_args_s),
-        STARPU_RW,     RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_RW,     RTBLKADDR(A, ChamComplexDouble, Am, An),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
diff --git a/runtime/starpu/codelets/codelet_zlaset.c b/runtime/starpu/codelets/codelet_zlaset.c
index c6acdc531ba2b8b9245aca7716885672caf0a5be..b78055d3385b4609cb43c1a6056e99cd58815e07 100644
--- a/runtime/starpu/codelets/codelet_zlaset.c
+++ b/runtime/starpu/codelets/codelet_zlaset.c
@@ -85,7 +85,7 @@ void INSERT_TASK_zlaset( const RUNTIME_option_t *options,
         &cl_zlaset,
         /* Task codelet arguments */
         STARPU_CL_ARGS, clargs, sizeof(struct cl_zlaset_args_s),
-        STARPU_W,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_W,      RTBLKADDR(A, ChamComplexDouble, Am, An),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
diff --git a/runtime/starpu/codelets/codelet_zlaset2.c b/runtime/starpu/codelets/codelet_zlaset2.c
index 9c1bc9f3d9b5c397ef11478d9e2f803faa97b66e..33a136f359bb8979d09f6222bddc866e2ef2d437 100644
--- a/runtime/starpu/codelets/codelet_zlaset2.c
+++ b/runtime/starpu/codelets/codelet_zlaset2.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon zlaset2 StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
@@ -21,7 +21,7 @@
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -66,7 +66,7 @@ void INSERT_TASK_zlaset2(const RUNTIME_option_t *options,
         STARPU_VALUE,     &M,                        sizeof(int),
         STARPU_VALUE,     &N,                        sizeof(int),
         STARPU_VALUE, &alpha,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_W,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_W,      RTBLKADDR(A, ChamComplexDouble, Am, An),
         STARPU_PRIORITY,    options->priority,
         STARPU_CALLBACK,    callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
diff --git a/runtime/starpu/codelets/codelet_zlatro.c b/runtime/starpu/codelets/codelet_zlatro.c
index 9befacd6780ffde876cd38da6efe15d18bf7ae9a..42fa0637df433f277656c09c259b55d33df347e3 100644
--- a/runtime/starpu/codelets/codelet_zlatro.c
+++ b/runtime/starpu/codelets/codelet_zlatro.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon zlatro StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Julien Langou
@@ -22,7 +22,7 @@
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -77,8 +77,8 @@ void INSERT_TASK_zlatro( const RUNTIME_option_t *options,
         STARPU_VALUE,   &trans,   sizeof(int),
         STARPU_VALUE,   &m,       sizeof(int),
         STARPU_VALUE,   &n,       sizeof(int),
-        STARPU_R,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_W,        RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_R,        RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_W,        RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
         STARPU_PRIORITY, options->priority,
         STARPU_CALLBACK, callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
diff --git a/runtime/starpu/codelets/codelet_zlauum.c b/runtime/starpu/codelets/codelet_zlauum.c
index bc49ece66b29f26d901891c3ca6900bbee749a87..bdb000e520bebbc92b8aaa45adbf883bb6d5ed52 100644
--- a/runtime/starpu/codelets/codelet_zlauum.c
+++ b/runtime/starpu/codelets/codelet_zlauum.c
@@ -83,7 +83,7 @@ void INSERT_TASK_zlauum( const RUNTIME_option_t *options,
         &cl_zlauum,
         /* Task codelet arguments */
         STARPU_CL_ARGS, clargs, sizeof(struct cl_zlauum_args_s),
-        STARPU_RW,     RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_RW,     RTBLKADDR(A, ChamComplexDouble, Am, An),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
diff --git a/runtime/starpu/codelets/codelet_zpanel.c b/runtime/starpu/codelets/codelet_zpanel.c
index 0b917134ef9ce05f6f91bca822f9d8b5b3d18dd1..7e450986c7448fbc1bd6c2398a7e21df9c63bd89 100644
--- a/runtime/starpu/codelets/codelet_zpanel.c
+++ b/runtime/starpu/codelets/codelet_zpanel.c
@@ -9,12 +9,12 @@
  *
  * @brief Chameleon zpanel StarPU codelets
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment Codelets to perform panel factorization with partial pivoting
  *
  * @author Mathieu Faverge
  * @author Matthieu Kuhn
- * @date 2023-02-21
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -93,8 +93,8 @@ void INSERT_TASK_zgetrf_panel_nopiv_percol_diag( const RUNTIME_option_t *options
         STARPU_VALUE,             &m,                   sizeof(int),
         STARPU_VALUE,             &n,                   sizeof(int),
         STARPU_VALUE,             &k,                   sizeof(int),
-        STARPU_RW,                RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_W,                 RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un),
+        STARPU_RW,                RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_W,                 RTBLKADDR(U, ChamComplexDouble, Um, Un),
         STARPU_VALUE,             &iinfo,               sizeof(int),
         STARPU_VALUE,             &(options->sequence), sizeof(RUNTIME_sequence_t*),
         STARPU_VALUE,             &(options->request),  sizeof(RUNTIME_request_t*),
@@ -165,8 +165,8 @@ void INSERT_TASK_zgetrf_panel_nopiv_percol_trsm( const RUNTIME_option_t *options
         STARPU_VALUE,             &m, sizeof(int),
         STARPU_VALUE,             &n, sizeof(int),
         STARPU_VALUE,             &k, sizeof(int),
-        STARPU_RW,                RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_R,                 RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un),
+        STARPU_RW,                RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_R,                 RTBLKADDR(U, ChamComplexDouble, Um, Un),
         STARPU_PRIORITY,          options->priority,
         STARPU_CALLBACK,          callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
diff --git a/runtime/starpu/codelets/codelet_zplghe.c b/runtime/starpu/codelets/codelet_zplghe.c
index 34bee5797a5c7355a33d073c7b275ed6dcc5eadf..bc3052e2730e8d394bb2bc4ac7a93f44a0642e1a 100644
--- a/runtime/starpu/codelets/codelet_zplghe.c
+++ b/runtime/starpu/codelets/codelet_zplghe.c
@@ -89,7 +89,7 @@ void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
         &cl_zplghe,
         /* Task codelet arguments */
         STARPU_CL_ARGS, clargs, sizeof(struct cl_zplghe_args_s),
-        STARPU_W,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_W,      RTBLKADDR(A, ChamComplexDouble, Am, An),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
diff --git a/runtime/starpu/codelets/codelet_zplgsy.c b/runtime/starpu/codelets/codelet_zplgsy.c
index e39a3c69bc23ba9e16cbc9fce04734ea0332917c..fd29f8d0a0263bf97c7e76aa97b746e5febf6099 100644
--- a/runtime/starpu/codelets/codelet_zplgsy.c
+++ b/runtime/starpu/codelets/codelet_zplgsy.c
@@ -89,7 +89,7 @@ void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
         &cl_zplgsy,
         /* Task codelet arguments */
         STARPU_CL_ARGS, clargs, sizeof(struct cl_zplgsy_args_s),
-        STARPU_W,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_W,      RTBLKADDR(A, ChamComplexDouble, Am, An),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
diff --git a/runtime/starpu/codelets/codelet_zplrnt.c b/runtime/starpu/codelets/codelet_zplrnt.c
index 33a2bee23ca7b7ee93ebe9bbbc3d6f7cff2ff00b..abb1ebd399c75d3b8db3cc77dd3239e1c81f5047 100644
--- a/runtime/starpu/codelets/codelet_zplrnt.c
+++ b/runtime/starpu/codelets/codelet_zplrnt.c
@@ -88,7 +88,7 @@ void INSERT_TASK_zplrnt( const RUNTIME_option_t *options,
         &cl_zplrnt,
         /* Task codelet arguments */
         STARPU_CL_ARGS, clargs, sizeof(struct cl_zplrnt_args_s),
-        STARPU_W,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_W,      RTBLKADDR(A, ChamComplexDouble, Am, An),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
diff --git a/runtime/starpu/codelets/codelet_zplssq.c b/runtime/starpu/codelets/codelet_zplssq.c
index 5d76a91f733a55fb919297ad3c512ac39d57bbfa..6a1c7a54d0410bb1a1e5aa64c03c25235c60ebdd 100644
--- a/runtime/starpu/codelets/codelet_zplssq.c
+++ b/runtime/starpu/codelets/codelet_zplssq.c
@@ -11,13 +11,13 @@
  *
  * @brief Chameleon zplssq StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.6.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -69,8 +69,8 @@ void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
         STARPU_VALUE,    &storev,            sizeof(int),
         STARPU_VALUE,    &M,                 sizeof(int),
         STARPU_VALUE,    &N,                 sizeof(int),
-        STARPU_R,  RTBLKADDR( IN,  double, INm,  INn  ),
-        STARPU_RW, RTBLKADDR( OUT, double, OUTm, OUTn ),
+        STARPU_R,  RTBLKADDR( IN,  ChamRealDouble, INm,  INn  ),
+        STARPU_RW, RTBLKADDR( OUT, ChamRealDouble, OUTm, OUTn ),
         STARPU_PRIORITY,    options->priority,
         STARPU_CALLBACK,    callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
@@ -115,7 +115,7 @@ void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, int N,
     rt_starpu_insert_task(
         codelet,
         STARPU_VALUE,    &N,                 sizeof(int),
-        STARPU_RW, RTBLKADDR(RESULT, double, RESULTm, RESULTn),
+        STARPU_RW, RTBLKADDR(RESULT, ChamRealDouble, RESULTm, RESULTn),
         STARPU_PRIORITY,    options->priority,
         STARPU_CALLBACK,    callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
diff --git a/runtime/starpu/codelets/codelet_zpotrf.c b/runtime/starpu/codelets/codelet_zpotrf.c
index b8b18765db71cecf6a9f79a4b3a333cb272f165d..1195a1e93e5d13db02e75c3bcd962a95394cd688 100644
--- a/runtime/starpu/codelets/codelet_zpotrf.c
+++ b/runtime/starpu/codelets/codelet_zpotrf.c
@@ -100,7 +100,7 @@ void INSERT_TASK_zpotrf( const RUNTIME_option_t *options,
         &cl_zpotrf,
         /* Task codelet arguments */
         STARPU_CL_ARGS, clargs, sizeof(struct cl_zpotrf_args_s),
-        STARPU_RW,     RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_RW,     RTBLKADDR(A, ChamComplexDouble, Am, An),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
diff --git a/runtime/starpu/codelets/codelet_zssssm.c b/runtime/starpu/codelets/codelet_zssssm.c
index b0f413cf8b9956aca377adf39c11d1faa871f1bf..f9cece0bf2156ec58922f77fdd10d077c7150875 100644
--- a/runtime/starpu/codelets/codelet_zssssm.c
+++ b/runtime/starpu/codelets/codelet_zssssm.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon zssssm StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
@@ -22,7 +22,7 @@
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -85,10 +85,10 @@ void INSERT_TASK_zssssm( const RUNTIME_option_t *options,
         STARPU_VALUE,    &n2,                        sizeof(int),
         STARPU_VALUE,     &k,                        sizeof(int),
         STARPU_VALUE,    &ib,                        sizeof(int),
-        STARPU_RW,            RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n),
-        STARPU_RW,            RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n),
-        STARPU_R,             RTBLKADDR(L1, CHAMELEON_Complex64_t, L1m, L1n),
-        STARPU_R,             RTBLKADDR(L2, CHAMELEON_Complex64_t, L2m, L2n),
+        STARPU_RW,            RTBLKADDR(A1, ChamComplexDouble, A1m, A1n),
+        STARPU_RW,            RTBLKADDR(A2, ChamComplexDouble, A2m, A2n),
+        STARPU_R,             RTBLKADDR(L1, ChamComplexDouble, L1m, L1n),
+        STARPU_R,             RTBLKADDR(L2, ChamComplexDouble, L2m, L2n),
         STARPU_VALUE,          &IPIV,                      sizeof(int*),
         STARPU_PRIORITY,    options->priority,
         STARPU_CALLBACK,    callback,
diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c
index ed8dea80b401d87dc8bd953c169eb11d2945cb7d..6748325a666032c7d7cb210da4f49cb1b3d247ed 100644
--- a/runtime/starpu/codelets/codelet_zsymm.c
+++ b/runtime/starpu/codelets/codelet_zsymm.c
@@ -194,9 +194,9 @@ void INSERT_TASK_zsymm_Astat( const RUNTIME_option_t *options,
         STARPU_CL_ARGS, clargs, sizeof(struct cl_zsymm_args_s),
 
         /* Task handles */
-        STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        accessC,  RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_R, RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_R, RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
+        accessC,  RTBLKADDR(C, ChamComplexDouble, Cm, Cn),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
@@ -264,9 +264,9 @@ void INSERT_TASK_zsymm( const RUNTIME_option_t *options,
         STARPU_CL_ARGS, clargs, sizeof(struct cl_zsymm_args_s),
 
         /* Task handles */
-        STARPU_R, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_R, RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        accessC,  RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_R, RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_R, RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
+        accessC,  RTBLKADDR(C, ChamComplexDouble, Cm, Cn),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c
index e6dd471e68010f04a3296b3dd5299b19e76c8537..51f72d20ea50b88bc00120ce412338ff0d00bad0 100644
--- a/runtime/starpu/codelets/codelet_zsyr2k.c
+++ b/runtime/starpu/codelets/codelet_zsyr2k.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon zsyr2k StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
@@ -20,7 +20,7 @@
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Loris Lucido
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -153,10 +153,10 @@ void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options,
         STARPU_VALUE,         &n,                        sizeof(int),
         STARPU_VALUE,         &k,                        sizeof(int),
         STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_R,                 RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_R,                 RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_R,                 RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
         STARPU_VALUE,      &beta,         sizeof(CHAMELEON_Complex64_t),
-        accessC,                  RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        accessC,                  RTBLKADDR(C, ChamComplexDouble, Cm, Cn),
         STARPU_PRIORITY,    options->priority,
         STARPU_CALLBACK,    callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c
index f11dd4158341615defeb35755d8c8dcabb28ebce..9df9fd08ad4e79cd8017f95a0596eca2e3c872f0 100644
--- a/runtime/starpu/codelets/codelet_zsyrk.c
+++ b/runtime/starpu/codelets/codelet_zsyrk.c
@@ -157,8 +157,8 @@ void INSERT_TASK_zsyrk( const RUNTIME_option_t *options,
         &cl_zsyrk,
         /* Task codelet arguments */
         STARPU_CL_ARGS, clargs, sizeof(struct cl_zsyrk_args_s),
-        STARPU_R,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        accessC,       RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_R,      RTBLKADDR(A, ChamComplexDouble, Am, An),
+        accessC,       RTBLKADDR(C, ChamComplexDouble, Cm, Cn),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
diff --git a/runtime/starpu/codelets/codelet_zsyssq.c b/runtime/starpu/codelets/codelet_zsyssq.c
index e50ceb11e09336f79600b24f7c6255746ffe9036..cf710e468f8e3f6135357b38549a177903981d97 100644
--- a/runtime/starpu/codelets/codelet_zsyssq.c
+++ b/runtime/starpu/codelets/codelet_zsyssq.c
@@ -11,13 +11,13 @@
  *
  * @brief Chameleon zsyssq StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.6.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -63,8 +63,8 @@ void INSERT_TASK_zsyssq( const RUNTIME_option_t *options,
         STARPU_VALUE,    &storev,                     sizeof(cham_store_t),
         STARPU_VALUE,    &uplo,                       sizeof(int),
         STARPU_VALUE,    &n,                          sizeof(int),
-        STARPU_R,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_RW,       RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn),
+        STARPU_R,        RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_RW,       RTBLKADDR(SCALESUMSQ, ChamRealDouble, SCALESUMSQm, SCALESUMSQn),
         STARPU_PRIORITY, options->priority,
         STARPU_CALLBACK, callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
diff --git a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c
index 0cf3b092f721fb6cfbf53b50c45a1114bacb41de..5882c5855b303b90c56636aa0a3f9a64afcc11e5 100644
--- a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c
+++ b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon zsytrf_nopiv StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
@@ -21,7 +21,7 @@
  * @author Marc Sergent
  * @author Lucas Barros de Assis
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c
  *
  */
@@ -64,7 +64,7 @@ void INSERT_TASK_zsytrf_nopiv( const RUNTIME_option_t *options,
         codelet,
         STARPU_VALUE,    &uplo,                      sizeof(int),
         STARPU_VALUE,    &n,                         sizeof(int),
-        STARPU_RW,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_RW,        RTBLKADDR(A, ChamComplexDouble, Am, An),
         STARPU_VALUE,    &iinfo,                     sizeof(int),
         /* STARPU_SCRATCH,   options->ws_worker, */
         STARPU_PRIORITY,  options->priority,
diff --git a/runtime/starpu/codelets/codelet_ztplqt.c b/runtime/starpu/codelets/codelet_ztplqt.c
index dc68af0f1f598db8bedd8af0ddb6a6d84c8b0aa5..3471d4878436fd2d49f4ca1835195eb73f03de37 100644
--- a/runtime/starpu/codelets/codelet_ztplqt.c
+++ b/runtime/starpu/codelets/codelet_ztplqt.c
@@ -11,11 +11,11 @@
  *
  * @brief Chameleon ztplqt StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Mathieu Faverge
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> s d c
  *
  */
@@ -72,9 +72,9 @@ void INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
         STARPU_VALUE, &N,     sizeof(int),
         STARPU_VALUE, &L,     sizeof(int),
         STARPU_VALUE, &ib,    sizeof(int),
-        STARPU_RW,     RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_RW,     RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        STARPU_W,      RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
+        STARPU_RW,     RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_RW,     RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
+        STARPU_W,      RTBLKADDR(T, ChamComplexDouble, Tm, Tn),
         /* Other options */
         STARPU_SCRATCH,   options->ws_worker,
         STARPU_PRIORITY,  options->priority,
diff --git a/runtime/starpu/codelets/codelet_ztpmlqt.c b/runtime/starpu/codelets/codelet_ztpmlqt.c
index 75af260d6f33289a32396d00d03a36abb1e09b79..fb2db26e4d978a4018dcf0d65fe3f47689c10004 100644
--- a/runtime/starpu/codelets/codelet_ztpmlqt.c
+++ b/runtime/starpu/codelets/codelet_ztpmlqt.c
@@ -9,11 +9,11 @@
  *
  * @brief Chameleon ztpmlqt StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Mathieu Faverge
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> s d c
  *
  */
@@ -118,10 +118,10 @@ void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
         STARPU_VALUE, &L,     sizeof(int),
         STARPU_VALUE, &ib,     sizeof(int),
         STARPU_VALUE, &(options->ws_wsize), sizeof(size_t),
-        STARPU_R,      RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn),
-        STARPU_R,      RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
-        STARPU_RW,     RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_RW,     RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_R,      RTBLKADDR(V, ChamComplexDouble, Vm, Vn),
+        STARPU_R,      RTBLKADDR(T, ChamComplexDouble, Tm, Tn),
+        STARPU_RW,     RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_RW,     RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
         /* Other options */
         STARPU_SCRATCH,   options->ws_worker,
         STARPU_PRIORITY,  options->priority,
diff --git a/runtime/starpu/codelets/codelet_ztpmqrt.c b/runtime/starpu/codelets/codelet_ztpmqrt.c
index 7d266758adb40234a7f0b52eb8f257514473020c..bab1d8d0a7dfc41c74edd98b1143cc62ebd8330b 100644
--- a/runtime/starpu/codelets/codelet_ztpmqrt.c
+++ b/runtime/starpu/codelets/codelet_ztpmqrt.c
@@ -9,11 +9,11 @@
  *
  * @brief Chameleon ztpmqrt StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Mathieu Faverge
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> s d c
  *
  */
@@ -118,10 +118,10 @@ void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options,
         STARPU_VALUE, &L,     sizeof(int),
         STARPU_VALUE, &ib,     sizeof(int),
         STARPU_VALUE, &(options->ws_wsize), sizeof(size_t),
-        STARPU_R,      RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn),
-        STARPU_R,      RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
-        STARPU_RW,     RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_RW,     RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_R,      RTBLKADDR(V, ChamComplexDouble, Vm, Vn),
+        STARPU_R,      RTBLKADDR(T, ChamComplexDouble, Tm, Tn),
+        STARPU_RW,     RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_RW,     RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
         /* Other options */
         STARPU_SCRATCH,   options->ws_worker,
         STARPU_PRIORITY,  options->priority,
diff --git a/runtime/starpu/codelets/codelet_ztpqrt.c b/runtime/starpu/codelets/codelet_ztpqrt.c
index 6f9983b033dbbd027143c93847cd49419f101a3e..0ade7095a124201ccddb2b39ef804d355ecb0ee7 100644
--- a/runtime/starpu/codelets/codelet_ztpqrt.c
+++ b/runtime/starpu/codelets/codelet_ztpqrt.c
@@ -11,12 +11,12 @@
  *
  * @brief Chameleon ztpqrt StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Mathieu Faverge
  * @author Florent Pruvost
  * @author Lucas Barros de Assis
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> s d c
  *
  */
@@ -73,9 +73,9 @@ void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
         STARPU_VALUE, &N,     sizeof(int),
         STARPU_VALUE, &L,     sizeof(int),
         STARPU_VALUE, &ib,    sizeof(int),
-        STARPU_RW,     RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_RW,     RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        STARPU_W,      RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
+        STARPU_RW,     RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_RW,     RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
+        STARPU_W,      RTBLKADDR(T, ChamComplexDouble, Tm, Tn),
         /* Other options */
         STARPU_SCRATCH,   options->ws_worker,
         STARPU_PRIORITY,  options->priority,
diff --git a/runtime/starpu/codelets/codelet_ztradd.c b/runtime/starpu/codelets/codelet_ztradd.c
index 66fc4da3ee1e6ee84595881561b6157c6c5e2b07..57b146d6a70ad2a93a455762cae0e5c01d16856a 100644
--- a/runtime/starpu/codelets/codelet_ztradd.c
+++ b/runtime/starpu/codelets/codelet_ztradd.c
@@ -98,8 +98,8 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
         &cl_ztradd,
         /* Task codelet arguments */
         STARPU_CL_ARGS, clargs, sizeof(struct cl_ztradd_args_s),
-        STARPU_R,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        accessB,       RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_R,      RTBLKADDR(A, ChamComplexDouble, Am, An),
+        accessB,       RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
diff --git a/runtime/starpu/codelets/codelet_ztrasm.c b/runtime/starpu/codelets/codelet_ztrasm.c
index f0e8cfb8318879c91a8ebf33fefb2d45b63c9534..c5412d41679a63c779aee6aa63a6a02c5ac2caf7 100644
--- a/runtime/starpu/codelets/codelet_ztrasm.c
+++ b/runtime/starpu/codelets/codelet_ztrasm.c
@@ -11,14 +11,14 @@
  *
  * @brief Chameleon ztrasm StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.6.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -68,8 +68,8 @@ void INSERT_TASK_ztrasm( const RUNTIME_option_t *options,
         STARPU_VALUE,    &diag,                      sizeof(int),
         STARPU_VALUE,    &M,                         sizeof(int),
         STARPU_VALUE,    &N,                         sizeof(int),
-        STARPU_R,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_RW,       RTBLKADDR(B, double, Bm, Bn),
+        STARPU_R,        RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_RW,       RTBLKADDR(B, ChamRealDouble, Bm, Bn),
         STARPU_PRIORITY, options->priority,
         STARPU_CALLBACK, callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c
index 36b31640cc56bf85d19a31900af7aab3d4ca43c0..a5f2a18773fbb964db65b9e28573c7bd85c945e8 100644
--- a/runtime/starpu/codelets/codelet_ztrmm.c
+++ b/runtime/starpu/codelets/codelet_ztrmm.c
@@ -148,8 +148,8 @@ void INSERT_TASK_ztrmm( const RUNTIME_option_t *options,
         &cl_ztrmm,
         /* Task codelet arguments */
         STARPU_CL_ARGS, clargs, sizeof(struct cl_ztrmm_args_s),
-        STARPU_R,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_RW,     RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_R,      RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_RW,     RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c
index 901eeb7809ec81505559b2c3e83b4f7a4c7e1bb3..00d38429e460dd32999e85d3e9b882e5d57acdd8 100644
--- a/runtime/starpu/codelets/codelet_ztrsm.c
+++ b/runtime/starpu/codelets/codelet_ztrsm.c
@@ -152,8 +152,8 @@ void INSERT_TASK_ztrsm( const RUNTIME_option_t *options,
         &cl_ztrsm,
         /* Task codelet arguments */
         STARPU_CL_ARGS, clargs, sizeof(struct cl_ztrsm_args_s),
-        STARPU_R,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_RW,     RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_R,      RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_RW,     RTBLKADDR(B, ChamComplexDouble, Bm, Bn),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
diff --git a/runtime/starpu/codelets/codelet_ztrssq.c b/runtime/starpu/codelets/codelet_ztrssq.c
index 0ca2119967510ee77a944ada7bca023cc68d0ebc..9d800319438bd57f759305c4f2553ccbbbbffcef 100644
--- a/runtime/starpu/codelets/codelet_ztrssq.c
+++ b/runtime/starpu/codelets/codelet_ztrssq.c
@@ -11,13 +11,13 @@
  *
  * @brief Chameleon ztrssq StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.6.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -66,8 +66,8 @@ void INSERT_TASK_ztrssq( const RUNTIME_option_t *options,
         STARPU_VALUE,    &diag,                      sizeof(int),
         STARPU_VALUE,    &m,                         sizeof(int),
         STARPU_VALUE,    &n,                         sizeof(int),
-        STARPU_R,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_RW,       RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn),
+        STARPU_R,        RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_RW,       RTBLKADDR(SCALESUMSQ, ChamRealDouble, SCALESUMSQm, SCALESUMSQn),
         STARPU_PRIORITY, options->priority,
         STARPU_CALLBACK, callback,
         STARPU_EXECUTE_ON_WORKER, options->workerid,
diff --git a/runtime/starpu/codelets/codelet_ztrtri.c b/runtime/starpu/codelets/codelet_ztrtri.c
index 19499f058f32c493d6be3216439f18e84de15d0e..4918849af0fe53409ef0530160767dc3794bdc03 100644
--- a/runtime/starpu/codelets/codelet_ztrtri.c
+++ b/runtime/starpu/codelets/codelet_ztrtri.c
@@ -96,7 +96,7 @@ void INSERT_TASK_ztrtri( const RUNTIME_option_t *options,
         &cl_ztrtri,
         /* Task codelet arguments */
         STARPU_CL_ARGS, clargs, sizeof(struct cl_ztrtri_args_s),
-        STARPU_RW,     RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_RW,     RTBLKADDR(A, ChamComplexDouble, Am, An),
 
         /* Common task arguments */
         STARPU_PRIORITY,          options->priority,
diff --git a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
index 55cb574df3cd1d6c703811c5690601874702e9e3..05510b05943b0858de0910e80cbe6dd8df96102e 100644
--- a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
+++ b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
@@ -11,14 +11,14 @@
  *
  * @brief Chameleon ztsmlq_hetra1 StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Hatem Ltaief
  * @author Mathieu Faverge
  * @author Azzam Haidar
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -96,10 +96,10 @@ void INSERT_TASK_ztsmlq_hetra1( const RUNTIME_option_t *options,
         STARPU_VALUE,    &k,                 sizeof(int),
         STARPU_VALUE,    &ib,                sizeof(int),
         STARPU_VALUE,    &ldWORK,            sizeof(int),
-        STARPU_RW,        RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n),
-        STARPU_RW,        RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n),
-        STARPU_R,         RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn),
-        STARPU_R,         RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
+        STARPU_RW,        RTBLKADDR(A1, ChamComplexDouble, A1m, A1n),
+        STARPU_RW,        RTBLKADDR(A2, ChamComplexDouble, A2m, A2n),
+        STARPU_R,         RTBLKADDR(V, ChamComplexDouble, Vm, Vn),
+        STARPU_R,         RTBLKADDR(T, ChamComplexDouble, Tm, Tn),
         STARPU_SCRATCH,   options->ws_worker,
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
diff --git a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
index acdb1db4dfcbcf8465f1ce76aedda65d72da1860..be370a8878c703fe08491a19d8c3cff62b79f315 100644
--- a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
+++ b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
@@ -11,14 +11,14 @@
  *
  * @brief Chameleon ztsmqr_hetra1 StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Hatem Ltaief
  * @author Mathieu Faverge
  * @author Azzam Haidar
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -96,10 +96,10 @@ void INSERT_TASK_ztsmqr_hetra1( const RUNTIME_option_t *options,
         STARPU_VALUE,    &k,                 sizeof(int),
         STARPU_VALUE,    &ib,                sizeof(int),
         STARPU_VALUE,    &ldWORK,            sizeof(int),
-        STARPU_RW,        RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n),
-        STARPU_RW,        RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n),
-        STARPU_R,         RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn),
-        STARPU_R,         RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
+        STARPU_RW,        RTBLKADDR(A1, ChamComplexDouble, A1m, A1n),
+        STARPU_RW,        RTBLKADDR(A2, ChamComplexDouble, A2m, A2n),
+        STARPU_R,         RTBLKADDR(V, ChamComplexDouble, Vm, Vn),
+        STARPU_R,         RTBLKADDR(T, ChamComplexDouble, Tm, Tn),
         STARPU_SCRATCH,   options->ws_worker,
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
diff --git a/runtime/starpu/codelets/codelet_ztstrf.c b/runtime/starpu/codelets/codelet_ztstrf.c
index 1ad32614ed03fd075dcb3231dab2228d5c22d6e4..4839c0d5abd627dea9c64df0a6d0ec972edd0ac1 100644
--- a/runtime/starpu/codelets/codelet_ztstrf.c
+++ b/runtime/starpu/codelets/codelet_ztstrf.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon ztstrf StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
@@ -22,7 +22,7 @@
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -96,9 +96,9 @@ void INSERT_TASK_ztstrf( const RUNTIME_option_t *options,
         STARPU_VALUE,    &n,                         sizeof(int),
         STARPU_VALUE,    &ib,                        sizeof(int),
         STARPU_VALUE,    &nb,                        sizeof(int),
-        STARPU_RW,        RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un),
-        STARPU_RW,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_W,         RTBLKADDR(L, CHAMELEON_Complex64_t, Lm, Ln),
+        STARPU_RW,        RTBLKADDR(U, ChamComplexDouble, Um, Un),
+        STARPU_RW,        RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_W,         RTBLKADDR(L, ChamComplexDouble, Lm, Ln),
         STARPU_VALUE,    &IPIV,                      sizeof(int*),
         STARPU_SCRATCH,   options->ws_worker,
         STARPU_VALUE,    &d_work,                    sizeof(CHAMELEON_starpu_ws_t *),
diff --git a/runtime/starpu/codelets/codelet_zunmlq.c b/runtime/starpu/codelets/codelet_zunmlq.c
index 229beb87ec96132d3ab5cf2a318e531e7e8c0c08..db6b3726ee0375fa0055c64d7f61067989c438f9 100644
--- a/runtime/starpu/codelets/codelet_zunmlq.c
+++ b/runtime/starpu/codelets/codelet_zunmlq.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon zunmlq StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
@@ -23,7 +23,7 @@
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -118,9 +118,9 @@ void INSERT_TASK_zunmlq( const RUNTIME_option_t *options,
         STARPU_VALUE,    &n,                 sizeof(int),
         STARPU_VALUE,    &k,                 sizeof(int),
         STARPU_VALUE,    &ib,                sizeof(int),
-        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_R,         RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
-        STARPU_RW,        RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_R,         RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_R,         RTBLKADDR(T, ChamComplexDouble, Tm, Tn),
+        STARPU_RW,        RTBLKADDR(C, ChamComplexDouble, Cm, Cn),
         /* ib * nb */
         STARPU_SCRATCH,   options->ws_worker,
         STARPU_VALUE,    &nb,                sizeof(int),
diff --git a/runtime/starpu/codelets/codelet_zunmqr.c b/runtime/starpu/codelets/codelet_zunmqr.c
index 767d741b907d4aeb587484a5a54a4fc280e34f4a..657c6ca87230410ac7ed7fe6e72435ed31fec886 100644
--- a/runtime/starpu/codelets/codelet_zunmqr.c
+++ b/runtime/starpu/codelets/codelet_zunmqr.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon zunmqr StarPU codelet
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @comment This file has been automatically generated
  *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
@@ -22,7 +22,7 @@
  * @author Lucas Barros de Assis
  * @author Florent Pruvost
  * @author Samuel Thibault
- * @date 2022-02-22
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -117,9 +117,9 @@ void INSERT_TASK_zunmqr( const RUNTIME_option_t *options,
         STARPU_VALUE,    &n,                 sizeof(int),
         STARPU_VALUE,    &k,                 sizeof(int),
         STARPU_VALUE,    &ib,                sizeof(int),
-        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_R,         RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
-        STARPU_RW,        RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_R,         RTBLKADDR(A, ChamComplexDouble, Am, An),
+        STARPU_R,         RTBLKADDR(T, ChamComplexDouble, Tm, Tn),
+        STARPU_RW,        RTBLKADDR(C, ChamComplexDouble, Cm, Cn),
         /* ib * nb */
         STARPU_SCRATCH,   options->ws_worker,
         STARPU_VALUE,    &nb,                sizeof(int),
diff --git a/runtime/starpu/control/runtime_descriptor.c b/runtime/starpu/control/runtime_descriptor.c
index 36dda081062b9840531622c0c46e4de29a1eaaf1..ee4817fe4cc35abf5a73baffad8b224fcf79452a 100644
--- a/runtime/starpu/control/runtime_descriptor.c
+++ b/runtime/starpu/control/runtime_descriptor.c
@@ -11,7 +11,7 @@
  *
  * @brief Chameleon StarPU descriptor routines
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Cedric Augonnet
  * @author Mathieu Faverge
  * @author Cedric Castagnede
@@ -20,7 +20,7 @@
  * @author Raphael Boucherie
  * @author Samuel Thibault
  * @author Loris Lucido
- * @date 2023-01-30
+ * @date 2023-07-06
  *
  */
 #include "chameleon_starpu.h"
@@ -101,6 +101,7 @@ void RUNTIME_desc_create( CHAM_desc_t *desc )
 {
     int64_t lmt = desc->lmt;
     int64_t lnt = desc->lnt;
+    size_t  nbtiles = lmt * lnt;
 
     desc->occurences = 1;
 
@@ -108,8 +109,12 @@ void RUNTIME_desc_create( CHAM_desc_t *desc )
      * Allocate starpu_handle_t array (handlers are initialized on the fly when
      * discovered by any algorithm to save space)
      */
-    desc->schedopt = (void*)calloc(lnt*lmt,sizeof(starpu_data_handle_t));
-    assert(desc->schedopt);
+    if ( cham_is_mixed( desc->dtyp ) ) {
+        nbtiles *= 3;
+    }
+
+    desc->schedopt = (void*)calloc( nbtiles, sizeof(starpu_data_handle_t) );
+    assert( desc->schedopt );
 
 #if !defined(CHAMELEON_SIMULATION)
 #if defined(CHAMELEON_USE_CUDA) || defined(CHAMELEON_USE_HIP)
@@ -160,7 +165,7 @@ void RUNTIME_desc_create( CHAM_desc_t *desc )
      */
     {
         chameleon_starpu_tag_init();
-        desc->mpitag = chameleon_starpu_tag_book( (int64_t)lnt * (int64_t)lmt );
+        desc->mpitag = chameleon_starpu_tag_book( nbtiles );
 
         if ( desc->mpitag == -1 ) {
             chameleon_fatal_error("RUNTIME_desc_create", "Can't pursue computation since no more tags are available");
@@ -181,41 +186,45 @@ void RUNTIME_desc_destroy( CHAM_desc_t *desc )
      * If this is the last descriptor using the matrix, we release the handle
      * and unregister the GPU data
      */
-    if ( desc->occurences == 0 ) {
-        starpu_data_handle_t *handle = (starpu_data_handle_t*)(desc->schedopt);
-        int lmt = desc->lmt;
-        int lnt = desc->lnt;
-        int m, n;
-
-        for (n = 0; n < lnt; n++) {
-            for (m = 0; m < lmt; m++)
-            {
-                if ( *handle != NULL ) {
-                    starpu_data_unregister(*handle);
-                    *handle = NULL;
-                }
-                handle++;
-            }
+    if ( desc->occurences > 0 ) {
+        return;
+    }
+
+    starpu_data_handle_t *handle = (starpu_data_handle_t*)(desc->schedopt);
+    int64_t lmt = desc->lmt;
+    int64_t lnt = desc->lnt;
+    int64_t nbtiles = lmt * lnt;
+    int64_t m;
+
+    if ( cham_is_mixed( desc->dtyp ) ) {
+        nbtiles *= 3;
+    }
+
+    for (m = 0; m < nbtiles; m++, handle++)
+    {
+        if ( *handle != NULL ) {
+            starpu_data_unregister(*handle);
+            *handle = NULL;
         }
+    }
 
 #if !defined(CHAMELEON_SIMULATION)
 #if defined(CHAMELEON_USE_CUDA) || defined(CHAMELEON_USE_HIP)
-        if ( (desc->use_mat == 1) && (desc->register_mat == 1) )
+    if ( (desc->use_mat == 1) && (desc->register_mat == 1) )
+    {
+        /* Unmap the pinned memory associated to the matrix */
+        if (gpuHostUnregister(desc->mat) != gpuSuccess)
         {
-            /* Unmap the pinned memory associated to the matrix */
-            if (gpuHostUnregister(desc->mat) != gpuSuccess)
-            {
-                chameleon_warning("RUNTIME_desc_destroy(StarPU)",
-                                  "gpuHostUnregister failed to unregister the "
-                                  "pinned memory associated to the matrix");
-            }
+            chameleon_warning("RUNTIME_desc_destroy(StarPU)",
+                              "gpuHostUnregister failed to unregister the "
+                              "pinned memory associated to the matrix");
         }
+    }
 #endif
 #endif
-        chameleon_starpu_tag_release( desc->mpitag );
+    chameleon_starpu_tag_release( desc->mpitag );
 
-        free( desc->schedopt );
-    }
+    free( desc->schedopt );
 }
 
 /**
@@ -335,24 +344,37 @@ void RUNTIME_desc_flush( const CHAM_desc_t        *desc,
 void RUNTIME_data_flush( const RUNTIME_sequence_t *sequence,
                          const CHAM_desc_t *A, int m, int n )
 {
+    int local, i, imax = 1;
     int64_t mm = m + (A->i / A->mb);
     int64_t nn = n + (A->j / A->nb);
-    int64_t shift = ((int64_t)A->lmt) * nn + mm;
+    int64_t shift   = ((int64_t)A->lmt) * nn + mm;
+    int64_t nbtiles = ((int64_t)(A->lmt)) * ((int64_t)(A->lnt));
     starpu_data_handle_t *handle = A->schedopt;
     handle += shift;
 
-    if (*handle == NULL) {
-        return;
-    }
+    local = chameleon_desc_islocal( A, m, n );
+
+    if ( cham_is_mixed( A->dtyp ) ) {
+        imax = 3;
+     }
+
+    for( i=0; i<imax; i++ ) {
+        starpu_data_handle_t *handlebis;
+
+        handlebis = handle + i * nbtiles;
+
+        if ( *handlebis == NULL ) {
+            continue;
+        }
 
 #if defined(CHAMELEON_USE_MPI)
-    starpu_mpi_cache_flush( MPI_COMM_WORLD, *handle );
+        starpu_mpi_cache_flush( MPI_COMM_WORLD, *handlebis );
 #endif
 
-    if ( chameleon_desc_islocal( A, m, n ) ) {
-        chameleon_starpu_data_wont_use( *handle );
+        if ( local ) {
+            chameleon_starpu_data_wont_use( *handlebis );
+        }
     }
-
     (void)sequence;
 }
 
@@ -443,3 +465,71 @@ void *RUNTIME_data_getaddr( const CHAM_desc_t *A, int m, int n )
     assert( *ptrtile );
     return (void*)(*ptrtile);
 }
+
+void *RUNTIME_data_getaddr_withconversion( const RUNTIME_option_t *options,
+                                           cham_access_t access, cham_flttype_t flttype,
+                                           const CHAM_desc_t *A, int m, int n )
+{
+    int64_t mm = m + (A->i / A->mb);
+    int64_t nn = n + (A->j / A->nb);
+
+    CHAM_tile_t *tile = A->get_blktile( A, m, n );
+    starpu_data_handle_t *ptrtile = A->schedopt;
+
+    int     fltshift = (cham_get_arith( tile->flttype ) - cham_get_arith( flttype ) + 3 ) % 3;
+    int64_t shift = (int64_t)fltshift * ((int64_t)A->lmt * (int64_t)A->lnt);
+    shift = shift + ((int64_t)A->lmt) * nn + mm;
+
+    /* Get the correct starpu_handle */
+    ptrtile += shift;
+
+    if ( *ptrtile != NULL ) {
+        return (void*)(*ptrtile);
+    }
+
+    int home_node = -1;
+    int myrank = A->myrank;
+    int owner  = A->get_rankof( A, m, n );
+
+    if ( myrank == owner ) {
+        if ( (tile->format & CHAMELEON_TILE_HMAT) ||
+             (tile->mat != NULL) )
+        {
+            home_node = STARPU_MAIN_RAM;
+        }
+    }
+
+    starpu_cham_tile_register( ptrtile, home_node, tile, flttype );
+
+#if defined(HAVE_STARPU_DATA_SET_OOC_FLAG)
+    if ( A->ooc == 0 ) {
+        starpu_data_set_ooc_flag( *ptrtile, 0 );
+    }
+#endif
+
+#if defined(HAVE_STARPU_DATA_SET_COORDINATES)
+    starpu_data_set_coordinates( *ptrtile, 3, m, n, cham_get_arith( flttype ) );
+#endif
+
+#if defined(CHAMELEON_USE_MPI)
+    starpu_mpi_data_register( *ptrtile, A->mpitag + shift, owner );
+#endif /* defined(CHAMELEON_USE_MPI) */
+
+#if defined(CHAMELEON_KERNELS_TRACE)
+    fprintf( stderr, "%s - %p registered with tag %ld\n",
+             tile->name, *ptrtile, A->mpitag + shift );
+#endif
+    assert( *ptrtile );
+
+    /* Submit the data conversion */
+    if (( fltshift != 0 ) && (access & ChamR) && (owner == myrank) ) {
+        starpu_data_handle_t *fromtile = A->schedopt;
+        starpu_data_handle_t *totile = ptrtile;
+
+        fromtile += ((int64_t)A->lmt) * nn + mm;
+        if ( *fromtile != NULL ) {
+            insert_task_convert( options, tile->m, tile->n, tile->flttype, *fromtile, flttype, *totile );
+        }
+    }
+    return (void*)(*ptrtile);
+}
diff --git a/runtime/starpu/include/chameleon_starpu.h.in b/runtime/starpu/include/chameleon_starpu.h.in
index 8e77d14a9dea84153c6b0672d63cecb3c71ed077..8d421ddbb5234d073ba9ea71b3c97ff3046ff9fc 100644
--- a/runtime/starpu/include/chameleon_starpu.h.in
+++ b/runtime/starpu/include/chameleon_starpu.h.in
@@ -116,6 +116,10 @@ static inline int cham_to_starpu_access( cham_access_t accessA ) {
     return accessA;
 }
 
+void *RUNTIME_data_getaddr_withconversion( const RUNTIME_option_t *options,
+                                           cham_access_t access, cham_flttype_t flttype,
+                                           const CHAM_desc_t *A, int m, int n );
+
 /*
  * MPI Redefinitions
  */
diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h
index 14f7c4e20c4b25ce01491f4274a25c721cc122ee..db7d35daca01a8b1b7fc16abf17fb742dda01e65 100644
--- a/runtime/starpu/include/runtime_codelet_z.h
+++ b/runtime/starpu/include/runtime_codelet_z.h
@@ -11,14 +11,14 @@
  *
  * @brief Chameleon StarPU CHAMELEON_Complex64_t codelets header
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Cedric Augonnet
  * @author Mathieu Faverge
  * @author Cedric Castagnede
  * @author Florent Pruvost
  * @author Alycia Lisito
  * @author Loris Lucido
- * @date 2023-01-30
+ * @date 2023-07-06
  * @precisions normal z -> c d s
  *
  */
@@ -33,14 +33,6 @@
 #include "coreblas/coreblas_z.h"
 #include "coreblas/coreblas_ztile.h"
 
-#if defined(CHAMELEON_USE_CUDA)
-#include "gpucublas.h"
-#endif
-
-#if defined(CHAMELEON_USE_HIP)
-#include "gpuhipblas.h"
-#endif
-
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
@@ -98,6 +90,10 @@ CODELETS_HEADER(zunmqr);
  * Auxiliary functions
  */
 CODELETS_HEADER(zgeadd);
+#if defined(PRECISION_z) || defined(PRECISION_d)
+CODELETS_HEADER(zgered);
+#endif
+CODELETS_HEADER(zhessq);
 CODELETS_HEADER(zhe2ge);
 CODELETS_HEADER(zlascal);
 CODELETS_HEADER(ztradd);
@@ -142,4 +138,19 @@ CODELETS_HEADER(zsytrf_nopiv);
 #endif
 CODELETS_HEADER(zplgsy);
 
+#if defined(PRECISION_d) || defined(PRECISION_s)
+CODELETS_HEADER(dlag2h);
+CODELETS_HEADER(hlag2d);
+#endif
+
+struct cl_zgemm_args_s {
+    cham_trans_t transA;
+    cham_trans_t transB;
+    int m;
+    int n;
+    int k;
+    CHAMELEON_Complex64_t alpha;
+    CHAMELEON_Complex64_t beta;
+};
+
 #endif /* _runtime_codelet_z_h_ */
diff --git a/runtime/starpu/include/runtime_codelets.h b/runtime/starpu/include/runtime_codelets.h
index 00808fc52fec20078744d3603bec4cb2be680a24..c27d6b913bb231c4815dca09e67b7201e12697c7 100644
--- a/runtime/starpu/include/runtime_codelets.h
+++ b/runtime/starpu/include/runtime_codelets.h
@@ -11,13 +11,13 @@
  *
  * @brief Chameleon StarPU codelets main header
  *
- * @version 1.2.0
+ * @version 1.3.0
  * @author Cedric Augonnet
  * @author Mathieu Faverge
  * @author Cedric Castagnede
  * @author Florent Pruvost
  * @author Loris Lucido
- * @date 2023-01-30
+ * @date 2023-07-06
  *
  */
 #ifndef _runtime_codelets_h_
@@ -26,6 +26,16 @@
 #include "chameleon/config.h"
 #include "runtime_codelet_profile.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+#if defined(CHAMELEON_USE_CUDA)
+#include "gpucublas.h"
+#endif
+
+#if defined(CHAMELEON_USE_HIP)
+#include "gpuhipblas.h"
+#endif
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
 #if defined(STARPU_CUDA_ASYNC)
 #define CODELET_CUDA_FLAGS(flags) .cuda_flags = {(flags)},
 #else
@@ -141,5 +151,25 @@
 #endif
 
 CODELETS_HEADER(map);
+CODELETS_HEADER(hgemm);
+CODELETS_HEADER(gemmex);
+
+struct cl_hgemm_args_s {
+    cham_trans_t transA;
+    cham_trans_t transB;
+    int m;
+    int n;
+    int k;
+    CHAMELEON_Real16_t alpha;
+    CHAMELEON_Real16_t beta;
+};
+
+void
+insert_task_convert( const RUNTIME_option_t *options,
+                     int m, int n,
+                     cham_flttype_t       fromtype,
+                     starpu_data_handle_t fromtile,
+                     cham_flttype_t       totype,
+                     starpu_data_handle_t totile );
 
 #endif /* _runtime_codelets_h_ */
diff --git a/runtime/starpu/interface/cham_tile_interface.c b/runtime/starpu/interface/cham_tile_interface.c
index 27eb801cb266a0038e77b19e04e729b68b3270f4..1e837e1b0ca4c4a5e8418e9acf86dcfb71029851 100644
--- a/runtime/starpu/interface/cham_tile_interface.c
+++ b/runtime/starpu/interface/cham_tile_interface.c
@@ -672,6 +672,8 @@ starpu_cham_tile_register( starpu_data_handle_t *handleptr,
             .tilesize   = tile->m * tile->n * elemsize,
         };
     memcpy( &(cham_tile_interface.tile), tile, sizeof( CHAM_tile_t ) );
+    /* Overwrite the flttype in case it comes from a data conversion */
+    cham_tile_interface.tile.flttype = flttype;
 
     if ( tile->format & CHAMELEON_TILE_FULLRANK ) {
         cham_tile_interface.allocsize = tile->m * tile->n * elemsize;