diff --git a/cmake_modules/local_subs.py b/cmake_modules/local_subs.py
index 227afc04908a49920f6917668f9fa5b7b0d7fd85..02af5df38a90e034eebb9438238440267a1a3355 100644
--- a/cmake_modules/local_subs.py
+++ b/cmake_modules/local_subs.py
@@ -43,6 +43,7 @@ _extra_blas = [
     ('',                     'scesca',               'dcesca',               'ccesca',               'zcesca'              ),
     ('',                     'sgesum',               'dgesum',               'cgesum',               'zgesum'              ),
     ('',                     'sgersum',              'dgersum',              'cgersum',              'zgersum'             ),
+    ('',                     'sprint',               'dprint',               'cprint',               'zprint'              ),
 ]
 
 _extra_BLAS = [ [ x.upper() for x in row ] for row in _extra_blas ]
diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt
index 0106e546cf95b7b0875761e6de3b4209bcf91f4a..bed4df6f140c9449d7c7201bf4249373c14d6146 100644
--- a/compute/CMakeLists.txt
+++ b/compute/CMakeLists.txt
@@ -253,6 +253,7 @@ set(ZSRC
     ##################
     zgenm2.c
     pzgenm2.c
+    zprint.c
 )
 
 precisions_rules_py(CHAMELEON_SRCS_GENERATED "${ZSRC}"
diff --git a/compute/zprint.c b/compute/zprint.c
new file mode 100644
index 0000000000000000000000000000000000000000..3dbd715e2c9120918817caa944836222ce344292
--- /dev/null
+++ b/compute/zprint.c
@@ -0,0 +1,227 @@
+/**
+ *
+ * @file zprint.c
+ *
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zprint wrappers
+ *
+ * @version 1.2.0
+ * @author Mathieu Faverge
+ * @author Matthieu Kuhn
+ * @date 2022-02-22
+ * @precisions normal z -> s d c
+ *
+ */
+#include "control/common.h"
+#if !defined(CHAMELEON_SIMULATION)
+#include <coreblas/coreblas_z.h>
+#endif
+
+/*
+ * Static variable to know how to handle the data within the kernel
+ * This assumes that only one runtime is enabled at a time.
+ */
+static RUNTIME_id_t zprint_runtime_id = RUNTIME_SCHED_STARPU;
+
+struct zprint_args_s {
+    FILE       *file;
+    const char *header;
+};
+
+static inline int
+zprint( const CHAM_desc_t *descA,
+        cham_uplo_t uplo, int m, int n,
+        CHAM_tile_t *tileA, void *op_args )
+{
+    CHAMELEON_Complex64_t *A;
+    struct zprint_args_s  *options = (struct zprint_args_s *)op_args;
+
+    int tempmm = m == descA->mt-1 ? descA->m-m*descA->mb : descA->mb;
+    int tempnn = n == descA->nt-1 ? descA->n-n*descA->nb : descA->nb;
+    int minmn = chameleon_min( tempmm, tempnn );
+    int lda, i;
+
+    if ( zprint_runtime_id == RUNTIME_SCHED_PARSEC ) {
+        A   = (CHAMELEON_Complex64_t*)tileA;
+        lda = descA->get_blkldd( descA, m );
+    }
+    else {
+        A   = CHAM_tile_get_ptr( tileA );
+        lda = tileA->ld;
+    }
+
+#if !defined(CHAMELEON_SIMULATION)
+    CORE_zprint( options->file, options->header, uplo,
+                 tempmm, tempnn, m, n, A, lda );
+#endif
+
+    return 0;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t
+ *
+ * @brief Print a matrix tile by tile.
+ *
+ *******************************************************************************
+ *
+ * @param[in] M
+ *          The number of rows of the matrix A. M >= 0.
+ *
+ * @param[in] N
+ *          The number of columns of the matrix A. N >= 0.
+ *
+ * @param[in] A
+ *          The M-by-N matrix A.
+ *
+ * @param[in] LDA
+ *          The leading dimension of the array A. LDA >= max(1,M).
+ *
+ *******************************************************************************
+ *
+ * @retval the two-norm estimate.
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zlatms_Tile
+ * @sa CHAMELEON_zlatms_Tile_Async
+ * @sa CHAMELEON_clange
+ * @sa CHAMELEON_dlange
+ * @sa CHAMELEON_slange
+ *
+ */
+int CHAMELEON_zprint( FILE *file, const char *header,
+                      cham_uplo_t uplo, int M, int N,
+                      CHAMELEON_Complex64_t *A, int LDA )
+{
+    int NB;
+    int status;
+    CHAM_context_t *chamctxt;
+    RUNTIME_sequence_t *sequence = NULL;
+    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
+    CHAM_desc_t descAl, descAt;
+    struct zprint_args_s options = {
+        .file   = file,
+        .header = header,
+    };
+
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_fatal_error("CHAMELEON_zlatms", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    /* Check input arguments */
+    if (M < 0) {
+        chameleon_error("CHAMELEON_zlatms", "illegal value of M");
+        return -1;
+    }
+    if (N < 0) {
+        chameleon_error("CHAMELEON_zlatms", "illegal value of N");
+        return -2;
+    }
+    if (LDA < chameleon_max(1, M)) {
+        chameleon_error("CHAMELEON_zlatms", "illegal value of LDA");
+        return -4;
+    }
+
+    /* Quick return */
+    if (chameleon_min(N, M) == 0) {
+        return (double)0.0;
+    }
+
+    /* Tune NB depending on M, N & NRHS; Set NBNB */
+    status = chameleon_tune(CHAMELEON_FUNC_ZGEMM, M, N, 0);
+    if (status != CHAMELEON_SUCCESS) {
+        chameleon_error("CHAMELEON_zlatms", "chameleon_tune() failed");
+        return status;
+    }
+
+    /* Set NT */
+    NB = CHAMELEON_NB;
+
+    chameleon_sequence_create( chamctxt, &sequence );
+
+    /* Submit the matrix conversion */
+    chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInput, uplo,
+                         A, NB, NB, LDA, N, M, N, sequence, &request );
+
+    /* Call the tile interface */
+    zprint_runtime_id = chamctxt->scheduler;
+    chameleon_pmap( uplo, &descAt, zprint, &options, sequence, &request );
+
+    /* Submit the matrix conversion back */
+    chameleon_ztile2lap( chamctxt, &descAl, &descAt,
+                         ChamDescInput, uplo, sequence, &request );
+
+    chameleon_sequence_wait( chamctxt, sequence );
+
+    /* Cleanup the temporary data */
+    chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
+
+    chameleon_sequence_destroy( chamctxt, sequence );
+    return status;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t_Tile
+ *
+ * @brief Tile equivalent of CHAMELEON_zprint().
+ *
+ *  Operates on matrices stored by tiles.
+ *  All matrices are passed through descriptors.
+ *  All dimensions are taken from the descriptors.
+ *
+ *******************************************************************************
+ *
+ * @param[in] A
+ *          On entry, the input matrix A.
+ *
+ *******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zlatms
+ * @sa CHAMELEON_zlatms_Tile_Async
+ * @sa CHAMELEON_clange_Tile
+ * @sa CHAMELEON_dlange_Tile
+ * @sa CHAMELEON_slange_Tile
+ *
+ */
+int CHAMELEON_zprint_Tile( FILE *file, const char *header,
+                           cham_uplo_t uplo, CHAM_desc_t *A )
+{
+    CHAM_context_t *chamctxt;
+    RUNTIME_sequence_t *sequence = NULL;
+    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
+    struct zprint_args_s options = {
+        .file   = file,
+        .header = header,
+    };
+    int status;
+
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_fatal_error("CHAMELEON_zlatms_Tile", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    chameleon_sequence_create( chamctxt, &sequence );
+
+    zprint_runtime_id = chamctxt->scheduler;
+    chameleon_pmap( uplo, A, zprint, &options, sequence, &request );
+    CHAMELEON_Desc_Flush( A, sequence );
+
+    chameleon_sequence_wait( chamctxt, sequence );
+    status = sequence->status;
+    chameleon_sequence_destroy( chamctxt, sequence );
+    return status;
+}
diff --git a/coreblas/compute/CMakeLists.txt b/coreblas/compute/CMakeLists.txt
index d880039fe2d83e112d882414c24bec1a1e24ec69..40baddc598df9da3ce04b58a54b7fe645435b564 100644
--- a/coreblas/compute/CMakeLists.txt
+++ b/coreblas/compute/CMakeLists.txt
@@ -107,6 +107,7 @@ set(ZSRC
     core_zttqrt.c
     core_zunmlq.c
     core_zunmqr.c
+    core_zprint.c
     )
 if( CHAMELEON_USE_HMAT )
   list( APPEND ZSRC
diff --git a/coreblas/compute/core_zprint.c b/coreblas/compute/core_zprint.c
new file mode 100644
index 0000000000000000000000000000000000000000..0e4fdc887f0c47ca85d4900e1ff52341e9f8bc83
--- /dev/null
+++ b/coreblas/compute/core_zprint.c
@@ -0,0 +1,46 @@
+/**
+ *
+ * @file core_zprint.c
+ *
+ * @copyright 2012-2023 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon core_zprint CPU kernel
+ *
+ * @version 1.0.0
+ * @author Mathieu Faverge
+ * @date 2020-03-03
+ * @precisions normal z -> c d s
+ *
+ */
+#include "coreblas/lapacke.h"
+#include "coreblas.h"
+
+void
+CORE_zprint( FILE *file, const char *header,
+             cham_uplo_t uplo, int M, int N, int Am, int An,
+             const CHAMELEON_Complex64_t *A, int lda )
+{
+    FILE *output = (file == NULL) ? stdout : file;
+    int   i, j;
+
+    fflush( output );
+    fprintf( output, "--- %10s (%2d, %2d) / %p, %d:\n", header, Am, An, A, lda );
+    for(i=0; i<M; i++) {
+        fprintf( output, "  " );
+        for(j=0; j<N; j++) {
+#if defined(PRECISION_z) || defined(PRECISION_c)
+            fprintf( output, " (% e, % e)",
+                     creal( A[j*lda + i] ),
+                     cimag( A[j*lda + i] ));
+#else
+            fprintf( output, " % e", A[j*lda + i] );
+#endif
+        }
+        fprintf( output, "\n" );
+    }
+    fprintf( output, "-------------------------\n" );
+    fflush( output );
+}
diff --git a/coreblas/compute/core_ztile.c b/coreblas/compute/core_ztile.c
index 986d587f80bf16abe4a1e6479aeeb88e09a4f686..6de9b2c63cd546ddb8216fa0f5c0bdfd522fdb1d 100644
--- a/coreblas/compute/core_ztile.c
+++ b/coreblas/compute/core_ztile.c
@@ -1072,3 +1072,13 @@ TCORE_zgram( cham_uplo_t        uplo,
     return CORE_zgram(
         uplo, M, N, Mt, Nt, CHAM_tile_get_ptr( Di ), Di->ld, CHAM_tile_get_ptr( Dj ), Dj->ld, CHAM_tile_get_ptr( D ), CHAM_tile_get_ptr( A ), A->ld );
 }
+
+void
+TCORE_zprint( FILE *file, const char *header,
+              cham_uplo_t uplo, int M, int N,
+              int Am, int An, const CHAM_tile_t *A )
+{
+    coreblas_kernel_trace( A );
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    CORE_zprint( file, header, uplo, M, N, Am, An, CHAM_tile_get_ptr( A ), A->ld );
+}
diff --git a/coreblas/include/coreblas/coreblas_z.h b/coreblas/include/coreblas/coreblas_z.h
index b0bcf3bdbbf7dfc8e465aa3f77a29451c66fe03a..050a7968769a5c37a138faee18ef0a577ee6be19 100644
--- a/coreblas/include/coreblas/coreblas_z.h
+++ b/coreblas/include/coreblas/coreblas_z.h
@@ -393,4 +393,8 @@ int CORE_zgram( cham_uplo_t uplo, int M, int N, int Mt, int Nt,
                 const double *D,
                 double *A, int LDA );
 
+void CORE_zprint( FILE *file, const char *header,
+                  cham_uplo_t uplo, int m, int n, int Am, int An,
+                  const CHAMELEON_Complex64_t *A, int lda );
+
 #endif /* _coreblas_z_h_ */
diff --git a/coreblas/include/coreblas/coreblas_ztile.h b/coreblas/include/coreblas/coreblas_ztile.h
index aa66fdd751657288475070c6dec606ff08fd714a..74cd413168d44653f27c408eee7dea77eca603ad 100644
--- a/coreblas/include/coreblas/coreblas_ztile.h
+++ b/coreblas/include/coreblas/coreblas_ztile.h
@@ -86,5 +86,6 @@ int  TCORE_zunmqr( cham_side_t side, cham_trans_t trans, int M, int N, int K, in
 int  TCORE_zgesum( cham_store_t storev, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *sum );
 int  TCORE_zcesca( int center, int scale, cham_store_t axis, int M, int N, int Mt, int Nt, const CHAM_tile_t *Gi, const CHAM_tile_t *Gj, const CHAM_tile_t *G, const CHAM_tile_t *Di, const CHAM_tile_t *Dj, CHAM_tile_t *A );
 int  TCORE_zgram( cham_uplo_t uplo, int M, int N, int Mt, int Nt, const CHAM_tile_t *Di, const CHAM_tile_t *Dj, const CHAM_tile_t *D, CHAM_tile_t *A );
+void TCORE_zprint( FILE *file, const char *header, cham_uplo_t uplo, int M, int N, int Am, int An, const CHAM_tile_t *A );
 
 #endif /* _coreblas_ztile_h_ */
diff --git a/include/chameleon.h b/include/chameleon.h
index 4f75ba8306c7da129df957100edf313cb121987c..c0a9ea9a9f6896be35a78ffab0b2e264c2bf6772 100644
--- a/include/chameleon.h
+++ b/include/chameleon.h
@@ -27,6 +27,7 @@
  * CHAMELEON types and constants
  */
 #include "chameleon/config.h"
+#include <stdio.h>
 #include "chameleon/constants.h"
 #include "chameleon/types.h"
 #include "chameleon/struct.h"
@@ -37,7 +38,6 @@
  */
 #include "chameleon/runtime.h"
 
-
 /* ****************************************************************************
  * CHAMELEON Simulation mode
  */
diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h
index c36e6289dcfc7dd19122cd671f25a66717c586f2..e587d015c4239a90fb10c228d9bb41faaca7dda1 100644
--- a/include/chameleon/chameleon_z.h
+++ b/include/chameleon/chameleon_z.h
@@ -382,6 +382,9 @@ int CHAMELEON_zgram( cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA
 int CHAMELEON_zgram_Tile( cham_uplo_t uplo, CHAM_desc_t *A );
 int CHAMELEON_zgram_Tile_Async( cham_uplo_t uplo, CHAM_desc_t *A, void *user_ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 
+int CHAMELEON_zprint( FILE *file, const char *header, cham_uplo_t uplo, int M, int N, CHAMELEON_Complex64_t *A, int LDA );
+int CHAMELEON_zprint_Tile( FILE *file, const char *header, cham_uplo_t uplo, CHAM_desc_t *descA );
+
 END_C_DECLS
 
 #endif /* _chameleon_z_h_ */