From e85e996af5eb0f2de5ebfd5382bbd95f97cf1584 Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Fri, 15 Oct 2021 11:26:35 +0200
Subject: [PATCH] Add empty core_ztile to measure overhead

---
 coreblas/compute/CMakeLists.txt     |  13 +-
 coreblas/compute/core_ztile_empty.c | 801 ++++++++++++++++++++++++++++
 2 files changed, 813 insertions(+), 1 deletion(-)
 create mode 100644 coreblas/compute/core_ztile_empty.c

diff --git a/coreblas/compute/CMakeLists.txt b/coreblas/compute/CMakeLists.txt
index 15df8b4f9..108dd249e 100644
--- a/coreblas/compute/CMakeLists.txt
+++ b/coreblas/compute/CMakeLists.txt
@@ -27,6 +27,8 @@
 #
 ###
 
+option( CHAMELEON_NO_KERNELS "Do not call the numerical kernels" OFF)
+
 # Generate the chameleon sources for all possible precisions
 # ------------------------------------------------------
 
@@ -105,13 +107,22 @@ set(ZSRC
     core_zttqrt.c
     core_zunmlq.c
     core_zunmqr.c
-    core_ztile.c
     )
 if( CHAMELEON_USE_HMAT )
   list( APPEND ZSRC
     hmat_z.c )
 endif()
 
+if ( CHAMELEON_NO_KERNELS )
+  list( APPEND ZSRC
+    core_ztile_empty.c
+    )
+else()
+  list( APPEND ZSRC
+    core_ztile.c
+    )
+endif()
+
 precisions_rules_py(COREBLAS_SRCS_GENERATED "${ZSRC}"
                     PRECISIONS "${CHAMELEON_PRECISION}")
 
diff --git a/coreblas/compute/core_ztile_empty.c b/coreblas/compute/core_ztile_empty.c
new file mode 100644
index 000000000..ee7a4a3fc
--- /dev/null
+++ b/coreblas/compute/core_ztile_empty.c
@@ -0,0 +1,801 @@
+/**
+ *
+ * @file core_ztile_empty.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ * @brief Chameleon CPU kernel interface from CHAM_tile_t layout to the real one.
+ *
+ * @version 1.1.0
+ * @author Mathieu Faverge
+ * @date 2021-03-16
+ * @precisions normal z -> c d s
+ *
+ */
+#include "coreblas.h"
+#include "coreblas/coreblas_ztile.h"
+
+#if defined( CHAMELEON_USE_HMAT )
+#include "coreblas/hmat.h"
+#endif
+
+#if defined( PRECISION_z ) || defined( PRECISION_c )
+void
+TCORE_dlag2z( __attribute__((unused)) cham_uplo_t uplo,
+              __attribute__((unused)) int M,
+              __attribute__((unused)) int N,
+              __attribute__((unused)) const CHAM_tile_t *A,
+              __attribute__((unused)) CHAM_tile_t       *B )
+{
+    coreblas_kernel_trace( A, B );
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( B->format & CHAMELEON_TILE_FULLRANK );
+    CORE_dlag2z( uplo, M, N, A->mat, A->ld, B->mat, B->ld );
+}
+#endif
+
+void
+TCORE_dzasum( __attribute__((unused)) cham_store_t       storev,
+              __attribute__((unused)) cham_uplo_t        uplo,
+              __attribute__((unused)) int                M,
+              __attribute__((unused)) int                N,
+              __attribute__((unused)) const CHAM_tile_t *A,
+              __attribute__((unused)) double *           work )
+{
+    coreblas_kernel_trace( A );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    CORE_dzasum( storev, uplo, M, N, CHAM_tile_get_ptr( A ), A->ld, work );
+}
+
+int
+TCORE_zaxpy( int                   M,
+             CHAMELEON_Complex64_t alpha,
+             const CHAM_tile_t *   A,
+             int                   incA,
+             CHAM_tile_t *         B,
+             int                   incB )
+{
+    coreblas_kernel_trace( A, B );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+int
+TCORE_zgeadd( __attribute__((unused)) cham_trans_t          trans,
+              __attribute__((unused)) int                   M,
+              __attribute__((unused)) int                   N,
+              __attribute__((unused)) CHAMELEON_Complex64_t alpha,
+              __attribute__((unused)) const CHAM_tile_t *   A,
+              __attribute__((unused)) CHAMELEON_Complex64_t beta,
+              __attribute__((unused)) CHAM_tile_t *         B )
+{
+    coreblas_kernel_trace( A, B );
+}
+
+int
+TCORE_zgelqt( __attribute__((unused)) int                    M,
+              __attribute__((unused)) int                    N,
+              __attribute__((unused)) int                    IB,
+              __attribute__((unused)) CHAM_tile_t *          A,
+              __attribute__((unused)) CHAM_tile_t *          T,
+              __attribute__((unused)) CHAMELEON_Complex64_t *TAU,
+              __attribute__((unused)) CHAMELEON_Complex64_t *WORK )
+{
+    coreblas_kernel_trace( A, T );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( T->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+void
+TCORE_zgemv( cham_trans_t trans, int M, int N,
+             CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A,
+                                          const CHAM_tile_t *x, int incX,
+             CHAMELEON_Complex64_t beta,        CHAM_tile_t *y, int incY )
+{
+    coreblas_kernel_trace( A, x, y );
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( x->format & CHAMELEON_TILE_FULLRANK );
+    assert( y->format & CHAMELEON_TILE_FULLRANK );
+}
+
+void
+TCORE_zgemm( cham_trans_t          transA,
+             cham_trans_t          transB,
+             int                   M,
+             int                   N,
+             int                   K,
+             CHAMELEON_Complex64_t alpha,
+             const CHAM_tile_t *   A,
+             const CHAM_tile_t *   B,
+             CHAMELEON_Complex64_t beta,
+             CHAM_tile_t *         C )
+{
+    coreblas_kernel_trace( A, B, C );
+}
+
+int
+TCORE_zgeqrt( int                    M,
+              int                    N,
+              int                    IB,
+              CHAM_tile_t *          A,
+              CHAM_tile_t *          T,
+              CHAMELEON_Complex64_t *TAU,
+              CHAMELEON_Complex64_t *WORK )
+{
+    coreblas_kernel_trace( A, T );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( T->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+int
+TCORE_zgessm( int M, int N, int K, int IB, const int *IPIV, const CHAM_tile_t *L, CHAM_tile_t *A )
+{
+    coreblas_kernel_trace( L, A );
+    assert( L->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+int
+TCORE_zgessq( cham_store_t storev, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *sclssq )
+{
+    coreblas_kernel_trace( A, sclssq );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( sclssq->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+int
+TCORE_zgetrf( int M, int N, CHAM_tile_t *A, int *IPIV, int *INFO )
+{
+    coreblas_kernel_trace( A );
+    return 0;
+}
+
+int
+TCORE_zgetrf_incpiv( int M, int N, int IB, CHAM_tile_t *A, int *IPIV, int *INFO )
+{
+    coreblas_kernel_trace( A );
+    return 0;
+}
+
+int
+TCORE_zgetrf_nopiv( int M, int N, int IB, CHAM_tile_t *A, int *INFO )
+{
+    coreblas_kernel_trace( A );
+    return 0;
+}
+
+void
+TCORE_zhe2ge( cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *B )
+{
+    coreblas_kernel_trace( A, B );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+#if defined( PRECISION_z ) || defined( PRECISION_c )
+void
+TCORE_zhemm( cham_side_t           side,
+             cham_uplo_t           uplo,
+             int                   M,
+             int                   N,
+             CHAMELEON_Complex64_t alpha,
+             const CHAM_tile_t *   A,
+             const CHAM_tile_t *   B,
+             CHAMELEON_Complex64_t beta,
+             CHAM_tile_t *         C )
+{
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( C->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+void
+TCORE_zherk( cham_uplo_t        uplo,
+             cham_trans_t       trans,
+             int                N,
+             int                K,
+             double             alpha,
+             const CHAM_tile_t *A,
+             double             beta,
+             CHAM_tile_t *      C )
+{
+    coreblas_kernel_trace( A, C );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( C->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+void
+TCORE_zher2k( cham_uplo_t           uplo,
+              cham_trans_t          trans,
+              int                   N,
+              int                   K,
+              CHAMELEON_Complex64_t alpha,
+              const CHAM_tile_t *   A,
+              const CHAM_tile_t *   B,
+              double                beta,
+              CHAM_tile_t *         C )
+{
+    coreblas_kernel_trace( A, B, C );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( C->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+#endif
+
+int
+TCORE_zherfb( cham_uplo_t            uplo,
+              int                    N,
+              int                    K,
+              int                    IB,
+              int                    NB,
+              const CHAM_tile_t *    A,
+              const CHAM_tile_t *    T,
+              CHAM_tile_t *          C,
+              CHAMELEON_Complex64_t *WORK,
+              int                    ldwork )
+{
+    coreblas_kernel_trace( A, T, C );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( T->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( C->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+#if defined( PRECISION_z ) || defined( PRECISION_c )
+int
+TCORE_zhessq( cham_store_t       storev,
+              cham_uplo_t        uplo,
+              int                N,
+              const CHAM_tile_t *A,
+              CHAM_tile_t *      sclssq )
+{
+    coreblas_kernel_trace( A, sclssq );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( sclssq->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+#endif
+
+void
+TCORE_zlacpy( cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *B )
+{
+    return;
+}
+
+void
+TCORE_zlange( cham_normtype_t    norm,
+              int                M,
+              int                N,
+              const CHAM_tile_t *A,
+              double *           work,
+              double *           normA )
+{
+    coreblas_kernel_trace( A );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+#if defined( PRECISION_z ) || defined( PRECISION_c )
+void
+TCORE_zlanhe( cham_normtype_t    norm,
+              cham_uplo_t        uplo,
+              int                N,
+              const CHAM_tile_t *A,
+              double *           work,
+              double *           normA )
+{
+    coreblas_kernel_trace( A );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+#endif
+
+void
+TCORE_zlansy( cham_normtype_t    norm,
+              cham_uplo_t        uplo,
+              int                N,
+              const CHAM_tile_t *A,
+              double *           work,
+              double *           normA )
+{
+    coreblas_kernel_trace( A );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+void
+TCORE_zlantr( cham_normtype_t    norm,
+              cham_uplo_t        uplo,
+              cham_diag_t        diag,
+              int                M,
+              int                N,
+              const CHAM_tile_t *A,
+              double *           work,
+              double *           normA )
+{
+    coreblas_kernel_trace( A );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+int
+TCORE_zlascal( cham_uplo_t uplo, int m, int n, CHAMELEON_Complex64_t alpha, CHAM_tile_t *A )
+{
+    coreblas_kernel_trace( A );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+void
+TCORE_zlaset( cham_uplo_t           uplo,
+              int                   n1,
+              int                   n2,
+              CHAMELEON_Complex64_t alpha,
+              CHAMELEON_Complex64_t beta,
+              CHAM_tile_t *         A )
+{
+    coreblas_kernel_trace( A );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+void
+TCORE_zlaset2( cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha, CHAM_tile_t *A )
+{
+    coreblas_kernel_trace( A );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+int
+TCORE_zlatro( cham_uplo_t        uplo,
+              cham_trans_t       trans,
+              int                M,
+              int                N,
+              const CHAM_tile_t *A,
+              CHAM_tile_t *      B )
+{
+    coreblas_kernel_trace( A, B );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+void
+TCORE_zlauum( cham_uplo_t uplo, int N, CHAM_tile_t *A )
+{
+    coreblas_kernel_trace( A );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+#if defined( PRECISION_z ) || defined( PRECISION_c )
+void
+TCORE_zplghe( double                 bump,
+              int                    m,
+              int                    n,
+              CHAM_tile_t *          A,
+              int                    bigM,
+              int                    m0,
+              int                    n0,
+              unsigned long long int seed )
+{
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+#endif
+
+void
+TCORE_zplgsy( CHAMELEON_Complex64_t  bump,
+              int                    m,
+              int                    n,
+              CHAM_tile_t *          A,
+              int                    bigM,
+              int                    m0,
+              int                    n0,
+              unsigned long long int seed )
+{
+    coreblas_kernel_trace( A );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+void
+TCORE_zplrnt( int                    m,
+              int                    n,
+              CHAM_tile_t *          A,
+              int                    bigM,
+              int                    m0,
+              int                    n0,
+              unsigned long long int seed )
+{
+    coreblas_kernel_trace( A );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+void
+TCORE_zpotrf( cham_uplo_t uplo, int n, CHAM_tile_t *A, int *INFO )
+{
+    coreblas_kernel_trace( A );
+    return;
+}
+
+int
+TCORE_zssssm( int                M1,
+              int                N1,
+              int                M2,
+              int                N2,
+              int                K,
+              int                IB,
+              CHAM_tile_t *      A1,
+              CHAM_tile_t *      A2,
+              const CHAM_tile_t *L1,
+              const CHAM_tile_t *L2,
+              const int *        IPIV )
+{
+    coreblas_kernel_trace( A1, A2, L1, L2 );
+    assert( A1->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( A2->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( L1->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( L2->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+void
+TCORE_zsymm( cham_side_t           side,
+             cham_uplo_t           uplo,
+             int                   M,
+             int                   N,
+             CHAMELEON_Complex64_t alpha,
+             const CHAM_tile_t *   A,
+             const CHAM_tile_t *   B,
+             CHAMELEON_Complex64_t beta,
+             CHAM_tile_t *         C )
+{
+    coreblas_kernel_trace( A, B, C );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( C->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+void
+TCORE_zsyrk( cham_uplo_t           uplo,
+             cham_trans_t          trans,
+             int                   N,
+             int                   K,
+             CHAMELEON_Complex64_t alpha,
+             const CHAM_tile_t *   A,
+             CHAMELEON_Complex64_t beta,
+             CHAM_tile_t *         C )
+{
+    coreblas_kernel_trace( A, C );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( C->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+void
+TCORE_zsyr2k( cham_uplo_t           uplo,
+              cham_trans_t          trans,
+              int                   N,
+              int                   K,
+              CHAMELEON_Complex64_t alpha,
+              const CHAM_tile_t *   A,
+              const CHAM_tile_t *   B,
+              CHAMELEON_Complex64_t beta,
+              CHAM_tile_t *         C )
+{
+    coreblas_kernel_trace( A, B, C );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( C->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+int
+TCORE_zsyssq( cham_store_t       storev,
+              cham_uplo_t        uplo,
+              int                N,
+              const CHAM_tile_t *A,
+              CHAM_tile_t *      sclssq )
+{
+    coreblas_kernel_trace( A, sclssq );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( sclssq->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+#if defined( PRECISION_z ) || defined( PRECISION_c )
+int
+TCORE_zsytf2_nopiv( cham_uplo_t uplo, int n, CHAM_tile_t *A )
+{
+    coreblas_kernel_trace( A );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+#endif
+
+int
+TCORE_ztplqt( int                    M,
+              int                    N,
+              int                    L,
+              int                    IB,
+              CHAM_tile_t *          A,
+              CHAM_tile_t *          B,
+              CHAM_tile_t *          T,
+              CHAMELEON_Complex64_t *WORK )
+{
+    coreblas_kernel_trace( A, B, T );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( T->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+int
+TCORE_ztpmlqt( cham_side_t            side,
+               cham_trans_t           trans,
+               int                    M,
+               int                    N,
+               int                    K,
+               int                    L,
+               int                    IB,
+               const CHAM_tile_t *    V,
+               const CHAM_tile_t *    T,
+               CHAM_tile_t *          A,
+               CHAM_tile_t *          B,
+               CHAMELEON_Complex64_t *WORK )
+{
+    coreblas_kernel_trace( V, T, A, B );
+    assert( V->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( T->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+int
+TCORE_ztpmqrt( cham_side_t            side,
+               cham_trans_t           trans,
+               int                    M,
+               int                    N,
+               int                    K,
+               int                    L,
+               int                    IB,
+               const CHAM_tile_t *    V,
+               const CHAM_tile_t *    T,
+               CHAM_tile_t *          A,
+               CHAM_tile_t *          B,
+               CHAMELEON_Complex64_t *WORK )
+{
+    coreblas_kernel_trace( V, T, A, B );
+    assert( V->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( T->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+int
+TCORE_ztpqrt( int                    M,
+              int                    N,
+              int                    L,
+              int                    IB,
+              CHAM_tile_t *          A,
+              CHAM_tile_t *          B,
+              CHAM_tile_t *          T,
+              CHAMELEON_Complex64_t *WORK )
+{
+    coreblas_kernel_trace( A, B, T );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( T->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+int
+TCORE_ztradd( cham_uplo_t           uplo,
+              cham_trans_t          trans,
+              int                   M,
+              int                   N,
+              CHAMELEON_Complex64_t alpha,
+              const CHAM_tile_t *   A,
+              CHAMELEON_Complex64_t beta,
+              CHAM_tile_t *         B )
+{
+    coreblas_kernel_trace( A, B );
+}
+
+void
+TCORE_ztrasm( cham_store_t       storev,
+              cham_uplo_t        uplo,
+              cham_diag_t        diag,
+              int                M,
+              int                N,
+              const CHAM_tile_t *A,
+              double *           work )
+{
+    coreblas_kernel_trace( A );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+void
+TCORE_ztrmm( cham_side_t           side,
+             cham_uplo_t           uplo,
+             cham_trans_t          transA,
+             cham_diag_t           diag,
+             int                   M,
+             int                   N,
+             CHAMELEON_Complex64_t alpha,
+             const CHAM_tile_t *   A,
+             CHAM_tile_t *         B )
+{
+    coreblas_kernel_trace( A, B );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+void
+TCORE_ztrsm( cham_side_t           side,
+             cham_uplo_t           uplo,
+             cham_trans_t          transA,
+             cham_diag_t           diag,
+             int                   M,
+             int                   N,
+             CHAMELEON_Complex64_t alpha,
+             const CHAM_tile_t *   A,
+             CHAM_tile_t *         B )
+{
+    coreblas_kernel_trace( A, B );
+}
+
+int
+TCORE_ztrssq( cham_uplo_t        uplo,
+              cham_diag_t        diag,
+              int                M,
+              int                N,
+              const CHAM_tile_t *A,
+              CHAM_tile_t *      sclssq )
+{
+    coreblas_kernel_trace( A, sclssq );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( sclssq->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+void
+TCORE_ztrtri( cham_uplo_t uplo, cham_diag_t diag, int N, CHAM_tile_t *A, int *info )
+{
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+int
+TCORE_ztsmlq_hetra1( cham_side_t            side,
+                     cham_trans_t           trans,
+                     int                    m1,
+                     int                    n1,
+                     int                    m2,
+                     int                    n2,
+                     int                    k,
+                     int                    ib,
+                     CHAM_tile_t *          A1,
+                     CHAM_tile_t *          A2,
+                     const CHAM_tile_t *    V,
+                     const CHAM_tile_t *    T,
+                     CHAMELEON_Complex64_t *WORK,
+                     int                    ldwork )
+{
+    coreblas_kernel_trace( A1, A2, V, T );
+    assert( A1->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( A2->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( V->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( T->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+int
+TCORE_ztsmqr_hetra1( cham_side_t            side,
+                     cham_trans_t           trans,
+                     int                    m1,
+                     int                    n1,
+                     int                    m2,
+                     int                    n2,
+                     int                    k,
+                     int                    ib,
+                     CHAM_tile_t *          A1,
+                     CHAM_tile_t *          A2,
+                     const CHAM_tile_t *    V,
+                     const CHAM_tile_t *    T,
+                     CHAMELEON_Complex64_t *WORK,
+                     int                    ldwork )
+{
+    coreblas_kernel_trace( A1, A2, V, T );
+    assert( A1->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( A2->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( V->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( T->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+int
+TCORE_ztstrf( int                    M,
+              int                    N,
+              int                    IB,
+              int                    NB,
+              CHAM_tile_t *          U,
+              CHAM_tile_t *          A,
+              CHAM_tile_t *          L,
+              int *                  IPIV,
+              CHAMELEON_Complex64_t *WORK,
+              int                    LDWORK,
+              int *                  INFO )
+{
+    coreblas_kernel_trace( U, A, L );
+    assert( U->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( L->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+int
+TCORE_zunmlq( cham_side_t            side,
+              cham_trans_t           trans,
+              int                    M,
+              int                    N,
+              int                    K,
+              int                    IB,
+              const CHAM_tile_t *    V,
+              const CHAM_tile_t *    T,
+              CHAM_tile_t *          C,
+              CHAMELEON_Complex64_t *WORK,
+              int                    LDWORK )
+{
+    coreblas_kernel_trace( V, T, C );
+    assert( V->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( T->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( C->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+int
+TCORE_zunmqr( cham_side_t            side,
+              cham_trans_t           trans,
+              int                    M,
+              int                    N,
+              int                    K,
+              int                    IB,
+              const CHAM_tile_t *    V,
+              const CHAM_tile_t *    T,
+              CHAM_tile_t *          C,
+              CHAMELEON_Complex64_t *WORK,
+              int                    LDWORK )
+{
+    coreblas_kernel_trace( V, T, C );
+    assert( V->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( T->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( C->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+int
+TCORE_zgesum( cham_store_t storev, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *sum )
+{
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( sum->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+int
+TCORE_zcesca( int center,
+              int scale,
+              cham_store_t axis,
+              int                M,
+              int                N,
+              int                Mt,
+              int                Nt,
+              const CHAM_tile_t *Gi,
+              const CHAM_tile_t *Gj,
+              const CHAM_tile_t *G,
+              const CHAM_tile_t *Di,
+              const CHAM_tile_t *Dj,
+              CHAM_tile_t *      A )
+{
+    assert( Gi->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( Gj->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( G->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( Di->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( Dj->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
+
+int
+TCORE_zgram( cham_uplo_t        uplo,
+             int                M,
+             int                N,
+             int                Mt,
+             int                Nt,
+             const CHAM_tile_t *Di,
+             const CHAM_tile_t *Dj,
+             const CHAM_tile_t *D,
+             CHAM_tile_t *      A )
+{
+    coreblas_kernel_trace( Di, Dj, D, A );
+    assert( Di->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( Dj->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( D->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+}
-- 
GitLab