diff --git a/cmake_modules/local_subs.py b/cmake_modules/local_subs.py
index 10a56bd0e1e3b0531944fbd926a71231cd887539..eb8cb418f2152666a28fcd62cfcff71cc76ac1c0 100644
--- a/cmake_modules/local_subs.py
+++ b/cmake_modules/local_subs.py
@@ -23,6 +23,8 @@ _extra_blas = [
     ('',                     'sgenm2',               'dgenm2',               'cgenm2',               'zgenm2'              ),
     ('',                     'slag2c_fake',          'dlag2z_fake',          'slag2c',               'dlag2z'              ),
     ('',                     'sgepdf',               'dgepdf',               'cgepdf',               'zgepdf'              ),
+    ('',                     'scesca',               'dcesca',               'ccesca',               'zcesca'              ),
+    ('',                     'sgesum',               'dgesum',               'cgesum',               'zgesum'              ),
 ]
 
 _extra_BLAS = [ [ x.upper() for x in row ] for row in _extra_blas ]
diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt
index d4b97797a312ddfb6ca28c31ebde693afaba14cd..f7f3c27b8149bc441b861d94dba3dd1b6d6452b5 100644
--- a/compute/CMakeLists.txt
+++ b/compute/CMakeLists.txt
@@ -235,6 +235,11 @@ set(ZSRC
     zbuild.c
     pzbuild.c
     ##################
+    # Center-Scaled
+    ##################
+    zcesca.c
+    pzcesca.c
+    ##################
     # Gram
     ##################
     zgram.c
diff --git a/compute/pzcesca.c b/compute/pzcesca.c
new file mode 100644
index 0000000000000000000000000000000000000000..4b6776b7c0d1e69efa75a1037f28495be919ee4e
--- /dev/null
+++ b/compute/pzcesca.c
@@ -0,0 +1,304 @@
+/**
+ *
+ * @file pzcesca.c
+ *
+ * @copyright 2012-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zcesca parallel algorithm
+ *
+ * @version 1.1.0
+ * @author Florent Pruvost
+ * @date 2021-05-07
+ * @precisions normal z -> s d c z
+ *
+ */
+#include "control/common.h"
+
+#define A( m, n )        A,     (m), (n)
+#define W( desc, m, n ) (desc), (m), (n)
+
+static inline void
+chameleon_pzcesca_internal( int center,
+                            int scale,
+                            cham_store_t axis,
+                            CHAM_desc_t *A,
+                            CHAM_desc_t *Wgcol,
+                            CHAM_desc_t *Wgrow,
+                            CHAM_desc_t *Wgelt,
+                            CHAM_desc_t *Wdcol,
+                            CHAM_desc_t *Wdrow,
+                            RUNTIME_option_t *options )
+{
+    int m, n;
+    int MT = A->mt;
+    int NT = A->nt;
+    int M  = A->m;
+    int N  = A->n;
+    int P  = A->p;
+    int Q  = A->q;
+
+    /**
+     *  1) compute sums and sum-square (scl,ssq) in each tile
+     */
+    for(n = 0; n < NT; n++) {
+        int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
+        for(m = 0; m < MT; m++) {
+            int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
+            if ( (center == 1) && ( (axis == ChamColumnwise) || (axis == ChamEltwise) ) ) {
+                INSERT_TASK_zgesum(
+                    options, ChamColumnwise, tempmm, tempnn,
+                    A(m, n), W( Wgcol, m, n) );
+            }
+            if ( (center == 1) && ( (axis == ChamRowwise) || (axis == ChamEltwise) ) ) {
+                INSERT_TASK_zgesum(
+                    options, ChamRowwise, tempmm, tempnn,
+                    A(m, n), W( Wgrow, m, n) );
+            }
+            if ( (scale == 1) && (axis == ChamColumnwise) ) {
+                INSERT_TASK_dgessq(
+                    options, ChamColumnwise, tempmm, tempnn,
+                    A(m, n), W( Wdcol, m, n) );
+            }
+            if ( (scale == 1) && (axis == ChamRowwise) ) {
+                INSERT_TASK_dgessq(
+                    options, ChamRowwise, tempmm, tempnn,
+                    A(m, n), W( Wdrow, m, n) );
+            }
+        }
+    }
+
+    for(n = 0; n < NT; n++) {
+        int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
+
+        if ( (center == 1) && ( (axis == ChamColumnwise) || (axis == ChamEltwise) ) ) {
+            /**
+             *  2) reduce sums of columns between tiles per processus (between lines)
+             */
+            for(m = P; m < MT; m++) {
+                INSERT_TASK_zgeadd(
+                    options, ChamNoTrans, 1, tempnn, Wgcol->nb,
+                    1., W( Wgcol, m,   n ),
+                    1., W( Wgcol, m%P, n ) );
+            }
+            /**
+             *  3) reduce sums of columns between tiles of different processus on the first line of tiles
+             */
+            for(m = 1; m < P; m++) {
+                INSERT_TASK_zgeadd(
+                    options, ChamNoTrans, 1, tempnn, Wgcol->nb,
+                    1., W( Wgcol, m, n ),
+                    1., W( Wgcol, 0, n ) );
+            }
+            if ( axis == ChamEltwise ) {
+                /* 4) reduce sums inside each tile of the first line of tiles for the global sum */
+                INSERT_TASK_zgesum(
+                    options, ChamEltwise, 1, tempnn,
+                    W( Wgcol, 0, n ),
+                    W( Wgelt, 0, n ) );
+            }
+        }
+
+        if ( (scale == 1) && (axis == ChamColumnwise) ) {
+            /**
+             *  2) reduce columns (scl,ssq) tiles per processus (between lines)
+             */
+            for(m = P; m < MT; m++) {
+                INSERT_TASK_dplssq(
+                    options, ChamColumnwise, 1, tempnn,
+                    W( Wdcol, m,   n ),
+                    W( Wdcol, m%P, n ) );
+            }
+            /**
+             *  3) reduce columns (scl,ssq) tiles on the first line of tiles
+             */
+            for(m = 1; m < P; m++) {
+                INSERT_TASK_dplssq(
+                    options, ChamColumnwise, 1, tempnn,
+                    W( Wdcol, m, n ),
+                    W( Wdcol, 0, n ) );
+            }
+            /* 5) deduce the sum square for each column from the pairs (scl,ssq) -> sqrt(sum) = scl*sqrt(ssq) */
+            INSERT_TASK_dplssq2( options, tempnn, W( Wdcol, 0, n ) );
+        }
+    }
+
+    for(m = 0; m < MT; m++) {
+        int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
+
+        if ( (center == 1) && ( (axis == ChamRowwise) || (axis == ChamEltwise) ) ) {
+            /**
+             *  2) reduce sums of rows between tiles per processus (between columns)
+             */
+            for(n = Q; n < NT; n++) {
+                INSERT_TASK_zgeadd(
+                    options, ChamNoTrans, tempmm, 1, Wgrow->mb,
+                    1., W( Wgrow, m,   n ),
+                    1., W( Wgrow, m, n%Q ) );
+            }
+            /**
+             *  3) reduce sums of rows between tiles of different processus on the first columns of tiles
+             */
+            for(n = 1; n < Q; n++) {
+                INSERT_TASK_zgeadd(
+                    options, ChamNoTrans, tempmm, 1, Wgcol->nb,
+                    1., W( Wgrow, m, n ),
+                    1., W( Wgrow, m, 0 ) );
+            }
+        }
+
+        if ( (scale == 1) && (axis == ChamRowwise) ) {
+            /**
+             *  2) reduce rows (scl,ssq) tiles per processus (between lines)
+             */
+            for(n = Q; n < NT; n++) {
+                INSERT_TASK_dplssq(
+                    options, ChamRowwise, tempmm, 1,
+                    W( Wdrow, m,   n ),
+                    W( Wdrow, m, n%Q ) );
+            }
+            /**
+             *  3) reduce rows (scl,ssq) tiles on the first column of tiles
+             */
+            for(n = 1; n < Q; n++) {
+                INSERT_TASK_dplssq(
+                    options, ChamRowwise, tempmm, 1,
+                    W( Wdrow, m, n ),
+                    W( Wdrow, m, 0 ) );
+            }
+            /* 5) deduce the sum square for each column from the pairs (scl,ssq) -> sqrt(sum) = scl*sqrt(ssq) */
+            INSERT_TASK_dplssq2( options, tempmm, W( Wdrow, m, 0 ) );
+        }
+    }
+
+    if ( (center == 1) && (axis == ChamEltwise) ) {
+        /* 6) reduce global sum on each processus (between columns) */
+        for(n = Q; n < NT; n++) {
+            INSERT_TASK_zgeadd(
+                options, ChamNoTrans, 1, 1, 1,
+                1., W( Wgelt, 0, n),
+                1., W( Wgelt, 0, n%Q) );
+        }
+
+        /* 7) reduce global sum on the first tile (index 0, 0) */
+        for(n = 1; n < Q; n++) {
+            INSERT_TASK_zgeadd(
+                options, ChamNoTrans, 1, 1, 1,
+                1., W( Wgelt, 0, n),
+                1., W( Wgelt, 0, 0) );
+        }
+    }
+
+    /* Finally compute Centered-Scaled matrix coefficients inplace */
+    for(n = 0; n < NT; n++) {
+        int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
+
+        for(m = 0; m < MT; m++) {
+            int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
+
+            INSERT_TASK_zcesca(
+                options,
+                center, scale, axis,
+                A->m, A->n, tempmm, tempnn,
+                W( Wgrow, m, 0 ),
+                W( Wgcol, 0, n ),
+                W( Wgelt, 0, 0 ),
+                W( Wdrow, m, 0 ),
+                W( Wdcol, 0, n ),
+                A( m, n ) );
+        }
+    }
+}
+
+/**
+ *
+ */
+void chameleon_pzcesca( struct chameleon_pzcesca_s *ws, int center, int scale, cham_store_t axis, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
+{
+    CHAM_context_t *chamctxt;
+    RUNTIME_option_t options;
+    CHAM_desc_t *Wgcol = &(ws->Wgcol);
+    CHAM_desc_t *Wgrow = &(ws->Wgrow);
+    CHAM_desc_t *Wgelt = &(ws->Wgelt);
+    CHAM_desc_t *Wdcol = &(ws->Wdcol);
+    CHAM_desc_t *Wdrow = &(ws->Wdrow);
+    int m, n, tempmm, tempnn;
+
+    chamctxt = chameleon_context_self();
+    if ( sequence->status != CHAMELEON_SUCCESS ) {
+        return;
+    }
+    RUNTIME_options_init(&options, chamctxt, sequence, request);
+
+    /* Initialize Wgcol */
+    for(m = 0; m < Wgcol->mt; m++) {
+        tempmm = m == Wgcol->mt-1 ? Wgcol->m-m*Wgcol->mb : Wgcol->mb;
+        for(n = 0; n < Wgcol->nt; n++) {
+            tempnn = n == Wgcol->nt-1 ? Wgcol->n-n*Wgcol->nb : Wgcol->nb;
+            INSERT_TASK_dlaset(
+                &options,
+                ChamUpperLower, tempmm, tempnn,
+                0., 0.,
+                W( Wgcol, m, n ) );
+        }
+    }
+    /* Initialize Wgrow */
+    for(m = 0; m < Wgrow->mt; m++) {
+        tempmm = m == Wgrow->mt-1 ? Wgrow->m-m*Wgrow->mb : Wgrow->mb;
+        for(n = 0; n < Wgrow->nt; n++) {
+            tempnn = n == Wgrow->nt-1 ? Wgrow->n-n*Wgrow->nb : Wgrow->nb;
+            INSERT_TASK_dlaset(
+                &options,
+                ChamUpperLower, tempmm, tempnn,
+                0., 0.,
+                W( Wgrow, m, n ) );
+        }
+    }
+    /* Initialize Wgelt */
+    for(m = 0; m < Wgelt->mt; m++) {
+        tempmm = m == Wgelt->mt-1 ? Wgelt->m-m*Wgelt->mb : Wgelt->mb;
+        for(n = 0; n < Wgelt->nt; n++) {
+            tempnn = n == Wgelt->nt-1 ? Wgelt->n-n*Wgelt->nb : Wgelt->nb;
+            INSERT_TASK_dlaset(
+                &options,
+                ChamUpperLower, tempmm, tempnn,
+                0., 0.,
+                W( Wgelt, m, n ) );
+        }
+    }
+    /* Initialize Wdcol */
+    for(m = 0; m < Wdcol->mt; m++) {
+        tempmm = m == Wdcol->mt-1 ? Wdcol->m-m*Wdcol->mb : Wdcol->mb;
+        for(n = 0; n < Wdcol->nt; n++) {
+            tempnn = n == Wdcol->nt-1 ? Wdcol->n-n*Wdcol->nb : Wdcol->nb;
+            INSERT_TASK_dlaset(
+                &options,
+                ChamUpperLower, tempmm, tempnn,
+                -1., -1.,
+                W( Wdcol, m, n ) );
+        }
+    }
+    /* Initialize Wdrow */
+    for(m = 0; m < Wdrow->mt; m++) {
+        tempmm = m == Wdrow->mt-1 ? Wdrow->m-m*Wdrow->mb : Wdrow->mb;
+        for(n = 0; n < Wdrow->nt; n++) {
+            tempnn = n == Wdrow->nt-1 ? Wdrow->n-n*Wdrow->nb : Wdrow->nb;
+            INSERT_TASK_dlaset(
+                &options,
+                ChamUpperLower, tempmm, tempnn,
+                -1., -1.,
+                W( Wdrow, m, n ) );
+        }
+    }
+
+    chameleon_pzcesca_internal( center, scale, axis, A, Wgcol, Wgrow, Wgelt, Wdcol, Wdrow, &options );
+
+    CHAMELEON_Desc_Flush( Wgcol, sequence );
+    CHAMELEON_Desc_Flush( Wgrow, sequence );
+    CHAMELEON_Desc_Flush( Wgelt, sequence );
+    CHAMELEON_Desc_Flush( Wdcol, sequence );
+    CHAMELEON_Desc_Flush( Wdrow, sequence );
+    RUNTIME_options_finalize(&options, chamctxt);
+}
diff --git a/compute/zcesca.c b/compute/zcesca.c
new file mode 100644
index 0000000000000000000000000000000000000000..446951edcaaaac0931e1196e9c2467c2a0f05c27
--- /dev/null
+++ b/compute/zcesca.c
@@ -0,0 +1,457 @@
+/**
+ *
+ * @file zcesca.c
+ *
+ * @copyright 2012-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zcesca wrappers
+ *
+ * @version 1.1.0
+ * @author Florent Pruvost
+ * @date 2021-05-07
+ * @precisions normal z -> s d c z
+ *
+ */
+#include "control/common.h"
+#include <stdlib.h>
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t
+ *
+ *  CHAMELEON_zcesca_WS_Alloc - Allocate the required workspaces for asynchronous center
+ *
+ *******************************************************************************
+ *
+ * @param[in] A
+ *          The descriptor of the matrix A.
+ *
+ *******************************************************************************
+ *
+ * @retval An allocated opaque pointer to use in CHAMELEON_zcesca_Tile_Async()
+ * and to free with CHAMELEON_zcesca_WS_Free().
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zcesca_Tile_Async
+ * @sa CHAMELEON_zcesca_WS_Free
+ *
+ */
+void *CHAMELEON_zcesca_WS_Alloc( const CHAM_desc_t *A )
+{
+    CHAM_context_t *chamctxt;
+    struct chameleon_pzcesca_s *options;
+    int workmt, worknt;
+
+    chamctxt = chameleon_context_self();
+    if ( chamctxt == NULL ) {
+        return NULL;
+    }
+
+    options = calloc( 1, sizeof(struct chameleon_pzcesca_s) );
+
+    workmt = chameleon_max( A->mt, A->p );
+    worknt = chameleon_max( A->nt, A->q );
+
+    chameleon_desc_init( &(options->Wgcol), CHAMELEON_MAT_ALLOC_TILE,
+                         ChamComplexDouble, 1, A->nb, A->nb,
+                         workmt, A->n, 0, 0,
+                         workmt, A->n, A->p, A->q,
+                         NULL, NULL, NULL );
+
+    chameleon_desc_init( &(options->Wgrow), CHAMELEON_MAT_ALLOC_TILE,
+                         ChamComplexDouble, A->mb, 1, A->mb,
+                         A->m, worknt, 0, 0,
+                         A->m, worknt, A->p, A->q,
+                         NULL, NULL, NULL );
+
+    chameleon_desc_init( &(options->Wgelt), CHAMELEON_MAT_ALLOC_TILE,
+                         ChamComplexDouble, 1, 1, 1,
+                         1, worknt, 0, 0,
+                         1, worknt, A->p, A->q,
+                         NULL, NULL, NULL );
+
+    chameleon_desc_init( &(options->Wdcol), CHAMELEON_MAT_ALLOC_TILE,
+                         ChamRealDouble, 2, A->nb, 2*A->nb,
+                         2*workmt, A->n, 0, 0,
+                         2*workmt, A->n, A->p, A->q,
+                         NULL, NULL, NULL );
+
+    chameleon_desc_init( &(options->Wdrow), CHAMELEON_MAT_ALLOC_TILE,
+                         ChamRealDouble, A->mb, 2, 2*A->mb,
+                         A->m, 2*worknt, 0, 0,
+                         A->m, 2*worknt, A->p, A->q,
+                         NULL, NULL, NULL );
+
+    return (void*)options;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t
+ *
+ * @brief Free the allocated workspaces for asynchronous center
+ *
+ *******************************************************************************
+ *
+ * @param[in,out] user_ws
+ *          On entry, the opaque pointer allocated by CHAMELEON_zcesca_WS_Alloc()
+ *          On exit, all data are freed.
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zcesca_Tile_Async
+ * @sa CHAMELEON_zcesca_WS_Alloc
+ *
+ */
+void CHAMELEON_zcesca_WS_Free( void *user_ws )
+{
+    struct chameleon_pzcesca_s *ws = (struct chameleon_pzcesca_s*)user_ws;
+
+    chameleon_desc_destroy( &(ws->Wgcol) );
+    chameleon_desc_destroy( &(ws->Wgrow) );
+    chameleon_desc_destroy( &(ws->Wgelt) );
+    chameleon_desc_destroy( &(ws->Wdcol) );
+    chameleon_desc_destroy( &(ws->Wdrow) );
+    free( ws );
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t
+ *
+ *  CHAMELEON_zcesca replace a general matrix by the Centered-Scaled matrix inplace
+ *
+ *  Considering a matrix A of size m x n, \f[A = (a_{i,j})_{1 \leq i \leq m, 1 \leq j \leq n}\f]
+ *  Lets
+ *  \f[g_i = \frac{1}{n} \sum_j a_{ij} \\
+ *     g_j = \frac{1}{m} \sum_i a_{ij} \\
+ *     g   = \frac{1}{mn} \sum_{i,j} a_{ij}\f]
+ *  A centered rowwise gives \f[\bar{A} = (\bar{a}_{i,j})_{1 \leq i \leq m, 1 \leq j \leq n}\f] such that
+ *  \f[ \bar{a}_{i,j} = a_{i,j} - g_i \f]
+ *  A centered columnwise gives \f[\bar{A} = (\bar{a}_{i,j})_{1 \leq i \leq m, 1 \leq j \leq n}\f] such that
+ *  \f[ \bar{a}_{i,j} = a_{i,j} - g_j \f]
+ *  A bicentered gives \f[\bar{A} = (\bar{a}_{i,j})_{1 \leq i \leq m, 1 \leq j \leq n}\f] such that
+ *  \f[ \bar{a}_{i,j} = a_{i,j} - g_i - g_j + g \f]
+ * Lets
+ * \f[d_i = || a_{i*} || = \sqrt{ \sum_j a_{ij}²} \\
+ *    d_j = || a_{*j} || = \sqrt{ \sum_i a_{ij}²} \f]
+ * A scaled rowwise gives \f[A' = (a_{i,j}')_{1 \leq i \leq m, 1 \leq j \leq n}\f] such that
+ * \f[ a_{i*}' = \frac{a_{i*}}{d_i} \f]
+ * A scaled columnwise gives \f[A' = (a_{i,j}')_{1 \leq i \leq m, 1 \leq j \leq n}\f] such that
+ * \f[ a_{*j}' = \frac{a_{*j}}{d_j} \f]
+ *
+ *******************************************************************************
+ *
+ * @param[in] center
+ *          1 if A must be centered, else 0.
+ *
+ * @param[in] scale
+ *          1 if A must be scaled, else 0.
+ *
+ * @param[in] axis
+ *          Specifies the axis over which to center and or scale.
+ *            = ChamColumnwise: centered column-wise
+ *            = ChamRowwise: centered row-wise
+ *            = ChamEltwise: bi-centered (only compatible if center=1 and scale=0)
+ *
+ * @param[in] M
+ *          The number of rows of the overall matrix.  M >= 0.
+ *
+ * @param[in] N
+ *         The number of columns of the overall matrix.  N >= 0.
+ *
+ * @param[in] A
+ *          The M-by-N matrix A.
+ *
+ * @param[in] LDA
+ *          The leading dimension of the array A. LDA >= max(1,M).
+ *
+ *******************************************************************************
+ *
+* @retval CHAMELEON_SUCCESS successful exit
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zcesca_Tile
+ * @sa CHAMELEON_zcesca_Tile_Async
+ * @sa CHAMELEON_scesca
+ *
+ */
+int CHAMELEON_zcesca(int center, int scale, cham_store_t axis, int M, int N, CHAMELEON_Complex64_t *A, int LDA )
+{
+    int NB;
+    int status;
+    CHAM_context_t *chamctxt;
+    RUNTIME_sequence_t *sequence = NULL;
+    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
+    CHAM_desc_t descAl, descAt;
+    void *ws;
+
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_fatal_error("CHAMELEON_zcesca", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    /* Check input arguments */
+    if ( (center != 0) && (center != 1) ) {
+        chameleon_error("CHAMELEON_zcesca", "Illegal value of center");
+        return -1;
+    }
+    if ( (scale != 0) && (scale != 1) ) {
+        chameleon_error("CHAMELEON_zcesca", "Illegal value of scale");
+        return -2;
+    }
+    if ( (axis != ChamColumnwise) && (axis != ChamRowwise) && (axis != ChamEltwise) ) {
+        chameleon_error("CHAMELEON_zcesca", "Illegal value of axis");
+        return -3;
+    }
+    if ( (axis == ChamEltwise) && (center == 1) && (scale == 1) ) {
+        chameleon_error("CHAMELEON_zcesca", "Illegal value of axis and/or scale, center=1 and axis=ChamEltwise (i.e. bi-centered) must not be used with scale=1");
+        return -3;
+    }
+    if (M < 0) {
+        chameleon_error("CHAMELEON_zcesca", "Illegal value of M");
+        return -4;
+    }
+    if (N < 0) {
+        chameleon_error("CHAMELEON_zcesca", "Illegal value of N");
+        return -5;
+    }
+    if (LDA < chameleon_max(1, M)) {
+        chameleon_error("CHAMELEON_zcesca", "illegal value of LDA");
+        return -7;
+    }
+
+    /* Quick return */
+    if (N == 0)
+        return CHAMELEON_SUCCESS;
+
+    /* Tune NB depending on M, N & NRHS; Set NBNB */
+    status = chameleon_tune(CHAMELEON_FUNC_DGEMM, M, N, 0);
+    if (status != CHAMELEON_SUCCESS) {
+        chameleon_error("CHAMELEON_zcesca", "chameleon_tune() failed");
+        return status;
+    }
+
+    /* Set NT */
+    NB = CHAMELEON_NB;
+
+    chameleon_sequence_create( chamctxt, &sequence );
+
+    /* Submit the matrix conversion */
+    chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInout, ChamUpperLower,
+                         A, NB, NB, LDA, N, N, N, sequence, &request );
+
+    /* Call the tile interface */
+    ws = CHAMELEON_zcesca_WS_Alloc( &descAt );
+    CHAMELEON_zcesca_Tile_Async( center, scale, axis, &descAt, ws, sequence, &request );
+
+    /* Submit the matrix conversion back */
+    chameleon_ztile2lap( chamctxt, &descAl, &descAt,
+                         ChamDescInout, ChamUpperLower, sequence, &request );
+
+    chameleon_sequence_wait( chamctxt, sequence );
+
+    /* Cleanup the temporary data */
+    CHAMELEON_zcesca_WS_Free( ws );
+    chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
+
+    status = sequence->status;
+    chameleon_sequence_destroy( chamctxt, sequence );
+    return status;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t_Tile
+ *
+ *  CHAMELEON_zcesca_Tile - Tile equivalent of CHAMELEON_zcesca().
+ *  Operates on matrices stored by tiles.
+ *  All matrices are passed through descriptors.
+ *  All dimensions are taken from the descriptors.
+ *
+ *******************************************************************************
+ *
+ * @param[in] center
+ *          1 if A must be centered, else 0.
+ *
+ * @param[in] scale
+ *          1 if A must be scaled, else 0.
+ *
+ * @param[in] axis
+ *          Specifies the axis over which to center and or scale.
+ *            = ChamColumnwise: centered column-wise
+ *            = ChamRowwise: centered row-wise
+ *            = ChamEltwise: bi-centered (only compatible if center=1 and scale=0)
+ *
+ * @param[in] A
+ *          The M-by-N matrix A.
+ *
+ *******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zcesca
+ * @sa CHAMELEON_zcesca_Tile_Async
+ * @sa CHAMELEON_scesca_Tile
+ *
+ */
+int CHAMELEON_zcesca_Tile( int center, int scale, cham_store_t axis, CHAM_desc_t *A )
+{
+    CHAM_context_t *chamctxt;
+    RUNTIME_sequence_t *sequence = NULL;
+    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
+    int status;
+    void *ws;
+
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_fatal_error("CHAMELEON_zcesca_Tile", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+
+    /* Check input arguments */
+    if ( (center != 0) && (center != 1) ) {
+        chameleon_error("CHAMELEON_zcesca_Tile", "Illegal value of center");
+        return -1;
+    }
+    if ( (scale != 0) && (scale != 1) ) {
+        chameleon_error("CHAMELEON_zcesca_Tile", "Illegal value of scale");
+        return -2;
+    }
+    if ( (axis != ChamColumnwise) && (axis != ChamRowwise) && (axis != ChamEltwise) ) {
+        chameleon_error("CHAMELEON_zcesca_Tile", "Illegal value of axis");
+        return -3;
+    }
+    if ( (axis == ChamEltwise) && (center == 1) && (scale == 1) ) {
+        chameleon_error("CHAMELEON_zcesca_Tile", "Illegal value of axis and/or scale, center=1 and axis=ChamEltwise (i.e. bi-centered) must not be used with scale=1");
+        return -3;
+    }
+
+    chameleon_sequence_create( chamctxt, &sequence );
+
+    ws = CHAMELEON_zcesca_WS_Alloc( A );
+    CHAMELEON_zcesca_Tile_Async( center, scale, axis, A, ws, sequence, &request );
+
+    CHAMELEON_Desc_Flush( A, sequence );
+
+    chameleon_sequence_wait( chamctxt, sequence );
+
+    CHAMELEON_zcesca_WS_Free( ws );
+
+    status = sequence->status;
+    chameleon_sequence_destroy( chamctxt, sequence );
+    return status;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t_Tile_Async
+ *
+ *  CHAMELEON_zcesca_Tile_Async - Non-blocking equivalent of CHAMELEON_zcesca_Tile().
+ *  May return before the computation is finished.
+ *  Allows for pipelining of operations at runtime.
+ *
+ *******************************************************************************
+ *
+ * @param[in] sequence
+ *          Identifies the sequence of function calls that this call belongs to
+ *          (for completion checks and exception handling purposes).
+ *
+ * @param[out] request
+ *          Identifies this function call (for exception handling purposes).
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zcesca
+ * @sa CHAMELEON_zcesca_Tile
+ * @sa CHAMELEON_scesca_Tile_Async
+ *
+ */
+int CHAMELEON_zcesca_Tile_Async( int center, int scale, cham_store_t axis, CHAM_desc_t *A, void *user_ws,
+                                 RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
+{
+    CHAM_context_t *chamctxt;
+    struct chameleon_pzcesca_s *ws;
+
+    /* Check input arguments */
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_fatal_error("CHAMELEON_zcesca_Tile_Async", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    if ( (center != 0) && (center != 1) ) {
+        chameleon_error("CHAMELEON_zcesca_Tile_Async", "Illegal value of center");
+        return -1;
+    }
+    if ( (scale != 0) && (scale != 1) ) {
+        chameleon_error("CHAMELEON_zcesca_Tile_Async", "Illegal value of scale");
+        return -2;
+    }
+    if ( (axis != ChamColumnwise) && (axis != ChamRowwise) && (axis != ChamEltwise) ) {
+        chameleon_error("CHAMELEON_zcesca_Tile_Async", "Illegal value of axis");
+        return -3;
+    }
+    if ( (axis == ChamEltwise) && (center == 1) && (scale == 1) ) {
+        chameleon_error("CHAMELEON_zcesca_Tile_Async", "Illegal value of axis and/or scale, center=1 and axis=ChamEltwise (i.e. bi-centered) must not be used with scale=1");
+        return -3;
+    }
+    if (sequence == NULL) {
+        chameleon_fatal_error("CHAMELEON_zcesca_Tile_Async", "NULL sequence");
+        return CHAMELEON_ERR_UNALLOCATED;
+    }
+    if (request == NULL) {
+        chameleon_fatal_error("CHAMELEON_zcesca_Tile_Async", "NULL request");
+        return CHAMELEON_ERR_UNALLOCATED;
+    }
+    /* Check sequence status */
+    if (sequence->status == CHAMELEON_SUCCESS) {
+        request->status = CHAMELEON_SUCCESS;
+    }
+    else {
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED);
+    }
+
+    /* Check descriptors for correctness */
+    if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) {
+        chameleon_error("CHAMELEON_zcesca_Tile_Async", "invalid descriptor");
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
+    }
+    /* Check input arguments */
+    if (A->nb != A->mb) {
+        chameleon_error("CHAMELEON_zcesca_Tile_Async", "only square tiles supported");
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
+    }
+    /* Quick return */
+    if (chameleon_min(A->m, A->n) == 0) {
+        return CHAMELEON_SUCCESS;
+    }
+
+    if ( user_ws == NULL ) {
+        ws = CHAMELEON_zcesca_WS_Alloc( A );
+    }
+    else {
+        ws = user_ws;
+    }
+
+    chameleon_pzcesca( ws, center, scale, axis, A, sequence, request );
+
+    if ( user_ws == NULL ) {
+        CHAMELEON_Desc_Flush( A, sequence );
+        chameleon_sequence_wait( chamctxt, sequence );
+        CHAMELEON_zcesca_WS_Free( ws );
+    }
+
+    return CHAMELEON_SUCCESS;
+}
diff --git a/compute/zgram.c b/compute/zgram.c
index 73427cb6fded3bb2fb56f84d10c30e1d9988085c..1b9d02122f01a23c15ba80cd6d7351eba4884b1e 100644
--- a/compute/zgram.c
+++ b/compute/zgram.c
@@ -80,7 +80,7 @@ void *CHAMELEON_zgram_WS_Alloc( const CHAM_desc_t *A )
  *
  * @ingroup CHAMELEON_Complex64_t
  *
- * @brief Free the allocated workspaces for asynchronous gemm
+ * @brief Free the allocated workspaces for asynchronous gram
  *
  *******************************************************************************
  *
@@ -154,13 +154,20 @@ int CHAMELEON_zgram( cham_uplo_t uplo, int N, CHAMELEON_Complex64_t *A, int LDA
         return CHAMELEON_ERR_NOT_INITIALIZED;
     }
     /* Check input arguments */
+    if ((uplo != ChamUpper) &&
+        (uplo != ChamLower) &&
+        (uplo != ChamUpperLower))
+    {
+        chameleon_error("CHAMELEON_zgram", "illegal value of uplo");
+        return -1;
+    }
     if (N < 0) {
         chameleon_error("CHAMELEON_zgram", "illegal value of N");
-        return -1;
+        return -2;
     }
     if (LDA < chameleon_max(1, N)) {
         chameleon_error("CHAMELEON_zgram", "illegal value of LDA");
-        return -3;
+        return -4;
     }
 
     /* Quick return */
@@ -241,11 +248,21 @@ int CHAMELEON_zgram_Tile( cham_uplo_t uplo, CHAM_desc_t *A )
         chameleon_fatal_error("CHAMELEON_zgram_Tile", "CHAMELEON not initialized");
         return CHAMELEON_ERR_NOT_INITIALIZED;
     }
+    /* Check input arguments */
+    if ((uplo != ChamUpper) &&
+        (uplo != ChamLower) &&
+        (uplo != ChamUpperLower))
+    {
+        chameleon_error("CHAMELEON_zgram_Tile", "illegal value of uplo");
+        return -1;
+    }
     chameleon_sequence_create( chamctxt, &sequence );
 
     ws = CHAMELEON_zgram_WS_Alloc( A );
     CHAMELEON_zgram_Tile_Async( uplo, A, ws, sequence, &request );
 
+    CHAMELEON_Desc_Flush( A, sequence );
+
     chameleon_sequence_wait( chamctxt, sequence );
 
     CHAMELEON_zgram_WS_Free( ws );
@@ -288,15 +305,23 @@ int CHAMELEON_zgram_Tile_Async( cham_uplo_t uplo, CHAM_desc_t *A, void *user_ws,
 
     chamctxt = chameleon_context_self();
     if (chamctxt == NULL) {
-        chameleon_fatal_error("CHAMELEON_zgram_Tile", "CHAMELEON not initialized");
+        chameleon_fatal_error("CHAMELEON_zgram_Tile_Async", "CHAMELEON not initialized");
         return CHAMELEON_ERR_NOT_INITIALIZED;
     }
+    /* Check input arguments */
+    if ((uplo != ChamUpper) &&
+        (uplo != ChamLower) &&
+        (uplo != ChamUpperLower))
+    {
+        chameleon_error("CHAMELEON_zgram_Tile_Async", "illegal value of uplo");
+        return -1;
+    }
     if (sequence == NULL) {
-        chameleon_fatal_error("CHAMELEON_zgram_Tile", "NULL sequence");
+        chameleon_fatal_error("CHAMELEON_zgram_Tile_Async", "NULL sequence");
         return CHAMELEON_ERR_UNALLOCATED;
     }
     if (request == NULL) {
-        chameleon_fatal_error("CHAMELEON_zgram_Tile", "NULL request");
+        chameleon_fatal_error("CHAMELEON_zgram_Tile_Async", "NULL request");
         return CHAMELEON_ERR_UNALLOCATED;
     }
     /* Check sequence status */
diff --git a/control/compute_z.h b/control/compute_z.h
index 53e39a1c10775a454b5c2983c006eeefd207b826..acc3c54458b4d0462be8adbe6f20404daa5cd643 100644
--- a/control/compute_z.h
+++ b/control/compute_z.h
@@ -34,6 +34,18 @@ struct chameleon_pzgemm_s {
     CHAM_desc_t WA;
     CHAM_desc_t WB;
 };
+
+/**
+ * @brief Data structure to handle the Centering-Scaled workspaces
+ */
+struct chameleon_pzcesca_s {
+    CHAM_desc_t Wgcol;
+    CHAM_desc_t Wgrow;
+    CHAM_desc_t Wgelt;
+    CHAM_desc_t Wdcol;
+    CHAM_desc_t Wdrow;
+};
+
 /**
  * @brief Data structure to handle the GRAM workspaces
  */
@@ -155,6 +167,10 @@ void chameleon_pztpqrt_param( int genD, cham_uplo_t uplo, int K,
                               CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *D,
                               RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 
+/**
+ * Centered-Scaled function prototypes
+ */
+void chameleon_pzcesca( struct chameleon_pzcesca_s *ws, int center, int scale, cham_store_t axis, CHAM_desc_t *A, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 /**
  * Gram function prototypes
  */
diff --git a/coreblas/compute/CMakeLists.txt b/coreblas/compute/CMakeLists.txt
index 081a231b89a1cb26b45edcc7fb1eb36ebdb70458..15df8b4f91c6148997903d7f3297d484cbf6f12d 100644
--- a/coreblas/compute/CMakeLists.txt
+++ b/coreblas/compute/CMakeLists.txt
@@ -35,6 +35,7 @@ set(ZSRC
     core_dlag2z.c
     core_dzasum.c
     core_zaxpy.c
+    core_zcesca.c
     core_zgeadd.c
     core_zlascal.c
     core_zlatm1.c
@@ -43,6 +44,7 @@ set(ZSRC
     core_zgemm.c
     core_zgeqrt.c
     core_zgesplit.c
+    core_zgesum.c
     core_zgessm.c
     core_zgessq.c
     core_zgetf2_nopiv.c
diff --git a/coreblas/compute/core_zcesca.c b/coreblas/compute/core_zcesca.c
new file mode 100644
index 0000000000000000000000000000000000000000..5d95d2eefb24b2004a2f61fabf6b6235a7973bb0
--- /dev/null
+++ b/coreblas/compute/core_zcesca.c
@@ -0,0 +1,233 @@
+/**
+ *
+ * @file core_zcesca.c
+ *
+ * @copyright 2012-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon core_zcesca CPU kernel
+ *
+ * @version 1.1.0
+ * @author Florent Pruvost
+ * @date 2021-05-07
+ * @precisions normal z -> c d s
+ *
+ */
+#include "coreblas.h"
+
+/**
+ *******************************************************************************
+ *
+ * @ingroup CORE_CHAMELEON_Complex64_t
+ *
+ *  CORE_zcesca computes the Centered-Scaled matrix factors of A inplace
+ *
+ *  Considering a matrix A of size m x n, \f[A = (a_{i,j})_{1 \leq i \leq m, 1 \leq j \leq n}\f]
+ *  Lets
+ *  \f[g_i = \frac{1}{n} \sum_j a_{ij} \\
+ *     g_j = \frac{1}{m} \sum_i a_{ij} \\
+ *     g   = \frac{1}{mn} \sum_{i,j} a_{ij}\f]
+ *  A centered rowwise gives \f[\bar{A} = (\bar{a}_{i,j})_{1 \leq i \leq m, 1 \leq j \leq n}\f] such that
+ *  \f[ \bar{a}_{i,j} = a_{i,j} - g_i \f]
+ *  A centered columnwise gives \f[\bar{A} = (\bar{a}_{i,j})_{1 \leq i \leq m, 1 \leq j \leq n}\f] such that
+ *  \f[ \bar{a}_{i,j} = a_{i,j} - g_j \f]
+ *  A bicentered gives \f[\bar{A} = (\bar{a}_{i,j})_{1 \leq i \leq m, 1 \leq j \leq n}\f] such that
+ *  \f[ \bar{a}_{i,j} = a_{i,j} - g_i - g_j + g \f]
+ * Lets
+ * \f[d_i = || a_{i*} || = \sqrt{ \sum_j a_{ij}²} \\
+ *    d_j = || a_{*j} || = \sqrt{ \sum_i a_{ij}²} \f]
+ * A scaled rowwise gives \f[A' = (a_{i,j}')_{1 \leq i \leq m, 1 \leq j \leq n}\f] such that
+ * \f[ a_{i*}' = \frac{a_{i*}}{d_i} \f]
+ * A scaled columnwise gives \f[A' = (a_{i,j}')_{1 \leq i \leq m, 1 \leq j \leq n}\f] such that
+ * \f[ a_{*j}' = \frac{a_{*j}}{d_j} \f]
+ *
+ *******************************************************************************
+ *
+ * @param[in] center
+ *          1 if A must be centered, else 0.
+ *
+ * @param[in] scale
+ *          1 if A must be scaled, else 0.
+ *
+ * @param[in] axis
+ *          Specifies the axis over which to center and or scale.
+ *            = ChamColumnwise: centered column-wise
+ *            = ChamRowwise: centered row-wise
+ *            = ChamEltwise: bi-centered (only compatible if center=1 and scale=0)
+ *
+ * @param[in] M
+ *          The number of rows of the overall matrix.  M >= 0.
+ *
+ * @param[in] N
+ *         The number of columns of the overall matrix.  N >= 0.
+ *
+ * @param[in] Mt
+ *          The number of rows of the tile A.  Mt >= 0.
+ *
+ * @param[in] Nt
+ *         The number of columns of the tile A.  Nt >= 0.
+ *
+ * @param[in] Gi
+ *         The 1-by-Mt tile containing the sum of values by rows.
+ *
+ * @param[in] LDGI
+ *         The leading dimension of the array Gi.
+ *
+ * @param[in] Gj
+ *         The 1-by-Nt tile containing the sum of values by columns.
+ *
+ * @param[in] LDGJ
+ *         The leading dimension of the array Gj.
+ *
+ * @param[in] G
+ *         The sum of values of all the matrix.
+ *
+  * @param[in] Di
+ *         The 1-by-Mt tile containing the sum of squares by rows.
+ *
+ * @param[in] LDDI
+ *         The leading dimension of the array Di.
+ *
+ * @param[in] Dj
+ *         The 1-by-Nt tile containing the sum of squares by columns.
+ *
+ * @param[in] LDDJ
+ *         The leading dimension of the array Dj.
+ *
+ * @param[in,out] A
+ *         On entry, the M-by-N tile A.
+ *         On exit, updated by the centered-scaled matrix coefficients.
+ *
+ * @param[in] LDA
+ *         The leading dimension of the array A.  LDA >= max(1,M).
+ *
+ *******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ *
+ */
+
+int CORE_zcesca( int center, int scale,
+                 cham_store_t axis,
+                 int M, int N,
+                 int Mt, int Nt,
+                 const CHAMELEON_Complex64_t *Gi, int LDGI,
+                 const CHAMELEON_Complex64_t *Gj, int LDGJ,
+                 const CHAMELEON_Complex64_t *G,
+                 const double *Di, int LDDI,
+                 const double *Dj, int LDDJ,
+                 CHAMELEON_Complex64_t *A, int LDA )
+{
+    int i, j;
+    CHAMELEON_Complex64_t gi, gj, g;
+    double di, dj;
+
+    /* Check input arguments */
+    if ( (center != 0) && (center != 1) ) {
+        coreblas_error(1, "Illegal value of center");
+        return -1;
+    }
+    if ( (scale != 0) && (scale != 1) ) {
+        coreblas_error(2, "Illegal value of scale");
+        return -2;
+    }
+    if ( (axis != ChamColumnwise) && (axis != ChamRowwise) && (axis != ChamEltwise) ) {
+        coreblas_error(3, "Illegal value of axis");
+        return -3;
+    }
+    if ( (axis == ChamEltwise) && (center == 1) && (scale == 1) ) {
+        coreblas_error(3, "Illegal value of axis and/or scale, center=1 and axis=ChamEltwise (i.e. bi-centered) must not be used with scale=1");
+        return -3;
+    }
+    if (M < 0) {
+        coreblas_error(4, "Illegal value of M");
+        return -4;
+    }
+    if (N < 0) {
+        coreblas_error(5, "Illegal value of N");
+        return -5;
+    }
+    if (Mt < 0) {
+        coreblas_error(6, "Illegal value of Mt");
+        return -6;
+    }
+    if (Nt < 0) {
+        coreblas_error(7, "Illegal value of Nt");
+        return -7;
+    }
+    if (LDGI < 0) {
+        coreblas_error(9, "Illegal value of LDGI");
+        return -9;
+    }
+    if (LDGJ < 0) {
+        coreblas_error(11, "Illegal value of LDGJ");
+        return -11;
+    }
+    if (LDDI < 0) {
+        coreblas_error(14, "Illegal value of LDDI");
+        return -14;
+    }
+    if (LDDJ < 0) {
+        coreblas_error(16, "Illegal value of LDDJ");
+        return -16;
+    }
+    if ((LDA < chameleon_max(1,Mt)) && (Mt > 0)) {
+        coreblas_error(18, "Illegal value of LDA");
+        return -18;
+    }
+
+    /* Quick return */
+    if ((center == 0) && (scale == 0)) {
+        return CHAMELEON_SUCCESS;
+    }
+    if ((Mt == 0) || (Nt == 0)) {
+        return CHAMELEON_SUCCESS;
+    }
+
+    if ( (center == 1) && (axis == ChamEltwise) ) {
+        /* overall mean of values */
+        g =  G[0] / ( (double)M * (double)N );
+    }
+
+    for(j = 0; j < Nt; j++) {
+        if ( (center == 1) && ( (axis == ChamColumnwise) || (axis == ChamEltwise) ) ) {
+            /* mean of values of the column */
+            gj = Gj[j*LDGJ] / ((double)M);
+        }
+        if ( (scale == 1) && (axis == ChamColumnwise) ) {
+            /* norm 2 of the column */
+            dj = Dj[j*LDDJ];
+        }
+        for(i = 0; i < Mt; i++) {
+            if ( (center == 1) && ( (axis == ChamRowwise) || (axis == ChamEltwise) ) ) {
+                /* mean of values of the row */
+                gi = Gi[i] / ((double)N);
+                /* compute centered matrix factor */
+                A[j*LDA+i] -= gi;
+            }
+            if ( (center == 1) && ( (axis == ChamColumnwise) || (axis == ChamEltwise) ) ) {
+                /* compute centered matrix factor */
+                A[j*LDA+i] -= gj;
+            }
+            if ( (center == 1) && (axis == ChamEltwise) ) {
+                /* compute centered matrix factor */
+                A[j*LDA+i] += g;
+            }
+            if ( (scale == 1) && (axis == ChamColumnwise) ) {
+                /* compute scaled matrix factor */
+                A[j*LDA+i] /= dj;
+            }
+            if ( (scale == 1) && (axis == ChamRowwise) ) {
+                /* norm 2 of the row */
+                di = Di[i];
+                /* compute scaled matrix factor */
+                A[j*LDA+i] /= di;
+            }
+        }
+    }
+
+    return CHAMELEON_SUCCESS;
+}
diff --git a/coreblas/compute/core_zgesum.c b/coreblas/compute/core_zgesum.c
new file mode 100644
index 0000000000000000000000000000000000000000..be8d2cf7eb92b4af064cc3d02a379e8c873734f4
--- /dev/null
+++ b/coreblas/compute/core_zgesum.c
@@ -0,0 +1,148 @@
+/**
+ *
+ * @file core_zgesum.c
+ *
+ * @copyright 2012-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon core_zgesum CPU kernel
+ *
+ * @version 1.1.0
+ * @author Florent Pruvost
+ * @date 2021-05-07
+ * @precisions normal z -> c d s
+ *
+ */
+#include <math.h>
+#include "coreblas/lapacke.h"
+#include "coreblas.h"
+
+/**
+ * @brief Subcase storev == ChamColumnwise of CORE_zgesum()
+ */
+static inline int
+CORE_zgesum_col( int M, int N,
+                 const CHAMELEON_Complex64_t *A, int LDA,
+                 CHAMELEON_Complex64_t *sum )
+{
+    int i, j;
+    CHAMELEON_Complex64_t *ptr = A;
+    for(j=0; j<N; j++) {
+        for(i=0; i<M; i++) {
+            sum[j] += *ptr;
+            ptr++;
+        }
+        ptr += LDA - M;
+    }
+    return CHAMELEON_SUCCESS;
+}
+/**
+ * @brief Subcase storev == ChamRowwise of CORE_zgesum()
+ */
+static inline int
+CORE_zgesum_row( int M, int N,
+                 const CHAMELEON_Complex64_t *A, int LDA,
+                 CHAMELEON_Complex64_t *sum )
+{
+    int i, j;
+    CHAMELEON_Complex64_t *ptr = A;
+    for(j=0; j<N; j++) {
+        for(i=0; i<M; i++) {
+            sum[i] += *ptr;
+            ptr++;
+        }
+        ptr += LDA - M;
+    }
+    return CHAMELEON_SUCCESS;
+}
+/**
+ * @brief Subcase storev == ChamEltwise of CORE_zgesum()
+ */
+static inline int
+CORE_zgesum_elt( int M, int N,
+                 const CHAMELEON_Complex64_t *A, int LDA,
+                 CHAMELEON_Complex64_t *sum )
+{
+    int i, j;
+    CHAMELEON_Complex64_t *ptr = A;
+    for(j=0; j<N; j++) {
+        for(i=0; i<M; i++) {
+            sum[0] += *ptr;
+            ptr++;
+        }
+        ptr += LDA - M;
+    }
+    return CHAMELEON_SUCCESS;
+}
+
+/**
+ *
+ * @ingroup CORE_CHAMELEON_Complex64_t
+ *
+ *  CORE_zgesum returns the sum of a matrix values over a chosen axis:
+ *  column-wise, row-wise, element-wise.
+ *
+ *******************************************************************************
+ *
+ * @param[in] storev
+ *          Specifies whether the sums are made per column or row.
+ *          = ChamColumnwise: Computes the sum of values on each column
+ *          = ChamRowwise:    Computes the sum of values on each row
+ *          = ChamEltwise:    Computes the sum of values on all the matrix
+ *
+ *  @param[in] M
+ *          The number of rows in the tile A.
+ *
+ *  @param[in] N
+ *          The number of columns in the tile A.
+ *
+ *  @param[in] A
+ *          The M-by-N matrix on which to compute the norm.
+ *
+ *  @param[in] LDA
+ *          The leading dimension of the tile A. LDA >= max(1,M).
+ *
+ *  @param[out] sum resulting sums
+ *          Dimension:  (1,K)
+ *          if storev == ChamColumnwise, K = N
+ *          if storev == ChamRowwise,    K = M
+ *          if storev == ChamEltwise,    K = 1
+ *
+ *******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval -k, the k-th argument had an illegal value
+ *
+ */
+int CORE_zgesum( cham_store_t storev, int M, int N,
+                 const CHAMELEON_Complex64_t *A, int LDA,
+                 CHAMELEON_Complex64_t *sum )
+{
+    int i;
+    int K;
+
+    /* initialize pairs scale, sumsquare if not already done */
+    if ( storev == ChamColumnwise ) {
+        K = N;
+    } else if ( storev == ChamRowwise ) {
+        K = M;
+    } else {
+        K = 1;
+    }
+    for (i=0; i<K; i+=1) {
+        sum[i] = 0.;
+    }
+
+    if (storev == ChamColumnwise) {
+        CORE_zgesum_col( M, N, A, LDA, sum );
+    }
+    else if (storev == ChamRowwise) {
+        CORE_zgesum_row( M, N, A, LDA, sum );
+    }
+    else {
+        CORE_zgesum_elt( M, N, A, LDA, sum );
+    }
+    return CHAMELEON_SUCCESS;
+}
diff --git a/coreblas/compute/core_zgram.c b/coreblas/compute/core_zgram.c
index d40ba792b2ceb0979bb8a9a6868f5d3a4f31ac46..4d89735b33676b83f9346c0d0b5e0a39bbf1f333 100644
--- a/coreblas/compute/core_zgram.c
+++ b/coreblas/compute/core_zgram.c
@@ -67,7 +67,7 @@
  *
  * @param[in,out] A
  *         On entry, the M-by-N tile A.
- *         On exit, updated by the application of L.
+ *         On exit, updated by the Gram matrix coefficients.
  *
  * @param[in] LDA
  *         The leading dimension of the array A.  LDA >= max(1,M).
diff --git a/coreblas/compute/core_ztile.c b/coreblas/compute/core_ztile.c
index 6c28b72d322c84a18b14cfbd387328d9c48652c7..31b1ee49599e53f619e206c164677c066499a18b 100644
--- a/coreblas/compute/core_ztile.c
+++ b/coreblas/compute/core_ztile.c
@@ -1007,6 +1007,39 @@ TCORE_zunmqr( cham_side_t            side,
         side, trans, M, N, K, IB, CHAM_tile_get_ptr( V ), V->ld, CHAM_tile_get_ptr( T ), T->ld, CHAM_tile_get_ptr( C ), C->ld, WORK, LDWORK );
 }
 
+int
+TCORE_zgesum( cham_store_t storev, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *sum )
+{
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( sum->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    return CORE_zgesum( storev, M, N, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( sum ) );
+}
+
+int
+TCORE_zcesca( int center,
+              int scale,
+              cham_store_t axis,
+              int                M,
+              int                N,
+              int                Mt,
+              int                Nt,
+              const CHAM_tile_t *Gi,
+              const CHAM_tile_t *Gj,
+              const CHAM_tile_t *G,
+              const CHAM_tile_t *Di,
+              const CHAM_tile_t *Dj,
+              CHAM_tile_t *      A )
+{
+    assert( Gi->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( Gj->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( G->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( Di->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( Dj->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
+    return CORE_zcesca(
+        center, scale, axis, M, N, Mt, Nt, CHAM_tile_get_ptr( Gi ), Gi->ld, CHAM_tile_get_ptr( Gj ), Gj->ld, CHAM_tile_get_ptr( G ), CHAM_tile_get_ptr( Di ), Di->ld, CHAM_tile_get_ptr( Dj ), Dj->ld, CHAM_tile_get_ptr( A ), A->ld );
+}
+
 int
 TCORE_zgram( cham_uplo_t        uplo,
              int                M,
diff --git a/coreblas/include/coreblas/coreblas_z.h b/coreblas/include/coreblas/coreblas_z.h
index 858cfe090a7548b85aeb0e3b3f8d35761f12080c..9637ba67b5d2bc56bd650346389bae1bb486d360 100644
--- a/coreblas/include/coreblas/coreblas_z.h
+++ b/coreblas/include/coreblas/coreblas_z.h
@@ -372,8 +372,21 @@ int  CORE_zunmqr(cham_side_t side, cham_trans_t trans,
                  CHAMELEON_Complex64_t *WORK, int LDWORK);
 
 /**
- * Gram prototypes
+ * Centered-Scale and Gram prototypes
  */
+int CORE_zgesum( cham_store_t storev, int M, int N,
+                 const CHAMELEON_Complex64_t *A, int LDA,
+                 CHAMELEON_Complex64_t *sum );
+int CORE_zcesca( int center, int scale,
+                 cham_store_t axis,
+                 int M, int N,
+                 int Mt, int Nt,
+                 const CHAMELEON_Complex64_t *Gi, int LDGI,
+                 const CHAMELEON_Complex64_t *Gj, int LDGJ,
+                 const CHAMELEON_Complex64_t *G,
+                 const double *Di, int LDDI,
+                 const double *Dj, int LDDJ,
+                 CHAMELEON_Complex64_t *A, int LDA );
 int CORE_zgram( cham_uplo_t uplo, int M, int N, int Mt, int Nt,
                 const double *Di, int LDDI,
                 const double *Dj, int LDDJ,
diff --git a/coreblas/include/coreblas/coreblas_ztile.h b/coreblas/include/coreblas/coreblas_ztile.h
index cec8b2c4c84440490efcdfe3ab8d85c8e510e2a1..10d420339b4e39bbc34fae19d5cc349ed08b0f71 100644
--- a/coreblas/include/coreblas/coreblas_ztile.h
+++ b/coreblas/include/coreblas/coreblas_ztile.h
@@ -80,6 +80,8 @@ int  TCORE_ztsmqr_hetra1( cham_side_t side, cham_trans_t trans, int m1, int n1,
 int  TCORE_ztstrf( int M, int N, int IB, int NB, CHAM_tile_t *U, CHAM_tile_t *A, CHAM_tile_t *L, int *IPIV, CHAMELEON_Complex64_t *WORK, int LDWORK, int *INFO );
 int  TCORE_zunmlq( cham_side_t side, cham_trans_t trans, int M, int N, int IB, int K, const CHAM_tile_t *V, const CHAM_tile_t *T, CHAM_tile_t *C, CHAMELEON_Complex64_t *WORK, int LDWORK );
 int  TCORE_zunmqr( cham_side_t side, cham_trans_t trans, int M, int N, int K, int IB, const CHAM_tile_t *V, const CHAM_tile_t *T, CHAM_tile_t *C, CHAMELEON_Complex64_t *WORK, int LDWORK );
-int TCORE_zgram( cham_uplo_t uplo, int M, int N, int Mt, int Nt, const CHAM_tile_t *Di, const CHAM_tile_t *Dj, const CHAM_tile_t *D, CHAM_tile_t *A );
+int  TCORE_zgesum( cham_store_t storev, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *sum );
+int  TCORE_zcesca( int center, int scale, cham_store_t axis, int M, int N, int Mt, int Nt, const CHAM_tile_t *Gi, const CHAM_tile_t *Gj, const CHAM_tile_t *G, const CHAM_tile_t *Di, const CHAM_tile_t *Dj, CHAM_tile_t *A );
+int  TCORE_zgram( cham_uplo_t uplo, int M, int N, int Mt, int Nt, const CHAM_tile_t *Di, const CHAM_tile_t *Dj, const CHAM_tile_t *D, CHAM_tile_t *A );
 
 #endif /* _coreblas_ztile_h_ */
diff --git a/doc/orgmode/chapters/using.org b/doc/orgmode/chapters/using.org
index f0cb97f5dc6265761c961cbbbefc44fa93b43ee0..74be7d6be8537a9d3f0cafb28f42c7f32c73d6e1 100644
--- a/doc/orgmode/chapters/using.org
+++ b/doc/orgmode/chapters/using.org
@@ -768,6 +768,11 @@
      * *EVD/SVD*
        * gesvd: singular value decomposition
        * heevd: eigenvalues/eigenvectors computation with A Hermitian
+    * *Specific Matrix transformation for Data Analysis*
+      * cesca: centered-scaled matrix transformation, pretreatment algorithm
+        for Principal Component Analysis
+      * gram: Gram matrix transformation, pretreatment algorithm for
+        Multidimensional Scaling
      * *Extra routines*
        * *Norms*
          * lange: compute norm of a matrix (Max, One, Inf, Frobenius)
diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h
index 63562bbab69fdf21eb765c19b3c7ccf48dda8c8b..119bb25bf7056030d850535f2a7069b134752d09 100644
--- a/include/chameleon/chameleon_z.h
+++ b/include/chameleon/chameleon_z.h
@@ -306,6 +306,8 @@ int CHAMELEON_zunmqr_param_Tile_Async(const libhqr_tree_t *qrtree, cham_side_t s
  */
 void *CHAMELEON_zgemm_WS_Alloc( cham_trans_t transA, cham_trans_t transB, const CHAM_desc_t *A, const CHAM_desc_t *B, const CHAM_desc_t *C );
 void  CHAMELEON_zgemm_WS_Free( void *ws );
+void *CHAMELEON_zcesca_WS_Alloc( const CHAM_desc_t *A );
+void  CHAMELEON_zcesca_WS_Free( void *ws );
 void *CHAMELEON_zgram_WS_Alloc( const CHAM_desc_t *A );
 void  CHAMELEON_zgram_WS_Free( void *ws );
 
@@ -352,6 +354,12 @@ int CHAMELEON_zbuild(cham_uplo_t uplo, int M, int N, CHAMELEON_Complex64_t *A, i
 int CHAMELEON_zbuild_Tile(cham_uplo_t uplo,  CHAM_desc_t *A, void *user_data, void* user_build_callback );
 int CHAMELEON_zbuild_Tile_Async(cham_uplo_t uplo, CHAM_desc_t *A, void *user_data, void* user_build_callback, RUNTIME_sequence_t *sequence, RUNTIME_request_t  *request);
 
+/**
+ * Centered-Scaled function prototypes
+ */
+int CHAMELEON_zcesca(int center, int scale, cham_store_t axis, int M, int N, CHAMELEON_Complex64_t *A, int LDA );
+int CHAMELEON_zcesca_Tile( int center, int scale, cham_store_t axis, CHAM_desc_t *A );
+int CHAMELEON_zcesca_Tile_Async( int center, int scale, cham_store_t axis, CHAM_desc_t *A, void *user_ws, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 /**
  * Gram function prototypes
  */
diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h
index bc13dbd6471dc4c322c198aaf50d4306e58fdd4d..d89c00df4d5a0c1405584fe630a6ee846bb5939a 100644
--- a/include/chameleon/tasks_z.h
+++ b/include/chameleon/tasks_z.h
@@ -429,8 +429,21 @@ INSERT_TASK_zttmqr( const RUNTIME_option_t *options,
 }
 
 /**
- * Gram prototype
+ * Centered-Scaled and Gram prototype
  */
+void INSERT_TASK_zgesum( const RUNTIME_option_t *options,
+                        cham_store_t storev, int m, int n,
+                        const CHAM_desc_t *A, int Am, int An,
+                        const CHAM_desc_t *SUMS, int SUMSm, int SUMSn );
+void INSERT_TASK_zcesca( const RUNTIME_option_t *options,
+                         int center, int scale, cham_store_t axis,
+                         int m, int n, int mt, int nt,
+                         const CHAM_desc_t *Gi, int Gim, int Gin,
+                         const CHAM_desc_t *Gj, int Gjm, int Gjn,
+                         const CHAM_desc_t *G, int Gm, int Gn,
+                         const CHAM_desc_t *Di, int Dim, int Din,
+                         const CHAM_desc_t *Dj, int Djm, int Djn,
+                         CHAM_desc_t *A, int Am, int An );
 void INSERT_TASK_zgram( const RUNTIME_option_t *options,
                         cham_uplo_t uplo,
                         int m, int n, int mt, int nt,
diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
index b3101b81657d82280fd10c234604dc1756aafff8..9ca5edd5c927544480fc82f3a75c78ddbdca4447 100644
--- a/runtime/CMakeLists.txt
+++ b/runtime/CMakeLists.txt
@@ -102,8 +102,10 @@ set(CODELETS_ZSRC
     ##################
     codelets/codelet_zbuild.c
     ##################
-    # GRAM
+    # CENTERED-SCALED and GRAM
     ##################
+    codelets/codelet_zgesum.c
+    codelets/codelet_zcesca.c
     codelets/codelet_zgram.c
     )
 
diff --git a/runtime/openmp/codelets/codelet_zcesca.c b/runtime/openmp/codelets/codelet_zcesca.c
new file mode 100644
index 0000000000000000000000000000000000000000..e7724e52bc33f09aa956f1e9dd42fda4a55d2d38
--- /dev/null
+++ b/runtime/openmp/codelets/codelet_zcesca.c
@@ -0,0 +1,42 @@
+/**
+ *
+ * @file openmp/codelet_zcesca.c
+ *
+ * @copyright 2012-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zcesca OpenMP codelet
+ *
+ * @version 1.1.0
+ * @author Florent Pruvost
+ * @date 2021-05-07
+ * @precisions normal z -> s d c z
+ *
+ */
+#include "chameleon_openmp.h"
+#include "chameleon/tasks_z.h"
+#include "coreblas/coreblas_ztile.h"
+
+void INSERT_TASK_zcesca( const RUNTIME_option_t *options,
+                         int center, int scale, cham_store_t axis,
+                         int m, int n, int mt, int nt,
+                         const CHAM_desc_t *Gi, int Gim, int Gin,
+                         const CHAM_desc_t *Gj, int Gjm, int Gjn,
+                         const CHAM_desc_t *G,  int Gm,  int Gn,
+                         const CHAM_desc_t *Di, int Dim, int Din,
+                         const CHAM_desc_t *Dj, int Djm, int Djn,
+                               CHAM_desc_t *A,  int Am,  int An )
+{
+    CHAM_tile_t *tileGi = Gi->get_blktile( Gi, Gim, Gin );
+    CHAM_tile_t *tileGj = Gj->get_blktile( Gj, Gjm, Gjn );
+    CHAM_tile_t *tileG  = G->get_blktile( G, Gm, Gn );
+    CHAM_tile_t *tileDi = Di->get_blktile( Di, Dim, Din );
+    CHAM_tile_t *tileDj = Dj->get_blktile( Dj, Djm, Djn );
+    CHAM_tile_t *tileA  = A->get_blktile( A, Am, An );
+
+#pragma omp task firstprivate( center, scale, axis, m, n, mt, nt, tileGi, tileGj, tileG, tileDi, tileDj, tileA ) depend( in:tileGi[0], tileGj[0], tileG[0], tileDi[0], tileDj[0] ) depend( inout:tileA[0] )
+    TCORE_zcesca( center, scale, axis, m, n, mt, nt,
+                  tileGi, tileGj, tileG, tileDi, tileDj, tileA );
+}
diff --git a/runtime/openmp/codelets/codelet_zgesum.c b/runtime/openmp/codelets/codelet_zgesum.c
new file mode 100644
index 0000000000000000000000000000000000000000..86b7bfd084fadee98c6218384e396597137e189f
--- /dev/null
+++ b/runtime/openmp/codelets/codelet_zgesum.c
@@ -0,0 +1,31 @@
+/**
+ *
+ * @file openmp/codelet_zgesum.c
+ *
+ * @copyright 2012-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgesum OpenMP codelet
+ *
+ * @version 1.1.0
+ * @author Florent Pruvost
+ * @date 2021-05-07
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon_openmp.h"
+#include "chameleon/tasks_z.h"
+#include "coreblas/coreblas_ztile.h"
+
+void INSERT_TASK_zgesum( const RUNTIME_option_t *options,
+                         cham_store_t storev, int m, int n,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *SUMS, int SUMSm, int SUMSn )
+{
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileScaleSum = SUMS->get_blktile( SUMS, SUMSm, SUMSn );
+#pragma omp task firstprivate( storev, m, n, tileA, tileScaleSum ) depend( in:tileA[0] ) depend( inout:tileScaleSum[0] )
+    TCORE_zgesum( storev, m, n, tileA, tileScaleSum );
+}
diff --git a/runtime/parsec/codelets/codelet_zcesca.c b/runtime/parsec/codelets/codelet_zcesca.c
new file mode 100644
index 0000000000000000000000000000000000000000..5018014eb6b28ec7ff7df6b02273cc98578d55d3
--- /dev/null
+++ b/runtime/parsec/codelets/codelet_zcesca.c
@@ -0,0 +1,95 @@
+/**
+ *
+ * @file parsec/codelet_zcesca.c
+ *
+ * @copyright 2012-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zcesca PaRSEC codelet
+ *
+ * @version 1.1.0
+ * @author Florent Pruvost
+ * @date 2021-05-07
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon_parsec.h"
+#include "chameleon/tasks_z.h"
+#include "coreblas/coreblas_z.h"
+
+static inline int
+CORE_zcesca_parsec( parsec_execution_stream_t *context,
+                    parsec_task_t             *this_task )
+{
+    int center, scale;
+    cham_store_t axis;
+    int m, n, mt, nt;
+    CHAMELEON_Complex64_t *Gi;
+    int ldgi;
+    CHAMELEON_Complex64_t *Gj;
+    int ldgj;
+    CHAMELEON_Complex64_t *G;
+    double *Di;
+    int lddi;
+    double *Dj;
+    int lddj;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+
+    parsec_dtd_unpack_args(
+        this_task, &center, &scale, &axis, &m, &n, &mt, &nt, &Gi, &ldgi, &Gj, &ldgj, &G, &Di, &lddi, &Dj, &lddj, &A, &lda );
+
+    CORE_zcesca( center, scale, axis,
+                 m, n, mt, nt,
+                 Gi, ldgi,
+                 Gj, ldgj,
+                 G,
+                 Di, lddi,
+                 Dj, lddj,
+                 A, lda);
+
+    (void)context;
+    return PARSEC_HOOK_RETURN_DONE;
+}
+
+
+void INSERT_TASK_zcesca( const RUNTIME_option_t *options,
+                         int center, int scale, cham_store_t axis,
+                         int m, int n, int mt, int nt,
+                         const CHAM_desc_t *Gi, int Gim, int Gin,
+                         const CHAM_desc_t *Gj, int Gjm, int Gjn,
+                         const CHAM_desc_t *G, int Gm, int Gn,
+                         const CHAM_desc_t *Di, int Dim, int Din,
+                         const CHAM_desc_t *Dj, int Djm, int Djn,
+                         CHAM_desc_t *A, int Am, int An)
+{
+    parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileGi = Gi->get_blktile( Gi, Gim, Gin );
+    CHAM_tile_t *tileGj = Gj->get_blktile( Gj, Gjm, Gjn );
+    CHAM_tile_t *tileDi = Di->get_blktile( Di, Dim, Din );
+    CHAM_tile_t *tileDj = Dj->get_blktile( Dj, Djm, Djn );
+    CHAM_tile_t *tileA  = A->get_blktile( A, Am, An );
+    parsec_dtd_taskpool_insert_task(
+        PARSEC_dtd_taskpool, CORE_zcesca_parsec, options->priority, "cesca",
+        sizeof(int),   &center, VALUE,
+        sizeof(int),   &scale, VALUE,
+        sizeof(int),   &axis, VALUE,
+        sizeof(int),   &m,    VALUE,
+        sizeof(int),   &n,    VALUE,
+        sizeof(int),   &mt,   VALUE,
+        sizeof(int),   &nt,   VALUE,
+        PASSED_BY_REF, RTBLKADDR( Gi, CHAMELEON_Complex64_t, Gim, Gin ), chameleon_parsec_get_arena_index( Gi ) | INPUT,
+        sizeof(int), &(tileGi->ld), VALUE,
+        PASSED_BY_REF, RTBLKADDR( Gj, CHAMELEON_Complex64_t, Gjm, Gjn ), chameleon_parsec_get_arena_index( Gj ) | INPUT,
+        sizeof(int), &(tileGj->ld), VALUE,
+        PASSED_BY_REF, RTBLKADDR( G, CHAMELEON_Complex64_t, Gm, Gn ), chameleon_parsec_get_arena_index( G ) | INPUT,
+        PASSED_BY_REF, RTBLKADDR( Di, double, Dim, Din ), chameleon_parsec_get_arena_index( Di ) | INPUT,
+        sizeof(int), &(tileDi->ld), VALUE,
+        PASSED_BY_REF, RTBLKADDR( Dj, double, Djm, Djn ), chameleon_parsec_get_arena_index( Dj ) | INPUT,
+        sizeof(int), &(tileDj->ld), VALUE,
+        PASSED_BY_REF, RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INOUT | AFFINITY,
+        sizeof(int), &(tileA->ld), VALUE,
+        PARSEC_DTD_ARG_END );
+}
diff --git a/runtime/parsec/codelets/codelet_zgesum.c b/runtime/parsec/codelets/codelet_zgesum.c
new file mode 100644
index 0000000000000000000000000000000000000000..c0e1a10dd0539db947d5784ea3c13788d294cda4
--- /dev/null
+++ b/runtime/parsec/codelets/codelet_zgesum.c
@@ -0,0 +1,61 @@
+/**
+ *
+ * @file parsec/codelet_zgesum.c
+ *
+ * @copyright 2009-2015 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgesum PaRSEC codelet
+ *
+ * @version 1.1.0
+ * @author Florent Pruvost
+ * @date 2021-05-07
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon_parsec.h"
+#include "chameleon/tasks_z.h"
+#include "coreblas/coreblas_z.h"
+
+static inline int
+CORE_zgesum_parsec( parsec_execution_stream_t *context,
+                    parsec_task_t             *this_task )
+{
+    cham_store_t storev;
+    int m;
+    int n;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    CHAMELEON_Complex64_t *SUMS;
+
+    parsec_dtd_unpack_args(
+        this_task, &storev, &m, &n, &A, &lda, &SUMS );
+
+    CORE_zgesum( storev, m, n, A, lda, SUMS );
+
+    (void)context;
+    return PARSEC_HOOK_RETURN_DONE;
+}
+
+void INSERT_TASK_zgesum( const RUNTIME_option_t *options,
+                        cham_store_t storev, int m, int n,
+                        const CHAM_desc_t *A, int Am, int An,
+                        const CHAM_desc_t *SUMS, int SUMSm, int SUMSn )
+{
+    parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+
+    parsec_dtd_taskpool_insert_task(
+        PARSEC_dtd_taskpool, CORE_zgesum_parsec, options->priority, "gessum",
+        sizeof(cham_store_t), &storev,       VALUE,
+        sizeof(int),          &m,            VALUE,
+        sizeof(int),          &n,            VALUE,
+        PASSED_BY_REF,   RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
+        sizeof(int), &(tileA->ld), VALUE,
+        PASSED_BY_REF,   RTBLKADDR( SUMS, CHAMELEON_Complex64_t, SUMSm, SUMSn ), chameleon_parsec_get_arena_index( SUMS ) | INOUT | AFFINITY,
+        PARSEC_DTD_ARG_END );
+}
diff --git a/runtime/quark/codelets/codelet_zcesca.c b/runtime/quark/codelets/codelet_zcesca.c
new file mode 100644
index 0000000000000000000000000000000000000000..6339719f74509d1b36b304a8ff9e086b59041d0a
--- /dev/null
+++ b/runtime/quark/codelets/codelet_zcesca.c
@@ -0,0 +1,65 @@
+/**
+ *
+ * @file quark/codelet_zcesca.c
+ *
+ * @copyright 2012-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zcesca Quark codelet
+ *
+ * @version 1.1.0
+ * @author Florent Pruvost
+ * @date 2021-05-07
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon_quark.h"
+#include "chameleon/tasks_z.h"
+#include "coreblas/coreblas_ztile.h"
+
+void CORE_zcesca_quark(Quark *quark)
+{
+    int center, scale;
+    cham_store_t axis;
+    int m, n, mt, nt;
+    CHAM_tile_t *Gi;
+    CHAM_tile_t *Gj;
+    CHAM_tile_t *G;
+    CHAM_tile_t *Di;
+    CHAM_tile_t *Dj;
+    CHAM_tile_t *A;
+
+    quark_unpack_args_13(quark, center, scale, axis, m, n, mt, nt, Gi, Gj, G, Di, Dj, A );
+    TCORE_zcesca( center, scale, axis, m, n, mt, nt, Gi, Gj, G, Di, Dj, A );
+}
+
+void INSERT_TASK_zcesca( const RUNTIME_option_t *options,
+                         int center, int scale, cham_store_t axis,
+                         int m, int n, int mt, int nt,
+                         const CHAM_desc_t *Gi, int Gim, int Gin,
+                         const CHAM_desc_t *Gj, int Gjm, int Gjn,
+                         const CHAM_desc_t *G, int Gm, int Gn,
+                         const CHAM_desc_t *Di, int Dim, int Din,
+                         const CHAM_desc_t *Dj, int Djm, int Djn,
+                         CHAM_desc_t *A, int Am, int An )
+{
+    quark_option_t *opt = (quark_option_t*)(options->schedopt);
+    DAG_CORE_CESCA;
+    QUARK_Insert_Task(opt->quark, CORE_zcesca_quark, (Quark_Task_Flags*)opt,
+                      sizeof(int),             &center,    VALUE,
+                      sizeof(int),             &scale,     VALUE,
+                      sizeof(int),             &axis,      VALUE,
+                      sizeof(int),             &m,         VALUE,
+                      sizeof(int),             &n,         VALUE,
+                      sizeof(int),             &mt,        VALUE,
+                      sizeof(int),             &nt,        VALUE,
+                      sizeof(void*), RTBLKADDR(Gi, CHAMELEON_Complex64_t, Gim, Gin), INPUT,
+                      sizeof(void*), RTBLKADDR(Gj, CHAMELEON_Complex64_t, Gjm, Gjn), INPUT,
+                      sizeof(void*), RTBLKADDR(G,  CHAMELEON_Complex64_t, Gm,  Gn ), INPUT,
+                      sizeof(void*), RTBLKADDR(Di, double, Dim, Din), INPUT,
+                      sizeof(void*), RTBLKADDR(Dj, double, Djm, Djn), INPUT,
+                      sizeof(void*), RTBLKADDR(A,  CHAMELEON_Complex64_t, Am,  An ), INOUT,
+                      0);
+}
diff --git a/runtime/quark/codelets/codelet_zgesum.c b/runtime/quark/codelets/codelet_zgesum.c
new file mode 100644
index 0000000000000000000000000000000000000000..51b4e427e8a27e75a606e65065475abb0e2a72d3
--- /dev/null
+++ b/runtime/quark/codelets/codelet_zgesum.c
@@ -0,0 +1,48 @@
+/**
+ *
+ * @file quark/codelet_zgesum.c
+ *
+ * @copyright 2012-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgesum Quark codelet
+ *
+ * @version 1.1.0
+ * @author Florent Pruvost
+ * @date 2021-05-07
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon_quark.h"
+#include "chameleon/tasks_z.h"
+#include "coreblas/coreblas_ztile.h"
+
+void CORE_zgesum_quark(Quark *quark)
+{
+    cham_store_t storev;
+    int m;
+    int n;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileW;
+
+    quark_unpack_args_5( quark, storev, m, n, tileA, tileW );
+    TCORE_zgesum( storev, m, n, tileA, tileW );
+}
+
+void INSERT_TASK_zgesum( const RUNTIME_option_t *options,
+                         cham_store_t storev, int m, int n,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *SUMS, int SUMSm, int SUMSn )
+{
+    quark_option_t *opt = (quark_option_t*)(options->schedopt);
+    DAG_CORE_GESUM;
+    QUARK_Insert_Task(opt->quark, CORE_zgesum_quark, (Quark_Task_Flags*)opt,
+                      sizeof(cham_store_t),            &storev, VALUE,
+                      sizeof(int),                     &m,      VALUE,
+                      sizeof(int),                     &n,      VALUE,
+                      sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT,
+                      sizeof(void*), RTBLKADDR(SUMS, CHAMELEON_Complex64_t, SUMSm, SUMSn), INOUT,
+                      0);
+}
diff --git a/runtime/quark/include/core_blas_dag.h b/runtime/quark/include/core_blas_dag.h
index ab5983727d1f532e8920c95f6be4da4f396227a1..cfc82f227b23cb8fb0edce1a1c42a0e34e928e66 100644
--- a/runtime/quark/include/core_blas_dag.h
+++ b/runtime/quark/include/core_blas_dag.h
@@ -97,6 +97,9 @@
 #define DAG_CORE_TRSSQ      DAG_SET_PROPERTIES( "TRSSQ"    , "white"    )
 #define DAG_CORE_PLSSQ      DAG_SET_PROPERTIES( "PLSSQ"    , "white"    )
 #define DAG_CORE_PLSSQ2     DAG_SET_PROPERTIES( "PLSSQ2"   , "white"    )
+
+#define DAG_CORE_GESUM      DAG_SET_PROPERTIES( "GESUM"    , "white"    )
+#define DAG_CORE_CESCA      DAG_SET_PROPERTIES( "CESCA"    , "orange"   )
 #define DAG_CORE_GRAM       DAG_SET_PROPERTIES( "GRAM"     , "orange"   )
 
 #endif /* _core_blas_dag_h_ */
diff --git a/runtime/starpu/codelets/codelet_zcallback.c b/runtime/starpu/codelets/codelet_zcallback.c
index 369c4cd24ccd2966f80c9c1ccc9d13fccd0d6f25..58e6cb6bf1eabfcc0ecf91c8541a5657a1ac9f9d 100644
--- a/runtime/starpu/codelets/codelet_zcallback.c
+++ b/runtime/starpu/codelets/codelet_zcallback.c
@@ -39,7 +39,9 @@ CHAMELEON_CL_CB(zgessq,        cti_handle_get_m(task->handles[0]), cti_handle_ge
 CHAMELEON_CL_CB(zgetrf,        cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), (2./3.)*M*N*K)
 CHAMELEON_CL_CB(zgetrf_incpiv, cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), (2./3.)*M*N*K)
 CHAMELEON_CL_CB(zgetrf_nopiv,  cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), (2./3.)*M*N*K)
-CHAMELEON_CL_CB(zgram,         cti_handle_get_m(task->handles[3]), cti_handle_get_n(task->handles[3]), 0,                                                M*N)
+CHAMELEON_CL_CB(zgesum,        cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), 0,                                      M*N)
+CHAMELEON_CL_CB(zcesca,        cti_handle_get_m(task->handles[5]), cti_handle_get_n(task->handles[5]), 0,                                      8.*M*N)
+CHAMELEON_CL_CB(zgram,         cti_handle_get_m(task->handles[3]), cti_handle_get_n(task->handles[3]), 0,                                      8.*M*N)
 CHAMELEON_CL_CB(zhe2ge,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                       (1./2.0)*M*N)
 CHAMELEON_CL_CB(zherfb,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                         2. *M* M*M)
 #if defined(PRECISION_z) || defined(PRECISION_c)
diff --git a/runtime/starpu/codelets/codelet_zcesca.c b/runtime/starpu/codelets/codelet_zcesca.c
new file mode 100644
index 0000000000000000000000000000000000000000..5dbadec77774438ef701cb9d07b4f7af579094f1
--- /dev/null
+++ b/runtime/starpu/codelets/codelet_zcesca.c
@@ -0,0 +1,110 @@
+/**
+ *
+ * @file starpu/codelet_zcesca.c
+ *
+ * @copyright 2012-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zcesca StarPU codelet
+ *
+ * @version 1.1.0
+ * @author Florent Pruvost
+ * @date 2021-05-07
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon_starpu.h"
+#include "runtime_codelet_z.h"
+
+struct cl_zcesca_args_s {
+    int center;
+    int scale;
+    cham_store_t axis;
+    int m;
+    int n;
+    int mt;
+    int nt;
+};
+
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zcesca_cpu_func(void *descr[], void *cl_arg)
+{
+    struct cl_zcesca_args_s clargs;
+    CHAM_tile_t *Gi;
+    CHAM_tile_t *Gj;
+    CHAM_tile_t *G;
+    CHAM_tile_t *Di;
+    CHAM_tile_t *Dj;
+    CHAM_tile_t *A;
+
+    Gi = cti_interface_get(descr[0]);
+    Gj = cti_interface_get(descr[1]);
+    G  = cti_interface_get(descr[2]);
+    Di = cti_interface_get(descr[3]);
+    Dj = cti_interface_get(descr[4]);
+    A  = cti_interface_get(descr[5]);
+
+    starpu_codelet_unpack_args( cl_arg, &clargs );
+    TCORE_zcesca( clargs.center, clargs.scale, clargs.axis,
+                  clargs.m, clargs.n, clargs.mt, clargs.nt,
+                  Gi, Gj, G, Di, Dj, A );
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zcesca, cl_zcesca_cpu_func)
+
+void INSERT_TASK_zcesca( const RUNTIME_option_t *options,
+                         int center, int scale, cham_store_t axis,
+                         int m, int n, int mt, int nt,
+                         const CHAM_desc_t *Gi, int Gim, int Gin,
+                         const CHAM_desc_t *Gj, int Gjm, int Gjn,
+                         const CHAM_desc_t *G, int Gm, int Gn,
+                         const CHAM_desc_t *Di, int Dim, int Din,
+                         const CHAM_desc_t *Dj, int Djm, int Djn,
+                         CHAM_desc_t *A, int Am, int An )
+{
+    struct cl_zcesca_args_s clargs = {
+        .center = center,
+        .scale  = scale,
+        .axis   = axis,
+        .m      = m,
+        .n      = n,
+        .mt     = mt,
+        .nt     = nt
+    };
+    struct starpu_codelet *codelet = &cl_zcesca;
+    void (*callback)(void*) = options->profiling ? cl_zcesca_callback : NULL;
+    starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
+    int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(Gi, Gim, Gin);
+    CHAMELEON_ACCESS_R(Gj, Gjm, Gjn);
+    CHAMELEON_ACCESS_R(G, Gm, Gn);
+    CHAMELEON_ACCESS_R(Di, Dim, Din);
+    CHAMELEON_ACCESS_R(Dj, Djm, Djn);
+    CHAMELEON_ACCESS_RW(A, Am, An);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    rt_starpu_insert_task(
+        codelet,
+        STARPU_VALUE, &clargs, sizeof(struct cl_zcesca_args_s),
+        STARPU_R,        RTBLKADDR(Gi, CHAMELEON_Complex64_t, Gim, Gin),
+        STARPU_R,        RTBLKADDR(Gj, CHAMELEON_Complex64_t, Gjm, Gjn),
+        STARPU_R,        RTBLKADDR(G, CHAMELEON_Complex64_t, Gm, Gn),
+        STARPU_R,        RTBLKADDR(Di, double, Dim, Din),
+        STARPU_R,        RTBLKADDR(Dj, double, Djm, Djn),
+        STARPU_RW,       RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_PRIORITY, options->priority,
+        STARPU_CALLBACK, callback,
+        STARPU_EXECUTE_ON_WORKER, workerid,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "zcesca",
+#endif
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_zgesum.c b/runtime/starpu/codelets/codelet_zgesum.c
new file mode 100644
index 0000000000000000000000000000000000000000..31a6ca93e5c8f9fc74a12e656309ca3059cb731d
--- /dev/null
+++ b/runtime/starpu/codelets/codelet_zgesum.c
@@ -0,0 +1,79 @@
+/**
+ *
+ * @file starpu/codelet_zgesum.c
+ *
+ * @copyright 2012-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgesum StarPU codelet
+ *
+ * @version 1.1.0
+ * @author Florent Pruvost
+ * @date 2021-05-7
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon_starpu.h"
+#include "runtime_codelet_z.h"
+
+struct cl_zgesum_args_s {
+    cham_store_t storev;
+    int m;
+    int n;
+};
+
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zgesum_cpu_func(void *descr[], void *cl_arg)
+{
+    struct cl_zgesum_args_s clargs;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileW;
+
+    tileA = cti_interface_get(descr[0]);
+    tileW = cti_interface_get(descr[1]);
+
+    starpu_codelet_unpack_args( cl_arg, &clargs );
+    TCORE_zgesum( clargs.storev, clargs.m, clargs.n, tileA, tileW );
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zgesum, cl_zgesum_cpu_func)
+
+void INSERT_TASK_zgesum( const RUNTIME_option_t *options,
+                         cham_store_t storev, int m, int n,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *SUMS, int SUMSm, int SUMSn )
+{
+    struct cl_zgesum_args_s clargs = {
+        .storev = storev,
+        .m      = m,
+        .n      = n
+    };
+    struct starpu_codelet *codelet = &cl_zgesum;
+    void (*callback)(void*) = options->profiling ? cl_zgesum_callback : NULL;
+    starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
+    int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_RW(SUMS, SUMSm, SUMSn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    rt_starpu_insert_task(
+        codelet,
+        STARPU_VALUE, &clargs, sizeof(struct cl_zgesum_args_s),
+        STARPU_R,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_RW,       RTBLKADDR(SUMS, CHAMELEON_Complex64_t, SUMSm, SUMSn),
+        STARPU_PRIORITY, options->priority,
+        STARPU_CALLBACK, callback,
+        STARPU_EXECUTE_ON_WORKER, workerid,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "zgesum",
+#endif
+        0);
+}
diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h
index e246aabc62c07b62d417146963805ad1928acfdd..3b41ad7813d88b1d041b838d772e9f8a94a45ea5 100644
--- a/runtime/starpu/include/runtime_codelet_z.h
+++ b/runtime/starpu/include/runtime_codelet_z.h
@@ -122,6 +122,12 @@ CODELETS_HEADER(dzasum);
  */
 CODELETS_HEADER(zplrnt);
 CODELETS_HEADER(zbuild);
+
+/*
+ * centered-scaled and gram functions
+ */
+CODELETS_HEADER(zgesum);
+CODELETS_HEADER(zcesca);
 CODELETS_HEADER(zgram);
 
 #if defined(PRECISION_z) || defined(PRECISION_c)
diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt
index 1079e455cd9f8f17ecb33d4e5a3317efdb8c50b3..78f4f507ef8a896f92a48b8a3a9c50c6ccd359ea 100644
--- a/testing/CMakeLists.txt
+++ b/testing/CMakeLists.txt
@@ -101,6 +101,8 @@ set(ZSRC
   testing_zgepdf_qdwh.c
   testing_zgepdf_qr.c
   testing_zplrnk.c
+  testing_zcesca.c
+  testing_zgram.c
   )
 
 
diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake
index 9ca61431a485b6eb9251d208ddf5d2581b549f53..a749e7a47aaaa2209348add0131599af9eaae794 100644
--- a/testing/CTestLists.cmake
+++ b/testing/CTestLists.cmake
@@ -66,6 +66,8 @@ if (NOT CHAMELEON_SIMULATION)
       gels_hqr )
     set( TESTS ${TESTS}
       genm2 gepdf_qr gepdf_qdwh )
+    set( TESTS ${TESTS}
+      cesca gram )
 
     foreach(cat ${TEST_CATEGORIES})
       foreach(gpus ${N_GPUS})
diff --git a/testing/input/cesca.in b/testing/input/cesca.in
new file mode 100644
index 0000000000000000000000000000000000000000..2f6f33fd9fa543c85c187523e2acffd2ae0cf1c1
--- /dev/null
+++ b/testing/input/cesca.in
@@ -0,0 +1,16 @@
+# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step]
+# Not given parameters will receive default values
+
+# CESCA
+# nb: Tile size
+# ib: Inner tile size
+# M: Number of rows of matrix A
+# N: Number of columns of matrix A
+# LDA: Leading dimension of matrix A
+
+op = cesca
+nb = 16, 17
+ib = 8
+m = 15, 17, 33
+n = 13, 19, 35
+lda = 37
diff --git a/testing/input/gram.in b/testing/input/gram.in
new file mode 100644
index 0000000000000000000000000000000000000000..0088b7edee3139d80a23e84cb28dbde220336654
--- /dev/null
+++ b/testing/input/gram.in
@@ -0,0 +1,17 @@
+# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step]
+# Not given parameters will receive default values
+
+# GRAM
+
+# nb: Tile size
+# ib: Inner tile size
+# n: Order of the matrix A
+# lda: Leading dimension of matrix A
+# uplo: Matrix part to be considered (0: Upper, 1: Lower)
+
+op = gram
+nb = 16, 17
+ib = 8
+n = 15, 19, 37
+lda = 41
+uplo = 0,1
diff --git a/testing/testing_zcesca.c b/testing/testing_zcesca.c
new file mode 100644
index 0000000000000000000000000000000000000000..b252476e852d1e395ce87eb5fb662e3e56abeeb6
--- /dev/null
+++ b/testing/testing_zcesca.c
@@ -0,0 +1,98 @@
+/**
+ *
+ * @file testing_zcesca.c
+ *
+ * @copyright 2019-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zcesca testing
+ *
+ * @version 1.1.0
+ * @author Florent Pruvost
+ * @date 2021-05-10
+ * @precisions normal z -> c d s
+ *
+ */
+#include <chameleon.h>
+#include "testings.h"
+#include "testing_zcheck.h"
+#include <chameleon/flops.h>
+
+static cham_fixdbl_t
+flops_zcesca( int M, int N )
+{
+    cham_fixdbl_t flops = 0.;
+#if defined( PRECISION_z ) || defined( PRECISION_c )
+    /*  2 multiplications and 5 addition per element */
+    flops = ( 2. * 6. + 10. ) * M * N;
+#else
+    flops = ( 2. + 5. ) * M * N;
+#endif
+
+    return flops;
+}
+
+int
+testing_zcesca( run_arg_list_t *args, int check )
+{
+    int          hres   = 0;
+    CHAM_desc_t *descA;
+
+    /* Read arguments */
+    intptr_t     mtxfmt = parameters_getvalue_int( "mtxfmt" );
+    int          nb     = run_arg_get_int( args, "nb", 320 );
+    int          P      = parameters_getvalue_int( "P" );
+    int          N      = run_arg_get_int( args, "N", 1000 );
+    int          M      = run_arg_get_int( args, "M", N );
+    int          LDA    = run_arg_get_int( args, "LDA", M );
+    int          seedA  = run_arg_get_int( args, "seedA", random() );
+    int          Q      = parameters_compute_q( P );
+    cham_fixdbl_t t, gflops;
+    cham_fixdbl_t flops = flops_zcesca( M, N );
+
+    CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
+
+    /* Create the matrices */
+    CHAMELEON_Desc_Create(
+        &descA, (void*)(-mtxfmt), ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q );
+
+    /* Fill the matrix with random values */
+    CHAMELEON_zplrnt_Tile( descA, seedA );
+
+    /* Compute the centered-scaled matrix transformation */
+    START_TIMING( t );
+    hres = CHAMELEON_zcesca_Tile( 1, 1, ChamColumnwise, descA );
+    STOP_TIMING( t );
+    gflops = flops * 1.e-9 / t;
+    run_arg_add_fixdbl( args, "time", t );
+    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+
+    CHAMELEON_Desc_Destroy( &descA );
+
+    return hres;
+}
+
+testing_t   test_zcesca;
+const char *zcesca_params[] = { "mtxfmt", "nb", "trans", "m", "n", "lda", "seedA", NULL };
+const char *zcesca_output[] = { NULL };
+const char *zcesca_outchk[] = { "RETURN", NULL };
+
+/**
+ * @brief Testing registration function
+ */
+void testing_zcesca_init( void ) __attribute__( ( constructor ) );
+void
+testing_zcesca_init( void )
+{
+    test_zcesca.name        = "zcesca";
+    test_zcesca.helper      = "General centered-scaled matrix transformation";
+    test_zcesca.params      = zcesca_params;
+    test_zcesca.output      = zcesca_output;
+    test_zcesca.outchk      = zcesca_outchk;
+    test_zcesca.fptr        = testing_zcesca;
+    test_zcesca.next        = NULL;
+
+    testing_register( &test_zcesca );
+}
diff --git a/testing/testing_zgram.c b/testing/testing_zgram.c
new file mode 100644
index 0000000000000000000000000000000000000000..abd27f011e5b5c659c16b9563fb6fb4bb4be10ac
--- /dev/null
+++ b/testing/testing_zgram.c
@@ -0,0 +1,98 @@
+/**
+ *
+ * @file testing_zgram.c
+ *
+ * @copyright 2019-2021 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgram testing
+ *
+ * @version 1.1.0
+ * @author Florent Pruvost
+ * @date 2021-05-10
+ * @precisions normal z -> c d s
+ *
+ */
+#include <chameleon.h>
+#include "testings.h"
+#include "testing_zcheck.h"
+#include <chameleon/flops.h>
+
+static cham_fixdbl_t
+flops_zgram( int N )
+{
+    cham_fixdbl_t flops = 0.;
+#if defined( PRECISION_z ) || defined( PRECISION_c )
+    /* 5 multiplications and 3 addition per element */
+    flops = ( 5. * 6. + 6. ) * N * N;
+#else
+    flops = ( 5. + 3. ) * N * N;
+#endif
+
+    return flops;
+}
+
+int
+testing_zgram( run_arg_list_t *args, int check )
+{
+    int          hres   = 0;
+    CHAM_desc_t *descA;
+
+    /* Read arguments */
+    intptr_t     mtxfmt = parameters_getvalue_int( "mtxfmt" );
+    int          nb     = run_arg_get_int( args, "nb", 320 );
+    cham_uplo_t uplo    = run_arg_get_uplo( args, "uplo", ChamUpper );
+    int          P      = parameters_getvalue_int( "P" );
+    int          N      = run_arg_get_int( args, "N", 1000 );
+    int          LDA    = run_arg_get_int( args, "LDA", N );
+    int          seedA  = run_arg_get_int( args, "seedA", random() );
+    int          Q      = parameters_compute_q( P );
+    cham_fixdbl_t t, gflops;
+    cham_fixdbl_t flops = flops_zgram( N );
+
+    CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
+
+    /* Create the matrices */
+    CHAMELEON_Desc_Create(
+        &descA, (void*)(-mtxfmt), ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, N, N, P, Q );
+
+    /* Fill the matrix with random values */
+    CHAMELEON_zplghe_Tile( (double)N, uplo, descA, seedA );
+
+    /* Compute the gram matrix transformation */
+    START_TIMING( t );
+    hres = CHAMELEON_zgram_Tile( uplo, descA );
+    STOP_TIMING( t );
+    gflops = flops * 1.e-9 / t;
+    run_arg_add_fixdbl( args, "time", t );
+    run_arg_add_fixdbl( args, "gflops", ( hres == CHAMELEON_SUCCESS ) ? gflops : -1. );
+
+    CHAMELEON_Desc_Destroy( &descA );
+
+    return hres;
+}
+
+testing_t   test_zgram;
+const char *zgram_params[] = { "mtxfmt", "nb", "uplo", "n", "n", "lda", "seedA", NULL };
+const char *zgram_output[] = { NULL };
+const char *zgram_outchk[] = { "RETURN", NULL };
+
+/**
+ * @brief Testing registration function
+ */
+void testing_zgram_init( void ) __attribute__( ( constructor ) );
+void
+testing_zgram_init( void )
+{
+    test_zgram.name        = "zgram";
+    test_zgram.helper      = "General Gram matrix transformation";
+    test_zgram.params      = zgram_params;
+    test_zgram.output      = zgram_output;
+    test_zgram.outchk      = zgram_outchk;
+    test_zgram.fptr        = testing_zgram;
+    test_zgram.next        = NULL;
+
+    testing_register( &test_zgram );
+}