diff --git a/cmake_modules/local_subs.py b/cmake_modules/local_subs.py
index d6908a87c2081353336d2615bbc040a0cddff030..18a79e3000ca3c1018d49ce7d34254c7baae3eae 100644
--- a/cmake_modules/local_subs.py
+++ b/cmake_modules/local_subs.py
@@ -20,6 +20,7 @@ _extra_blas = [
     ('',                     'slaran',               'dlaran',               'slaran',               'dlaran'              ),
     ('',                     'slatm1',               'dlatm1',               'clatm1',               'zlatm1'              ),
     ('',                     'slatm1',               'dlatm1',               'slatm1',               'dlatm1'              ),
+    ('',                     'sgenm2',               'dgenm2',               'cgenm2',               'zgenm2'              ),
     ('',                     'slag2c_fake',          'dlag2z_fake',          'slag2c',               'dlag2z'              ),
 ]
 
diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt
index cdf6d90212e850c6ccfb3444d8e2ae1dbf9de42b..9f2c8ab6f22fddcca9a9eb7ecba132dc32a23e50 100644
--- a/compute/CMakeLists.txt
+++ b/compute/CMakeLists.txt
@@ -251,6 +251,9 @@ set(ZSRC
     ##################
     zgram.c
     pzgram.c
+    ##################
+    zgenm2.c
+    pzgenm2.c
 )
 
 precisions_rules_py(CHAMELEON_SRCS_GENERATED "${ZSRC}"
diff --git a/compute/pzgenm2.c b/compute/pzgenm2.c
new file mode 100644
index 0000000000000000000000000000000000000000..e054fa2a0e515009b801c6f67382d65c9c9cff5a
--- /dev/null
+++ b/compute/pzgenm2.c
@@ -0,0 +1,418 @@
+/**
+ *
+ * @file pzgenm2.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ * @copyright 2016-2020 KAUST. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon parallel algorithm to compute 2-norm estimator.
+ *
+ * @version 1.0.0
+ * @author Mathieu Faverge
+ * @author Dalal Sukkari
+ * @date 2020-10-06
+ * @precisions normal z -> s d c
+ *
+ */
+#include "control/common.h"
+
+#define A(m, n)     A,      m, n
+#define X(m, n)     &X,     m, n
+#define SX(m, n)    &SX,    m, n
+#define NRMX( m, n) &NRMX,  m, n
+#define NRMSX(m, n) &NRMSX, m, n
+#define DROW(m, n)  &DROW,  m, n
+
+void
+chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
+                   RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
+{
+    CHAM_context_t  *chamctxt;
+    RUNTIME_option_t options;
+    CHAM_desc_t X, SX, NRMX, NRMSX, DROW;
+    int m, n, k;
+    int myp = A->myrank / A->q;
+    int myq = A->myrank % A->q;
+    int tempmm, tempnn;
+    int cnt, maxiter;
+    double e0, normx, normsx, beta, scl;
+
+    chamctxt = chameleon_context_self();
+    if ( sequence->status != CHAMELEON_SUCCESS ) {
+        return;
+    }
+    RUNTIME_options_init( &options, chamctxt, sequence, request );
+
+    /* Initialize the result */
+    *result = 0.0;
+
+    /* Init workspace handle for the call to dlange but unused */
+    RUNTIME_options_ws_alloc( &options, 1, 0 );
+
+    /**
+     * DROW must be allocated with GLOBAL to avoid destruction bewteen the first
+     * stage and the second one during Flush.
+     * This is the same issue for X and SX to be reused from one iteration to another.
+     */
+    chameleon_desc_init( &DROW, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, A->nb, A->nb,
+                         A->p, A->n, 0, 0, A->p, A->n, A->p, A->q,
+                         NULL, NULL, NULL );
+    /**
+     * NRMX must be allocated with GLOBAL to be able to access the norm value
+     * after flushing the descriptor.
+     * This is the same issue for NRMSX.
+     */
+    chameleon_desc_init( &NRMX, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2,
+                         A->p * 2, A->q, 0, 0, A->p * 2, A->q, A->p, A->q,
+                         NULL, NULL, NULL );
+
+    /**
+     * Start by computing the initial vector of the iterative process, and that
+     * is used to compute the initial norm.
+     *
+     * For j=0,n-1, drow[j] = sum( |A_{i,j}|, i=0..m-1 )
+     * So drow[j] = sum( S_{p,j}, p=0..P-1 ) with S_{p,j} = sum( |A_{i,j}|, i=0..m-1 \ i%P = p )
+     *
+     */
+    for(n = myq; n < A->nt; n += A->q) {
+        tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb;
+
+        /* Zeroes the local intermediate vector */
+        INSERT_TASK_dlaset(
+            &options,
+            ChamUpperLower, 1, tempnn,
+            0., 0.,
+            DROW( myp, n ) );
+
+        /* Computes the sums of the local tiles into the local vector */
+        for(m = myp; m < A->mt; m += A->p) {
+            tempmm = m == A->mt-1 ? A->m - m * A->mb : A->mb;
+            INSERT_TASK_dzasum(
+                &options,
+                ChamColumnwise, ChamUpperLower, tempmm, tempnn,
+                A(m, n), DROW( myp, n ) );
+        }
+
+        /* Reduce on first row of nodes */
+	for(m = 1; m < A->p; m++) {
+	    INSERT_TASK_daxpy(
+                &options, tempnn, 1.,
+                DROW( m, n ), 1,
+                DROW( 0, n ), 1 );
+	}
+    }
+
+    /**
+     * Reduce now by columns on processes with (myp == 0)
+     */
+    if ( myp == 0 )
+    {
+        INSERT_TASK_dlaset(
+            &options,
+            ChamUpperLower, NRMX.mb, NRMX.nb,
+            1., 0.,
+            NRMX( myp, myq ) );
+
+        for( n = myq; n < A->nt; n += A->q ) {
+	    tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
+	    INSERT_TASK_dgessq(
+                &options, ChamEltwise, 1, tempnn,
+                DROW( myp, n   ),
+                NRMX( myp, myq ) );
+	}
+
+        /* Reduce on first row of nodes */
+	for(n = 1; n < A->q; n++) {
+	    INSERT_TASK_dplssq(
+                &options, ChamEltwise, 1, 1,
+                NRMX( myp, n ),
+                NRMX( myp, 0 ) );
+	}
+
+        INSERT_TASK_dplssq2(
+            &options, 1, NRMX( myp, 0 ) );
+    }
+
+    /* Bcast norm over processes from node (0,0) */
+    for(m = 0; m < A->p; m++) {
+	for(n = 0; n < A->q; n++) {
+            if ( (m != 0) || (n != 0) ) {
+                INSERT_TASK_dlacpy(
+                    &options,
+                    ChamUpperLower, 1, 1, 1,
+                    NRMX(0, 0),
+                    NRMX(m, n) );
+            }
+        }
+    }
+
+    CHAMELEON_Desc_Flush( &DROW, sequence );
+    CHAMELEON_Desc_Flush( &NRMX, sequence );
+    CHAMELEON_Desc_Flush( A,     sequence );
+    RUNTIME_sequence_wait( chamctxt, sequence );
+    *result = *((double *)(NRMX.get_blkaddr( &NRMX, myp, myq )));
+
+    if ( (*result) == 0.0 ) {
+	chameleon_desc_destroy( &DROW );
+	chameleon_desc_destroy( &NRMX );
+	return;
+    } else {
+	normx = *result;
+    }
+
+    chameleon_desc_init( &NRMSX, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2,
+                         A->p * 2, A->q, 0, 0, A->p * 2, A->q, A->p, A->q,
+                         NULL, NULL, NULL );
+    chameleon_desc_init( &X,  CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble, 1, A->nb, A->nb,
+                         A->p, A->n, 0, 0, A->p, A->n, A->p, A->q,
+                         NULL, NULL, NULL );
+    chameleon_desc_init( &SX, CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble, A->mb, 1, A->mb,
+                         A->m, A->q, 0, 0, A->m, A->q, A->p, A->q,
+                         NULL, NULL, NULL );
+
+    cnt = 0;
+    e0  = 0.;
+    maxiter = chameleon_min( 100, A->n );
+
+    while ( (cnt < maxiter) &&
+            (fabs(*result - e0) > (tol * (*result))) )
+    {
+        e0 = *result;
+
+        /* Initialization of X in the first loop */
+        if ( cnt == 0 )
+        {
+            for (n = myq; n < A->nt; n += A->q) {
+                tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb;
+
+                if ( myp == 0 ) {
+#if defined(PRECISION_z) || defined(PRECISION_c)
+                    INSERT_TASK_dlag2z(
+                        &options,
+                        ChamUpperLower, 1, tempnn,
+                        DROW( 0, n ),
+                        X(    0, n ) );
+#else
+                    INSERT_TASK_zlacpy(
+                        &options,
+                        ChamUpperLower, 1, tempnn, tempnn,
+                        DROW( 0, n ),
+                        X(    0, n ) );
+#endif
+                }
+
+                /* Broadcast X */
+                for (m = 1; m < A->p; m++) {
+                    INSERT_TASK_zlacpy(
+                        &options,
+                        ChamUpperLower, 1, tempnn, tempnn,
+                        X( 0, n ),
+                        X( m, n ) );
+                }
+            }
+            CHAMELEON_Desc_Flush( &DROW, sequence );
+        }
+
+        /**
+         * Scale x = x / ||A||
+         *
+         * We scale the P replica of X, so each row of Q processes possess one
+         * copy of the scaled X.
+         */
+        scl = 1. / e0;
+        for (n = myq; n < A->nt; n += A->q) {
+            tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb;
+
+            INSERT_TASK_zlascal(
+                &options,
+                ChamUpperLower, 1, tempnn, tempnn,
+                scl, X( myp, n ) );
+        }
+
+        /**
+         *  Compute Sx = S * x
+         */
+        for(m = myp; m < A->mt;  m+=A->p) {
+            tempmm = m == A->mt-1 ? A->m - m * A->mb : A->mb;
+
+            for (n = myq; n < A->nt; n += A->q ) {
+                tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb;
+                beta   = n == myq ? 0. : 1.;
+
+                INSERT_TASK_zgemv(
+                    &options,
+                    ChamNoTrans, tempmm, tempnn,
+                    1.,   A(  m,   n ),
+                          X(  myp, n ), 1,
+                    beta, SX( m, myq ), 1 );
+            }
+
+            /* Reduce columns */
+            for (k = 1; k < chameleon_min( A->q, A->nt ); k++) {
+                INSERT_TASK_zaxpy(
+                    &options, tempmm, 1.,
+                    SX( m, k ), 1,
+                    SX( m, 0 ), 1 );
+            }
+            /* Broadcast SX to ease the following gemv */
+            for (k = 1; k < A->q; k++) {
+                INSERT_TASK_zlacpy(
+                    &options,
+                    ChamUpperLower, tempmm, 1, tempmm,
+                    SX( m, 0 ),
+                    SX( m, k ) );
+            }
+        }
+
+        /**
+         *  Compute x = S' * S * x = S' * Sx
+         */
+        for ( n = myq; n < A->nt; n += A->q ) {
+            tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb;
+
+            for( m = myp; m < A->mt;  m += A->p ) {
+                tempmm = m == A->mt-1 ? A->m - m * A->mb : A->mb;
+                beta   = m == myp ? 0. : 1.;
+
+                INSERT_TASK_zgemv(
+                    &options,
+                    ChamConjTrans, tempmm, tempnn,
+                    1.,   A(  m,   n ),
+                          SX( m, myq ), 1,
+                    beta, X(  myp, n ), 1 );
+            }
+
+            /* Reduce rows */
+            for (k = 1; k < chameleon_min( A->p, A->mt ); k++) {
+                INSERT_TASK_zaxpy(
+                    &options, tempnn, 1.,
+                    X( k, n ), 1,
+                    X( 0, n ), 1 );
+            }
+            /* Broadcast */
+            for (k = 1; k < A->p; k++) {
+                INSERT_TASK_zlacpy(
+                    &options,
+                    ChamUpperLower, 1, tempnn, tempnn,
+                    X( 0, n ),
+                    X( k, n ) );
+            }
+        }
+
+        /**
+         * Compute ||x||
+         *
+         * All rows of Q nodes compute the same thing in parallel due to replication.
+         */
+        {
+            INSERT_TASK_dlaset(
+                &options,
+                ChamUpperLower, NRMX.mb, NRMX.nb,
+                1., 0.,
+                NRMX( myp, myq ) );
+
+            for( n = myq; n < A->nt; n += A->q ) {
+                tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
+
+                INSERT_TASK_zgessq(
+                    &options, ChamEltwise, 1, tempnn,
+                    X(    myp, n   ),
+                    NRMX( myp, myq ) );
+            }
+
+            /* Reduce columns  */
+            for(n = 1; n < chameleon_min( A->q, A->nt ); n++) {
+                INSERT_TASK_dplssq(
+                    &options, ChamEltwise, 1, 1,
+                    NRMX( myp, n ),
+                    NRMX( myp, 0 ) );
+            }
+
+            INSERT_TASK_dplssq2(
+                &options, 1, NRMX( myp, 0 ) );
+
+            /* Broadcast the results to processes in the same row */
+            for(n = 1; n < A->q; n++) {
+                INSERT_TASK_dlacpy(
+                    &options,
+                    ChamUpperLower, 1, 1, 1,
+                    NRMX( myp, 0 ),
+                    NRMX( myp, n ) );
+            }
+        }
+
+        /**
+         * Compute ||Sx||
+         *
+         * All columns of P nodes compute the same thing in parallel due to replication.
+         */
+        {
+            INSERT_TASK_dlaset(
+                &options,
+                ChamUpperLower, NRMSX.mb, NRMSX.nb,
+                1., 0.,
+                NRMSX( myp, myq ) );
+
+            for( m = myp; m < A->mt; m += A->p ) {
+                tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
+                INSERT_TASK_zgessq(
+                    &options, ChamEltwise, tempmm, 1,
+                    SX(    m,   myq ),
+                    NRMSX( myp, myq ) );
+            }
+
+            /* Reduce rows */
+            for( m = 1; m < chameleon_min( A->p, A->mt ); m++ ) {
+                INSERT_TASK_dplssq(
+                    &options, ChamEltwise, 1, 1,
+                    NRMSX( m, myq ),
+                    NRMSX( 0, myq ) );
+            }
+
+            INSERT_TASK_dplssq2(
+                &options, 1, NRMSX( 0, myq ) );
+
+            /* Broadcast the results to processes in the same column */
+            for(m = 1; m < A->p; m++) {
+                INSERT_TASK_dlacpy(
+                    &options,
+                    ChamUpperLower, 1, 1, 1,
+                    NRMSX( 0, myq ),
+                    NRMSX( m, myq ) );
+            }
+        }
+
+        CHAMELEON_Desc_Flush( &NRMX,  sequence );
+        CHAMELEON_Desc_Flush( &NRMSX, sequence );
+        CHAMELEON_Desc_Flush( &X,     sequence );
+        CHAMELEON_Desc_Flush( &SX,    sequence );
+        CHAMELEON_Desc_Flush( A,      sequence );
+        RUNTIME_sequence_wait(chamctxt, sequence);
+        normx  = *((double *)(NRMX.get_blkaddr(  &NRMX,  myp, myq )));
+        normsx = *((double *)(NRMSX.get_blkaddr( &NRMSX, myp, myq )));
+
+        *result = normx / normsx;
+        cnt++;
+    }
+
+    if ( (cnt >= maxiter) &&
+         (fabs((*result) - e0) > (tol * (*result))) ) {
+        *result = INFINITY;
+    }
+
+    RUNTIME_options_ws_free( &options );
+    RUNTIME_options_finalize( &options, chamctxt );
+
+    chameleon_desc_destroy( &DROW );
+    chameleon_desc_destroy( &NRMX );
+    chameleon_desc_destroy( &NRMSX );
+    chameleon_desc_destroy( &X );
+    chameleon_desc_destroy( &SX );
+
+    return;
+}
diff --git a/compute/zgenm2.c b/compute/zgenm2.c
new file mode 100644
index 0000000000000000000000000000000000000000..7f27a44d647a461d7e2e8bd76d4787310ec6d322
--- /dev/null
+++ b/compute/zgenm2.c
@@ -0,0 +1,266 @@
+/**
+ *
+ * @file zgenm2.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ * @copyright 2016-2020 KAUST. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgenm2 wrappers
+ *
+ * @version 1.0.0
+ * @author Mathieu Faverge
+ * @date 2020-10-06
+ * @precisions normal z -> s d c
+ *
+ */
+#include "control/common.h"
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t
+ *
+ * @brief Returns an estimate of the two-norm of A.
+ *
+ *******************************************************************************
+ *
+ * @param[in] tol
+ *          Specify the tolerance used in the iterative algorithm to converge to
+ *          an estimation. Must be > 0.
+ *
+ * @param[in] M
+ *          The number of rows of the matrix A. M >= 0. When M = 0,
+ *          the returned value is set to zero.
+ *
+ * @param[in] N
+ *          The number of columns of the matrix A. N >= 0. When N = 0,
+ *          the returned value is set to zero.
+ *
+ * @param[in] A
+ *          The M-by-N matrix A.
+ *
+ * @param[in] LDA
+ *          The leading dimension of the array A. LDA >= max(1,M).
+ *
+ *******************************************************************************
+ *
+ * @retval the two-norm estimate.
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zgenm2_Tile
+ * @sa CHAMELEON_zgenm2_Tile_Async
+ * @sa CHAMELEON_clange
+ * @sa CHAMELEON_dlange
+ * @sa CHAMELEON_slange
+ *
+ */
+double CHAMELEON_zgenm2( double tol, int M, int N,
+                         CHAMELEON_Complex64_t *A, int LDA )
+{
+    int NB;
+    int status;
+    double value = -1.;
+    CHAM_context_t *chamctxt;
+    RUNTIME_sequence_t *sequence = NULL;
+    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
+    CHAM_desc_t descAl, descAt;
+
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_fatal_error("CHAMELEON_zgenm2", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    /* Check input arguments */
+    if (M < 0) {
+        chameleon_error("CHAMELEON_zgenm2", "illegal value of M");
+        return -1;
+    }
+    if (N < 0) {
+        chameleon_error("CHAMELEON_zgenm2", "illegal value of N");
+        return -2;
+    }
+    if (LDA < chameleon_max(1, M)) {
+        chameleon_error("CHAMELEON_zgenm2", "illegal value of LDA");
+        return -4;
+    }
+
+    /* Quick return */
+    if (chameleon_min(N, M) == 0) {
+        return (double)0.0;
+    }
+
+    /* Tune NB depending on M, N & NRHS; Set NBNB */
+    status = chameleon_tune(CHAMELEON_FUNC_ZGEMM, M, N, 0);
+    if (status != CHAMELEON_SUCCESS) {
+        chameleon_error("CHAMELEON_zgenm2", "chameleon_tune() failed");
+        return status;
+    }
+
+    /* Set NT */
+    NB = CHAMELEON_NB;
+
+    chameleon_sequence_create( chamctxt, &sequence );
+
+    /* Submit the matrix conversion */
+    chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInput, ChamUpperLower,
+                         A, NB, NB, LDA, N, M, N, sequence, &request );
+
+    /* Call the tile interface */
+    CHAMELEON_zgenm2_Tile_Async( tol, &descAt, &value, sequence, &request );
+
+    /* Submit the matrix conversion back */
+    chameleon_ztile2lap( chamctxt, &descAl, &descAt,
+                         ChamDescInput, ChamUpperLower, sequence, &request );
+
+    chameleon_sequence_wait( chamctxt, sequence );
+
+    /* Cleanup the temporary data */
+    chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
+
+    chameleon_sequence_destroy( chamctxt, sequence );
+    return value;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t_Tile
+ *
+ * @brief Tile equivalent of CHAMELEON_zgenm2().
+ *
+ *  Operates on matrices stored by tiles.
+ *  All matrices are passed through descriptors.
+ *  All dimensions are taken from the descriptors.
+ *
+ *******************************************************************************
+ *
+ * @param[in] tol
+ *          Specify the tolerance used in the iterative algorithm to converge to
+ *          an estimation. Must be > 0.
+ *
+ * @param[in] A
+ *          On entry, the input matrix A.
+ *
+ *******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zgenm2
+ * @sa CHAMELEON_zgenm2_Tile_Async
+ * @sa CHAMELEON_clange_Tile
+ * @sa CHAMELEON_dlange_Tile
+ * @sa CHAMELEON_slange_Tile
+ *
+ */
+double CHAMELEON_zgenm2_Tile( double tol, CHAM_desc_t *A )
+{
+    CHAM_context_t *chamctxt;
+    RUNTIME_sequence_t *sequence = NULL;
+    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
+    int status;
+    double value = -1.;
+
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_fatal_error("CHAMELEON_zgenm2_Tile", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    chameleon_sequence_create( chamctxt, &sequence );
+
+    CHAMELEON_zgenm2_Tile_Async( tol, A, &value, sequence, &request );
+
+    CHAMELEON_Desc_Flush( A, sequence );
+
+    chameleon_sequence_wait( chamctxt, sequence );
+    status = sequence->status;
+    chameleon_sequence_destroy( chamctxt, sequence );
+    return ( status == CHAMELEON_SUCCESS ) ? value : (double)status;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t_Tile_Async
+ *
+ * @brief Non-blocking equivalent of CHAMELEON_zgenm2_Tile().
+ *
+ *  May return before the computation is finished.
+ *  Allows for pipelining of operations at runtime.
+ *
+ *******************************************************************************
+ *
+ * @param[in] sequence
+ *          Identifies the sequence of function calls that this call belongs to
+ *          (for completion checks and exception handling purposes).
+ *
+ * @param[out] request
+ *          Identifies this function call (for exception handling purposes).
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zgenm2
+ * @sa CHAMELEON_zgenm2_Tile
+ * @sa CHAMELEON_clange_Tile_Async
+ * @sa CHAMELEON_dlange_Tile_Async
+ * @sa CHAMELEON_slange_Tile_Async
+ *
+ */
+int CHAMELEON_zgenm2_Tile_Async( double tol, CHAM_desc_t *A, double *value,
+                                 RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
+{
+    CHAM_context_t *chamctxt;
+
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_fatal_error("CHAMELEON_zgenm2_Tile", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    if (sequence == NULL) {
+        chameleon_fatal_error("CHAMELEON_zgenm2_Tile", "NULL sequence");
+        return CHAMELEON_ERR_UNALLOCATED;
+    }
+    if (request == NULL) {
+        chameleon_fatal_error("CHAMELEON_zgenm2_Tile", "NULL request");
+        return CHAMELEON_ERR_UNALLOCATED;
+    }
+    /* Check sequence status */
+    if (sequence->status == CHAMELEON_SUCCESS) {
+        request->status = CHAMELEON_SUCCESS;
+    }
+    else {
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED);
+    }
+
+    /* Check descriptors for correctness */
+    if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) {
+        chameleon_error("CHAMELEON_zgenm2_Tile_Async", "invalid descriptor");
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
+    }
+    /* Check input arguments */
+    if (A->nb != A->mb) {
+        chameleon_error("CHAMELEON_zgenm2_Tile_Async", "only square tiles supported");
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
+    }
+    if ( tol <= 0. ) {
+        chameleon_error("CHAMELEON_zgenm2_Tile_Async", "tol must be > 0");
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
+    }
+
+    /* Quick return */
+    if (chameleon_min(A->m, A->n) == 0) {
+        *value = 0.0;
+        return CHAMELEON_SUCCESS;
+    }
+
+    chameleon_pzgenm2( tol, A, value, sequence, request );
+
+    return CHAMELEON_SUCCESS;
+}
diff --git a/control/compute_z.h b/control/compute_z.h
index ae8596e16e6462080cac77ecd46e7314e8f1296f..c17553091863da09e097ff25d09f31dc4b63ddab 100644
--- a/control/compute_z.h
+++ b/control/compute_z.h
@@ -39,6 +39,7 @@ void chameleon_pzgebrd_gb2bd(cham_uplo_t uplo, CHAM_desc_t *A, double *D, double
 void chameleon_pzgebrd_ge2gb( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
+void chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 void chameleon_pzgemm(cham_trans_t transA, cham_trans_t transB, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B, CHAMELEON_Complex64_t beta, CHAM_desc_t *C, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h
index 1b7ca622e2309503e526520b3a9a2efb1ba8b02a..7ebcff28f37b4027a16421fa1302f37b718dc28e 100644
--- a/include/chameleon/chameleon_z.h
+++ b/include/chameleon/chameleon_z.h
@@ -39,6 +39,7 @@ int CHAMELEON_zgeadd(cham_trans_t trans, int M, int N, CHAMELEON_Complex64_t alp
 int CHAMELEON_zgelqf(int M, int N, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descT);
 int CHAMELEON_zgelqs(int M, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descT, CHAMELEON_Complex64_t *B, int LDB);
 int CHAMELEON_zgels(cham_trans_t trans, int M, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descT, CHAMELEON_Complex64_t *B, int LDB);
+double CHAMELEON_zgenm2( double tol, int M, int N, CHAMELEON_Complex64_t *A, int LDA );
 int CHAMELEON_zgemm(cham_trans_t transA, cham_trans_t transB, int M, int N, int K, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t *A, int LDA, CHAMELEON_Complex64_t *B, int LDB, CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t *C, int LDC);
 int CHAMELEON_zgeqrf(int M, int N, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descT);
 int CHAMELEON_zgeqrs(int M, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descT, CHAMELEON_Complex64_t *B, int LDB);
@@ -114,6 +115,7 @@ int CHAMELEON_zgeadd_Tile(cham_trans_t trans, CHAMELEON_Complex64_t alpha, CHAM_
 int CHAMELEON_zgelqf_Tile(CHAM_desc_t *A, CHAM_desc_t *T);
 int CHAMELEON_zgelqs_Tile(CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B);
 int CHAMELEON_zgels_Tile(cham_trans_t trans, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B);
+double CHAMELEON_zgenm2_Tile( double tol, CHAM_desc_t *A );
 int CHAMELEON_zgemm_Tile(cham_trans_t transA, cham_trans_t transB, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B, CHAMELEON_Complex64_t beta, CHAM_desc_t *C);
 int CHAMELEON_zgeqrf_Tile(CHAM_desc_t *A, CHAM_desc_t *T);
 int CHAMELEON_zgeqrs_Tile(CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B);
@@ -186,6 +188,7 @@ int CHAMELEON_zgeadd_Tile_Async(cham_trans_t trans, CHAMELEON_Complex64_t alpha,
 int CHAMELEON_zgelqf_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *T, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgelqs_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgels_Tile_Async(cham_trans_t trans, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
+int CHAMELEON_zgenm2_Tile_Async( double tol, CHAM_desc_t *A, double *value, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 int CHAMELEON_zgemm_Tile_Async(cham_trans_t transA, cham_trans_t transB, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B, CHAMELEON_Complex64_t beta, CHAM_desc_t *C, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgeqrf_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *T, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgeqrs_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt
index f6591433f46729ac378b23e979cb3d9ba2feb8c6..a233e6d909f773a50ce159e6990ed8b0516fb69c 100644
--- a/testing/CMakeLists.txt
+++ b/testing/CMakeLists.txt
@@ -74,6 +74,7 @@ set(ZSRC
   testing_zsytrf.c
   testing_zsytrs.c
   testing_zsysv.c
+  testing_zgenm2.c
   testing_zgetrf.c
   testing_zgetrs.c
   testing_zgesv.c
diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake
index 986fd4faf5974bd2be4e62e8c80f632465f266af..82bef26df95796b6a2c9aa2708126666adf0d362 100644
--- a/testing/CTestLists.cmake
+++ b/testing/CTestLists.cmake
@@ -64,6 +64,8 @@ if (NOT CHAMELEON_SIMULATION)
       #geqrs_hqr gelqs_hqr
       gels
       gels_hqr )
+    set( TESTS ${TESTS}
+      genm2 )
 
     foreach(cat ${TEST_CATEGORIES})
       foreach(gpus ${N_GPUS})
diff --git a/testing/chameleon_ztesting.c b/testing/chameleon_ztesting.c
index 0d6d6e5d93d4ff1c1251f1994ad07ffcb04e28d3..82e3969202896be9c23196fc722c83c6ea182c24 100644
--- a/testing/chameleon_ztesting.c
+++ b/testing/chameleon_ztesting.c
@@ -69,6 +69,11 @@ static parameter_t parameters[] = {
     { "seedB", "Seed for the matrix B random generation", 'Y', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 11, TestValInt, {0}, NULL, pread_int, sprint_int },
     { "seedC", "Seed for the matrix C random generation", 'Z', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 11, TestValInt, {0}, NULL, pread_int, sprint_int },
 
+    { NULL, "Matrix generation numerical parameters", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL },
+    { "bump",  "Bump value to make a matrix diagonal dominant",           'z', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 13, TestValComplex64, {0}, NULL, pread_complex64, sprint_complex64 },
+    { "mode",  "Mode that specifies the eigen/singular values in xlatms", -30, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2,  4, TestValInt,       {0}, NULL, pread_int,       sprint_int    },
+    { "cond",  "Conditional number of the matrix used by xlatms",         -31, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 13, TestValDouble,    {0}, NULL, pread_double,    sprint_double    },
+
     { NULL, "Operation specific parameters", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL },
     { "trans",  "Value of the trans parameter",  -11, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 9, TestTrans,    {0}, NULL, pread_trans, sprint_trans },
     { "transA", "Value of the transA parameter", -12, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 9, TestTrans,    {0}, NULL, pread_trans, sprint_trans },
@@ -79,9 +84,8 @@ static parameter_t parameters[] = {
     { "norm",   "Value of the norm parameter",   -17, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 4, TestNormtype, {0}, NULL, pread_norm,  sprint_norm  },
 
     { NULL, "Operation specific scalar", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL },
-    { "alpha", "Value of the scalar alpha",                     'x', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 13, TestValComplex64, {0}, NULL, pread_complex64, sprint_complex64 },
-    { "beta",  "Value of the scalar beta",                      'y', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 13, TestValComplex64, {0}, NULL, pread_complex64, sprint_complex64 },
-    { "bump",  "Bump value to make a matrix diagonal dominant", 'z', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 13, TestValComplex64, {0}, NULL, pread_complex64, sprint_complex64 },
+    { "alpha", "Value of the scalar alpha",                       'x', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 13, TestValComplex64, {0}, NULL, pread_complex64, sprint_complex64 },
+    { "beta",  "Value of the scalar beta",                        'y', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 13, TestValComplex64, {0}, NULL, pread_complex64, sprint_complex64 },
 
     { NULL, "QR/LQ parameters", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL },
     { "qra",    "Size of TS domain (=RH for householder trees)",           -20, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 3, TestValInt, {0}, NULL, pread_int, sprint_int },
diff --git a/testing/input/genm2.in b/testing/input/genm2.in
new file mode 100644
index 0000000000000000000000000000000000000000..dc29aa220c8af8dc0bb4a4e0b8e31c05d298f395
--- /dev/null
+++ b/testing/input/genm2.in
@@ -0,0 +1,20 @@
+# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step]
+# Not given parameters will receive default values
+
+# GENM2
+# mtxfmt
+# nb: Tile size
+# M: Number of rows of matrix A
+# N: Number of columns of matrix A
+# LDA: Leading dimension of matrix A
+# cond: The condition number
+# mode: the mode values for latms
+
+op = genm2
+mtxfmt = 0, 1
+nb = 16, 17
+m = 15, 25, 37
+n = 13, 23, 35
+lda = 41
+cond = 1., 1.e13
+mode = 1:6
diff --git a/testing/testing_zgenm2.c b/testing/testing_zgenm2.c
new file mode 100644
index 0000000000000000000000000000000000000000..b396d9fee6ee2a4c51542b3bc2a80102b9d72710
--- /dev/null
+++ b/testing/testing_zgenm2.c
@@ -0,0 +1,140 @@
+/**
+ *
+ * @file testing_zgenm2.c
+ *
+ * @copyright 2019-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgenm2 testing
+ *
+ * @version 1.0.0
+ * @author Lucas Barros de Assis
+ * @author Mathieu Faverge
+ * @date 2020-10-05
+ * @precisions normal z -> c d s
+ *
+ */
+#include <chameleon.h>
+#if !defined(CHAMELEON_SIMULATION)
+#include <coreblas/lapacke.h>
+#include <coreblas/cblas.h>
+#include <coreblas.h>
+#endif
+#include "testings.h"
+#include "testing_zcheck.h"
+#include "flops.h"
+
+static cham_fixdbl_t
+flops_zgenm2( int M, int N )
+{
+    double coefabs = 1.;
+#if defined( PRECISION_z ) || defined( PRECISION_c )
+    coefabs = 3.;
+#endif
+
+    return coefabs * M * N;
+}
+
+int
+testing_zgenm2( run_arg_list_t *args, int check )
+{
+    int          hres   = 0;
+    double       norm;
+    CHAM_desc_t *descA;
+    double      *D, dmax = 1.;
+
+    /* Reads arguments */
+    intptr_t        mtxfmt = parameters_getvalue_int( "mtxfmt" );
+    int             nb     = run_arg_get_int( args, "nb", 320 );
+    int             P      = parameters_getvalue_int( "P" );
+    int             N      = run_arg_get_int( args, "N", 1000 );
+    int             M      = run_arg_get_int( args, "M", N );
+    int             LDA    = run_arg_get_int( args, "LDA", M );
+    int             seedA  = run_arg_get_int( args, "seedA", random() );
+    int             Q      = parameters_compute_q( P );
+    int             minMN  = chameleon_min( M, N );
+    double          cond   = run_arg_get_double( args, "cond", 1.e16 );
+    int             mode   = run_arg_get_int( args, "mode", 4 );
+    double          tol    = 1.e-1;
+    cham_fixdbl_t t, gflops;
+    cham_fixdbl_t flops = flops_zgenm2( M, N );
+
+    CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
+
+    /* Generate the diagonal of eigen/singular values */
+    D = malloc( minMN * sizeof(double) );
+#if !defined(CHAMELEON_SIMULATION)
+    hres = CORE_dlatm1( mode, cond, 0, ChamDistUniform, seedA, D, minMN );
+    if ( hres != 0 ) {
+        free( D );
+        return hres;
+    }
+
+    /* Save the largest absolute value */
+    hres = cblas_idamax( minMN, D, 1 );
+    dmax = fabs( D[hres] );
+#endif
+
+    /* Creates the matrix */
+    CHAMELEON_Desc_Create(
+        &descA, (void*)(-mtxfmt), ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q );
+
+    /* Fills the matrix with random values */
+    hres = CHAMELEON_zlatms_Tile(
+        ChamDistUniform, seedA, ChamNonsymPosv, D, 0, cond, 0., descA );
+    free( D );
+    if ( hres != 0 ) {
+        return hres;
+    }
+
+    /* Calculates the norm */
+    START_TIMING( t );
+    norm = CHAMELEON_zgenm2_Tile( tol, descA );
+    STOP_TIMING( t );
+    gflops = flops * 1.e-9 / t;
+    run_arg_add_fixdbl( args, "time", t );
+    run_arg_add_fixdbl( args, "gflops", ( norm >= 0. ) ? gflops : -1. );
+
+    /* Checks the solution */
+    hres = 0;
+    if ( check ) {
+        double res = fabs(dmax - norm) / (dmax * tol);
+
+        run_arg_add_double( args, "||A||", dmax );
+        run_arg_add_double( args, "||B||", norm );
+        run_arg_add_double( args, "||R||", res );
+
+        if ( isnan(res) || isinf(res) || (res > 10.0) ) {
+            hres = 1;
+        }
+    }
+
+    CHAMELEON_Desc_Destroy( &descA );
+
+    return hres;
+}
+
+testing_t   test_zgenm2;
+const char *zgenm2_params[] = { "mtxfmt", "nb", "m", "n", "lda", "seedA", "cond", "mode", NULL };
+const char *zgenm2_output[] = { NULL };
+const char *zgenm2_outchk[] = { "||A||", "||B||", "||R||", "RETURN", NULL };
+
+/**
+ * @brief Testing registration function
+ */
+void testing_zgenm2_init( void ) __attribute__( ( constructor ) );
+void
+testing_zgenm2_init( void )
+{
+    test_zgenm2.name        = "zgenm2";
+    test_zgenm2.helper      = "General matrix two-norm estimator";
+    test_zgenm2.params      = zgenm2_params;
+    test_zgenm2.output      = zgenm2_output;
+    test_zgenm2.outchk      = zgenm2_outchk;
+    test_zgenm2.fptr        = testing_zgenm2;
+    test_zgenm2.next        = NULL;
+
+    testing_register( &test_zgenm2 );
+}