diff --git a/cmake_modules/local_subs.py b/cmake_modules/local_subs.py
index a4cb1fc264233227fa012b5ce8f38907bdfb0a53..18a79e3000ca3c1018d49ce7d34254c7baae3eae 100644
--- a/cmake_modules/local_subs.py
+++ b/cmake_modules/local_subs.py
@@ -20,6 +20,8 @@ _extra_blas = [
     ('',                     'slaran',               'dlaran',               'slaran',               'dlaran'              ),
     ('',                     'slatm1',               'dlatm1',               'clatm1',               'zlatm1'              ),
     ('',                     'slatm1',               'dlatm1',               'slatm1',               'dlatm1'              ),
+    ('',                     'sgenm2',               'dgenm2',               'cgenm2',               'zgenm2'              ),
+    ('',                     'slag2c_fake',          'dlag2z_fake',          'slag2c',               'dlag2z'              ),
 ]
 
 _extra_BLAS = [ [ x.upper() for x in row ] for row in _extra_blas ]
diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt
index cdf6d90212e850c6ccfb3444d8e2ae1dbf9de42b..9f2c8ab6f22fddcca9a9eb7ecba132dc32a23e50 100644
--- a/compute/CMakeLists.txt
+++ b/compute/CMakeLists.txt
@@ -251,6 +251,9 @@ set(ZSRC
     ##################
     zgram.c
     pzgram.c
+    ##################
+    zgenm2.c
+    pzgenm2.c
 )
 
 precisions_rules_py(CHAMELEON_SRCS_GENERATED "${ZSRC}"
diff --git a/compute/pzgenm2.c b/compute/pzgenm2.c
new file mode 100644
index 0000000000000000000000000000000000000000..e054fa2a0e515009b801c6f67382d65c9c9cff5a
--- /dev/null
+++ b/compute/pzgenm2.c
@@ -0,0 +1,418 @@
+/**
+ *
+ * @file pzgenm2.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ * @copyright 2016-2020 KAUST. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon parallel algorithm to compute 2-norm estimator.
+ *
+ * @version 1.0.0
+ * @author Mathieu Faverge
+ * @author Dalal Sukkari
+ * @date 2020-10-06
+ * @precisions normal z -> s d c
+ *
+ */
+#include "control/common.h"
+
+#define A(m, n)     A,      m, n
+#define X(m, n)     &X,     m, n
+#define SX(m, n)    &SX,    m, n
+#define NRMX( m, n) &NRMX,  m, n
+#define NRMSX(m, n) &NRMSX, m, n
+#define DROW(m, n)  &DROW,  m, n
+
+void
+chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
+                   RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
+{
+    CHAM_context_t  *chamctxt;
+    RUNTIME_option_t options;
+    CHAM_desc_t X, SX, NRMX, NRMSX, DROW;
+    int m, n, k;
+    int myp = A->myrank / A->q;
+    int myq = A->myrank % A->q;
+    int tempmm, tempnn;
+    int cnt, maxiter;
+    double e0, normx, normsx, beta, scl;
+
+    chamctxt = chameleon_context_self();
+    if ( sequence->status != CHAMELEON_SUCCESS ) {
+        return;
+    }
+    RUNTIME_options_init( &options, chamctxt, sequence, request );
+
+    /* Initialize the result */
+    *result = 0.0;
+
+    /* Init workspace handle for the call to dlange but unused */
+    RUNTIME_options_ws_alloc( &options, 1, 0 );
+
+    /**
+     * DROW must be allocated with GLOBAL to avoid destruction bewteen the first
+     * stage and the second one during Flush.
+     * This is the same issue for X and SX to be reused from one iteration to another.
+     */
+    chameleon_desc_init( &DROW, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 1, A->nb, A->nb,
+                         A->p, A->n, 0, 0, A->p, A->n, A->p, A->q,
+                         NULL, NULL, NULL );
+    /**
+     * NRMX must be allocated with GLOBAL to be able to access the norm value
+     * after flushing the descriptor.
+     * This is the same issue for NRMSX.
+     */
+    chameleon_desc_init( &NRMX, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2,
+                         A->p * 2, A->q, 0, 0, A->p * 2, A->q, A->p, A->q,
+                         NULL, NULL, NULL );
+
+    /**
+     * Start by computing the initial vector of the iterative process, and that
+     * is used to compute the initial norm.
+     *
+     * For j=0,n-1, drow[j] = sum( |A_{i,j}|, i=0..m-1 )
+     * So drow[j] = sum( S_{p,j}, p=0..P-1 ) with S_{p,j} = sum( |A_{i,j}|, i=0..m-1 \ i%P = p )
+     *
+     */
+    for(n = myq; n < A->nt; n += A->q) {
+        tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb;
+
+        /* Zeroes the local intermediate vector */
+        INSERT_TASK_dlaset(
+            &options,
+            ChamUpperLower, 1, tempnn,
+            0., 0.,
+            DROW( myp, n ) );
+
+        /* Computes the sums of the local tiles into the local vector */
+        for(m = myp; m < A->mt; m += A->p) {
+            tempmm = m == A->mt-1 ? A->m - m * A->mb : A->mb;
+            INSERT_TASK_dzasum(
+                &options,
+                ChamColumnwise, ChamUpperLower, tempmm, tempnn,
+                A(m, n), DROW( myp, n ) );
+        }
+
+        /* Reduce on first row of nodes */
+	for(m = 1; m < A->p; m++) {
+	    INSERT_TASK_daxpy(
+                &options, tempnn, 1.,
+                DROW( m, n ), 1,
+                DROW( 0, n ), 1 );
+	}
+    }
+
+    /**
+     * Reduce now by columns on processes with (myp == 0)
+     */
+    if ( myp == 0 )
+    {
+        INSERT_TASK_dlaset(
+            &options,
+            ChamUpperLower, NRMX.mb, NRMX.nb,
+            1., 0.,
+            NRMX( myp, myq ) );
+
+        for( n = myq; n < A->nt; n += A->q ) {
+	    tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
+	    INSERT_TASK_dgessq(
+                &options, ChamEltwise, 1, tempnn,
+                DROW( myp, n   ),
+                NRMX( myp, myq ) );
+	}
+
+        /* Reduce on first row of nodes */
+	for(n = 1; n < A->q; n++) {
+	    INSERT_TASK_dplssq(
+                &options, ChamEltwise, 1, 1,
+                NRMX( myp, n ),
+                NRMX( myp, 0 ) );
+	}
+
+        INSERT_TASK_dplssq2(
+            &options, 1, NRMX( myp, 0 ) );
+    }
+
+    /* Bcast norm over processes from node (0,0) */
+    for(m = 0; m < A->p; m++) {
+	for(n = 0; n < A->q; n++) {
+            if ( (m != 0) || (n != 0) ) {
+                INSERT_TASK_dlacpy(
+                    &options,
+                    ChamUpperLower, 1, 1, 1,
+                    NRMX(0, 0),
+                    NRMX(m, n) );
+            }
+        }
+    }
+
+    CHAMELEON_Desc_Flush( &DROW, sequence );
+    CHAMELEON_Desc_Flush( &NRMX, sequence );
+    CHAMELEON_Desc_Flush( A,     sequence );
+    RUNTIME_sequence_wait( chamctxt, sequence );
+    *result = *((double *)(NRMX.get_blkaddr( &NRMX, myp, myq )));
+
+    if ( (*result) == 0.0 ) {
+	chameleon_desc_destroy( &DROW );
+	chameleon_desc_destroy( &NRMX );
+	return;
+    } else {
+	normx = *result;
+    }
+
+    chameleon_desc_init( &NRMSX, CHAMELEON_MAT_ALLOC_GLOBAL, ChamRealDouble, 2, 1, 2,
+                         A->p * 2, A->q, 0, 0, A->p * 2, A->q, A->p, A->q,
+                         NULL, NULL, NULL );
+    chameleon_desc_init( &X,  CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble, 1, A->nb, A->nb,
+                         A->p, A->n, 0, 0, A->p, A->n, A->p, A->q,
+                         NULL, NULL, NULL );
+    chameleon_desc_init( &SX, CHAMELEON_MAT_ALLOC_GLOBAL, ChamComplexDouble, A->mb, 1, A->mb,
+                         A->m, A->q, 0, 0, A->m, A->q, A->p, A->q,
+                         NULL, NULL, NULL );
+
+    cnt = 0;
+    e0  = 0.;
+    maxiter = chameleon_min( 100, A->n );
+
+    while ( (cnt < maxiter) &&
+            (fabs(*result - e0) > (tol * (*result))) )
+    {
+        e0 = *result;
+
+        /* Initialization of X in the first loop */
+        if ( cnt == 0 )
+        {
+            for (n = myq; n < A->nt; n += A->q) {
+                tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb;
+
+                if ( myp == 0 ) {
+#if defined(PRECISION_z) || defined(PRECISION_c)
+                    INSERT_TASK_dlag2z(
+                        &options,
+                        ChamUpperLower, 1, tempnn,
+                        DROW( 0, n ),
+                        X(    0, n ) );
+#else
+                    INSERT_TASK_zlacpy(
+                        &options,
+                        ChamUpperLower, 1, tempnn, tempnn,
+                        DROW( 0, n ),
+                        X(    0, n ) );
+#endif
+                }
+
+                /* Broadcast X */
+                for (m = 1; m < A->p; m++) {
+                    INSERT_TASK_zlacpy(
+                        &options,
+                        ChamUpperLower, 1, tempnn, tempnn,
+                        X( 0, n ),
+                        X( m, n ) );
+                }
+            }
+            CHAMELEON_Desc_Flush( &DROW, sequence );
+        }
+
+        /**
+         * Scale x = x / ||A||
+         *
+         * We scale the P replica of X, so each row of Q processes possess one
+         * copy of the scaled X.
+         */
+        scl = 1. / e0;
+        for (n = myq; n < A->nt; n += A->q) {
+            tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb;
+
+            INSERT_TASK_zlascal(
+                &options,
+                ChamUpperLower, 1, tempnn, tempnn,
+                scl, X( myp, n ) );
+        }
+
+        /**
+         *  Compute Sx = S * x
+         */
+        for(m = myp; m < A->mt;  m+=A->p) {
+            tempmm = m == A->mt-1 ? A->m - m * A->mb : A->mb;
+
+            for (n = myq; n < A->nt; n += A->q ) {
+                tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb;
+                beta   = n == myq ? 0. : 1.;
+
+                INSERT_TASK_zgemv(
+                    &options,
+                    ChamNoTrans, tempmm, tempnn,
+                    1.,   A(  m,   n ),
+                          X(  myp, n ), 1,
+                    beta, SX( m, myq ), 1 );
+            }
+
+            /* Reduce columns */
+            for (k = 1; k < chameleon_min( A->q, A->nt ); k++) {
+                INSERT_TASK_zaxpy(
+                    &options, tempmm, 1.,
+                    SX( m, k ), 1,
+                    SX( m, 0 ), 1 );
+            }
+            /* Broadcast SX to ease the following gemv */
+            for (k = 1; k < A->q; k++) {
+                INSERT_TASK_zlacpy(
+                    &options,
+                    ChamUpperLower, tempmm, 1, tempmm,
+                    SX( m, 0 ),
+                    SX( m, k ) );
+            }
+        }
+
+        /**
+         *  Compute x = S' * S * x = S' * Sx
+         */
+        for ( n = myq; n < A->nt; n += A->q ) {
+            tempnn = n == A->nt-1 ? A->n - n * A->nb : A->nb;
+
+            for( m = myp; m < A->mt;  m += A->p ) {
+                tempmm = m == A->mt-1 ? A->m - m * A->mb : A->mb;
+                beta   = m == myp ? 0. : 1.;
+
+                INSERT_TASK_zgemv(
+                    &options,
+                    ChamConjTrans, tempmm, tempnn,
+                    1.,   A(  m,   n ),
+                          SX( m, myq ), 1,
+                    beta, X(  myp, n ), 1 );
+            }
+
+            /* Reduce rows */
+            for (k = 1; k < chameleon_min( A->p, A->mt ); k++) {
+                INSERT_TASK_zaxpy(
+                    &options, tempnn, 1.,
+                    X( k, n ), 1,
+                    X( 0, n ), 1 );
+            }
+            /* Broadcast */
+            for (k = 1; k < A->p; k++) {
+                INSERT_TASK_zlacpy(
+                    &options,
+                    ChamUpperLower, 1, tempnn, tempnn,
+                    X( 0, n ),
+                    X( k, n ) );
+            }
+        }
+
+        /**
+         * Compute ||x||
+         *
+         * All rows of Q nodes compute the same thing in parallel due to replication.
+         */
+        {
+            INSERT_TASK_dlaset(
+                &options,
+                ChamUpperLower, NRMX.mb, NRMX.nb,
+                1., 0.,
+                NRMX( myp, myq ) );
+
+            for( n = myq; n < A->nt; n += A->q ) {
+                tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
+
+                INSERT_TASK_zgessq(
+                    &options, ChamEltwise, 1, tempnn,
+                    X(    myp, n   ),
+                    NRMX( myp, myq ) );
+            }
+
+            /* Reduce columns  */
+            for(n = 1; n < chameleon_min( A->q, A->nt ); n++) {
+                INSERT_TASK_dplssq(
+                    &options, ChamEltwise, 1, 1,
+                    NRMX( myp, n ),
+                    NRMX( myp, 0 ) );
+            }
+
+            INSERT_TASK_dplssq2(
+                &options, 1, NRMX( myp, 0 ) );
+
+            /* Broadcast the results to processes in the same row */
+            for(n = 1; n < A->q; n++) {
+                INSERT_TASK_dlacpy(
+                    &options,
+                    ChamUpperLower, 1, 1, 1,
+                    NRMX( myp, 0 ),
+                    NRMX( myp, n ) );
+            }
+        }
+
+        /**
+         * Compute ||Sx||
+         *
+         * All columns of P nodes compute the same thing in parallel due to replication.
+         */
+        {
+            INSERT_TASK_dlaset(
+                &options,
+                ChamUpperLower, NRMSX.mb, NRMSX.nb,
+                1., 0.,
+                NRMSX( myp, myq ) );
+
+            for( m = myp; m < A->mt; m += A->p ) {
+                tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
+                INSERT_TASK_zgessq(
+                    &options, ChamEltwise, tempmm, 1,
+                    SX(    m,   myq ),
+                    NRMSX( myp, myq ) );
+            }
+
+            /* Reduce rows */
+            for( m = 1; m < chameleon_min( A->p, A->mt ); m++ ) {
+                INSERT_TASK_dplssq(
+                    &options, ChamEltwise, 1, 1,
+                    NRMSX( m, myq ),
+                    NRMSX( 0, myq ) );
+            }
+
+            INSERT_TASK_dplssq2(
+                &options, 1, NRMSX( 0, myq ) );
+
+            /* Broadcast the results to processes in the same column */
+            for(m = 1; m < A->p; m++) {
+                INSERT_TASK_dlacpy(
+                    &options,
+                    ChamUpperLower, 1, 1, 1,
+                    NRMSX( 0, myq ),
+                    NRMSX( m, myq ) );
+            }
+        }
+
+        CHAMELEON_Desc_Flush( &NRMX,  sequence );
+        CHAMELEON_Desc_Flush( &NRMSX, sequence );
+        CHAMELEON_Desc_Flush( &X,     sequence );
+        CHAMELEON_Desc_Flush( &SX,    sequence );
+        CHAMELEON_Desc_Flush( A,      sequence );
+        RUNTIME_sequence_wait(chamctxt, sequence);
+        normx  = *((double *)(NRMX.get_blkaddr(  &NRMX,  myp, myq )));
+        normsx = *((double *)(NRMSX.get_blkaddr( &NRMSX, myp, myq )));
+
+        *result = normx / normsx;
+        cnt++;
+    }
+
+    if ( (cnt >= maxiter) &&
+         (fabs((*result) - e0) > (tol * (*result))) ) {
+        *result = INFINITY;
+    }
+
+    RUNTIME_options_ws_free( &options );
+    RUNTIME_options_finalize( &options, chamctxt );
+
+    chameleon_desc_destroy( &DROW );
+    chameleon_desc_destroy( &NRMX );
+    chameleon_desc_destroy( &NRMSX );
+    chameleon_desc_destroy( &X );
+    chameleon_desc_destroy( &SX );
+
+    return;
+}
diff --git a/compute/zgenm2.c b/compute/zgenm2.c
new file mode 100644
index 0000000000000000000000000000000000000000..7f27a44d647a461d7e2e8bd76d4787310ec6d322
--- /dev/null
+++ b/compute/zgenm2.c
@@ -0,0 +1,266 @@
+/**
+ *
+ * @file zgenm2.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ * @copyright 2016-2020 KAUST. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgenm2 wrappers
+ *
+ * @version 1.0.0
+ * @author Mathieu Faverge
+ * @date 2020-10-06
+ * @precisions normal z -> s d c
+ *
+ */
+#include "control/common.h"
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t
+ *
+ * @brief Returns an estimate of the two-norm of A.
+ *
+ *******************************************************************************
+ *
+ * @param[in] tol
+ *          Specify the tolerance used in the iterative algorithm to converge to
+ *          an estimation. Must be > 0.
+ *
+ * @param[in] M
+ *          The number of rows of the matrix A. M >= 0. When M = 0,
+ *          the returned value is set to zero.
+ *
+ * @param[in] N
+ *          The number of columns of the matrix A. N >= 0. When N = 0,
+ *          the returned value is set to zero.
+ *
+ * @param[in] A
+ *          The M-by-N matrix A.
+ *
+ * @param[in] LDA
+ *          The leading dimension of the array A. LDA >= max(1,M).
+ *
+ *******************************************************************************
+ *
+ * @retval the two-norm estimate.
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zgenm2_Tile
+ * @sa CHAMELEON_zgenm2_Tile_Async
+ * @sa CHAMELEON_clange
+ * @sa CHAMELEON_dlange
+ * @sa CHAMELEON_slange
+ *
+ */
+double CHAMELEON_zgenm2( double tol, int M, int N,
+                         CHAMELEON_Complex64_t *A, int LDA )
+{
+    int NB;
+    int status;
+    double value = -1.;
+    CHAM_context_t *chamctxt;
+    RUNTIME_sequence_t *sequence = NULL;
+    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
+    CHAM_desc_t descAl, descAt;
+
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_fatal_error("CHAMELEON_zgenm2", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    /* Check input arguments */
+    if (M < 0) {
+        chameleon_error("CHAMELEON_zgenm2", "illegal value of M");
+        return -1;
+    }
+    if (N < 0) {
+        chameleon_error("CHAMELEON_zgenm2", "illegal value of N");
+        return -2;
+    }
+    if (LDA < chameleon_max(1, M)) {
+        chameleon_error("CHAMELEON_zgenm2", "illegal value of LDA");
+        return -4;
+    }
+
+    /* Quick return */
+    if (chameleon_min(N, M) == 0) {
+        return (double)0.0;
+    }
+
+    /* Tune NB depending on M, N & NRHS; Set NBNB */
+    status = chameleon_tune(CHAMELEON_FUNC_ZGEMM, M, N, 0);
+    if (status != CHAMELEON_SUCCESS) {
+        chameleon_error("CHAMELEON_zgenm2", "chameleon_tune() failed");
+        return status;
+    }
+
+    /* Set NT */
+    NB = CHAMELEON_NB;
+
+    chameleon_sequence_create( chamctxt, &sequence );
+
+    /* Submit the matrix conversion */
+    chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInput, ChamUpperLower,
+                         A, NB, NB, LDA, N, M, N, sequence, &request );
+
+    /* Call the tile interface */
+    CHAMELEON_zgenm2_Tile_Async( tol, &descAt, &value, sequence, &request );
+
+    /* Submit the matrix conversion back */
+    chameleon_ztile2lap( chamctxt, &descAl, &descAt,
+                         ChamDescInput, ChamUpperLower, sequence, &request );
+
+    chameleon_sequence_wait( chamctxt, sequence );
+
+    /* Cleanup the temporary data */
+    chameleon_ztile2lap_cleanup( chamctxt, &descAl, &descAt );
+
+    chameleon_sequence_destroy( chamctxt, sequence );
+    return value;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t_Tile
+ *
+ * @brief Tile equivalent of CHAMELEON_zgenm2().
+ *
+ *  Operates on matrices stored by tiles.
+ *  All matrices are passed through descriptors.
+ *  All dimensions are taken from the descriptors.
+ *
+ *******************************************************************************
+ *
+ * @param[in] tol
+ *          Specify the tolerance used in the iterative algorithm to converge to
+ *          an estimation. Must be > 0.
+ *
+ * @param[in] A
+ *          On entry, the input matrix A.
+ *
+ *******************************************************************************
+ *
+ * @retval CHAMELEON_SUCCESS successful exit
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zgenm2
+ * @sa CHAMELEON_zgenm2_Tile_Async
+ * @sa CHAMELEON_clange_Tile
+ * @sa CHAMELEON_dlange_Tile
+ * @sa CHAMELEON_slange_Tile
+ *
+ */
+double CHAMELEON_zgenm2_Tile( double tol, CHAM_desc_t *A )
+{
+    CHAM_context_t *chamctxt;
+    RUNTIME_sequence_t *sequence = NULL;
+    RUNTIME_request_t request = RUNTIME_REQUEST_INITIALIZER;
+    int status;
+    double value = -1.;
+
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_fatal_error("CHAMELEON_zgenm2_Tile", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    chameleon_sequence_create( chamctxt, &sequence );
+
+    CHAMELEON_zgenm2_Tile_Async( tol, A, &value, sequence, &request );
+
+    CHAMELEON_Desc_Flush( A, sequence );
+
+    chameleon_sequence_wait( chamctxt, sequence );
+    status = sequence->status;
+    chameleon_sequence_destroy( chamctxt, sequence );
+    return ( status == CHAMELEON_SUCCESS ) ? value : (double)status;
+}
+
+/**
+ ********************************************************************************
+ *
+ * @ingroup CHAMELEON_Complex64_t_Tile_Async
+ *
+ * @brief Non-blocking equivalent of CHAMELEON_zgenm2_Tile().
+ *
+ *  May return before the computation is finished.
+ *  Allows for pipelining of operations at runtime.
+ *
+ *******************************************************************************
+ *
+ * @param[in] sequence
+ *          Identifies the sequence of function calls that this call belongs to
+ *          (for completion checks and exception handling purposes).
+ *
+ * @param[out] request
+ *          Identifies this function call (for exception handling purposes).
+ *
+ *******************************************************************************
+ *
+ * @sa CHAMELEON_zgenm2
+ * @sa CHAMELEON_zgenm2_Tile
+ * @sa CHAMELEON_clange_Tile_Async
+ * @sa CHAMELEON_dlange_Tile_Async
+ * @sa CHAMELEON_slange_Tile_Async
+ *
+ */
+int CHAMELEON_zgenm2_Tile_Async( double tol, CHAM_desc_t *A, double *value,
+                                 RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
+{
+    CHAM_context_t *chamctxt;
+
+    chamctxt = chameleon_context_self();
+    if (chamctxt == NULL) {
+        chameleon_fatal_error("CHAMELEON_zgenm2_Tile", "CHAMELEON not initialized");
+        return CHAMELEON_ERR_NOT_INITIALIZED;
+    }
+    if (sequence == NULL) {
+        chameleon_fatal_error("CHAMELEON_zgenm2_Tile", "NULL sequence");
+        return CHAMELEON_ERR_UNALLOCATED;
+    }
+    if (request == NULL) {
+        chameleon_fatal_error("CHAMELEON_zgenm2_Tile", "NULL request");
+        return CHAMELEON_ERR_UNALLOCATED;
+    }
+    /* Check sequence status */
+    if (sequence->status == CHAMELEON_SUCCESS) {
+        request->status = CHAMELEON_SUCCESS;
+    }
+    else {
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_SEQUENCE_FLUSHED);
+    }
+
+    /* Check descriptors for correctness */
+    if (chameleon_desc_check(A) != CHAMELEON_SUCCESS) {
+        chameleon_error("CHAMELEON_zgenm2_Tile_Async", "invalid descriptor");
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
+    }
+    /* Check input arguments */
+    if (A->nb != A->mb) {
+        chameleon_error("CHAMELEON_zgenm2_Tile_Async", "only square tiles supported");
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
+    }
+    if ( tol <= 0. ) {
+        chameleon_error("CHAMELEON_zgenm2_Tile_Async", "tol must be > 0");
+        return chameleon_request_fail(sequence, request, CHAMELEON_ERR_ILLEGAL_VALUE);
+    }
+
+    /* Quick return */
+    if (chameleon_min(A->m, A->n) == 0) {
+        *value = 0.0;
+        return CHAMELEON_SUCCESS;
+    }
+
+    chameleon_pzgenm2( tol, A, value, sequence, request );
+
+    return CHAMELEON_SUCCESS;
+}
diff --git a/compute/zlange.c b/compute/zlange.c
index 93f8208e1d85a80d6ac7faa30429346d18f22654..418d44bfbb9f3c6ead899c32f4b9c6e374a167c4 100644
--- a/compute/zlange.c
+++ b/compute/zlange.c
@@ -6,16 +6,16 @@
  *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
+ * @copyright 2016-2020 KAUST. All rights reserved.
  *
  ***
  *
  * @brief Chameleon zlange wrappers
  *
  * @version 1.0.0
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
- * @date 2020-03-03
+ * @author Hatem Ltaief
+ * @date 2020-10-14
  * @precisions normal z -> s d c
  *
  */
@@ -26,8 +26,9 @@
  *
  * @ingroup CHAMELEON_Complex64_t
  *
- *  CHAMELEON_zlange returns the value
+ * @brief Return the norm of A.
  *
+ * CHAMELEON_zlange returns the value
  *     zlange = ( max(abs(A(i,j))), NORM = ChamMaxNorm
  *              (
  *              ( norm1(A),         NORM = ChamOneNorm
@@ -35,12 +36,17 @@
  *              ( normI(A),         NORM = ChamInfNorm
  *              (
  *              ( normF(A),         NORM = ChamFrobeniusNorm
+ *              (
+ *              ( norm2(A),         NORM = ChamTwoNorm
+ *
+ *  where norm1 denotes the one norm of a matrix (maximum column sum), normI
+ *  denotes the infinity norm of a matrix (maximum row sum), normF denotes the
+ *  Frobenius norm of a matrix (square root of sum of squares) and norm2 denotes
+ *  an estimator of the maximum singular value. Note that max(abs(A(i,j))) is
+ *  not a consistent matrix norm.
  *
- *  where norm1 denotes the one norm of a matrix (maximum column sum),
- *  normI denotes the infinity norm of a matrix (maximum row sum) and
- *  normF denotes the Frobenius norm of a matrix (square root of sum
- *  of squares). Note that max(abs(A(i,j))) is not a consistent matrix
- *  norm.
+ *  Note that ChamTwoNorm returns an estimate as computed by CHAMELEON_zgenm2()
+ *  with a tolerance of 1.e-1.
  *
  *******************************************************************************
  *
@@ -49,6 +55,7 @@
  *          = ChamOneNorm: One norm
  *          = ChamInfNorm: Infinity norm
  *          = ChamFrobeniusNorm: Frobenius norm
+ *          = ChamTwoNorm: Twonorm
  *
  * @param[in] M
  *          The number of rows of the matrix A. M >= 0. When M = 0,
@@ -77,8 +84,8 @@
  * @sa CHAMELEON_slange
  *
  */
-double CHAMELEON_zlange(cham_normtype_t norm, int M, int N,
-                    CHAMELEON_Complex64_t *A, int LDA )
+double CHAMELEON_zlange( cham_normtype_t norm, int M, int N,
+                         CHAMELEON_Complex64_t *A, int LDA )
 {
     int NB;
     int status;
@@ -94,8 +101,12 @@ double CHAMELEON_zlange(cham_normtype_t norm, int M, int N,
         return CHAMELEON_ERR_NOT_INITIALIZED;
     }
     /* Check input arguments */
-    if ( (norm != ChamMaxNorm) && (norm != ChamOneNorm)
-         && (norm != ChamInfNorm) && (norm != ChamFrobeniusNorm) ) {
+    if ( (norm != ChamMaxNorm) &&
+         (norm != ChamOneNorm) &&
+         (norm != ChamInfNorm) &&
+         (norm != ChamFrobeniusNorm) &&
+         (norm != ChamTwoNorm) )
+    {
         chameleon_error("CHAMELEON_zlange", "illegal value of norm");
         return -1;
     }
@@ -130,14 +141,14 @@ double CHAMELEON_zlange(cham_normtype_t norm, int M, int N,
 
     /* Submit the matrix conversion */
     chameleon_zlap2tile( chamctxt, &descAl, &descAt, ChamDescInput, ChamUpperLower,
-                     A, NB, NB, LDA, N, M, N, sequence, &request );
+                         A, NB, NB, LDA, N, M, N, sequence, &request );
 
     /* Call the tile interface */
     CHAMELEON_zlange_Tile_Async( norm, &descAt, &value, sequence, &request );
 
     /* Submit the matrix conversion back */
     chameleon_ztile2lap( chamctxt, &descAl, &descAt,
-                     ChamDescInput, ChamUpperLower, sequence, &request );
+                         ChamDescInput, ChamUpperLower, sequence, &request );
 
     chameleon_sequence_wait( chamctxt, sequence );
 
@@ -153,10 +164,11 @@ double CHAMELEON_zlange(cham_normtype_t norm, int M, int N,
  *
  * @ingroup CHAMELEON_Complex64_t_Tile
  *
- *  CHAMELEON_zlange_Tile - Tile equivalent of CHAMELEON_zlange().
- *  Operates on matrices stored by tiles.
- *  All matrices are passed through descriptors.
- *  All dimensions are taken from the descriptors.
+ * @brief Tile equivalent of CHAMELEON_zlange().
+ *
+ * Operates on matrices stored by tiles.
+ * All matrices are passed through descriptors.
+ * All dimensions are taken from the descriptors.
  *
  *******************************************************************************
  *
@@ -216,9 +228,11 @@ double CHAMELEON_zlange_Tile( cham_normtype_t norm, CHAM_desc_t *A )
  *
  * @ingroup CHAMELEON_Complex64_t_Tile_Async
  *
- *  CHAMELEON_zlange_Tile_Async - Non-blocking equivalent of CHAMELEON_zlange_Tile().
- *  May return before the computation is finished.
- *  Allows for pipelining of operations at runtime.
+ * @brief Non-blocking equivalent of CHAMELEON_zlange_Tile().
+ *
+ * @Warning Note that this algorithm includes a RUNTIME_Sequence_wait to cleanup
+ * workspaces and thus, will return only when the result is computed. It does
+ * not allow to do pipelining.
  *
  *******************************************************************************
  *
@@ -239,7 +253,7 @@ double CHAMELEON_zlange_Tile( cham_normtype_t norm, CHAM_desc_t *A )
  *
  */
 int CHAMELEON_zlange_Tile_Async( cham_normtype_t norm, CHAM_desc_t *A, double *value,
-                             RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
+                                 RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
 {
     CHAM_context_t *chamctxt;
 
@@ -285,7 +299,12 @@ int CHAMELEON_zlange_Tile_Async( cham_normtype_t norm, CHAM_desc_t *A, double *v
         return CHAMELEON_SUCCESS;
     }
 
-    chameleon_pzlange_generic( norm, ChamUpperLower, ChamNonUnit, A, value, sequence, request );
+    if ( norm == ChamTwoNorm ) {
+        chameleon_pzgenm2( 1.e-1, A, value, sequence, request );
+    }
+    else {
+        chameleon_pzlange_generic( norm, ChamUpperLower, ChamNonUnit, A, value, sequence, request );
+    }
 
     return CHAMELEON_SUCCESS;
 }
diff --git a/control/compute_z.h b/control/compute_z.h
index ae8596e16e6462080cac77ecd46e7314e8f1296f..c17553091863da09e097ff25d09f31dc4b63ddab 100644
--- a/control/compute_z.h
+++ b/control/compute_z.h
@@ -39,6 +39,7 @@ void chameleon_pzgebrd_gb2bd(cham_uplo_t uplo, CHAM_desc_t *A, double *D, double
 void chameleon_pzgebrd_ge2gb( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
+void chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 void chameleon_pzgemm(cham_trans_t transA, cham_trans_t transB, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B, CHAMELEON_Complex64_t beta, CHAM_desc_t *C, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
diff --git a/control/descriptor.c b/control/descriptor.c
index 51d2687303b026655966e0232ecb7a3f74bacf4f..6c424bfcb8014a1037484abfba12585d7dc330e7 100644
--- a/control/descriptor.c
+++ b/control/descriptor.c
@@ -906,8 +906,8 @@ int CHAMELEON_Desc_Release (CHAM_desc_t  *desc) {
  * @retval CHAMELEON_SUCCESS successful exit
  *
  */
-int CHAMELEON_Desc_Flush( CHAM_desc_t     *desc,
-                          RUNTIME_sequence_t *sequence )
+int CHAMELEON_Desc_Flush( const CHAM_desc_t        *desc,
+                          const RUNTIME_sequence_t *sequence )
 {
     RUNTIME_desc_flush( desc, sequence );
     return CHAMELEON_SUCCESS;
diff --git a/coreblas/compute/CMakeLists.txt b/coreblas/compute/CMakeLists.txt
index b774d7dad0cef277d2d6029963a7974c61089dda..f0d4976c2b52febd93b491e2890e48f62d0a94e2 100644
--- a/coreblas/compute/CMakeLists.txt
+++ b/coreblas/compute/CMakeLists.txt
@@ -30,12 +30,14 @@
 
 set(COREBLAS_SRCS_GENERATED "")
 set(ZSRC
+    core_dlag2z.c
     core_dzasum.c
     core_zaxpy.c
     core_zgeadd.c
     core_zlascal.c
     core_zlatm1.c
     core_zgelqt.c
+    core_zgemv.c
     core_zgemm.c
     core_zgeqrt.c
     core_zgesplit.c
diff --git a/coreblas/compute/core_dlag2z.c b/coreblas/compute/core_dlag2z.c
new file mode 100644
index 0000000000000000000000000000000000000000..498a73c5fbb6f23cca1b74ebe6f3fc8604238e3d
--- /dev/null
+++ b/coreblas/compute/core_dlag2z.c
@@ -0,0 +1,98 @@
+/**
+ *
+ * @file core_dlag2z.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon core_dlag2z CPU kernel
+ *
+ * @version 1.0.0
+ * @author Mathieu Faverge
+ * @date 2020-10-05
+ * @precisions normal z -> c
+ *
+ */
+#include "coreblas.h"
+
+/**
+ *
+ * @ingroup CORE_CHAMELEON_Complex64_t
+ *
+ * @brief Converts a m-by-n matrix A from double real to double complex.
+ *
+ *******************************************************************************
+ *
+ * @param[in] M
+ *          The number of rows of the matrix A.
+ *          m >= 0.
+ *
+ * @param[in] N
+ *          The number of columns of the matrix A.
+ *          n >= 0.
+ *
+ * @param[in] A
+ *          The lda-by-n matrix in double complex precision to convert.
+ *
+ * @param[in] LDA
+ *          The leading dimension of the matrix A.
+ *          lda >= max(1,m).
+ *
+ * @param[out] B
+ *          On exit, the converted LDB-by-n matrix in single complex precision.
+ *
+ * @param[in] LDB
+ *          The leading dimension of the matrix As.
+ *          ldas >= max(1,m).
+ *
+ */
+void
+CORE_dlag2z( cham_uplo_t uplo, int M, int N,
+             const double          *A, int lda,
+             CHAMELEON_Complex64_t *B, int ldb )
+{
+    const double          *Aptr;
+    CHAMELEON_Complex64_t *Bptr;
+    int i, j;
+
+    if ( (uplo != ChamUpperLower) &&
+         (uplo != ChamUpper)      &&
+         (uplo != ChamLower))
+    {
+        coreblas_error(1, "illegal value of uplo");
+        return;
+    }
+
+    if (M < 0) {
+        coreblas_error(2, "Illegal value of m");
+        return;
+    }
+    if (N < 0) {
+        coreblas_error(3, "Illegal value of n");
+        return;
+    }
+    if ( (lda < chameleon_max(1,M)) && (M > 0) ) {
+        coreblas_error(5, "Illegal value of lda");
+        return;
+    }
+    if ( (ldb < chameleon_max(1,M)) && (M > 0) ) {
+        coreblas_error(7, "Illegal value of ldb");
+        return;
+    }
+
+    for(j=0; j<N; j++) {
+        int mmin = ( uplo == ChamLower ) ? j                     : 0;
+        int mmax = ( uplo == ChamUpper ) ? chameleon_min(j+1, M) : M;
+
+        Aptr = A + lda * j + mmin;
+        Bptr = B + ldb * j + mmin;
+
+        for(i=mmin; i<mmax; i++, Aptr++, Bptr++) {
+            *Bptr = (CHAMELEON_Complex64_t)(*Aptr);
+        }
+    }
+}
diff --git a/coreblas/compute/core_zgemv.c b/coreblas/compute/core_zgemv.c
new file mode 100644
index 0000000000000000000000000000000000000000..8c25d3dbab4ca76b80f9de2d73045cbf5b93ef56
--- /dev/null
+++ b/coreblas/compute/core_zgemv.c
@@ -0,0 +1,98 @@
+/**
+ *
+ * @file core_zgemv.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon core_zgemv CPU kernel
+ *
+ * @version 1.0.0
+ * @author Mathieu Faverge
+ * @date 2020-10-06
+ * @precisions normal z -> c d s
+ *
+ */
+#include "coreblas.h"
+
+/**
+ *******************************************************************************
+ *
+ * @ingroup CORE_CHAMELEON_Complex64_t
+ *
+ *  Performs one of the matrix-vector operations
+ *
+ *    \f[ y = \alpha [op( A ) \times x] + \beta y \f],
+ *
+ *  where op( A ) is one of:
+ *    \f[ op( A ) = A,   \f]
+ *    \f[ op( A ) = A^T, \f]
+ *    \f[ op( A ) = A^H, \f]
+ *
+ *  alpha and beta are scalars, op(A) an m-by-n matrix, and x and y are two vectors.
+ *
+ *******************************************************************************
+ *
+ * @param[in] trans
+ *          - ChamNoTrans:   A is not transposed,
+ *          - ChamTrans:     A is transposed,
+ *          - ChamConjTrans: A is conjugate transposed.
+ *
+ * @param[in] M
+ *          The number of rows of the matrix A. M >= 0.
+ *
+ * @param[in] N
+ *          The number of columns of the matrix A. N >= 0.
+ *
+ * @param[in] alpha
+ *          The scalar alpha.
+ *
+ * @param[in] A
+ *          An lda-by-n matrix, where only the m-by-n leading entries are references.
+ *
+ * @param[in] LDA
+ *          The leading dimension of the array A. LDA >= max(1,M).
+ *
+ * @param[in] x
+ *          X is COMPLEX*16 array, dimension at least
+ *          ( 1 + ( n - 1 )*abs( INCX ) ) when TRANS = 'N' or 'n'
+ *          and at least
+ *          ( 1 + ( m - 1 )*abs( INCX ) ) otherwise.
+ *          Before entry, the incremented array X must contain the
+ *          vector x.
+ *
+ * @param[in] incX
+ *          On entry, INCX specifies the increment for the elements of
+ *          X. INCX must not be zero.
+ *
+ * @param[in] beta
+ *          The scalar beta.
+ *
+ * @param[in] y
+ *          Y is COMPLEX*16 array, dimension at least
+ *          ( 1 + ( n - 1 )*abs( INCY ) ) when TRANS = 'N' or 'n'
+ *          and at least
+ *          ( 1 + ( m - 1 )*abs( INCY ) ) otherwise.
+ *          Before entry, the incremented array Y must contain the vector y with
+ *          beta != 0. On exit, Y is overwritten by the updated vector.
+ *
+ * @param[in] incY
+ *          On entry, INCY specifies the increment for the elements of
+ *          Y. INCY must not be zero.
+ *
+ */
+void CORE_zgemv( cham_trans_t trans, int M, int N,
+                 CHAMELEON_Complex64_t alpha, const CHAMELEON_Complex64_t *A, int LDA,
+                                              const CHAMELEON_Complex64_t *x, int incX,
+                 CHAMELEON_Complex64_t beta,        CHAMELEON_Complex64_t *y, int incY )
+{
+    cblas_zgemv(
+        CblasColMajor, (CBLAS_TRANSPOSE)trans, M, N,
+        CBLAS_SADDR(alpha), A, LDA,
+                            x, incX,
+        CBLAS_SADDR(beta),  y, incY );
+}
diff --git a/coreblas/compute/core_ztile.c b/coreblas/compute/core_ztile.c
index 173855a3b5bd858e80c0aaec4c823a692f2e3345..f27cebfd2eaa0221e41afeb05ea7c25f6f035a89 100644
--- a/coreblas/compute/core_ztile.c
+++ b/coreblas/compute/core_ztile.c
@@ -18,6 +18,18 @@
 #include "coreblas.h"
 #include "coreblas/coreblas_ztile.h"
 
+#if defined( PRECISION_z ) || defined( PRECISION_c )
+void
+TCORE_dlag2z( cham_uplo_t uplo, int M, int N,
+              const CHAM_tile_t *A,
+              CHAM_tile_t       *B )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( B->format & CHAMELEON_TILE_FULLRANK );
+    CORE_dlag2z( uplo, M, N, A->mat, A->ld, B->mat, B->ld );
+}
+#endif
+
 void
 TCORE_dzasum( cham_store_t       storev,
               cham_uplo_t        uplo,
@@ -71,6 +83,19 @@ TCORE_zgelqt( int                    M,
     return CORE_zgelqt( M, N, IB, A->mat, A->ld, T->mat, T->ld, TAU, WORK );
 }
 
+void
+TCORE_zgemv( cham_trans_t trans, int M, int N,
+             CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A,
+                                          const CHAM_tile_t *x, int incX,
+             CHAMELEON_Complex64_t beta,        CHAM_tile_t *y, int incY )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( x->format & CHAMELEON_TILE_FULLRANK );
+    assert( y->format & CHAMELEON_TILE_FULLRANK );
+    CORE_zgemv(
+        trans, M, N, alpha, A->mat, A->ld, x->mat, incX, beta, y->mat, incY );
+}
+
 void
 TCORE_zgemm( cham_trans_t          transA,
              cham_trans_t          transB,
diff --git a/coreblas/include/coreblas/coreblas_z.h b/coreblas/include/coreblas/coreblas_z.h
index 111e0b544259f045ddd7785379d7f8127ae483f8..9f317d4e589d85a23ef9b24260dbcb8f254e5b8c 100644
--- a/coreblas/include/coreblas/coreblas_z.h
+++ b/coreblas/include/coreblas/coreblas_z.h
@@ -30,6 +30,9 @@
 /**
  *  Declarations of serial kernels - alphabetical order
  */
+void CORE_dlag2z( cham_uplo_t uplo, int M, int N,
+                  const double          *A, int lda,
+                  CHAMELEON_Complex64_t *B, int ldb );
 void CORE_dzasum(cham_store_t storev, cham_uplo_t uplo, int M, int N,
                  const CHAMELEON_Complex64_t *A, int lda, double *work);
 int CORE_zaxpy(int M, CHAMELEON_Complex64_t alpha,
@@ -52,6 +55,10 @@ void CORE_zgemm(cham_trans_t transA, cham_trans_t transB,
                 CHAMELEON_Complex64_t alpha, const CHAMELEON_Complex64_t *A, int LDA,
                                           const CHAMELEON_Complex64_t *B, int LDB,
                 CHAMELEON_Complex64_t beta,        CHAMELEON_Complex64_t *C, int LDC);
+void CORE_zgemv(cham_trans_t trans, int M, int N,
+                CHAMELEON_Complex64_t alpha, const CHAMELEON_Complex64_t *A, int LDA,
+                                             const CHAMELEON_Complex64_t *x, int incX,
+                CHAMELEON_Complex64_t beta,        CHAMELEON_Complex64_t *y, int incY);
 int  CORE_zgeqrt(int M, int N, int IB,
                  CHAMELEON_Complex64_t *A, int LDA,
                  CHAMELEON_Complex64_t *T, int LDT,
diff --git a/coreblas/include/coreblas/coreblas_ztile.h b/coreblas/include/coreblas/coreblas_ztile.h
index b5de9f7667d56514c583919f94f52e6a2798ff3a..c754393a0b0e3cef7f906c95a8b051b2864fb43d 100644
--- a/coreblas/include/coreblas/coreblas_ztile.h
+++ b/coreblas/include/coreblas/coreblas_ztile.h
@@ -16,10 +16,14 @@
 #ifndef _coreblas_ztile_h_
 #define _coreblas_ztile_h_
 
+#if defined(PRECISION_z) || defined(PRECISION_c)
+void TCORE_dlag2z( cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *B );
+#endif
 void TCORE_dzasum( cham_store_t storev, cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, double *work );
 int  TCORE_zaxpy( int M, CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A, int incA, CHAM_tile_t *B, int incB );
 int  TCORE_zgeadd( cham_trans_t trans, int M, int N, CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A, CHAMELEON_Complex64_t beta, CHAM_tile_t *B );
 int  TCORE_zgelqt( int M, int N, int IB, CHAM_tile_t *A, CHAM_tile_t *T, CHAMELEON_Complex64_t *TAU, CHAMELEON_Complex64_t *WORK );
+void TCORE_zgemv( cham_trans_t trans, int M, int N, CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A, const CHAM_tile_t *x, int incx, CHAMELEON_Complex64_t beta, CHAM_tile_t *y, int incy );
 void TCORE_zgemm( cham_trans_t transA, cham_trans_t transB, int M, int N, int K, CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A, const CHAM_tile_t *B, CHAMELEON_Complex64_t beta, CHAM_tile_t *C );
 int  TCORE_zgeqrt( int M, int N, int IB, CHAM_tile_t *A, CHAM_tile_t *T, CHAMELEON_Complex64_t *TAU, CHAMELEON_Complex64_t *WORK );
 int  TCORE_zgessm( int M, int N, int K, int IB, const int *IPIV, const CHAM_tile_t *L, CHAM_tile_t *A );
@@ -28,18 +32,18 @@ int  TCORE_zgetrf( int M, int N, CHAM_tile_t *A, int *IPIV, int *INFO );
 int  TCORE_zgetrf_incpiv( int M, int N, int IB, CHAM_tile_t *A, int *IPIV, int *INFO );
 int  TCORE_zgetrf_nopiv( int M, int N, int IB, CHAM_tile_t *A, int *INFO );
 void TCORE_zhe2ge( cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *B );
-#if defined(PRECISION_z ) || defined(PRECISION_c)
+#if defined(PRECISION_z) || defined(PRECISION_c)
 void TCORE_zhemm( cham_side_t side, cham_uplo_t uplo, int M, int N, CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A, const CHAM_tile_t *B, CHAMELEON_Complex64_t beta, CHAM_tile_t *C );
 void TCORE_zherk( cham_uplo_t uplo, cham_trans_t trans, int N, int K, double alpha, const CHAM_tile_t *A, double beta, CHAM_tile_t *C );
 void TCORE_zher2k( cham_uplo_t uplo, cham_trans_t trans, int N, int K, CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A, const CHAM_tile_t *B, double beta, CHAM_tile_t *C );
 #endif
 int  TCORE_zherfb( cham_uplo_t uplo, int N, int K, int IB, int NB, const CHAM_tile_t *A, const CHAM_tile_t *T, CHAM_tile_t *C, CHAMELEON_Complex64_t *WORK, int ldwork );
-#if defined(PRECISION_z ) || defined(PRECISION_c)
+#if defined(PRECISION_z) || defined(PRECISION_c)
 int  TCORE_zhessq( cham_store_t storev, cham_uplo_t uplo, int N, const CHAM_tile_t *A, CHAM_tile_t *sclssq );
 #endif
 void TCORE_zlacpy( cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *B );
 void TCORE_zlange( cham_normtype_t norm, int M, int N, const CHAM_tile_t *A, double *work, double *normA );
-#if defined(PRECISION_z ) || defined(PRECISION_c)
+#if defined(PRECISION_z) || defined(PRECISION_c)
 void TCORE_zlanhe( cham_normtype_t norm, cham_uplo_t uplo, int N, const CHAM_tile_t *A, double *work, double *normA );
 #endif
 void TCORE_zlansy( cham_normtype_t norm, cham_uplo_t uplo, int N, const CHAM_tile_t *A, double *work, double *normA );
@@ -49,7 +53,7 @@ void TCORE_zlaset( cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha
 void TCORE_zlaset2( cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha, CHAM_tile_t *A );
 int  TCORE_zlatro( cham_uplo_t uplo, cham_trans_t trans, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *B );
 void TCORE_zlauum( cham_uplo_t uplo, int N, CHAM_tile_t *A );
-#if defined(PRECISION_z ) || defined(PRECISION_c)
+#if defined(PRECISION_z) || defined(PRECISION_c)
 void TCORE_zplghe( double bump, int m, int n, CHAM_tile_t *tileA, int bigM, int m0, int n0, unsigned long long int seed );
 #endif
 void TCORE_zplgsy( CHAMELEON_Complex64_t bump, int m, int n, CHAM_tile_t *tileA, int bigM, int m0, int n0, unsigned long long int seed );
diff --git a/include/chameleon.h b/include/chameleon.h
index ac4c8a8ca521af9b5d01a9d1a07f61a10a0adf06..76849662c9a572668c84a6fa3d9526e4b348ec1a 100644
--- a/include/chameleon.h
+++ b/include/chameleon.h
@@ -128,7 +128,8 @@ CHAM_desc_t *CHAMELEON_Desc_SubMatrix( CHAM_desc_t *descA, int i, int j, int m,
 int CHAMELEON_Desc_Destroy( CHAM_desc_t **desc );
 int CHAMELEON_Desc_Acquire( CHAM_desc_t  *desc );
 int CHAMELEON_Desc_Release( CHAM_desc_t  *desc );
-int CHAMELEON_Desc_Flush  ( CHAM_desc_t  *desc, RUNTIME_sequence_t *sequence );
+int CHAMELEON_Desc_Flush  ( const CHAM_desc_t        *desc,
+                            const RUNTIME_sequence_t *sequence );
 
 /* Workspaces */
 int CHAMELEON_Dealloc_Workspace (CHAM_desc_t **desc);
diff --git a/include/chameleon/chameleon_z.h b/include/chameleon/chameleon_z.h
index 1b7ca622e2309503e526520b3a9a2efb1ba8b02a..7ebcff28f37b4027a16421fa1302f37b718dc28e 100644
--- a/include/chameleon/chameleon_z.h
+++ b/include/chameleon/chameleon_z.h
@@ -39,6 +39,7 @@ int CHAMELEON_zgeadd(cham_trans_t trans, int M, int N, CHAMELEON_Complex64_t alp
 int CHAMELEON_zgelqf(int M, int N, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descT);
 int CHAMELEON_zgelqs(int M, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descT, CHAMELEON_Complex64_t *B, int LDB);
 int CHAMELEON_zgels(cham_trans_t trans, int M, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descT, CHAMELEON_Complex64_t *B, int LDB);
+double CHAMELEON_zgenm2( double tol, int M, int N, CHAMELEON_Complex64_t *A, int LDA );
 int CHAMELEON_zgemm(cham_trans_t transA, cham_trans_t transB, int M, int N, int K, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t *A, int LDA, CHAMELEON_Complex64_t *B, int LDB, CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t *C, int LDC);
 int CHAMELEON_zgeqrf(int M, int N, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descT);
 int CHAMELEON_zgeqrs(int M, int N, int NRHS, CHAMELEON_Complex64_t *A, int LDA, CHAM_desc_t *descT, CHAMELEON_Complex64_t *B, int LDB);
@@ -114,6 +115,7 @@ int CHAMELEON_zgeadd_Tile(cham_trans_t trans, CHAMELEON_Complex64_t alpha, CHAM_
 int CHAMELEON_zgelqf_Tile(CHAM_desc_t *A, CHAM_desc_t *T);
 int CHAMELEON_zgelqs_Tile(CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B);
 int CHAMELEON_zgels_Tile(cham_trans_t trans, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B);
+double CHAMELEON_zgenm2_Tile( double tol, CHAM_desc_t *A );
 int CHAMELEON_zgemm_Tile(cham_trans_t transA, cham_trans_t transB, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B, CHAMELEON_Complex64_t beta, CHAM_desc_t *C);
 int CHAMELEON_zgeqrf_Tile(CHAM_desc_t *A, CHAM_desc_t *T);
 int CHAMELEON_zgeqrs_Tile(CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B);
@@ -186,6 +188,7 @@ int CHAMELEON_zgeadd_Tile_Async(cham_trans_t trans, CHAMELEON_Complex64_t alpha,
 int CHAMELEON_zgelqf_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *T, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgelqs_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgels_Tile_Async(cham_trans_t trans, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
+int CHAMELEON_zgenm2_Tile_Async( double tol, CHAM_desc_t *A, double *value, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 int CHAMELEON_zgemm_Tile_Async(cham_trans_t transA, cham_trans_t transB, CHAMELEON_Complex64_t alpha, CHAM_desc_t *A, CHAM_desc_t *B, CHAMELEON_Complex64_t beta, CHAM_desc_t *C, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgeqrf_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *T, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 int CHAMELEON_zgeqrs_Tile_Async(CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
diff --git a/include/chameleon/constants.h b/include/chameleon/constants.h
index 962e7270f16e9a512b99167c2dc2416f131d4f7a..fce41282c595025d2cf8648c154422c91679f178 100644
--- a/include/chameleon/constants.h
+++ b/include/chameleon/constants.h
@@ -105,7 +105,7 @@ typedef enum chameleon_side_e {
 typedef enum chameleon_normtype_e {
     ChamOneNorm       = 171, /**< One norm:       max_j( sum_i( |a_{ij}| ) )   */
     /* ChamRealOneNorm   = 172, */
-    /* ChamTwoNorm       = 173, */
+    ChamTwoNorm       = 173, /**< Two norm: max( sigma_i )                     */
     ChamFrobeniusNorm = 174, /**< Frobenius norm: sqrt( sum_{i,j} (a_{ij}^2) ) */
     ChamInfNorm       = 175, /**< Inifinite norm: max_i( sum_j( |a_{ij}| ) )   */
     /* ChamRealInfNorm   = 176, */
diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h
index 6df71da4c29ef3c015f39feeace6e403738b064b..a78d30041de15dd80ce8f83d84b78395d007d538 100644
--- a/include/chameleon/tasks_z.h
+++ b/include/chameleon/tasks_z.h
@@ -30,6 +30,10 @@
 /**
  *  Declarations of QUARK wrappers (called by CHAMELEON) - alphabetical order
  */
+void INSERT_TASK_dlag2z( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, int m, int n,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn );
 void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
                          cham_store_t storev, cham_uplo_t uplo, int M, int N,
                          const CHAM_desc_t *A, int Am, int An,
@@ -49,6 +53,11 @@ void INSERT_TASK_zgelqt( const RUNTIME_option_t *options,
                          int m, int n, int ib, int nb,
                          const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *T, int Tm, int Tn );
+void INSERT_TASK_zgemv( const RUNTIME_option_t *options,
+                        cham_trans_t trans, int m, int n,
+                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                                                     const CHAM_desc_t *X, int Xm, int Xn, int incX,
+                        CHAMELEON_Complex64_t beta,  const CHAM_desc_t *Y, int Ym, int Yn, int incY );
 void INSERT_TASK_zgemm( const RUNTIME_option_t *options,
                         cham_trans_t transA, cham_trans_t transB,
                         int m, int n, int k, int nb,
diff --git a/include/chameleon/types.h b/include/chameleon/types.h
index 1cbedc7bae8fde7f8a0cf2d5bf989df47e6a558a..6f0acad809320d25d44e910caf3cdfc568a941ad 100644
--- a/include/chameleon/types.h
+++ b/include/chameleon/types.h
@@ -22,6 +22,7 @@
 #define _chameleon_types_h_
 
 #include "chameleon/config.h"
+#include <math.h>
 
 /**
  *  System requirements
diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
index 71724813e89887a857b5d54aa5f8c82146097361..1caa32fef3ec97692aa023fc59547cc44a6f2f61 100644
--- a/runtime/CMakeLists.txt
+++ b/runtime/CMakeLists.txt
@@ -28,12 +28,17 @@
 # List of codelets required by all runtimes
 # -----------------------------------------
 set(CODELETS_ZSRC
+    codelets/codelet_dlag2z.c
     codelets/codelet_dzasum.c
     ##################
     # BLAS 1
     ##################
     codelets/codelet_zaxpy.c
     ##################
+    # BLAS 2
+    ##################
+    codelets/codelet_zgemv.c
+    ##################
     # BLAS 3
     ##################
     codelets/codelet_zgemm.c
diff --git a/runtime/openmp/codelets/codelet_dlag2z.c b/runtime/openmp/codelets/codelet_dlag2z.c
new file mode 100644
index 0000000000000000000000000000000000000000..077f541fef83a22f7e4d1b163002475df254db3a
--- /dev/null
+++ b/runtime/openmp/codelets/codelet_dlag2z.c
@@ -0,0 +1,34 @@
+/**
+ *
+ * @file openmp/codelet_dlag2z.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon dlag2z OpenMP codelet
+ *
+ * @version 1.0.0
+ * @author Mathieu Faverge
+ * @date 2020-10-05
+ * @precisions normal z -> c
+ *
+ */
+#include "chameleon_openmp.h"
+#include "chameleon/tasks_z.h"
+#include "coreblas/coreblas_ztile.h"
+
+void INSERT_TASK_dlag2z( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, int m, int n,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
+{
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+
+#pragma omp task firstprivate( uplo, m, n, tileA, tileB ) depend( in:tileA[0] ) depend( out:tileB[0] )
+    TCORE_dlag2z( uplo, m, n, tileA, tileB );
+}
diff --git a/runtime/openmp/codelets/codelet_zgemv.c b/runtime/openmp/codelets/codelet_zgemv.c
new file mode 100644
index 0000000000000000000000000000000000000000..c3d70b0b4ab523994c7c946d791fb45efba115d3
--- /dev/null
+++ b/runtime/openmp/codelets/codelet_zgemv.c
@@ -0,0 +1,37 @@
+/**
+ *
+ * @file openmp/codelet_zgemv.c
+ *
+ * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgemv OpenMP codelet
+ *
+ * @version 1.0.0
+ * @author Mathieu Faverge
+ * @date 2020-10-06
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon_openmp.h"
+#include "chameleon/tasks_z.h"
+#include "coreblas/coreblas_ztile.h"
+
+void
+INSERT_TASK_zgemv( const RUNTIME_option_t *options,
+                   cham_trans_t trans, int m, int n,
+                   CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                                                const CHAM_desc_t *X, int Xm, int Xn, int incX,
+                   CHAMELEON_Complex64_t beta,  const CHAM_desc_t *Y, int Ym, int Yn, int incY )
+{
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileX = X->get_blktile( X, Xm, Xn );
+    CHAM_tile_t *tileY = Y->get_blktile( Y, Ym, Yn );
+
+#pragma omp task firstprivate( trans, m, n, alpha, tileA, tileX, incX, beta, tileY, incY ) depend( in:tileA[0], tileX[0] ) depend( inout:tileY[0] )
+    TCORE_zgemv( trans,  m, n,
+                 alpha, tileA, tileX, incX,
+                 beta,  tileY, incY );
+}
diff --git a/runtime/parsec/codelets/codelet_dlag2z.c b/runtime/parsec/codelets/codelet_dlag2z.c
new file mode 100644
index 0000000000000000000000000000000000000000..60868494412940b0d2397daa7c544756c941e4bc
--- /dev/null
+++ b/runtime/parsec/codelets/codelet_dlag2z.c
@@ -0,0 +1,62 @@
+/**
+ *
+ * @file parsec/codelet_dlag2z.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon dlag2z PaRSEC codelet
+ *
+ * @version 1.0.0
+ * @author Mathieu Faverge
+ * @date 2020-10-05
+ * @precisions normal z -> c
+ *
+ */
+#include "chameleon_parsec.h"
+#include "chameleon/tasks_z.h"
+#include "coreblas/coreblas_z.h"
+
+static inline int
+CORE_dlag2z_parsec( parsec_execution_stream_t *context,
+                    parsec_task_t             *this_task )
+{
+    cham_uplo_t uplo;
+    int m;
+    int n;
+    double *A;
+    int lda;
+    CHAMELEON_Complex64_t *B;
+    int ldb;
+
+    parsec_dtd_unpack_args( this_task, &uplo, &m, &n, &A, &lda, &B, &ldb );
+    CORE_dlag2z( uplo, m, n, A, lda, B, ldb );
+
+    (void)context;
+    return PARSEC_HOOK_RETURN_DONE;
+}
+
+void INSERT_TASK_dlag2z( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, int m, int n,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
+{
+    parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+
+    parsec_dtd_taskpool_insert_task(
+        PARSEC_dtd_taskpool, CORE_dlag2z_parsec, options->priority, "dlag2z",
+        sizeof(cham_uplo_t), &uplo,       VALUE,
+        sizeof(int),         &m,          VALUE,
+        sizeof(int),         &n,          VALUE,
+        PASSED_BY_REF,        RTBLKADDR( A, double, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
+        sizeof(int),        &(tileA->ld), VALUE,
+        PASSED_BY_REF,        RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), chameleon_parsec_get_arena_index( B ) | OUTPUT,
+        sizeof(int),        &(tileB->ld), VALUE,
+        PARSEC_DTD_ARG_END );
+}
diff --git a/runtime/parsec/codelets/codelet_zgemv.c b/runtime/parsec/codelets/codelet_zgemv.c
new file mode 100644
index 0000000000000000000000000000000000000000..0579743d54cdce1cf66ca6c8bbcc40e6fcca9ccc
--- /dev/null
+++ b/runtime/parsec/codelets/codelet_zgemv.c
@@ -0,0 +1,76 @@
+/**
+ *
+ * @file parsec/codelet_zgemv.c
+ *
+ * @copyright 2009-2015 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgemv PaRSEC codelet
+ *
+ * @version 1.0.0
+ * @author Mathieu Faverge
+ * @date 2020-10-06
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon_parsec.h"
+#include "chameleon/tasks_z.h"
+#include "coreblas/coreblas_z.h"
+
+static inline int
+CORE_zgemv_parsec( parsec_execution_stream_t *context,
+                   parsec_task_t             *this_task )
+{
+    cham_trans_t trans;
+    int m;
+    int n;
+    CHAMELEON_Complex64_t alpha;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    CHAMELEON_Complex64_t *X;
+    int incX;
+    CHAMELEON_Complex64_t beta;
+    CHAMELEON_Complex64_t *Y;
+    int incY;
+
+    parsec_dtd_unpack_args(
+        this_task, &trans, &m, &n, &alpha, &A, &lda, &X, &incX, &beta, &Y, &incY );
+
+    CORE_zgemv( trans, m, n,
+                alpha, A, lda,
+                       X, incX,
+                beta,  Y, incY );
+
+    (void)context;
+    return PARSEC_HOOK_RETURN_DONE;
+}
+
+void
+INSERT_TASK_zgemv( const RUNTIME_option_t *options,
+                   cham_trans_t trans, int m, int n,
+                   CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                                                const CHAM_desc_t *X, int Xm, int Xn, int incX,
+                   CHAMELEON_Complex64_t beta,  const CHAM_desc_t *Y, int Ym, int Yn, int incY )
+{
+    parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+
+    parsec_dtd_taskpool_insert_task(
+        PARSEC_dtd_taskpool, CORE_zgemv_parsec, options->priority, "zgemv",
+        sizeof(cham_trans_t),          &trans,       VALUE,
+        sizeof(int),                   &m,           VALUE,
+        sizeof(int),                   &n,           VALUE,
+        sizeof(CHAMELEON_Complex64_t), &alpha,       VALUE,
+        PASSED_BY_REF, RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
+        sizeof(int),                   &(tileA->ld), VALUE,
+        PASSED_BY_REF, RTBLKADDR( X, CHAMELEON_Complex64_t, Xm, Xn ), chameleon_parsec_get_arena_index( X ) | INPUT,
+        sizeof(int),                   &incX,        VALUE,
+        sizeof(CHAMELEON_Complex64_t), &beta,        VALUE,
+        PASSED_BY_REF, RTBLKADDR( Y, CHAMELEON_Complex64_t, Ym, Yn ), chameleon_parsec_get_arena_index( Y ) | INOUT | AFFINITY,
+        sizeof(int),                   &incY,        VALUE,
+        PARSEC_DTD_ARG_END );
+}
diff --git a/runtime/quark/codelets/codelet_dlag2z.c b/runtime/quark/codelets/codelet_dlag2z.c
new file mode 100644
index 0000000000000000000000000000000000000000..86e214347b028782f7aee4cda9dc426a78fe1b3c
--- /dev/null
+++ b/runtime/quark/codelets/codelet_dlag2z.c
@@ -0,0 +1,53 @@
+/**
+ *
+ * @file quark/codelet_dlag2z.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon dlag2z Quark codelet
+ *
+ * @version 1.0.0
+ * @author Mathieu Faverge
+ * @date 2020-10-05
+ * @precisions normal z -> c
+ *
+ */
+#include "chameleon_quark.h"
+#include "chameleon/tasks_z.h"
+#include "coreblas/coreblas_ztile.h"
+
+static inline void
+CORE_dlag2z_quark( Quark *quark )
+{
+    cham_uplo_t uplo;
+    int m;
+    int n;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+
+    quark_unpack_args_5( quark, uplo, m, n, tileA, tileB );
+    TCORE_dlag2z( uplo, m, n, tileA, tileB );
+}
+
+void INSERT_TASK_dlag2z( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, int m, int n,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
+{
+    quark_option_t *opt = (quark_option_t*)(options->schedopt);
+
+    //DAG_CORE_DLAG2Z;
+    QUARK_Insert_Task(
+        opt->quark, CORE_dlag2z_quark, (Quark_Task_Flags*)opt,
+        sizeof(cham_uplo_t), &uplo,    VALUE,
+        sizeof(int),         &m,         VALUE,
+        sizeof(int),         &n,         VALUE,
+        sizeof(void*), RTBLKADDR(A, double, Am, An),                INPUT,
+        sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), OUTPUT,
+        0);
+}
diff --git a/runtime/quark/codelets/codelet_zgemv.c b/runtime/quark/codelets/codelet_zgemv.c
new file mode 100644
index 0000000000000000000000000000000000000000..4dcde9bf5a5bdb1be9da1592e80ac576b528ca2a
--- /dev/null
+++ b/runtime/quark/codelets/codelet_zgemv.c
@@ -0,0 +1,66 @@
+/**
+ *
+ * @file quark/codelet_zgemv.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgemv Quark codelet
+ *
+ * @version 1.0.0
+ * @author Mathieu Faverge
+ * @date 2020-10-06
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon_quark.h"
+#include "chameleon/tasks_z.h"
+#include "coreblas/coreblas_ztile.h"
+
+void CORE_zgemv_quark(Quark *quark)
+{
+    cham_trans_t trans;
+    int m;
+    int n;
+    CHAMELEON_Complex64_t alpha;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileX;
+    int incX;
+    CHAMELEON_Complex64_t beta;
+    CHAM_tile_t *tileY;
+    int incY;
+
+    quark_unpack_args_10( quark, trans, m, n, alpha, tileA, tileX, incX, beta, tileY, incY );
+    TCORE_zgemv( trans, m, n,
+                 alpha, tileA, tileX, incX,
+                 beta,  tileY, incY );
+}
+
+void INSERT_TASK_zgemv( const RUNTIME_option_t *options,
+                        cham_trans_t trans, int m, int n,
+                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                                                     const CHAM_desc_t *X, int Xm, int Xn, int incX,
+                        CHAMELEON_Complex64_t beta,  const CHAM_desc_t *Y, int Ym, int Yn, int incY )
+{
+    quark_option_t *opt = (quark_option_t*)(options->schedopt);
+    int accessY = ( beta == 0. ) ? OUTPUT : INOUT;
+
+    /* DAG_CORE_GEMV; */
+    QUARK_Insert_Task(
+        opt->quark, CORE_zgemv_quark, (Quark_Task_Flags*)opt,
+        sizeof(cham_trans_t),          &trans, VALUE,
+        sizeof(int),                   &m,     VALUE,
+        sizeof(int),                   &n,     VALUE,
+        sizeof(CHAMELEON_Complex64_t), &alpha, VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT,
+        sizeof(void*), RTBLKADDR(X, CHAMELEON_Complex64_t, Xm, Xn), INPUT,
+        sizeof(int),                   &incX,  VALUE,
+        sizeof(CHAMELEON_Complex64_t), &beta,  VALUE,
+        sizeof(void*), RTBLKADDR(Y, CHAMELEON_Complex64_t, Ym, Yn), accessY,
+        sizeof(int),                   &incY,  VALUE,
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_dlag2z.c b/runtime/starpu/codelets/codelet_dlag2z.c
new file mode 100644
index 0000000000000000000000000000000000000000..b7953cd977431f2b0599d5470ce4fbd6d7834ccb
--- /dev/null
+++ b/runtime/starpu/codelets/codelet_dlag2z.c
@@ -0,0 +1,79 @@
+/**
+ *
+ * @file starpu/codelet_dlag2z.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon dlag2z StarPU codelet
+ *
+ * @version 1.0.0
+ * @author Mathieu Faverge
+ * @date 2020-10-05
+ * @precisions normal z -> c
+ *
+ */
+#include "chameleon_starpu.h"
+#include "runtime_codelet_z.h"
+
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_dlag2z_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    int m;
+    int n;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+
+    starpu_codelet_unpack_args(cl_arg, &uplo, &m, &n);
+    TCORE_dlag2z( uplo, m, n, tileA, tileB );
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(dlag2z, cl_dlag2z_cpu_func)
+
+/**
+ *
+ * @ingroup INSERT_TASK_Complex64_t
+ *
+ */
+void INSERT_TASK_dlag2z( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, int m, int n,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
+{
+    struct starpu_codelet *codelet = &cl_dlag2z;
+    void (*callback)(void*) = options->profiling ? cl_dlag2z_callback : NULL;
+    starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
+    int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_W(B, Bm, Bn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    starpu_insert_task(
+        starpu_mpi_codelet(codelet),
+        STARPU_VALUE,    &uplo,              sizeof(uplo),
+        STARPU_VALUE,    &m,                 sizeof(int),
+        STARPU_VALUE,    &n,                 sizeof(int),
+        STARPU_R,         RTBLKADDR(A, double, Am, An),
+        STARPU_W,         RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_PRIORITY,  options->priority,
+        STARPU_CALLBACK,  callback,
+        STARPU_EXECUTE_ON_WORKER, workerid,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "dlag2z",
+#endif
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_zcallback.c b/runtime/starpu/codelets/codelet_zcallback.c
index f810ab0f07d92c320459072022496d68906f1c30..cb381cfc153f21234fbc7b3fbea21f5663669627 100644
--- a/runtime/starpu/codelets/codelet_zcallback.c
+++ b/runtime/starpu/codelets/codelet_zcallback.c
@@ -22,11 +22,15 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if defined(PRECISION_z) || defined(PRECISION_c)
+CHAMELEON_CL_CB(dlag2z,        cti_handle_get_m(task->handles[1]), cti_handle_get_n(task->handles[1]), 0,                                      M*N)
+#endif
 CHAMELEON_CL_CB(dzasum,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      M*N)
 CHAMELEON_CL_CB(zaxpy,         cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[1]), 0,                                      M)
 CHAMELEON_CL_CB(zgeadd,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      M*N)
 CHAMELEON_CL_CB(zlascal,       cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      M*N)
 CHAMELEON_CL_CB(zgelqt,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      (4./3.)*M*N*K)
+CHAMELEON_CL_CB(zgemv,         cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      2. *M*N  )
 CHAMELEON_CL_CB(zgemm,         cti_handle_get_m(task->handles[2]), cti_handle_get_n(task->handles[2]), cti_handle_get_n(task->handles[0]),     2. *M*N*K) /* If A^t, computation is wrong */
 CHAMELEON_CL_CB(zgeqrt,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      (4./3.)*M*M*N)
 CHAMELEON_CL_CB(zgessm,        cti_handle_get_m(task->handles[2]), cti_handle_get_m(task->handles[2]), cti_handle_get_m(task->handles[2]),     2. *M*N*K)
diff --git a/runtime/starpu/codelets/codelet_zgemv.c b/runtime/starpu/codelets/codelet_zgemv.c
new file mode 100644
index 0000000000000000000000000000000000000000..62b887247e58661abda06fcd9dc63238fe1e4c9d
--- /dev/null
+++ b/runtime/starpu/codelets/codelet_zgemv.c
@@ -0,0 +1,133 @@
+/**
+ *
+ * @file starpu/codelet_zgemv.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgemv StarPU codelet
+ *
+ * @version 1.0.0
+ * @author Mathieu Faverge
+ * @date 2020-10-06
+ * @precisions normal z -> c d s
+ *
+ */
+#include "chameleon_starpu.h"
+#include "runtime_codelet_z.h"
+
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zgemv_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_trans_t trans;
+    int m;
+    int n;
+    CHAMELEON_Complex64_t alpha;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileX;
+    int incX;
+    CHAMELEON_Complex64_t beta;
+    CHAM_tile_t *tileY;
+    int incY;
+
+    tileA = cti_interface_get(descr[0]);
+    tileX = cti_interface_get(descr[1]);
+    tileY = cti_interface_get(descr[2]);
+
+    starpu_codelet_unpack_args(cl_arg, &trans, &m, &n, &alpha, &incX, &beta, &incY );
+    TCORE_zgemv( trans, m, n,
+                 alpha, tileA, tileX, incX,
+                 beta,  tileY, incY );
+}
+
+#if defined(CHAMELEON_USE_CUDA) & 0
+static void cl_zgemv_cuda_func(void *descr[], void *cl_arg)
+{
+    cham_trans_t transA;
+    cham_trans_t transB;
+    int m;
+    int n;
+    int k;
+    cuDoubleComplex alpha;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+    cuDoubleComplex beta;
+    CHAM_tile_t *tileC;
+
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
+
+    starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &beta);
+
+    RUNTIME_getStream( stream );
+
+    CUDA_zgemv(
+        transA, transB,
+        m, n, k,
+        &alpha, tileA->mat, tileA->ld,
+                tileB->mat, tileB->ld,
+        &beta,  tileC->mat, tileC->ld,
+        stream);
+
+#ifndef STARPU_CUDA_ASYNC
+    cudaStreamSynchronize( stream );
+#endif
+
+    return;
+}
+#endif /* defined(CHAMELEON_USE_CUDA) */
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zgemv, cl_zgemv_cpu_func)
+
+/**
+ *
+ * @ingroup INSERT_TASK_Complex64_t
+ *
+ */
+void INSERT_TASK_zgemv( const RUNTIME_option_t *options,
+                        cham_trans_t trans, int m, int n,
+                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                                                     const CHAM_desc_t *X, int Xm, int Xn, int incX,
+                        CHAMELEON_Complex64_t beta,  const CHAM_desc_t *Y, int Ym, int Yn, int incY )
+{
+    struct starpu_codelet *codelet = &cl_zgemv;
+    void (*callback)(void*) = options->profiling ? cl_zgemv_callback : NULL;
+    starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
+    int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    int accessY = ( beta == 0. ) ? STARPU_W : STARPU_RW;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_R(X, Xm, Xn);
+    CHAMELEON_ACCESS_RW(Y, Ym, Yn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    starpu_insert_task(
+        starpu_mpi_codelet(codelet),
+        STARPU_VALUE,    &trans,             sizeof(cham_trans_t),
+        STARPU_VALUE,    &m,                 sizeof(int),
+        STARPU_VALUE,    &n,                 sizeof(int),
+        STARPU_VALUE,    &alpha,             sizeof(CHAMELEON_Complex64_t),
+        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_R,         RTBLKADDR(X, CHAMELEON_Complex64_t, Xm, Xn),
+        STARPU_VALUE,    &incX,              sizeof(int),
+        STARPU_VALUE,    &beta,              sizeof(CHAMELEON_Complex64_t),
+        accessY,          RTBLKADDR(Y, CHAMELEON_Complex64_t, Ym, Yn),
+        STARPU_VALUE,    &incY,              sizeof(int),
+        STARPU_PRIORITY,  options->priority,
+        STARPU_CALLBACK,  callback,
+        STARPU_EXECUTE_ON_WORKER, workerid,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "zgemv",
+#endif
+        0);
+}
diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h
index 25a14085fb7579fd40f8bb1800b3029d98cf548d..e5ed6704c090d8efc3409e57c2f29ebbb8ac8455 100644
--- a/runtime/starpu/include/runtime_codelet_z.h
+++ b/runtime/starpu/include/runtime_codelet_z.h
@@ -39,6 +39,11 @@
  */
 CODELETS_HEADER(zaxpy);
 
+/*
+ * BLAS 2 functions
+ */
+CODELETS_HEADER(zgemv);
+
 /*
  * BLAS 3 functions
  */
@@ -106,6 +111,9 @@ CODELETS_HEADER(zlag2c);
 /*
  * DZ functions
  */
+#if defined(PRECISION_z) || defined(PRECISION_c)
+CODELETS_HEADER(dlag2z);
+#endif
 CODELETS_HEADER(dzasum);
 
 /*
diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt
index f6591433f46729ac378b23e979cb3d9ba2feb8c6..a233e6d909f773a50ce159e6990ed8b0516fb69c 100644
--- a/testing/CMakeLists.txt
+++ b/testing/CMakeLists.txt
@@ -74,6 +74,7 @@ set(ZSRC
   testing_zsytrf.c
   testing_zsytrs.c
   testing_zsysv.c
+  testing_zgenm2.c
   testing_zgetrf.c
   testing_zgetrs.c
   testing_zgesv.c
diff --git a/testing/CTestLists.cmake b/testing/CTestLists.cmake
index 986fd4faf5974bd2be4e62e8c80f632465f266af..82bef26df95796b6a2c9aa2708126666adf0d362 100644
--- a/testing/CTestLists.cmake
+++ b/testing/CTestLists.cmake
@@ -64,6 +64,8 @@ if (NOT CHAMELEON_SIMULATION)
       #geqrs_hqr gelqs_hqr
       gels
       gels_hqr )
+    set( TESTS ${TESTS}
+      genm2 )
 
     foreach(cat ${TEST_CATEGORIES})
       foreach(gpus ${N_GPUS})
diff --git a/testing/chameleon_ztesting.c b/testing/chameleon_ztesting.c
index 0d6d6e5d93d4ff1c1251f1994ad07ffcb04e28d3..82e3969202896be9c23196fc722c83c6ea182c24 100644
--- a/testing/chameleon_ztesting.c
+++ b/testing/chameleon_ztesting.c
@@ -69,6 +69,11 @@ static parameter_t parameters[] = {
     { "seedB", "Seed for the matrix B random generation", 'Y', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 11, TestValInt, {0}, NULL, pread_int, sprint_int },
     { "seedC", "Seed for the matrix C random generation", 'Z', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 11, TestValInt, {0}, NULL, pread_int, sprint_int },
 
+    { NULL, "Matrix generation numerical parameters", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL },
+    { "bump",  "Bump value to make a matrix diagonal dominant",           'z', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 13, TestValComplex64, {0}, NULL, pread_complex64, sprint_complex64 },
+    { "mode",  "Mode that specifies the eigen/singular values in xlatms", -30, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2,  4, TestValInt,       {0}, NULL, pread_int,       sprint_int    },
+    { "cond",  "Conditional number of the matrix used by xlatms",         -31, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 13, TestValDouble,    {0}, NULL, pread_double,    sprint_double    },
+
     { NULL, "Operation specific parameters", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL },
     { "trans",  "Value of the trans parameter",  -11, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 9, TestTrans,    {0}, NULL, pread_trans, sprint_trans },
     { "transA", "Value of the transA parameter", -12, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 9, TestTrans,    {0}, NULL, pread_trans, sprint_trans },
@@ -79,9 +84,8 @@ static parameter_t parameters[] = {
     { "norm",   "Value of the norm parameter",   -17, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 4, TestNormtype, {0}, NULL, pread_norm,  sprint_norm  },
 
     { NULL, "Operation specific scalar", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL },
-    { "alpha", "Value of the scalar alpha",                     'x', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 13, TestValComplex64, {0}, NULL, pread_complex64, sprint_complex64 },
-    { "beta",  "Value of the scalar beta",                      'y', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 13, TestValComplex64, {0}, NULL, pread_complex64, sprint_complex64 },
-    { "bump",  "Bump value to make a matrix diagonal dominant", 'z', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 13, TestValComplex64, {0}, NULL, pread_complex64, sprint_complex64 },
+    { "alpha", "Value of the scalar alpha",                       'x', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 13, TestValComplex64, {0}, NULL, pread_complex64, sprint_complex64 },
+    { "beta",  "Value of the scalar beta",                        'y', PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 13, TestValComplex64, {0}, NULL, pread_complex64, sprint_complex64 },
 
     { NULL, "QR/LQ parameters", 0, PARAM_OPTION, 0, 0, 0, {0}, NULL, NULL, NULL },
     { "qra",    "Size of TS domain (=RH for householder trees)",           -20, PARAM_OPTION | PARAM_INPUT | PARAM_OUTPUT, 2, 3, TestValInt, {0}, NULL, pread_int, sprint_int },
diff --git a/testing/input/genm2.in b/testing/input/genm2.in
new file mode 100644
index 0000000000000000000000000000000000000000..dc29aa220c8af8dc0bb4a4e0b8e31c05d298f395
--- /dev/null
+++ b/testing/input/genm2.in
@@ -0,0 +1,20 @@
+# You can enumerate each parameter's values as an explicit list separated by commas or by a range start:end[:step]
+# Not given parameters will receive default values
+
+# GENM2
+# mtxfmt
+# nb: Tile size
+# M: Number of rows of matrix A
+# N: Number of columns of matrix A
+# LDA: Leading dimension of matrix A
+# cond: The condition number
+# mode: the mode values for latms
+
+op = genm2
+mtxfmt = 0, 1
+nb = 16, 17
+m = 15, 25, 37
+n = 13, 23, 35
+lda = 41
+cond = 1., 1.e13
+mode = 1:6
diff --git a/testing/testing_zcheck.c b/testing/testing_zcheck.c
index 65b0ec3dddb7631085af86949e48840b237135a2..9263fe639f1139acbe945d6433846d276556ca4a 100644
--- a/testing/testing_zcheck.c
+++ b/testing/testing_zcheck.c
@@ -214,9 +214,6 @@ int check_znorm( run_arg_list_t *args, cham_mtxtype_t matrix_type, cham_normtype
         result = fabs( norm_cham - norm_lapack ) / ( norm_lapack * eps );
 
         switch(norm_type) {
-        case ChamMaxNorm:
-            /* result should be perfectly equal */
-            break;
         case ChamInfNorm:
             /* Sum order on the line can differ */
             result = result / (double)N;
@@ -229,6 +226,10 @@ int check_znorm( run_arg_list_t *args, cham_mtxtype_t matrix_type, cham_normtype
             /* Sum order on every element can differ */
             result = result / ((double)M * (double)N);
             break;
+        case ChamMaxNorm:
+        default:
+            /* result should be perfectly equal */
+            ;
         }
 
         info_solution = ( result < 1 ) ? 0 : 1;
diff --git a/testing/testing_zgenm2.c b/testing/testing_zgenm2.c
new file mode 100644
index 0000000000000000000000000000000000000000..b396d9fee6ee2a4c51542b3bc2a80102b9d72710
--- /dev/null
+++ b/testing/testing_zgenm2.c
@@ -0,0 +1,140 @@
+/**
+ *
+ * @file testing_zgenm2.c
+ *
+ * @copyright 2019-2020 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon zgenm2 testing
+ *
+ * @version 1.0.0
+ * @author Lucas Barros de Assis
+ * @author Mathieu Faverge
+ * @date 2020-10-05
+ * @precisions normal z -> c d s
+ *
+ */
+#include <chameleon.h>
+#if !defined(CHAMELEON_SIMULATION)
+#include <coreblas/lapacke.h>
+#include <coreblas/cblas.h>
+#include <coreblas.h>
+#endif
+#include "testings.h"
+#include "testing_zcheck.h"
+#include "flops.h"
+
+static cham_fixdbl_t
+flops_zgenm2( int M, int N )
+{
+    double coefabs = 1.;
+#if defined( PRECISION_z ) || defined( PRECISION_c )
+    coefabs = 3.;
+#endif
+
+    return coefabs * M * N;
+}
+
+int
+testing_zgenm2( run_arg_list_t *args, int check )
+{
+    int          hres   = 0;
+    double       norm;
+    CHAM_desc_t *descA;
+    double      *D, dmax = 1.;
+
+    /* Reads arguments */
+    intptr_t        mtxfmt = parameters_getvalue_int( "mtxfmt" );
+    int             nb     = run_arg_get_int( args, "nb", 320 );
+    int             P      = parameters_getvalue_int( "P" );
+    int             N      = run_arg_get_int( args, "N", 1000 );
+    int             M      = run_arg_get_int( args, "M", N );
+    int             LDA    = run_arg_get_int( args, "LDA", M );
+    int             seedA  = run_arg_get_int( args, "seedA", random() );
+    int             Q      = parameters_compute_q( P );
+    int             minMN  = chameleon_min( M, N );
+    double          cond   = run_arg_get_double( args, "cond", 1.e16 );
+    int             mode   = run_arg_get_int( args, "mode", 4 );
+    double          tol    = 1.e-1;
+    cham_fixdbl_t t, gflops;
+    cham_fixdbl_t flops = flops_zgenm2( M, N );
+
+    CHAMELEON_Set( CHAMELEON_TILE_SIZE, nb );
+
+    /* Generate the diagonal of eigen/singular values */
+    D = malloc( minMN * sizeof(double) );
+#if !defined(CHAMELEON_SIMULATION)
+    hres = CORE_dlatm1( mode, cond, 0, ChamDistUniform, seedA, D, minMN );
+    if ( hres != 0 ) {
+        free( D );
+        return hres;
+    }
+
+    /* Save the largest absolute value */
+    hres = cblas_idamax( minMN, D, 1 );
+    dmax = fabs( D[hres] );
+#endif
+
+    /* Creates the matrix */
+    CHAMELEON_Desc_Create(
+        &descA, (void*)(-mtxfmt), ChamComplexDouble, nb, nb, nb * nb, LDA, N, 0, 0, M, N, P, Q );
+
+    /* Fills the matrix with random values */
+    hres = CHAMELEON_zlatms_Tile(
+        ChamDistUniform, seedA, ChamNonsymPosv, D, 0, cond, 0., descA );
+    free( D );
+    if ( hres != 0 ) {
+        return hres;
+    }
+
+    /* Calculates the norm */
+    START_TIMING( t );
+    norm = CHAMELEON_zgenm2_Tile( tol, descA );
+    STOP_TIMING( t );
+    gflops = flops * 1.e-9 / t;
+    run_arg_add_fixdbl( args, "time", t );
+    run_arg_add_fixdbl( args, "gflops", ( norm >= 0. ) ? gflops : -1. );
+
+    /* Checks the solution */
+    hres = 0;
+    if ( check ) {
+        double res = fabs(dmax - norm) / (dmax * tol);
+
+        run_arg_add_double( args, "||A||", dmax );
+        run_arg_add_double( args, "||B||", norm );
+        run_arg_add_double( args, "||R||", res );
+
+        if ( isnan(res) || isinf(res) || (res > 10.0) ) {
+            hres = 1;
+        }
+    }
+
+    CHAMELEON_Desc_Destroy( &descA );
+
+    return hres;
+}
+
+testing_t   test_zgenm2;
+const char *zgenm2_params[] = { "mtxfmt", "nb", "m", "n", "lda", "seedA", "cond", "mode", NULL };
+const char *zgenm2_output[] = { NULL };
+const char *zgenm2_outchk[] = { "||A||", "||B||", "||R||", "RETURN", NULL };
+
+/**
+ * @brief Testing registration function
+ */
+void testing_zgenm2_init( void ) __attribute__( ( constructor ) );
+void
+testing_zgenm2_init( void )
+{
+    test_zgenm2.name        = "zgenm2";
+    test_zgenm2.helper      = "General matrix two-norm estimator";
+    test_zgenm2.params      = zgenm2_params;
+    test_zgenm2.output      = zgenm2_output;
+    test_zgenm2.outchk      = zgenm2_outchk;
+    test_zgenm2.fptr        = testing_zgenm2;
+    test_zgenm2.next        = NULL;
+
+    testing_register( &test_zgenm2 );
+}