diff --git a/compute/CMakeLists.txt b/compute/CMakeLists.txt
index 8d8fe14aedff319d6c3473ea0ae614d4372f7fea..1d9b97f0432585e291caf7406c07c5d349f4335e 100644
--- a/compute/CMakeLists.txt
+++ b/compute/CMakeLists.txt
@@ -179,16 +179,16 @@ set(ZSRC
     ##################
     # OTHERS
     ##################
-    #pzgebrd_ge2tb.c
-    #pzgebrd_tb2bd.c
+    pztile2band.c
+    #pzgebrd_gb2bd.c
+    pzgebrd_ge2gb.c
     #pzgetmi2.c
     #pzgetrf_reclap.c
     #pzgetrf_rectil.c
-    #pzhbcpy_t2bl.c
     #pzhegst.c
     #pzherbt.c
-    #pzhetrd_hb2st.c
-    #pzhetrd_he2hb.c
+    #pzhetrd_hb2ht.c
+    pzhetrd_he2hb.c
     #pzlarft_blgtrd.c
     #pzlaswp.c
     #pzlaswpc.c
@@ -198,16 +198,16 @@ set(ZSRC
     #zgebrd.c
     #zgecfi.c
     #zgecfi2.c
-    #zgesvd.c
+    zgesvd.c
     #zgetmi.c
     #zgetri.c
     #zgetrs.c
     #zheev.c
-    #zheevd.c
+    zheevd.c
     #zhegst.c
     #zhegv.c
     #zhegvd.c
-    #zhetrd.c
+    zhetrd.c
     #zlaswp.c
     #zlaswpc.c
     #ztrsmrv.c
diff --git a/compute/pzgebrd_ge2gb.c b/compute/pzgebrd_ge2gb.c
new file mode 100644
index 0000000000000000000000000000000000000000..710eb1604a0eaf34b90e858d7ef5a6f85efd9904
--- /dev/null
+++ b/compute/pzgebrd_ge2gb.c
@@ -0,0 +1,102 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file pzgebrd_ge2gb.c
+ *
+ *  PLASMA auxiliary routines
+ *  PLASMA is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.8.0
+ * @author Hatem Ltaief
+ * @author Azzam Haidar
+ * @date 2010-11-15
+ * @precisions normal z -> s d c
+ *
+ **/
+#include "control/common.h"
+
+void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T,
+                         MORSE_sequence_t *sequence, MORSE_request_t *request)
+{
+    int k;
+    int tempkm, tempkn;
+    if (A.m >= A.n){
+       for (k = 0; k < A.nt; k++) {
+           tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
+           tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
+
+           morse_pzgeqrf(
+               morse_desc_submatrix(&A, k*A.mb, k*A.nb, A.m-k*A.mb, tempkn),
+               morse_desc_submatrix(&T, k*T.mb, k*T.nb, T.m-k*T.mb, tempkn),
+               sequence, request);
+
+           morse_pzunmqr(
+               MorseLeft,
+               MorseConjTrans,
+               morse_desc_submatrix(&A, k*A.mb,     k*A.nb, A.m-k*A.mb, tempkn),
+               morse_desc_submatrix(&A, k*A.mb, (k+1)*A.nb, A.m-k*A.mb, A.n-(k+1)*A.nb),
+               morse_desc_submatrix(&T, k*T.mb,     k*T.nb, T.m-k*T.mb, tempkn),
+               sequence, request);
+
+           if (k+1 < A.nt){
+              tempkn = k+1 == A.nt-1 ? A.n-(k+1)*A.nb : A.nb;
+
+              morse_pzgelqf(
+                  morse_desc_submatrix(&A, k*A.mb, (k+1)*A.nb, tempkm, A.n-(k+1)*A.nb),
+                  morse_desc_submatrix(&T, k*T.mb, (k+1)*T.nb, T.mb,   T.n-(k+1)*T.nb),
+                  sequence, request);
+
+              morse_pzunmlq(
+                  MorseRight, MorseConjTrans,
+                  morse_desc_submatrix(&A,     k*A.mb, (k+1)*A.nb, tempkm,         A.n-(k+1)*A.nb),
+                  morse_desc_submatrix(&A, (k+1)*A.mb, (k+1)*A.nb, A.m-(k+1)*A.mb, A.n-(k+1)*A.nb),
+                  morse_desc_submatrix(&T,     k*T.mb, (k+1)*T.nb, T.mb,           T.n-(k+1)*T.nb),
+                  sequence, request);
+           }
+       }
+    }
+    else{
+       for (k = 0; k < A.mt; k++) {
+           tempkm = k == A.mt-1 ? A.m-k*A.mb : A.mb;
+           tempkn = k == A.nt-1 ? A.n-k*A.nb : A.nb;
+
+           morse_pzgelqf(
+               morse_desc_submatrix(&A, k*A.mb, k*A.nb, tempkm, A.n-k*A.nb),
+               morse_desc_submatrix(&T, k*T.mb, k*T.nb, T.mb,   T.n-k*T.nb),
+               sequence, request);
+
+           morse_pzunmlq(
+               MorseRight, MorseConjTrans,
+               morse_desc_submatrix(&A,     k*A.mb, k*A.nb, tempkm,         A.n-k*A.nb),
+               morse_desc_submatrix(&A, (k+1)*A.mb, k*A.nb, A.m-(k+1)*A.mb, A.n-k*A.nb),
+               morse_desc_submatrix(&T,     k*T.mb, k*T.nb, T.mb,           T.n-k*T.nb),
+               sequence, request);
+
+           if (k+1 < A.mt){
+              tempkm = k+1 == A.mt-1 ? A.m-(k+1)*A.mb : A.mb;
+
+              morse_pzgeqrf(
+                   morse_desc_submatrix(&A, (k+1)*A.mb, k*A.nb, A.m-(k+1)*A.mb, tempkn),
+                   morse_desc_submatrix(&T, (k+1)*T.mb, k*T.nb, T.m-(k+1)*T.mb, tempkn),
+                   sequence, request);
+
+              morse_pzunmqr(
+                  MorseLeft, MorseConjTrans,
+                  morse_desc_submatrix(&A, (k+1)*A.mb,     k*A.nb, A.m-(k+1)*A.mb, tempkn),
+                  morse_desc_submatrix(&A, (k+1)*A.mb, (k+1)*A.nb, A.m-(k+1)*A.mb, A.n-(k+1)*A.nb),
+                  morse_desc_submatrix(&T, (k+1)*T.mb,     k*T.nb, T.m-(k+1)*T.mb, tempkn),
+                  sequence, request);
+           }
+       }
+    }
+}
diff --git a/compute/pzgelqf.c b/compute/pzgelqf.c
index cbbb8e44a210af323ee1ddbd538a100a19416a0b..7d59288a91657a6923f6612e09596cc8fd06a909 100644
--- a/compute/pzgelqf.c
+++ b/compute/pzgelqf.c
@@ -53,7 +53,7 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
     int k, m, n;
     int ldak, ldam;
     int tempkm, tempkn, tempmm, tempnn;
-    int ib, minMT;
+    int ib, minMNT;
 
     morse = morse_context_self();
     if (sequence->status != MORSE_SUCCESS)
@@ -63,9 +63,9 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
     ib = MORSE_IB;
 
     if (A->m > A->n) {
-        minMT = A->nt;
+        minMNT = A->nt;
     } else {
-        minMT = A->mt;
+        minMNT = A->mt;
     }
 
     /*
@@ -114,7 +114,7 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
     morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, min(A->m, A->n), A->nb, 0, 0, min(A->m, A->n), A->nb, A->p, A->q);
 #endif
 
-    for (k = 0; k < min(A->mt, A->nt); k++) {
+    for (k = 0; k < minMNT; k++) {
         tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
         tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
         ldak = BLKLDD(A, k);
@@ -181,4 +181,5 @@ void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T,
     morse_desc_mat_free(DIAG);
     free(DIAG);
 #endif
+    (void)DIAG;
 }
diff --git a/compute/pzgelqfrh.c b/compute/pzgelqfrh.c
index d108869da821b4e8c7fbebbab0a783d71bca93fa..c358e06c9c2845768d3a97e9f934d6fdc1e486b3 100644
--- a/compute/pzgelqfrh.c
+++ b/compute/pzgelqfrh.c
@@ -59,7 +59,6 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
     int ldak, ldam;
     int tempkmin, tempkm, tempNn, tempnn, tempmm, tempNRDn;
     int ib;
-    int nblk;
 
     morse = morse_context_self();
     if (sequence->status != MORSE_SUCCESS)
@@ -112,9 +111,11 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
 
 #if defined(CHAMELEON_COPY_DIAG)
     /* necessary to avoid dependencies between tasks regarding the diag tile */
-    nblk = ( A->nt + BS -1 ) / BS;
-    DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
-    morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
+    {
+        int nblk = ( A->nt + BS -1 ) / BS;
+        DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
+        morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
+    }
 #endif
 
     for (k = 0; k < min(A->mt, A->nt); k++) {
@@ -212,4 +213,5 @@ void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
     morse_desc_mat_free(DIAG);
     free(DIAG);
 #endif
+    (void)DIAG;
 }
diff --git a/compute/pzgeqrf.c b/compute/pzgeqrf.c
index 9ab9cc1f3d8b9afa51c3ba67ef5141f7c59c4158..ad141a97b1adb407457161e82c0889ee7e4759fd 100644
--- a/compute/pzgeqrf.c
+++ b/compute/pzgeqrf.c
@@ -175,4 +175,5 @@ void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T,
     morse_desc_mat_free(DIAG);
     free(DIAG);
 #endif
+    (void)DIAG;
 }
diff --git a/compute/pzgeqrfrh.c b/compute/pzgeqrfrh.c
index 22c21f5bb3545b43713880864c0abf04dfd12bae..c95013f89f661bf18f3a8ac88cd5fe7beeea1c2f 100644
--- a/compute/pzgeqrfrh.c
+++ b/compute/pzgeqrfrh.c
@@ -57,7 +57,6 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
     int ldaM, ldam, ldaMRD;
     int tempkmin, tempkn, tempMm, tempnn, tempmm, tempMRDm;
     int ib;
-    int nblk;
 
     morse = morse_context_self();
     if (sequence->status != MORSE_SUCCESS)
@@ -109,10 +108,12 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
     RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
 
 #if defined(CHAMELEON_COPY_DIAG)
-    /* necessary to avoid dependencies between tasks regarding the diag tile */
-    nblk = ( A->mt + BS -1 ) / BS;
-    DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
-    morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
+    {
+        /* necessary to avoid dependencies between tasks regarding the diag tile */
+        int nblk = ( A->mt + BS -1 ) / BS;
+        DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
+        morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
+    }
 #endif
 
     K = min(A->mt, A->nt);
@@ -211,4 +212,5 @@ void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS,
     morse_desc_mat_free(DIAG);
     free(DIAG);
 #endif
+    (void)DIAG;
 }
diff --git a/compute/pzhetrd_he2hb.c b/compute/pzhetrd_he2hb.c
new file mode 100644
index 0000000000000000000000000000000000000000..88e673c59e76c7f962e142c644cf26ccfe1d05e4
--- /dev/null
+++ b/compute/pzhetrd_he2hb.c
@@ -0,0 +1,469 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file pzhetrd_he2hb.c
+ *
+ *  PLASMA auxiliary routines
+ *  PLASMA is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.7.1
+ * @author Hatem Ltaief
+ * @author Azzam Haidar
+ * @date 2010-11-15
+ * @precisions normal z -> s d c
+ *
+ **/
+#include "control/common.h"
+
+#define A(m, n) A,  m,  n
+#define T(m, n) T,  m,  n
+#define D(k) D, (k)-1, 0
+
+#define AT(k) AT, k, 0
+
+#if defined(CHAMELEON_COPY_DIAG)
+#define E(m, n) E, m, 0
+#else
+#define E(m, n) A, m, n
+#endif
+
+/***************************************************************************//**
+ *  Parallel tile BAND Tridiagonal Reduction - dynamic scheduler
+ **/
+void morse_pzhetrd_he2hb(MORSE_enum uplo,
+                         MORSE_desc_t *A, MORSE_desc_t *T,
+                         MORSE_sequence_t *sequence, MORSE_request_t *request)
+{
+    MORSE_context_t *morse;
+    MORSE_option_t options;
+    MORSE_desc_t *E  = NULL;
+    MORSE_desc_t *D  = NULL;
+    MORSE_desc_t *AT = NULL;
+    size_t ws_worker = 0;
+    size_t ws_host = 0;
+
+    int k, m, n, i, j;
+    int ldak, ldak1, ldam, ldan, ldaj, ldai;
+    int tempkm, tempkn, tempmm, tempnn, tempjj;
+    int ib;
+
+    morse = morse_context_self();
+    if (sequence->status != MORSE_SUCCESS)
+        return;
+
+    RUNTIME_options_init(&options, morse, sequence, request);
+    ib = MORSE_IB;
+
+    /*
+     * zgeqrt        = A->nb * (ib+1)
+     * zunmqr        = A->nb * ib
+     * ztsqrt        = A->nb * (ib+1)
+     * ztsmqr        = A->nb * ib
+     * zherfb        = A->nb * ib
+     * ztsmqr_hetra1 = A->nb * ib
+     */
+    ws_worker = A->nb * (ib+1);
+
+    /* Allocation of temporary (scratch) working space */
+#if defined(CHAMELEON_USE_CUDA)
+    /* Worker space
+     *
+     * zunmqr = A->nb * ib
+     * ztsmqr = 2 * A->nb * ib
+     * zherfb = A->nb * ib
+     */
+    ws_worker = max( ws_worker, ib * A->nb * 2 );
+#endif
+
+#if defined(CHAMELEON_USE_MAGMA)
+    /* Worker space
+     *
+     * zgeqrt = max( A->nb * (ib+1), ib * (ib + A->nb) )
+     * ztsqrt = max( A->nb * (ib+1), ib * (ib + A->nb) )
+     */
+    ws_worker = max( ws_worker, ib * (ib + A->nb) );
+
+    /* Host space
+     *
+     * zgeqrt = ib * (A->mb+3*ib) + A->mb )
+     * ztsqrt = 2 * ib * (A->nb+ib) + A->nb
+     */
+    ws_host = max( ws_host, ib * (A->mb + 3 * ib) + A->mb );
+    ws_host = max( ws_host,  2 * ib * (A->nb + ib) + A->nb );
+#endif
+
+    ws_worker *= sizeof(MORSE_Complex64_t);
+    ws_host   *= sizeof(MORSE_Complex64_t);
+
+    RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
+
+#if defined(CHAMELEON_COPY_DIAG)
+    /* Copy of the extra-diagonal to generate more parallelism by releasing anti-dependencies on UNMQR/TSMQR triangle conflict */
+    E = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
+    morse_zdesc_alloc_diag(*E, A->mb, A->nb, min(A->m, A->n), A->nb, 0, 0, min(A->m, A->n), A->nb, A->p, A->q);
+#endif
+
+    /* Copy of the diagonal tiles to keep the general version of the tile all along the computation */
+    D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
+    morse_zdesc_alloc_diag(*D, A->mb, A->nb, min(A->m, A->n) - A->mb, A->nb, 0, 0, min(A->m, A->n) - A->mb, A->nb, A->p, A->q);
+
+    AT = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
+    *AT = morse_desc_init(
+        MorseComplexDouble, A->mb, A->nb, (A->mb*A->nb),
+        min(A->mt, A->nt) * A->mb, A->nb, 0, 0, min(A->mt, A->nt) * A->mb, A->nb, 1, 1);
+    morse_desc_mat_alloc( AT );
+
+    /* Let's extract the diagonal in a temporary copy that contains A and A' */
+    for (k = 1; k < A->nt; k++){
+        tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
+        ldak = BLKLDD(A, k);
+
+        MORSE_TASK_zhe2ge(&options,
+                          uplo,
+                          tempkn, tempkn, ldak,
+                          A(k, k), ldak,
+                          D(k),    ldak);
+    }
+
+    if (uplo == MorseLower) {
+       for (k = 0; k < A->nt-1; k++){
+           tempkm = k+1 == A->mt-1 ? A->m-(k+1)*A->mb : A->mb;
+           tempkn = k   == A->nt-1 ? A->n- k   *A->nb : A->nb;
+           ldak1 = BLKLDD(A, k+1);
+
+           MORSE_TASK_zgeqrt(
+               &options,
+               tempkm, tempkn, ib, A->nb,
+               A(k+1, k), ldak1,
+               T(k+1, k), T->mb);
+
+#if defined(CHAMELEON_COPY_DIAG)
+           MORSE_TASK_zlacpy(
+               &options,
+               MorseLower, tempkm, tempkn, A->nb,
+               A(k+1, k), ldak1,
+               E(k+1, k), ldak1 );
+#if defined(CHAMELEON_USE_CUDA)
+           MORSE_TASK_zlaset(
+               &options,
+               MorseUpper, tempkm, tempkn,
+               0., 1.,
+               E(k+1, k), ldak1 );
+#endif
+#endif
+
+           /* LEFT and RIGHT on the symmetric diagonal block */
+           MORSE_TASK_zherfb(
+               &options,
+               MorseLower,
+               tempkm, tempkm, ib, A->nb,
+               E(k+1, k), ldak1,
+               T(k+1, k), T->mb,
+               D(k+1),    ldak1);
+
+           /* RIGHT on the remaining tiles until the bottom */
+           for (m = k+2; m < A->mt ; m++) {
+               tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
+               ldam = BLKLDD(A, m);
+               MORSE_TASK_zunmqr(
+                   &options,
+                   MorseRight, MorseNoTrans,
+                   tempmm, A->nb, tempkm, ib, A->nb,
+                   E(k+1, k),   ldak1,
+                   T(k+1, k),   T->mb,
+                   A(m,   k+1), ldam);
+           }
+
+           for (m = k+2; m < A->mt; m++) {
+               tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
+               ldam = BLKLDD(A, m);
+
+               options.priority = 1;
+               MORSE_TASK_ztsqrt(
+                   &options,
+                   tempmm, A->nb, ib, A->nb,
+                   A(k+1, k), ldak1,
+                   A(m  , k), ldam,
+                   T(m  , k), T->mb);
+               options.priority = 0;
+
+               /* LEFT */
+               for (i = k+2; i < m; i++) {
+                   ldai = BLKLDD(A, i);
+                   MORSE_TASK_ztsmqr_hetra1(
+                       &options,
+                       MorseLeft, MorseConjTrans,
+                       A->mb, A->nb, tempmm, A->nb, A->nb, ib, A->nb,
+                       A(i, k+1), ldai,
+                       A(m,   i), ldam,
+                       A(m,   k), ldam,
+                       T(m,   k), T->mb);
+               }
+
+               /* RIGHT */
+               for (j = m+1; j < A->mt ; j++) {
+                   tempjj = j == A->mt-1 ? A->m-j*A->mb : A->mb;
+                   ldaj = BLKLDD(A, j);
+                   MORSE_TASK_ztsmqr(
+                       &options,
+                       MorseRight, MorseNoTrans,
+                       tempjj, A->nb, tempjj, tempmm, A->nb, ib, A->nb,
+                       A(j, k+1), ldaj,
+                       A(j,   m), ldaj,
+                       A(m,   k), ldam,
+                       T(m,   k), T->mb);
+               }
+
+               /* LEFT->RIGHT */
+               options.priority = 1;
+               /**
+                * Compute the update of the following:
+                *
+                *     A1   A2'
+                *     A2   A3
+                *
+                * with A1 and A3 two diagonal tiles. This is the tsmqr_corner
+                * from plasma split in 4 tasks
+                */
+               /*  Copy the transpose of A2 (m, k+1): AT(k) <- A2' = A2(k+1, m) */
+               MORSE_TASK_zlatro(
+                   &options,
+                   MorseUpperLower, MorseConjTrans,
+                   tempmm, A->nb, A->nb,
+                   A(m, k+1), ldam,
+                   AT(m),  ldak1);
+
+               /*  Left application on |A1| */
+               /*                      |A2| */
+               MORSE_TASK_ztsmqr(
+                   &options,
+                   MorseLeft, MorseConjTrans,
+                   A->mb, A->nb, tempmm, A->nb, A->nb, ib, A->nb,
+                   D(k+1), ldak1,
+                   A(m, k+1), ldam,
+                   A(m,   k), ldam,
+                   T(m,   k), T->mb);
+
+               /*  Left application on | A2'| */
+               /*                      | A3 | */
+               MORSE_TASK_ztsmqr(
+                   &options,
+                   MorseLeft, MorseConjTrans,
+                   A->mb, tempmm, tempmm, tempmm, A->nb, ib, A->nb,
+                   AT(m), ldak1,
+                   D(m) , ldam,
+                   A(m,  k), ldam,
+                   T(m,  k), T->mb);
+
+               /*  Right application on | A1 A2' | */
+               MORSE_TASK_ztsmqr(
+                   &options,
+                   MorseRight, MorseNoTrans,
+                   A->mb, A->nb, A->mb, tempmm, A->nb, ib, A->nb,
+                   D(k+1), ldak1,
+                   AT(m) , ldak1,
+                   A(m,   k), ldam,
+                   T(m,   k), T->mb);
+
+               /*  Right application on | A2 A3 | */
+               MORSE_TASK_ztsmqr(
+                   &options,
+                   MorseRight, MorseNoTrans,
+                   tempmm, A->nb, tempmm, tempmm, A->nb, ib, A->nb,
+                   A(m, k+1), ldam,
+                   D(m)  , ldam,
+                   A(m,   k), ldam,
+                   T(m,   k), T->mb);
+               options.priority = 0;
+           }
+       }
+    }
+    else {
+       for (k = 0; k < A->nt-1; k++){
+           tempkn = k+1 == A->nt-1 ? A->n-(k+1)*A->nb : A->nb;
+           tempkm = k   == A->mt-1 ? A->m- k   *A->mb : A->mb;
+           ldak  = BLKLDD(A, k);
+           ldak1 = BLKLDD(A, k+1);
+           MORSE_TASK_zgelqt(
+               &options,
+               tempkm, tempkn, ib, A->nb,
+               A(k, k+1), ldak,
+               T(k, k+1), T->mb);
+
+#if defined(CHAMELEON_COPY_DIAG)
+           MORSE_TASK_zlacpy(
+               &options,
+               MorseUpper, tempkm, tempkn, A->nb,
+               A(k, k+1), ldak,
+               E(k, k+1), ldak );
+#if defined(CHAMELEON_USE_CUDA)
+           MORSE_TASK_zlaset(
+               &options,
+               MorseLower, tempkm, tempkn,
+               0., 1.,
+               E(k, k+1), ldak );
+#endif
+#endif
+
+           /* RIGHT and LEFT on the symmetric diagonal block */
+           MORSE_TASK_zherfb(
+               &options,
+               MorseUpper,
+               tempkn, tempkn, ib, A->nb,
+               E(k, k+1), ldak,
+               T(k, k+1), T->mb,
+               D(k+1),    ldak1);
+
+           /* LEFT on the remaining tiles until the left side */
+           for (n = k+2; n < A->nt ; n++) {
+               tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
+               MORSE_TASK_zunmlq(
+                   &options,
+                   MorseLeft, MorseNoTrans,
+                   A->mb, tempnn, tempkn, ib, A->nb,
+                   E(k,   k+1), ldak,
+                   T(k,   k+1), T->mb,
+                   A(k+1, n  ), ldak1);
+           }
+
+           for (n = k+2; n < A->nt; n++) {
+               tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
+               ldan = BLKLDD(A, n);
+               options.priority = 1;
+               MORSE_TASK_ztslqt(
+                   &options,
+                   A->mb, tempnn, ib, A->nb,
+                   A(k, k+1), ldak,
+                   A(k, n  ), ldak,
+                   T(k, n  ), T->mb);
+               options.priority = 0;
+
+               /* RIGHT */
+               for (i = k+2; i < n; i++) {
+                   ldai = BLKLDD(A, i);
+                   MORSE_TASK_ztsmlq_hetra1(
+                       &options,
+                       MorseRight, MorseConjTrans,
+                       A->mb, A->nb, A->nb, tempnn, A->nb, ib, A->nb,
+                       A(k+1, i), ldak1,
+                       A(i,   n), ldai,
+                       A(k,   n), ldak,
+                       T(k,   n), T->mb);
+               }
+
+               /* LEFT */
+               for (j = n+1; j < A->nt ; j++) {
+                   tempjj = j == A->nt-1 ? A->n-j*A->nb : A->nb;
+                   MORSE_TASK_ztsmlq(
+                       &options,
+                       MorseLeft, MorseNoTrans,
+                       A->nb, tempjj, tempnn, tempjj, A->nb, ib, A->nb,
+                       A(k+1, j), ldak1,
+                       A(n,   j), ldan,
+                       A(k,   n), ldak,
+                       T(k,   n), T->mb);
+               }
+
+               /* RIGHT->LEFT */
+               options.priority = 1;
+               /**
+                * Compute the update of the following:
+                *
+                *     A1   A2
+                *     A2'  A3
+                *
+                * with A1 and A3 two diagonal tiles. This is the tsmqr_corner
+                * from plasma split in 4 tasks
+                */
+               /*  Copy the transpose of A2: AT(k) <- A2' */
+               MORSE_TASK_zlatro(
+                   &options,
+                   MorseUpperLower, MorseConjTrans,
+                   A->mb, tempnn, A->nb,
+                   A(k+1, n), ldak1,
+                   AT(n),     A->mb);
+
+               /*  Right application on | A1 A2 | */
+               MORSE_TASK_ztsmlq(
+                   &options,
+                   MorseRight, MorseConjTrans,
+                   A->mb, A->nb, A->mb, tempnn, A->nb, ib, A->nb,
+                   D(k+1),    ldak1,
+                   A(k+1, n), ldak1,
+                   A(k,   n), ldak,
+                   T(k,   n), T->mb);
+
+               /*  Right application on | A2' A3 | */
+               MORSE_TASK_ztsmlq(
+                   &options,
+                   MorseRight, MorseConjTrans,
+                   tempnn, A->nb, tempnn, tempnn, A->nb, ib, A->nb,
+                   AT(n),    A->mb,
+                   D(n),     ldan,
+                   A(k,  n), ldak,
+                   T(k,  n), T->mb);
+
+               /*  Left application on |A1 | */
+               /*                      |A2'| */
+               MORSE_TASK_ztsmlq(
+                   &options,
+                   MorseLeft, MorseNoTrans,
+                   A->mb, A->nb, tempnn, A->nb, A->nb, ib, A->nb,
+                   D(k+1),  ldak1,
+                   AT(n),   A->mb,
+                   A(k, n), ldak,
+                   T(k, n), T->mb);
+
+               /*  Left application on | A2 | */
+               /*                      | A3 | */
+               MORSE_TASK_ztsmlq(
+                   &options,
+                   MorseLeft, MorseNoTrans,
+                   A->mb, tempnn, tempnn, tempnn, A->nb, ib, A->nb,
+                   A(k+1, n), ldak1,
+                   D(n),      ldan,
+                   A(k,   n), ldak,
+                   T(k,   n), T->mb);
+           }
+           options.priority = 0;
+       }
+    }
+
+    /* Copy-back into A */
+    for (k = 1; k < A->nt; k++){
+        tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
+        ldak = BLKLDD(A, k);
+        MORSE_TASK_zlacpy(&options,
+                          uplo,
+                          tempkn, tempkn, ldak,
+                          D(k), ldak,
+                          A(k, k), ldak);
+    }
+
+    RUNTIME_options_ws_free(&options);
+    RUNTIME_options_finalize(&options, morse);
+    MORSE_TASK_dataflush_all();
+
+    MORSE_Sequence_Wait(sequence);
+    morse_desc_mat_free(D);
+    free(D);
+
+    morse_desc_mat_free(AT);
+    free(AT);
+
+#if defined(CHAMELEON_COPY_DIAG)
+    morse_desc_mat_free(E);
+    free(E);
+#endif
+    (void)E;
+}
diff --git a/compute/pztile2band.c b/compute/pztile2band.c
new file mode 100644
index 0000000000000000000000000000000000000000..4611d2adce9a7e2679e82d8ec83d3b59aff33063
--- /dev/null
+++ b/compute/pztile2band.c
@@ -0,0 +1,124 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+
+/**
+ *
+ * @file pztile2band.c
+ *
+ *  PLASMA auxiliary routines
+ *  PLASMA is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.8.0
+ * @author Azzam Haidar
+ * @author Gregoire Pichon
+ * @author Mathieu Faverge
+ * @date 2010-11-15
+ * @precisions normal z -> s d c
+ *
+ **/
+#include "control/common.h"
+
+#define A(m,n)   A,  m, n
+#define B(m, n)  B, m, n
+
+/***************************************************************************//**
+ *  Parallel copy of a band matrix from full NxN tile storage to band storage (LDABxN).
+ **/
+void morse_pztile2band(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B,
+                       MORSE_sequence_t *sequence, MORSE_request_t *request)
+{
+    MORSE_context_t *morse;
+    MORSE_option_t options;
+
+    int j;
+    int ldaj, ldx;
+    int tempjm, tempjn;
+    int minmnt = min(A->mt, A->nt);
+
+    morse = morse_context_self();
+    if (sequence->status != MORSE_SUCCESS)
+        return;
+    RUNTIME_options_init(&options, morse, sequence, request);
+
+    ldx = B->mb-1;
+
+    /*
+     *  MorseLower => Lower Band
+     */
+    if ( uplo == MorseLower ) {
+       for (j = 0; j < minmnt; j++){
+           /* Compute dimension on N with B since it is dimensioned with min(A->m, A->n) */
+           assert( A->m == B->n );
+           assert( A->n >= B->n );
+
+           tempjm = j == A->mt-1 ? A->m - j * A->mb : A->mb;
+           tempjn = j == B->nt-1 ? B->n - j * B->nb : B->nb;
+           ldaj = BLKLDD(A, j);
+
+           MORSE_TASK_zlaset(
+               &options,
+               MorseUpperLower, B->mb, tempjn,
+               0., 0.,
+               B(0, j), B->mb );
+
+           MORSE_TASK_zlacpy(
+               &options,
+               MorseLower, tempjm, tempjn, A->nb,
+               A(j, j), ldaj,
+               B(0, j), ldx );
+
+           if( j<minmnt-1 ){
+               tempjm = (j+1) == A->mt-1 ? A->m-(j+1)*A->mb : A->mb;
+               ldaj = BLKLDD(A, j+1);
+               MORSE_TASK_zlacpyx(
+                   &options,
+                   MorseUpper, tempjm, tempjn, A->nb,
+                   0,     A(j+1, j), ldaj,
+                   A->nb, B(0,   j), ldx);
+           }
+       }
+    }
+    else if ( uplo == MorseUpper ) {
+       for (j = 0; j < minmnt; j++){
+           /* Compute dimension on M with B since it is dimensioned with min(A->m, A->n) */
+           assert( A->n == B->n );
+           assert( A->m >= B->n );
+           tempjn = j == A->nt-1 ? A->n - j * A->nb : A->nb;
+           ldaj = BLKLDD(A, j);
+
+           MORSE_TASK_zlaset(
+               &options,
+               MorseUpperLower, B->mb, tempjn,
+               0., 0.,
+               B(0, j), B->mb );
+
+           if(j > 0){
+               MORSE_TASK_zlacpy(
+                   &options,
+                   MorseLower, A->mb, tempjn, A->nb,
+                   A(j-1, j), BLKLDD(A, j-1),
+                   B(0,   j), ldx);
+           }
+
+           tempjm = j == B->nt-1 ? B->n - j * B->nb : B->nb;
+           MORSE_TASK_zlacpyx(
+               &options,
+               MorseUpper, tempjm, tempjn, A->nb,
+               0,     A(j, j), ldaj,
+               A->nb, B(0, j), ldx);
+       }
+    }
+    RUNTIME_options_finalize(&options, morse);
+    MORSE_TASK_dataflush_all();
+}
+#undef B
+#undef A
diff --git a/compute/pzunglq.c b/compute/pzunglq.c
index 7b1343866534893e900f092ddc29470c1dc96dad..2356ff6707dde66993eefb9fd228f9f2ecb0dd1a 100644
--- a/compute/pzunglq.c
+++ b/compute/pzunglq.c
@@ -153,4 +153,5 @@ void morse_pzunglq(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
     morse_desc_mat_free(DIAG);
     free(DIAG);
 #endif
+    (void)DIAG;
 }
diff --git a/compute/pzunglqrh.c b/compute/pzunglqrh.c
index cd4e8abc014496e0076f68a05e2720dcd6f81195..30288e5d255567859ead0c4c41592dd112739408 100644
--- a/compute/pzunglqrh.c
+++ b/compute/pzunglqrh.c
@@ -59,7 +59,6 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
     int ldqm;
     int tempkm, tempkmin, tempNn, tempnn, tempmm, tempNRDn;
     int ib;
-    int nblk;
 
     morse = morse_context_self();
     if (sequence->status != MORSE_SUCCESS)
@@ -90,10 +89,12 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
     RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
 
 #if defined(CHAMELEON_COPY_DIAG)
-    /* necessary to avoid dependencies between tasks regarding the diag tile */
-    nblk = ( A->nt + BS -1 ) / BS;
-    DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
-    morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
+    {
+        /* necessary to avoid dependencies between tasks regarding the diag tile */
+        int nblk = ( A->nt + BS -1 ) / BS;
+        DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
+        morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
+    }
 #endif
 
     K = min(A->mt, A->nt);
@@ -178,4 +179,5 @@ void morse_pzunglqrh(MORSE_desc_t *A, MORSE_desc_t *Q,
     morse_desc_mat_free(DIAG);
     free(DIAG);
 #endif
+    (void)DIAG;
 }
diff --git a/compute/pzungqr.c b/compute/pzungqr.c
index 8226c8477bea418ccc59b2e908b78f8e4a4897a7..80e7fd82bc6140056ecfe8e4774d34d998e55fa9 100644
--- a/compute/pzungqr.c
+++ b/compute/pzungqr.c
@@ -154,4 +154,5 @@ void morse_pzungqr(MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *T,
     morse_desc_mat_free(DIAG);
     free(DIAG);
 #endif
+    (void)DIAG;
 }
diff --git a/compute/pzungqrrh.c b/compute/pzungqrrh.c
index 8c2f64ddb809336b45cdacb293639e995adcfef9..bd08779f2631654e54b7adbb72416d44469dd845 100644
--- a/compute/pzungqrrh.c
+++ b/compute/pzungqrrh.c
@@ -61,7 +61,6 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
     int ldqM, ldqm, ldqMRD;
     int tempkn, tempMm, tempnn, tempmm, tempMRDm, tempkmin;
     int ib;
-    int nblk;
 
     morse = morse_context_self();
     if (sequence->status != MORSE_SUCCESS)
@@ -92,10 +91,12 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
     RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
 
 #if defined(CHAMELEON_COPY_DIAG)
-    /* necessary to avoid dependencies between tasks regarding the diag tile */
-    nblk = ( A->mt + BS -1 ) / BS;
-    DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
-    morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
+    {
+        /* necessary to avoid dependencies between tasks regarding the diag tile */
+        int nblk = ( A->mt + BS -1 ) / BS;
+        DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
+        morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
+    }
 #endif
 
     K = min(A->mt, A->nt);
@@ -183,4 +184,5 @@ void morse_pzungqrrh(MORSE_desc_t *A, MORSE_desc_t *Q,
     morse_desc_mat_free(DIAG);
     free(DIAG);
 #endif
+    (void)DIAG;
 }
diff --git a/compute/pzunmlq.c b/compute/pzunmlq.c
index f157fb241b2237671735212a9e793e9b90357dfb..46e35e596e5468e7a12e03129478fee54a9fb92b 100644
--- a/compute/pzunmlq.c
+++ b/compute/pzunmlq.c
@@ -312,4 +312,5 @@ void morse_pzunmlq(MORSE_enum side, MORSE_enum trans,
     morse_desc_mat_free(DIAG);
     free(DIAG);
 #endif
+    (void)DIAG;
 }
diff --git a/compute/pzunmlqrh.c b/compute/pzunmlqrh.c
index ce3cbd1dba54d21a3834cac5308c42d6c8bec30a..e9a5b3689dd2a9d9bf4b3ea48f36eb0044727618 100644
--- a/compute/pzunmlqrh.c
+++ b/compute/pzunmlqrh.c
@@ -57,11 +57,9 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
 
     int k, m, n;
     int K, N, RD, lastRD;
-    int ldaN, ldak;
-    int ldbN, ldbm, ldbNRD;
+    int ldak, ldbN, ldbm, ldbNRD;
     int tempNn, tempkm, tempnn, tempmm, tempNRDn, tempkmin;
     int ib;
-    int nblk;
 
     morse = morse_context_self();
     if (sequence->status != MORSE_SUCCESS)
@@ -91,11 +89,13 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
 
     RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
 
-    /* necessary to avoid dependencies between tasks regarding the diag tile */
 #if defined(CHAMELEON_COPY_DIAG)
-    nblk = ( A->nt + BS -1 ) / BS;
-    DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
-    morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
+    /* necessary to avoid dependencies between tasks regarding the diag tile */
+    {
+        int nblk = ( A->nt + BS -1 ) / BS;
+        DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
+        morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
+    }
 #endif
 
     K = min(A->mt, A->nt);
@@ -110,7 +110,6 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
                 for (N = k; N < A->nt; N += BS) {
                     tempNn   = N == A->nt-1 ? A->n-N*A->nb : A->nb;
                     tempkmin = min(tempkm,tempNn);
-                    ldaN = BLKLDD(A, N);
                     ldbN = BLKLDD(B, N);
 #if defined(CHAMELEON_COPY_DIAG)
                     MORSE_TASK_zlacpy(
@@ -206,7 +205,6 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
                 for (N = k; N < A->nt; N += BS) {
                     tempNn   = N == A->nt-1 ? A->n-N*A->nb : A->nb;
                     tempkmin = min(tempkm,tempNn);
-                    ldaN = BLKLDD(A, N);
                     ldbN = BLKLDD(B, N);
                     for (m = min(N+BS, A->nt)-1; m > N; m--) {
                         tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
@@ -339,7 +337,6 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
                 for (N = k; N < A->nt; N += BS) {
                     tempNn = N == A->nt-1 ? A->n-N*A->nb : A->nb;
                     tempkmin = min(tempkm,tempNn);
-                    ldaN = BLKLDD(A, N);
 #if defined(CHAMELEON_COPY_DIAG)
                     MORSE_TASK_zlacpy(
                         &options,
@@ -413,4 +410,5 @@ void morse_pzunmlqrh(MORSE_enum side, MORSE_enum trans,
     morse_desc_mat_free(DIAG);
     free(DIAG);
 #endif
+    (void)DIAG;
 }
diff --git a/compute/pzunmqr.c b/compute/pzunmqr.c
index 3d53e459cd7d4ddbad667ff88944306796d118b0..7d359f059546d4853771e4bd808a5d7101ea6aad 100644
--- a/compute/pzunmqr.c
+++ b/compute/pzunmqr.c
@@ -317,4 +317,5 @@ void morse_pzunmqr(MORSE_enum side, MORSE_enum trans,
     morse_desc_mat_free(DIAG);
     free(DIAG);
 #endif
+    (void)DIAG;
 }
diff --git a/compute/pzunmqrrh.c b/compute/pzunmqrrh.c
index aee79ee4a90c1129bdaa6ab89ecea83e431c8b5d..71bb3d6bd60cb2b8c78fa077b201adcc054c208a 100644
--- a/compute/pzunmqrrh.c
+++ b/compute/pzunmqrrh.c
@@ -61,7 +61,6 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans,
     int ldbM, ldbm, ldbMRD;
     int tempMm, tempkn, tempnn, tempmm, tempMRDm, tempkmin;
     int ib;
-    int nblk;
 
     morse = morse_context_self();
     if (sequence->status != MORSE_SUCCESS)
@@ -91,11 +90,14 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans,
 
     RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
 
-    /* necessary to avoid dependencies between tasks regarding the diag tile */
 #if defined(CHAMELEON_COPY_DIAG)
-    nblk = ( A->mt + BS -1 ) / BS;
-    DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
-    morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
+    /* necessary to avoid dependencies between tasks regarding the diag tile */
+    {
+        int nblk = ( A->mt + BS -1 ) / BS;
+        nblk = ( A->mt + BS -1 ) / BS;
+        DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
+        morse_zdesc_alloc_diag(*DIAG, A->mb, A->nb, nblk * A->mb, A->nb, 0, 0, nblk * A->mb, A->nb, A->p, A->q);
+    }
 #endif
 
     K = min(A->mt, A->nt);
@@ -418,4 +420,5 @@ void morse_pzunmqrrh(MORSE_enum side, MORSE_enum trans,
     morse_desc_mat_free(DIAG);
     free(DIAG);
 #endif
+    (void)DIAG;
 }
diff --git a/compute/zgesvd.c b/compute/zgesvd.c
new file mode 100644
index 0000000000000000000000000000000000000000..6c5263e4c27e2a008d375da266abb54dec0d80bf
--- /dev/null
+++ b/compute/zgesvd.c
@@ -0,0 +1,611 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file zgesvd.c
+ *
+ *  PLASMA computational routines
+ *  PLASMA is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.8.0
+ * @author Gregoire Pichon
+ * @author Mathieu Faverge
+ * @date 2010-11-15
+ * @precisions normal z -> s d c
+ *
+ **/
+#include <coreblas/include/lapacke.h>
+#include "control/common.h"
+
+/***************************************************************************//**
+ *
+ * @ingroup MORSE_Complex64_t
+ *
+ *  MORSE_zgesvd - computes the singular value decomposition (SVD) of a complex
+ *  M-by-N matrix A, optionally computing the left and/or right singular
+ *  vectors. The SVD is written
+ *
+ *       A = U * SIGMA * transpose(V)
+ *
+ *  where SIGMA is an M-by-N matrix which is zero except for its
+ *  min(m,n) diagonal elements, U is an M-by-M orthogonal matrix, and
+ *  V is an N-by-N orthogonal matrix.  The diagonal elements of SIGMA
+ *  are the singular values of A; they are real and non-negative, and
+ *  are returned in descending order.  The first min(m,n) columns of
+ *  U and V are the left and right singular vectors of A.
+ *
+ *  Note that the routine returns V**T, not V.
+ *******************************************************************************
+ *
+ * @param[in] jobu
+ *          Specifies options for computing all or part of the matrix U.
+ *          Intended usage:
+ *          = MorseVec   = 'A'(lapack):  all M columns of U are returned
+ *                        in array U;
+ *          = MorseNoVec = 'N':  no columns of U (no left singular vectors)
+ *                        are computed.
+ *          = MorseSVec  = 'S': the first min(m,n) columns of U (the left
+ *                        singular vectors) are returned in the array U;
+ *                        NOT SUPPORTTED YET
+ *          = MorseOVec  = 'O': the first min(m,n) columns of U (the left
+ *                        singular vectors) are overwritten on the array A;
+ *                        NOT SUPPORTTED YET
+ *
+ * @param[in] jobvt
+ *          Specifies options for computing all or part of the matrix V**H.
+ *          Intended usage:
+ *          = MorseVec   = 'A'(lapack): all N rows of V**H are returned
+ *                        in the array VT;
+ *          = MorseNoVec = 'N': no rows of V**H (no right singular vectors)
+ *                        are computed.
+ *          = MorseSVec  = 'S': the first min(m,n) rows of V**H (the right
+ *                        singular vectors) are returned in the array VT;
+ *                        NOT SUPPORTTED YET
+ *          = MorseOVec  = 'O': the first min(m,n) rows of V**H (the right
+ *                        singular vectors) are overwritten on the array A;
+ *                        NOT SUPPORTTED YET
+ *
+ *          Note: jobu and jobvt cannot both be MorseOVec.
+ *
+ * @param[in] M
+ *          The number of rows of the matrix A. M >= 0.
+ *
+ * @param[in] N
+ *          The number of columns of the matrix A. N >= 0.
+ *
+ * @param[in,out] A
+ *          On entry, the M-by-N matrix A.
+ *          On exit,
+ *          if JOBU = 'O',  A is overwritten with the first min(m,n)
+ *                          columns of U (the left singular vectors,
+ *                          stored columnwise);
+ *          if JOBVT = 'O', A is overwritten with the first min(m,n)
+ *                          rows of V**H (the right singular vectors,
+ *                          stored rowwise);
+ *          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
+ *                          are destroyed.
+ *
+ * @param[in] LDA
+ *          The leading dimension of the array A. LDA >= max(1,M).
+ *
+ * @param[out] S
+ *          The double precision singular values of A, sorted so that S(i) >= S(i+1).
+ *
+ * @param[in, out] descT
+ *          On entry, descriptor as return by MORSE_Alloc_Workspace_zgesvd
+ *          On exit, contains auxiliary factorization data.
+ *
+ * @param[out] U
+ *          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
+ *          If JOBU = 'A', U contains the M-by-M unitary matrix U;
+ *          if JOBU = 'S', U contains the first min(m,n) columns of U
+ *          (the left singular vectors, stored columnwise);
+ *          if JOBU = 'N' or 'O', U is not referenced.
+ *
+ * @param[in] LDU
+ *          The leading dimension of the array U.  LDU >= 1; if
+ *          JOBU = 'S' or 'A', LDU >= M.
+ *
+ * @param[out] VT
+ *         If JOBVT = 'A', VT contains the N-by-N unitary matrix
+ *         V**H;
+ *         if JOBVT = 'S', VT contains the first min(m,n) rows of
+ *         V**H (the right singular vectors, stored rowwise);
+ *         if JOBVT = 'N' or 'O', VT is not referenced.
+ *
+ * @param[in] LDVT
+ *         The leading dimension of the array VT.  LDVT >= 1; if
+ *         JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
+ *
+ *******************************************************************************
+ *
+ * @return
+ *          \retval MORSE_SUCCESS successful exit
+ *          \retval <0 if -i, the i-th argument had an illegal value
+ *
+ *******************************************************************************
+ *
+ * @sa MORSE_zgesvd_Tile
+ * @sa MORSE_zgesvd_Tile_Async
+ * @sa MORSE_cgesvd
+ * @sa MORSE_dgesvd
+ * @sa MORSE_sgesvd
+ *
+ ******************************************************************************/
+int MORSE_zgesvd(MORSE_enum jobu, MORSE_enum jobvt,
+                  int M, int N,
+                  MORSE_Complex64_t *A, int LDA,
+                  double *S,
+                  MORSE_desc_t *descT,
+                  MORSE_Complex64_t *U, int LDU,
+                  MORSE_Complex64_t *VT, int LDVT)
+{
+    int NB;
+    int status;
+    MORSE_context_t  *morse;
+    MORSE_sequence_t *sequence = NULL;
+    MORSE_request_t   request = MORSE_REQUEST_INITIALIZER;
+    MORSE_desc_t descA;
+
+    morse = morse_context_self();
+    if (morse == NULL) {
+        morse_fatal_error("MORSE_zgesvd", "MORSE not initialized");
+        return MORSE_ERR_NOT_INITIALIZED;
+    }
+
+    /* Check input arguments */
+    if (jobu != MorseNoVec  && jobu != MorseVec) {
+        morse_error("MORSE_zgesvd", "illegal value of jobu");
+        return -1;
+    }
+    if (jobvt != MorseNoVec && jobvt != MorseVec) {
+        morse_error("MORSE_zgesvd", "illegal value of jobvt");
+        return -2;
+    }
+    if (M < 0) {
+        morse_error("MORSE_zgesvd", "illegal value of M");
+        return -3;
+    }
+    if (N < 0) {
+        morse_error("MORSE_zgesvd", "illegal value of N");
+        return -4;
+    }
+    if (LDA < max(1, M)) {
+        morse_error("MORSE_zgesvd", "illegal value of LDA");
+        return -6;
+    }
+    if (LDU < 1) {
+        morse_error("MORSE_zgesvd", "illegal value of LDU");
+        return -9;
+    }
+    if (LDVT < 1) {
+        morse_error("MORSE_zgesvd", "illegal value of LDVT");
+        return -11;
+    }
+    /* Quick return */
+    if (min(M, N) == 0) {
+        return MORSE_SUCCESS;
+    }
+
+    /* Tune NB & IB depending on M & N; Set NBNB */
+    status = morse_tune(MORSE_FUNC_ZGESVD, M, N, 0);
+    if (status != MORSE_SUCCESS) {
+        morse_error("MORSE_zgesvd", "morse_tune() failed");
+        return status;
+    }
+
+    /* Set MT, NT */
+    NB = MORSE_NB;
+
+    morse_sequence_create(morse, &sequence);
+
+    /* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) { */
+    morse_zooplap2tile( descA, A, NB, NB,  LDA, N, 0, 0, M, N, sequence, &request,
+                        morse_desc_mat_free(&(descA)) );
+    /* } else { */
+    /*     morse_ziplap2tile( descA,   A, NB, NB,  LDA, N, 0, 0, M, N, */
+    /*                         sequence, &request); */
+    /* } */
+
+    /* Call the tile interface */
+    MORSE_zgesvd_Tile_Async(jobu, jobvt, &descA, S, descT, U, LDU, VT, LDVT, sequence, &request);
+
+    /* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) { */
+    morse_zooptile2lap( descA, A, NB, NB, LDA, N,  sequence, &request);
+    morse_sequence_wait(morse, sequence);
+    morse_desc_mat_free(&descA);
+    /* } else { */
+    /*     morse_ziptile2lap( descA, A, NB, NB, LDA, N,  sequence, &request); */
+    /*     morse_sequence_wait(morse, sequence); */
+    /* } */
+
+    status = sequence->status;
+    morse_sequence_destroy(morse, sequence);
+    return status;
+}
+
+/***************************************************************************//**
+ *
+ * @ingroup MORSE_Complex64_t_Tile
+ *
+ *  MORSE_zgesvd_Tile - computes the singular value decomposition (SVD) of a complex
+ *  M-by-N matrix A, optionally computing the left and/or right singular
+ *  vectors.
+ *  Tile equivalent of MORSE_zgesvd().
+ *  Operates on matrices stored by tiles.
+ *  All matrices are passed through descriptors.
+ *  All dimensions are taken from the descriptors.
+ *
+ *******************************************************************************
+ *
+ * @param[in] jobu
+ *          Specifies options for computing all or part of the matrix U.
+ *          Intended usage:
+ *          = MorseVec   = 'A'(lapack):  all M columns of U are returned
+ *                        in array U;
+ *          = MorseNoVec = 'N':  no columns of U (no left singular vectors)
+ *                        are computed.
+ *          = MorseSVec  = 'S': the first min(m,n) columns of U (the left
+ *                        singular vectors) are returned in the array U;
+ *                        NOT SUPPORTTED YET
+ *          = MorseOVec  = 'O': the first min(m,n) columns of U (the left
+ *                        singular vectors) are overwritten on the array A;
+ *                        NOT SUPPORTTED YET
+ *
+ * @param[in] jobvt
+ *          Specifies options for computing all or part of the matrix V**H.
+ *          Intended usage:
+ *          = MorseVec   = 'A'(lapack): all N rows of V**H are returned
+ *                        in the array VT;
+ *          = MorseNoVec = 'N': no rows of V**H (no right singular vectors)
+ *                        are computed.
+ *          = MorseSVec  = 'S': the first min(m,n) rows of V**H (the right
+ *                        singular vectors) are returned in the array VT;
+ *                        NOT SUPPORTTED YET
+ *          = MorseOVec  = 'O': the first min(m,n) rows of V**H (the right
+ *                        singular vectors) are overwritten on the array A;
+ *                        NOT SUPPORTTED YET
+ *
+ *          Note: jobu and jobvt cannot both be MorseOVec.
+ *
+ * @param[in,out] A
+ *          On entry, the M-by-N matrix A.
+ *          On exit,
+ *          if JOBU = 'O',  A is overwritten with the first min(m,n)
+ *                          columns of U (the left singular vectors,
+ *                          stored columnwise);
+ *          if JOBVT = 'O', A is overwritten with the first min(m,n)
+ *                          rows of V**H (the right singular vectors,
+ *                          stored rowwise);
+ *          if JOBU .ne. 'O' and JOBVT .ne. 'O', the contents of A
+ *                          are destroyed.
+ *
+ * @param[out] S
+ *          The singular values of A, sorted so that S(i) >= S(i+1).
+ *
+ * @param[in, out] T
+ *          On entry, descriptor as return by MORSE_Alloc_Workspace_zgesvd
+ *          On exit, contains auxiliary factorization data.
+ *
+ * @param[out] U
+ *          (LDU,M) if JOBU = 'A' or (LDU,min(M,N)) if JOBU = 'S'.
+ *          If JOBU = 'A', U contains the M-by-M unitary matrix U;
+ *          if JOBU = 'S', U contains the first min(m,n) columns of U
+ *          (the left singular vectors, stored columnwise);
+ *          if JOBU = 'N' or 'O', U is not referenced.
+ *
+ * @param[in] LDU
+ *          The leading dimension of the array U.  LDU >= 1; if
+ *          JOBU = 'S' or 'A', LDU >= M.
+ *
+ * @param[out] VT
+ *         If JOBVT = 'A', VT contains the N-by-N unitary matrix
+ *         V**H;
+ *         if JOBVT = 'S', VT contains the first min(m,n) rows of
+ *         V**H (the right singular vectors, stored rowwise);
+ *         if JOBVT = 'N' or 'O', VT is not referenced.
+ *
+ * @param[in] LDVT
+ *         The leading dimension of the array VT.  LDVT >= 1; if
+ *         JOBVT = 'A', LDVT >= N; if JOBVT = 'S', LDVT >= min(M,N).
+ *
+ *******************************************************************************
+ *
+ * @return
+ *          \return MORSE_SUCCESS successful exit
+ *
+ *******************************************************************************
+ *
+ * @sa MORSE_zgesvd
+ * @sa MORSE_zgesvd_Tile_Async
+ * @sa MORSE_cgesvd_Tile
+ * @sa MORSE_dgesvd_Tile
+ * @sa MORSE_sgesvd_Tile
+ *
+ ******************************************************************************/
+int MORSE_zgesvd_Tile(MORSE_enum jobu, MORSE_enum jobvt,
+                      MORSE_desc_t *A,
+                      double *S,
+                      MORSE_desc_t *T,
+                      MORSE_Complex64_t *U, int LDU,
+                      MORSE_Complex64_t *VT, int LDVT)
+{
+    MORSE_context_t  *morse;
+    MORSE_sequence_t *sequence = NULL;
+    MORSE_request_t   request = MORSE_REQUEST_INITIALIZER;
+    int status;
+
+    morse = morse_context_self();
+    if (morse == NULL) {
+        morse_fatal_error("MORSE_zgesvd_Tile", "MORSE not initialized");
+        return MORSE_ERR_NOT_INITIALIZED;
+    }
+    morse_sequence_create(morse, &sequence);
+    MORSE_zgesvd_Tile_Async(jobu, jobvt, A, S, T, U, LDU, VT, LDVT, sequence, &request);
+    morse_sequence_wait(morse, sequence);
+    status = sequence->status;
+    morse_sequence_destroy(morse, sequence);
+    return status;
+}
+
+/***************************************************************************//**
+ *
+ * @ingroup MORSE_Complex64_t_Tile_Async
+ *
+ *  MORSE_zgesvd_Tile_Async - computes the singular value decomposition (SVD) of a complex
+ *  M-by-N matrix A, optionally computing the left and/or right singular
+ *  vectors.
+ *  Non-blocking equivalent of MORSE_zgesvd_Tile().
+ *  May return before the computation is finished.
+ *  Allows for pipelining of operations at runtime.
+ *
+ *******************************************************************************
+ *
+ * @param[in] sequence
+ *          Identifies the sequence of function calls that this call belongs to
+ *          (for completion checks and exception handling purposes).
+ *
+ * @param[out] request
+ *          Identifies this function call (for exception handling purposes).
+ *
+ *******************************************************************************
+ *
+ * @sa MORSE_zgesvd
+ * @sa MORSE_zgesvd_Tile
+ * @sa MORSE_cgesvd_Tile_Async
+ * @sa MORSE_dgesvd_Tile_Async
+ * @sa MORSE_sgesvd_Tile_Async
+ *
+ ******************************************************************************/
+int MORSE_zgesvd_Tile_Async(MORSE_enum jobu, MORSE_enum jobvt,
+                            MORSE_desc_t *A,
+                            double *S,
+                            MORSE_desc_t *T,
+                            MORSE_Complex64_t *U, int LDU,
+                            MORSE_Complex64_t *VT, int LDVT,
+                            MORSE_sequence_t *sequence, MORSE_request_t *request)
+{
+    MORSE_desc_t descA;
+    MORSE_desc_t descT;
+    MORSE_desc_t descU, descVT;
+    MORSE_desc_t descAB;
+    MORSE_desc_t *subA, *subT, *subUVT;
+    double *E;
+    int M, N, MINMN, NB, LDAB;
+    int KL, KU, uplo, nru, ncvt;
+    int info = 0;
+    char gbbrd_vect;
+
+    MORSE_context_t *morse;
+    morse = morse_context_self();
+
+    if (morse == NULL) {
+        morse_fatal_error("MORSE_zgesvd_Tile_Async", "MORSE not initialized");
+        return MORSE_ERR_NOT_INITIALIZED;
+    }
+    if (sequence == NULL) {
+        morse_fatal_error("MORSE_zgesvd_Tile_Async", "NULL sequence");
+        return MORSE_ERR_UNALLOCATED;
+    }
+    if (request == NULL) {
+        morse_fatal_error("MORSE_zgesvd_Tile_Async", "NULL request");
+        return MORSE_ERR_UNALLOCATED;
+    }
+    /* Check sequence status */
+    if (sequence->status == MORSE_SUCCESS)
+        request->status = MORSE_SUCCESS;
+    else
+        return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED);
+
+    /* Check descriptors for correctness */
+    if (morse_desc_check(A) != MORSE_SUCCESS) {
+        morse_error("MORSE_zgesvd_Tile_Async", "invalid first descriptor");
+        return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
+    } else {
+        descA = *A;
+    }
+    if (morse_desc_check(T) != MORSE_SUCCESS) {
+        morse_error("MORSE_zgesvd_Tile_Async", "invalid fourth descriptor");
+        return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
+    } else {
+        descT = *T;
+    }
+    /* Check input arguments */
+    if (jobu != MorseNoVec  && jobu != MorseVec) {
+        morse_error("MORSE_zgesvd_Tile_Async", "illegal value of jobu");
+        return MORSE_ERR_NOT_SUPPORTED;
+    }
+    if (jobvt != MorseNoVec && jobvt != MorseVec) {
+        morse_error("MORSE_zgesvd_Tile_Async", "illegal value of jobvt");
+        return MORSE_ERR_NOT_SUPPORTED;
+    }
+    if (descA.nb != descA.mb) {
+        morse_error("MORSE_zgesvd_Tile_Async", "only square tiles supported");
+        return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
+    }
+
+    M     = descA.m;
+    N     = descA.n;
+    MINMN = min(M, N);
+    NB    = descA.mb;
+    LDAB  = NB + 1;
+    uplo  = M >= N ? MorseUpper : MorseLower;
+
+    /* Reduction to band */
+    morse_pzgebrd_ge2gb( descA, descT,
+                         sequence, request );
+
+    /* Allocate band structure */
+    morse_zdesc_alloc_diag( descAB,
+                            LDAB, NB,
+                            LDAB, MINMN,
+                            0, 0,
+                            LDAB, MINMN,
+                            1, 1 );
+
+    /* Convert matrix to band form */
+    morse_pztile2band( uplo,
+                       &descA, &descAB,
+                       sequence, request );
+
+    E = malloc( MINMN * sizeof(double) );
+    if (E == NULL) {
+        morse_error("MORSE_zheevd_Tile_Async", "malloc(E) failed");
+        free(E);
+        return MORSE_ERR_OUT_OF_RESOURCES;
+    }
+    memset(E, 0, MINMN * sizeof(double) );
+
+    /* NCC = 0, C = NULL, we do not update any matrix with new singular vectors */
+    /* On exit, AB = U (S +~ E) VT */
+    if (uplo == MorseUpper){
+        KL = 0;
+        KU = NB;
+    }
+    else{
+        KL = NB;
+        KU = 0;
+    }
+
+    /* Manage the case where only singular values are required */
+    if (jobu == MorseNoVec) {
+        nru = 0;
+        if (jobvt == MorseNoVec) {
+            gbbrd_vect = 'N';
+            ncvt = 0;
+        }
+        else {
+            gbbrd_vect = 'P';
+            ncvt = N;
+        }
+    }
+    else {
+        nru = M;
+        if (jobvt == MorseNoVec) {
+            gbbrd_vect = 'Q';
+            ncvt = 0;
+        }
+        else {
+            gbbrd_vect = 'B';
+            ncvt = N;
+        }
+    }
+
+    morse_sequence_wait(morse, sequence);
+    info = LAPACKE_zgbbrd( LAPACK_COL_MAJOR,
+                           gbbrd_vect,
+                           M, N,
+                           0, KL, KU,
+                           (MORSE_Complex64_t *) descAB.mat, LDAB,
+                           S, E,
+                           U, LDU,
+                           VT, LDVT,
+                           NULL, 1 );
+    if (info != 0) {
+        fprintf(stderr, "MORSE_zgesvd_Tile_Async: LAPACKE_zgbbrd = %d\n", info );
+    }
+    morse_desc_mat_free(&descAB);
+
+    /* Transform U and Vt into tile format */
+    if ( jobu != MorseNoVec ) {
+        morse_zooplap2tile( descU, U, NB, NB, LDU, M, 0, 0, M, M, sequence, request, morse_desc_mat_free(&(descU)) );
+    }
+    if ( jobvt != MorseNoVec ) {
+        morse_zooplap2tile( descVT, VT, NB, NB, LDVT, N, 0, 0, N, N, sequence, request, morse_desc_mat_free(&(descVT)) );
+    }
+    morse_sequence_wait(morse, sequence);
+
+    subA = NULL;
+    subT = NULL;
+    subUVT = NULL;
+    if ( jobu != MorseNoVec ) {
+        if ( M < N ){
+            subA   = morse_desc_submatrix(&descA, descA.mb, 0, descA.m-descA.mb, descA.n-descA.nb);
+            subUVT = morse_desc_submatrix(&descU, descU.mb, 0, descU.m-descU.mb, descU.n);
+            subT   = morse_desc_submatrix(&descT, descT.mb, 0, descT.m-descT.mb, descT.n-descT.nb);
+            morse_pzunmqr( MorseLeft, MorseNoTrans,
+                           subA, subUVT, subT,
+                           sequence, request );
+        }
+        else {
+            morse_pzunmqr( MorseLeft, MorseNoTrans,
+                           &descA, &descU, &descT,
+                           sequence, request );
+        }
+    }
+
+    if ( jobvt != MorseNoVec ) {
+        if ( M < N ){
+            morse_pzunmlq( MorseRight, MorseNoTrans,
+                           &descA, &descVT, &descT,
+                           sequence, request );
+        }
+        else {
+            subA   = morse_desc_submatrix(&descA,  0, descA.nb,  descA.m-descA.mb, descA.n -descA.nb);
+            subUVT = morse_desc_submatrix(&descVT, 0, descVT.nb, descVT.m,         descVT.n-descVT.nb);
+            subT   = morse_desc_submatrix(&descT,  0, descT.nb,  descT.m-descT.mb, descT.n -descT.nb);
+            morse_pzunmlq( MorseRight, MorseNoTrans,
+                           subA, subUVT, subT,
+                           sequence, request );
+        }
+    }
+
+    /* Transform U and VT into lapack layout */
+    if ( jobu != MorseNoVec ) {
+        morse_zooptile2lap( descU,  U,  NB, NB, LDU,  M, sequence, request );
+    }
+    if ( jobvt != MorseNoVec ) {
+        morse_zooptile2lap( descVT, VT, NB, NB, LDVT, N, sequence, request );
+    }
+
+    morse_sequence_wait(morse, sequence);
+    if (subA) {
+        free(subA); free(subUVT); free(subT);
+    }
+
+    /* Solve the bidiagonal SVD problem */
+    /* On exit, U and VT are updated with bidiagonal matrix singular vectors */
+    info = LAPACKE_zbdsqr( LAPACK_COL_MAJOR, 'U',
+                           MINMN, ncvt, nru, 0,
+                           S, E,
+                           VT, LDVT, U, LDU, NULL, 1 );
+    if (info != 0) {
+        fprintf(stderr, "MORSE_zgesvd_Tile_Async: LAPACKE_zbdsqr = %d\n", info );
+    }
+
+    if (jobu != MorseNoVec)
+        morse_desc_mat_free( &descU );
+    if (jobvt != MorseNoVec)
+        morse_desc_mat_free( &descVT );
+    free(E);
+    return MORSE_SUCCESS;
+}
diff --git a/compute/zheevd.c b/compute/zheevd.c
new file mode 100644
index 0000000000000000000000000000000000000000..7272b0c243065b70712c0f5a64b27a3626479764
--- /dev/null
+++ b/compute/zheevd.c
@@ -0,0 +1,509 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ * @file zheevd.c
+ *
+ *  PLASMA computational routines
+ *  Release Date: November, 15th 2009
+ *  PLASMA is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.7.1
+ * @author Azzam Haidar
+ * @author Hatem Ltaief
+ * @date 2010-11-15
+ * @precisions normal z -> s d c
+ *
+ **/
+#include <coreblas/include/lapacke.h>
+#include "control/common.h"
+
+/***************************************************************************//**
+ *
+ * @ingroup MORSE_Complex64_t
+ *
+ *  MORSE_zheevd - Computes all eigenvalues and, optionally,
+ *  eigenvectors of a complex Hermitian matrix A. The matrix A is
+ *  preliminary reduced to tridiagonal form using a two-stage
+ *  approach:
+ *  First stage: reduction to band tridiagonal form;
+ *  Second stage: reduction from band to tridiagonal form.
+ *
+ *******************************************************************************
+ *
+ * @param[in] jobz
+ *          Intended usage:
+ *          = MorseNoVec: computes eigenvalues only;
+ *          = MorseVec: computes eigenvalues and eigenvectors.
+ *
+ * @param[in] uplo
+ *          Specifies whether the matrix A is upper triangular or
+ *          lower triangular:
+ *          = MorseUpper: Upper triangle of A is stored;
+ *          = MorseLower: Lower triangle of A is stored.
+ *
+ * @param[in] N
+ *          The order of the matrix A. N >= 0.
+ *
+ * @param[in,out] A
+ *          On entry, the symmetric (or Hermitian) matrix A.
+ *          If uplo = MorseUpper, the leading N-by-N upper triangular
+ *          part of A contains the upper triangular part of the matrix
+ *          A, and the strictly lower triangular part of A is not
+ *          referenced.
+ *          If uplo = MorseLower, the leading N-by-N lower triangular
+ *          part of A contains the lower triangular part of the matrix
+ *          A, and the strictly upper triangular part of A is not
+ *          referenced.
+ *          On exit, the lower triangle (if uplo = MorseLower) or the
+ *          upper triangle (if uplo = MorseUpper) of A, including the
+ *          diagonal, is destroyed.
+ *
+ * @param[in] LDA
+ *          The leading dimension of the array A. LDA >= max(1,N).
+ *
+ * @param[out] W
+ *          On exit, if info = 0, the eigenvalues.
+ *
+ * @param[in, out] descT
+ *          On entry, descriptor as return by MORSE_Alloc_Workspace_zheevd
+ *          On exit, contains auxiliary factorization data.
+ *
+ *******************************************************************************
+ *
+ * @return
+ *          \retval MORSE_SUCCESS successful exit
+ *          \retval <0 if -i, the i-th argument had an illegal value
+ *          \retval >0 if INFO = i, the algorithm failed to converge; i
+ *               off-diagonal elements of an intermediate tridiagonal
+ *               form did not converge to zero.
+ *
+ *******************************************************************************
+ *
+ * @sa MORSE_zheevd_Tile
+ * @sa MORSE_zheevd_Tile_Async
+ * @sa MORSE_cheevd
+ * @sa MORSE_dsyev
+ * @sa MORSE_ssyev
+ *
+ ******************************************************************************/
+int MORSE_zheevd(MORSE_enum jobz, MORSE_enum uplo, int N,
+                 MORSE_Complex64_t *A, int LDA,
+                 double *W,
+                 MORSE_desc_t *descT)
+{
+    int NB;
+    int status;
+    MORSE_context_t  *morse;
+    MORSE_sequence_t *sequence = NULL;
+    MORSE_request_t   request = MORSE_REQUEST_INITIALIZER;
+    MORSE_desc_t      descA;
+
+    morse = morse_context_self();
+    if (morse == NULL) {
+        morse_fatal_error("MORSE_zheevd", "MORSE not initialized");
+        return MORSE_ERR_NOT_INITIALIZED;
+    }
+
+    /* Check input arguments */
+    if (jobz != MorseNoVec && jobz != MorseVec) {
+        morse_error("MORSE_zheevd", "illegal value of jobz");
+        return -1;
+    }
+    if (uplo != MorseLower && uplo != MorseUpper) {
+        morse_error("MORSE_zheevd", "illegal value of uplo");
+        return -2;
+    }
+    if (N < 0) {
+        morse_error("MORSE_zheevd", "illegal value of N");
+        return -3;
+    }
+    if (LDA < max(1, N)) {
+        morse_error("MORSE_zheevd", "illegal value of LDA");
+        return -5;
+    }
+
+    /* Quick return */
+    if (N == 0)
+        return MORSE_SUCCESS;
+
+    /* Tune NB & IB depending on N; Set NBNB */
+    status = morse_tune(MORSE_FUNC_ZHEEVD, N, N, 0);
+    if (status != MORSE_SUCCESS) {
+        morse_error("MORSE_zheevd", "morse_tune() failed");
+        return status;
+    }
+
+    /* Set NT */
+    NB = MORSE_NB;
+
+    morse_sequence_create(morse, &sequence);
+
+    /* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) { */
+    morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, sequence, &request,
+                        morse_desc_mat_free(&(descA)) );
+    /* } else { */
+    /*     morse_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, */
+    /*                         sequence, &requoest); */
+    /* } */
+
+    /* Call the tile interface */
+    MORSE_zheevd_Tile_Async(jobz, uplo, &descA, W, descT, sequence, &request);
+
+    /* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) { */
+    morse_zooptile2lap( descA, A, NB, NB, LDA, N,  sequence, &request);
+    morse_sequence_wait(morse, sequence);
+    morse_desc_mat_free(&descA);
+    /* } else { */
+    /*     morse_ziptile2lap( descA, A, NB, NB, LDA, N,  sequence, &request); */
+    /*     morse_sequence_wait(morse, sequence); */
+    /* } */
+
+    status = sequence->status;
+    morse_sequence_destroy(morse, sequence);
+    return status;
+}
+/***************************************************************************//**
+ *
+ * @ingroup MORSE_Complex64_t_Tile
+ *
+ *  MORSE_zheevd_Tile - Computes all eigenvalues and, optionally, eigenvectors of a
+ *  complex Hermitian matrix A using a two-stage approach:
+ *  First stage: reduction to band tridiagonal form;
+ *  Second stage: reduction from band to tridiagonal form.
+ *
+ *  Operates on matrices stored by tiles.
+ *  All matrices are passed through descriptors.
+ *  All dimensions are taken from the descriptors.
+ *
+ *******************************************************************************
+ *
+ * @param[in] jobz
+ *          Intended usage:
+ *          = MorseNoVec: computes eigenvalues only;
+ *          = MorseVec: computes eigenvalues and eigenvectors.
+ *
+ * @param[in] uplo
+ *          Specifies whether the matrix A is upper triangular or
+ *          lower triangular:
+ *          = MorseUpper: Upper triangle of A is stored;
+ *          = MorseLower: Lower triangle of A is stored.
+ *
+ * @param[in,out] A
+ *          On entry, the symmetric (or Hermitian) matrix A.
+ *          If uplo = MorseUpper, the leading N-by-N upper triangular
+ *          part of A contains the upper triangular part of the matrix
+ *          A, and the strictly lower triangular part of A is not
+ *          referenced.
+ *          If UPLO = 'L', the leading N-by-N lower triangular part of
+ *          A contains the lower triangular part of the matrix A, and
+ *          the strictly upper triangular part of A is not referenced.
+ *          On exit, if jobz = MorseVec, then if return value = 0, A
+ *          contains the orthonormal eigenvectors of the matrix A.
+ *          If jobz = MorseNoVec, then on exit the lower triangle (if
+ *          uplo = MorseLower) or the upper triangle (if uplo =
+ *          MorseUpper) of A, including the diagonal, is destroyed.*
+ *
+ * @param[out] W
+ *          On exit, if info = 0, the eigenvalues.
+ *
+ * @param[in,out] T
+ *          On entry, descriptor as return by
+ *          MORSE_Alloc_Workspace_zheevd
+ *          On exit, contains auxiliary factorization data.
+ *
+ *******************************************************************************
+ *
+ * @return
+ *          \retval MORSE_SUCCESS successful exit
+ *          \retval <0 if -i, the i-th argument had an illegal value
+ *          \retval >0 if INFO = i, the algorithm failed to converge; i
+ *               off-diagonal elements of an intermediate tridiagonal
+ *               form did not converge to zero.
+ *
+ *******************************************************************************
+ *
+ * @sa MORSE_zheevd_Tile
+ * @sa MORSE_zheevd_Tile_Async
+ * @sa MORSE_cheevd
+ * @sa MORSE_dsyev
+ * @sa MORSE_ssyev
+ *
+ ******************************************************************************/
+int MORSE_zheevd_Tile(MORSE_enum jobz, MORSE_enum uplo,
+                      MORSE_desc_t *A, double *W, MORSE_desc_t *T)
+{
+    MORSE_context_t  *morse;
+    MORSE_sequence_t *sequence = NULL;
+    MORSE_request_t   request = MORSE_REQUEST_INITIALIZER;
+    int status;
+
+    morse = morse_context_self();
+    if (morse == NULL) {
+        morse_fatal_error("MORSE_zheevd_Tile", "MORSE not initialized");
+        return MORSE_ERR_NOT_INITIALIZED;
+    }
+    morse_sequence_create(morse, &sequence);
+    MORSE_zheevd_Tile_Async(jobz, uplo, A, W, T, sequence, &request);
+    morse_sequence_wait(morse, sequence);
+
+    RUNTIME_desc_getoncpu(A);
+    RUNTIME_desc_getoncpu(T);
+
+    status = sequence->status;
+    morse_sequence_destroy(morse, sequence);
+    return status;
+}
+
+/***************************************************************************//**
+ *
+ * @ingroup MORSE_Complex64_t_Tile_Async
+ *
+ *  MORSE_zheevd_Tile_Async - Computes all eigenvalues and,
+ *  optionally, eigenvectors of a complex Hermitian matrix A using a
+ *  two-stage approach:
+ *  First stage: reduction to band tridiagonal form;
+ *  Second stage: reduction from band to tridiagonal form.
+ *
+ *  May return before the computation is finished.
+ *  Allows for pipelining of operations at runtime.
+ *
+ *******************************************************************************
+ *
+ * @param[in] jobz
+ *          Intended usage:
+ *          = MorseNoVec: computes eigenvalues only;
+ *          = MorseVec: computes eigenvalues and eigenvectors.
+ *
+ * @param[in] uplo
+ *          Specifies whether the matrix A is upper triangular or
+ *          lower triangular:
+ *          = MorseUpper: Upper triangle of A is stored;
+ *          = MorseLower: Lower triangle of A is stored.
+ *
+ * @param[in,out] A
+ *          On entry, the symmetric (or Hermitian) matrix A.
+ *          If uplo = MorseUpper, the leading N-by-N upper triangular
+ *          part of A contains the upper triangular part of the matrix
+ *          A, and the strictly lower triangular part of A is not
+ *          referenced.
+ *          If UPLO = 'L', the leading N-by-N lower triangular part of
+ *          A contains the lower triangular part of the matrix A, and
+ *          the strictly upper triangular part of A is not referenced.
+ *          On exit, if jobz = MorseVec, then if return value = 0, A
+ *          contains the orthonormal eigenvectors of the matrix A.
+ *          If jobz = MorseNoVec, then on exit the lower triangle (if
+ *          uplo = MorseLower) or the upper triangle (if uplo =
+ *          MorseUpper) of A, including the diagonal, is destroyed.*
+ *
+ * @param[out] W
+ *          On exit, if info = 0, the eigenvalues.
+ *
+ * @param[in,out] T
+ *          On entry, descriptor as return by
+ *          MORSE_Alloc_Workspace_zheevd
+ *          On exit, contains auxiliary factorization data.
+ *
+ * @param[in] sequence
+ *          Identifies the sequence of function calls that this call belongs to
+ *          (for completion checks and exception handling purposes).
+ *
+ * @param[out] request
+ *          Identifies this function call (for exception handling purposes).
+ *
+ *******************************************************************************
+ *
+ * @sa MORSE_zheevd
+ * @sa MORSE_zheevd_Tile
+ * @sa MORSE_cheevd_Tile_Async
+ * @sa MORSE_dsyev_Tile_Async
+ * @sa MORSE_ssyev_Tile_Async
+ *
+ ******************************************************************************/
+int MORSE_zheevd_Tile_Async(MORSE_enum jobz, MORSE_enum uplo,
+                            MORSE_desc_t *A, double *W, MORSE_desc_t *T,
+                            MORSE_sequence_t *sequence, MORSE_request_t *request)
+{
+    MORSE_context_t *morse;
+    MORSE_desc_t descA;
+    MORSE_desc_t descT;
+    MORSE_Complex64_t *Q2;
+    int N, NB, status;
+    double *E;
+    MORSE_Complex64_t *V;
+    MORSE_desc_t descQ2, descV;
+    MORSE_desc_t *subA, *subQ, *subT;
+
+    morse = morse_context_self();
+    if (morse == NULL) {
+        morse_fatal_error("MORSE_zheevd_Tile_Async", "MORSE not initialized");
+        return MORSE_ERR_NOT_INITIALIZED;
+    }
+    if (sequence == NULL) {
+        morse_fatal_error("MORSE_zheevd_Tile_Async", "NULL sequence");
+        return MORSE_ERR_UNALLOCATED;
+    }
+    if (request == NULL) {
+        morse_fatal_error("MORSE_zheevd_Tile_Async", "NULL request");
+        return MORSE_ERR_UNALLOCATED;
+    }
+    /* Check sequence status */
+    if (sequence->status == MORSE_SUCCESS)
+        request->status = MORSE_SUCCESS;
+    else
+        return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED);
+
+    /* Check descriptors for correctness */
+    if (morse_desc_check(A) != MORSE_SUCCESS) {
+        morse_error("MORSE_zheevd_Tile_Async", "invalid descriptor");
+        return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
+    } else {
+        descA = *A;
+    }
+    if (morse_desc_check(T) != MORSE_SUCCESS) {
+        morse_error("MORSE_zheevd_Tile_Async", "invalid descriptor");
+        return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
+    } else {
+        descT = *T;
+    }
+    /* Check input arguments */
+    if (jobz != MorseNoVec && jobz != MorseVec) {
+        morse_error("MORSE_zheevd_Tile_Async", "illegal value of jobz");
+        return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
+    }
+    if (uplo != MorseLower && uplo != MorseUpper) {
+        morse_error("MORSE_zheevd_Tile_Async", "illegal value of uplo");
+        return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
+    }
+    if (descA.m != descA.n) {
+        morse_error("MORSE_zheevd_Tile_Async", "matrix need to be square");
+        return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
+    }
+    if (descA.nb != descA.mb) {
+        morse_error("MORSE_zheevd_Tile_Async", "only square tiles supported");
+        return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
+    }
+
+    N   = descA.m;
+    NB  = min(descA.mb,descA.m);
+
+    /* Allocate data structures for reduction to tridiagonal form */
+    E = malloc( (N - 1) * sizeof(double) );
+    if (E == NULL) {
+        morse_error("MORSE_zheevd_Tile_Async", "malloc(E) failed");
+        free(E);
+        return MORSE_ERR_OUT_OF_RESOURCES;
+    }
+
+    if (jobz == MorseVec){
+        /* Have to synchrone right now */
+        Q2 = malloc( N * N * sizeof(MORSE_Complex64_t));
+        /* For bug in lapacke */
+        memset( Q2, 0, N * N * sizeof(MORSE_Complex64_t));
+    }
+
+    status = MORSE_zhetrd_Tile_Async( jobz, uplo,
+                                      A, W, E, T,
+                                      Q2, N,
+                                      sequence, request );
+    if (status != 0) {
+        morse_error("MORSE_zheevd_Tile", "MORSE_zhetrd failed");
+    }
+
+
+    if (jobz == MorseNoVec){
+        /* Tridiagonal eigensolver */
+        status = LAPACKE_zstedc( LAPACK_COL_MAJOR,
+                                 morse_lapack_const(jobz),
+                                 N, W, E,
+                                 NULL, N );
+        if (status != 0) {
+            morse_error("MORSE_zheevd_Tile_Async", "LAPACKE_zstedc failed");
+        }
+        free(E);
+        return MORSE_SUCCESS;
+    }
+
+    V = malloc( N * N * sizeof(MORSE_Complex64_t) );
+    if (V == NULL) {
+        morse_error("MORSE_zheevd_Tile_Async", "malloc(V) failed");
+        free(V);
+        return MORSE_ERR_OUT_OF_RESOURCES;
+    }
+    /* For bug in lapacke */
+    memset(V, 0, N * N * sizeof(MORSE_Complex64_t));
+
+    /* Tridiagonal eigensolver */
+    /* V contains the eigenvectors of the tridiagonal system on exit */
+    status = LAPACKE_zstedc( LAPACK_COL_MAJOR,
+                             morse_lapack_const(MorseIvec),
+                             N, W, E,
+                             V, N );
+    if (status != 0) {
+        morse_error("MORSE_zheevd_Tile_Async", "LAPACKE_zstedc failed");
+    }
+
+    /* Convert matrices in tile format */
+    /* A/T from MORSE_zhetrd   refer  to Q1 (tile   layout) */
+    /* Q   from MORSE_zhetrd   refers to Q2 (lapack layout) */
+    /* V   from LAPACKE_zstedc refers to V  (lapack layout) */
+    /* The final eigenvectors are (Q1 Q2 V) or (Q1^h Q2 V)  */
+    morse_zooplap2tile( descQ2, Q2, NB, NB, N, N, 0, 0, N, N, sequence, request,
+                        morse_desc_mat_free(&(descQ2)) );
+    morse_zooplap2tile( descV,  V,  NB, NB, N, N, 0, 0, N, N, sequence, request,
+                        morse_desc_mat_free(&(descQ2)); morse_desc_mat_free(&(descV)) );
+    if (uplo == MorseLower)
+    {
+        subA = morse_desc_submatrix(&descA,  descA.mb,  0, descA.m -descA.mb,  descA.n-descA.nb);
+        subQ = morse_desc_submatrix(&descQ2, descQ2.mb, 0, descQ2.m-descQ2.mb, descQ2.n        );
+        subT = morse_desc_submatrix(&descT,  descT.mb,  0, descT.m -descT.mb,  descT.n-descT.nb);
+
+        /* Compute Q2 = Q1 * Q2 */
+        morse_pzunmqr( MorseLeft, MorseNoTrans,
+                       subA, subQ, subT,
+                       sequence, request );
+
+        /* Compute the final eigenvectors A = (Q1 * Q2) * V */
+        morse_pzgemm( MorseNoTrans, MorseNoTrans,
+                      1.0, &descQ2, &descV,
+                      0.0, &descA,
+                      sequence, request);
+
+    }
+    else {
+        subA = morse_desc_submatrix(&descA,  0, descA.nb,  descA.m -descA.mb,  descA.n -descA.nb );
+        subQ = morse_desc_submatrix(&descQ2, descQ2.mb, 0, descQ2.m-descQ2.mb, descQ2.n          );
+        subT = morse_desc_submatrix(&descT,  0, descT.nb,  descT.m -descT.mb,  descT.n -descT.nb );
+
+        /* Compute Q2 = Q1^h * Q2 */
+        morse_pzunmlq( MorseLeft, MorseConjTrans,
+                       subA, subQ, subT,
+                       sequence, request );
+
+        /* Compute the final eigenvectors A =  (Q1^h * Q2) * V */
+        morse_pzgemm( MorseNoTrans, MorseNoTrans,
+                      1.0, &descQ2, &descV,
+                      0.0, &descA,
+                      sequence, request);
+    }
+
+    morse_sequence_wait(morse, sequence);
+
+    free(subA); free(subQ); free(subT);
+    morse_desc_mat_free(&descQ2);
+    free(Q2);
+
+    morse_desc_mat_free(&descV);
+    free(V);
+
+    free(E);
+    return MORSE_SUCCESS;
+}
diff --git a/compute/zhetrd.c b/compute/zhetrd.c
new file mode 100644
index 0000000000000000000000000000000000000000..6e11245aa99f5e73c98dfc28cf30647c26c6dde2
--- /dev/null
+++ b/compute/zhetrd.c
@@ -0,0 +1,423 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ * @file zhetrd.c
+ *
+ *  PLASMA computational routines
+ *  Release Date: November, 15th 2009
+ *  PLASMA is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.7.1
+ * @author Azzam Haidar
+ * @author Hatem Ltaief
+ * @author Gregoire Pichon
+ * @author Mathieu Faverge
+ * @date 2010-11-15
+ * @precisions normal z -> s d c
+ *
+ **/
+#include <coreblas/include/lapacke.h>
+#include "control/common.h"
+
+/***************************************************************************//**
+ *
+ * @ingroup MORSE_Complex64_t
+ *
+ *  MORSE_zhetrd - reduces a complex Hermitian matrix A to real symmetric
+ *  tridiagonal form S using a two-stage approach
+ *  First stage: reduction to band tridiagonal form (unitary Q1);
+ *  Second stage: reduction from band to tridiagonal form (unitary
+ *  Q2).  Let Q = Q1 * Q2 be the global unitary transformation; Q**H *
+ *  A * Q = S.
+ *
+ *******************************************************************************
+ *
+ * @param[in] jobz
+ *          Intended usage:
+ *          = MorseNoVec: computes tridiagonal only;
+ *          = MorseVec: computes tridiagonal and generate the orthogonal matrix Q.
+ *
+ * @param[in] uplo
+ *          Specifies whether the matrix A is upper triangular or
+ *          lower triangular:
+ *          = MorseUpper:: Upper triangle of A is stored;
+ *          = MorseLower: Lower triangle of A is stored.
+ *
+ * @param[in] N
+ *          The order of the matrix A. N >= 0.
+ *
+ * @param[in,out] A
+ *          On entry, the symmetric (or Hermitian) matrix A.
+ *          If uplo = MorseUpper, the leading N-by-N upper triangular
+ *          part of A contains the upper triangular part of the matrix
+ *          A, and the strictly lower triangular part of A is not
+ *          referenced.
+ *          If uplo = MorseLower, the leading N-by-N lower triangular
+ *          part of A contains the lower triangular part of the matrix
+ *          A, and the strictly upper triangular part of A is not
+ *          referenced.
+ *          On exit, the lower triangle (if uplo = MorseLower) or the
+ *          upper triangle (if uplo = MorseUpper) of A, including the
+ *          diagonal, is destroyed.
+ *
+ * @param[in] LDA
+ *          The leading dimension of the array A. LDA >= max(1,N).
+ *
+ * @param[out] D
+ *          On exit, the diagonal elements of the tridiagonal matrix:
+ *          D(i) = A(i,i).
+ *
+ * @param[out] E
+ *          On exit, he off-diagonal elements of the tridiagonal matrix:
+ *          E(i) = A(i,i+1) if uplo = MorseUpper, E(i) = A(i+1,i) if uplo = MorseLower.
+ *
+ * @param[out] descT
+ *          On entry, descriptor as return by MORSE_Alloc_Workspace_zhetrd
+ *          On exit, contains auxiliary factorization data.
+ *
+ * @param[out] Q
+ *          On exit, if jobz = MorseVec, then if return value = 0, Q
+ *          contains the N-by-N unitary matrix Q.
+ *          If jobz = MorseNoVec, then it is not referenced.
+ *
+ * @param[in] LDQ
+ *          The leading dimension of the array Q. LDQ >= N.
+ *
+ *******************************************************************************
+ *
+ * @return
+ *          \retval MORSE_SUCCESS successful exit
+ *          \retval <0 if -i, the i-th argument had an illegal value
+ *          \retval >0 if INFO = i, the algorithm failed to converge; i
+ *               off-diagonal elements of an intermediate tridiagonal
+ *               form did not converge to zero.
+ *
+ *******************************************************************************
+ *
+ * @sa MORSE_zhetrd_Tile
+ * @sa MORSE_zhetrd_Tile_Async
+ * @sa MORSE_chetrd
+ * @sa MORSE_dsytrd
+ * @sa MORSE_ssytrd
+ *
+ ******************************************************************************/
+int MORSE_zhetrd(MORSE_enum jobz, MORSE_enum uplo, int N,
+                 MORSE_Complex64_t *A, int LDA,
+                 double *D,
+                 double *E,
+                 MORSE_desc_t *descT,
+                 MORSE_Complex64_t *Q, int LDQ)
+{
+    int NB;
+    int status;
+    MORSE_context_t *morse;
+    MORSE_sequence_t *sequence = NULL;
+    MORSE_request_t request = MORSE_REQUEST_INITIALIZER;
+    MORSE_desc_t descA;
+
+    morse = morse_context_self();
+    if (morse == NULL) {
+        morse_error("MORSE_zhetrd", "MORSE not initialized");
+        return MORSE_ERR_NOT_INITIALIZED;
+    }
+
+    /* Check input arguments */
+    if (jobz != MorseNoVec && jobz != MorseVec) {
+        morse_error("MORSE_zhetrd", "illegal value of jobz");
+        return -1;
+    }
+    if (uplo != MorseLower && uplo != MorseUpper) {
+        morse_error("MORSE_zhetrd", "illegal value of uplo");
+        return -1;
+    }
+    if (N < 0) {
+        morse_error("MORSE_zhetrd", "illegal value of N");
+        return -2;
+    }
+    if (LDA < max(1, N)) {
+        morse_error("MORSE_zhetrd", "illegal value of LDA");
+        return -4;
+    }
+
+    /* Quick return */
+    if (N == 0)
+        return MORSE_SUCCESS;
+
+    /* Tune NB & IB depending on N; Set NBNB */
+    status = morse_tune(MORSE_FUNC_ZHETRD, N, N, 0);
+    if (status != MORSE_SUCCESS) {
+        morse_error("MORSE_zhetrd", "morse_tune() failed");
+        return status;
+    }
+    /* Set NT */
+    NB = MORSE_NB;
+
+    morse_sequence_create(morse, &sequence);
+
+    /* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) { */
+    morse_zooplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, sequence, &request,
+                        morse_desc_mat_free(&(descA)) );
+    /* } else { */
+    /*     morse_ziplap2tile( descA, A, NB, NB, LDA, N, 0, 0, N, N, */
+    /*                         sequence, &request); */
+    /* } */
+
+    /* Call the tile interface */
+    MORSE_zhetrd_Tile_Async(jobz, uplo, &descA, D, E, descT, Q, LDQ, sequence, &request);
+
+    /* if ( MORSE_TRANSLATION == MORSE_OUTOFPLACE ) { */
+    morse_zooptile2lap( descA, A, NB, NB, LDA, N,  sequence, &request);
+    morse_sequence_wait(morse, sequence);
+    morse_desc_mat_free(&descA);
+    /* } else { */
+    /*     morse_ziptile2lap( descA, A, NB, NB, LDA, N,  sequence, &request); */
+    /* morse_sequence_wait(morse, sequence); */
+    /* } */
+
+    status = sequence->status;
+    morse_sequence_destroy(morse, sequence);
+    return status;
+}
+/***************************************************************************//**
+ *
+ * @ingroup MORSE_Complex64_t_Tile
+ *
+ *  MORSE_zhetrd_Tile - reduces a complex Hermitian matrix A to real symmetric
+ *  tridiagonal form S using a two-stage approach
+ *  First stage: reduction to band tridiagonal form (unitary Q1);
+ *  Second stage: reduction from band to tridiagonal form (unitary Q2).
+ *  Let Q = Q1 * Q2 be the global unitary transformation;
+ *  Q**H * A * Q = S.
+ *  Tile equivalent of MORSE_zhetrd().
+ *  Operates on matrices stored by tiles.
+ *  All matrices are passed through descriptors.
+ *  All dimensions are taken from the descriptors.
+ *
+ *******************************************************************************
+ *
+ * @param[in] jobz
+ *          Intended usage:
+ *          = MorseNoVec: computes tridiagonal only;
+ *          = MorseVec: computes tridiagonal and generate the orthogonal matrix Q.
+ *
+ * @param[in] uplo
+ *          Specifies whether the matrix A is upper triangular or lower triangular:
+ *          = MorseUpper: Upper triangle of A is stored;
+ *          = MorseLower: Lower triangle of A is stored.
+ *
+ * @param[in,out] A
+ *          On entry, the symmetric (or Hermitian) matrix A.  If uplo
+ *          = MorseUpper, the leading N-by-N upper triangular part of
+ *          A contains the upper triangular part of the matrix A, and
+ *          the strictly lower triangular part of A is not referenced.
+ *          If UPLO = 'L', the leading N-by-N lower triangular part of
+ *          A contains the lower triangular part of the matrix A, and
+ *          the strictly upper triangular part of A is not referenced.
+ *          On exit, if jobz = MorseVec, then if return value = 0, A
+ *          contains the orthonormal eigenvectors of the matrix A.
+ *          If jobz = MorseNoVec, then on exit the lower triangle (if
+ *          uplo = MorseLower) or the upper triangle (if uplo =
+ *          MorseUpper) of A, including the diagonal, is destroyed.*
+ *
+ * @param[out] D
+ *          On exit, the diagonal elements of the tridiagonal matrix:
+ *          D(i) = A(i,i).
+ *
+ * @param[out] E
+ *          On exit, he off-diagonal elements of the tridiagonal matrix:
+ *          E(i) = A(i,i+1) if uplo = MorseUpper,
+ *          E(i) = A(i+1,i) if uplo = MorseLower.
+ *
+ * @param[out] T
+ *          On exit, auxiliary factorization data.
+ *
+ * @param[out] Q
+ *          On exit, if jobz = MorseVec, then if return value = 0, Q
+ *          contains the N-by-N unitary matrix Q.
+ *          If jobz = MorseNoVec, then it is not referenced.
+ *
+ * @param[in] LDQ
+ *          The leading dimension of the array Q. LDQ >= N.
+ *
+ *******************************************************************************
+ *
+ * @return
+ *          \retval MORSE_SUCCESS successful exit
+ *          \retval <0 if -i, the i-th argument had an illegal value
+ *          \retval >0 if INFO = i, the algorithm failed to converge; i
+ *               off-diagonal elements of an intermediate tridiagonal
+ *               form did not converge to zero.
+ *
+ *******************************************************************************
+ *
+ * @sa MORSE_zhetrd
+ * @sa MORSE_zhetrd_Tile_Async
+ * @sa MORSE_chetrd_Tile
+ * @sa MORSE_dsytrd_Tile
+ * @sa MORSE_ssytrd_Tile
+ * @sa MORSE_zhetrd_Tile
+ *
+ ******************************************************************************/
+int MORSE_zhetrd_Tile(MORSE_enum jobz, MORSE_enum uplo,
+                      MORSE_desc_t *A, double *D, double *E,
+                      MORSE_desc_t *T, MORSE_Complex64_t *Q, int LDQ)
+{
+    MORSE_context_t *morse;
+    MORSE_sequence_t *sequence = NULL;
+    MORSE_request_t request = MORSE_REQUEST_INITIALIZER;
+    int status;
+
+    morse = morse_context_self();
+    if (morse == NULL) {
+        morse_fatal_error("MORSE_zhetrd_Tile", "MORSE not initialized");
+        return MORSE_ERR_NOT_INITIALIZED;
+    }
+    morse_sequence_create(morse, &sequence);
+    MORSE_zhetrd_Tile_Async(jobz, uplo, A, D, E, T, Q, LDQ, sequence, &request);
+    morse_sequence_wait(morse, sequence);
+    status = sequence->status;
+    morse_sequence_destroy(morse, sequence);
+    return status;
+}
+
+/***************************************************************************//**
+ *
+ * @ingroup MORSE_Complex64_t_Tile_Async
+ *
+ *  MORSE_zhetrd_Tile_Async - Computes all eigenvalues and,
+ *  optionally, eigenvectors of a complex Hermitian matrix A using a
+ *  two-stage approach:
+ *  First stage: reduction to band tridiagonal form;
+ *  Second stage: reduction from band to tridiagonal form.
+ *
+ *  May return before the computation is finished.
+ *  Allows for pipelining of operations at runtime.
+ *
+ *******************************************************************************
+ *
+ * @param[in] sequence
+ *          Identifies the sequence of function calls that this call belongs to
+ *          (for completion checks and exception handling purposes).
+ *
+ * @param[out] request
+ *          Identifies this function call (for exception handling purposes).
+ *
+ *******************************************************************************
+ *
+ * @sa MORSE_zhetrd
+ * @sa MORSE_zhetrd_Tile
+ * @sa MORSE_chetrd_Tile_Async
+ * @sa MORSE_dsytrd_Tile_Async
+ * @sa MORSE_ssytrd_Tile_Async
+ *
+ ******************************************************************************/
+int MORSE_zhetrd_Tile_Async(MORSE_enum jobz,
+                            MORSE_enum uplo,
+                            MORSE_desc_t *A,
+                            double *W,
+                            double *E,
+                            MORSE_desc_t *T,
+                            MORSE_Complex64_t *Q, int LDQ,
+                            MORSE_sequence_t *sequence, MORSE_request_t *request)
+{
+    MORSE_context_t *morse;
+    MORSE_desc_t descA;
+    MORSE_desc_t descAB;
+    int N, NB, LDAB;
+    int status;
+
+    morse = morse_context_self();
+    if (morse == NULL) {
+        morse_fatal_error("MORSE_zhetrd_Tile_Async", "MORSE not initialized");
+        return MORSE_ERR_NOT_INITIALIZED;
+    }
+    if (sequence == NULL) {
+        morse_fatal_error("MORSE_zhetrd_Tile_Async", "NULL sequence");
+        return MORSE_ERR_UNALLOCATED;
+    }
+    if (request == NULL) {
+        morse_fatal_error("MORSE_zhetrd_Tile_Async", "NULL request");
+        return MORSE_ERR_UNALLOCATED;
+    }
+
+    /* Check sequence status */
+    if (sequence->status == MORSE_SUCCESS)
+        request->status = MORSE_SUCCESS;
+    else
+        return morse_request_fail(sequence, request, MORSE_ERR_SEQUENCE_FLUSHED);
+
+    /* Check descriptors for correctness */
+    if (morse_desc_check(A) != MORSE_SUCCESS) {
+        morse_error("MORSE_zhetrd_Tile_Async", "invalid descriptor");
+        return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
+    } else {
+        descA = *A;
+    }
+    if (morse_desc_check(T) != MORSE_SUCCESS) {
+        morse_error("MORSE_zhetrd_Tile_Async", "invalid descriptor");
+        return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
+    }
+
+    /* Check input arguments */
+    if (jobz != MorseNoVec && jobz != MorseVec) {
+        morse_error("MORSE_zhetrd_Tile_Async", "illegal value of jobz");
+        return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
+    }
+    if (uplo != MorseLower && uplo != MorseUpper) {
+        morse_error("MORSE_zhetrd_Tile_Async", "illegal value of uplo");
+        return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
+    }
+    if (descA.m != descA.n) {
+        morse_error("MORSE_zhetrd_Tile_Async", "matrix need to be square");
+        return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
+    }
+    if (descA.nb != descA.mb) {
+        morse_error("MORSE_zhetrd_Tile_Async", "only square tiles supported");
+        return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
+    }
+
+    N  = descA.m;
+    NB = descA.mb;
+
+    /* Reduction to band. On exit, T contains reflectors */
+    morse_pzhetrd_he2hb( uplo, A, T,
+                         sequence, request );
+
+    LDAB = NB+1;
+
+    /* Allocate band structure */
+    morse_zdesc_alloc_diag( descAB,
+                            LDAB, NB, /* mb, nb */
+                            LDAB, N,  /* lm, ln */
+                            0, 0,     /* i, j */
+                            LDAB, N,  /* m, n */
+                            1, 1 );
+
+    /* Copy data into band structure */
+    morse_pztile2band( uplo, A, &descAB,
+                       sequence, request );
+    morse_sequence_wait(morse, sequence);
+
+    /* Reduce band matrix to tridiagonal matrix */
+    status = LAPACKE_zhbtrd( LAPACK_COL_MAJOR,
+                             morse_lapack_const(jobz),
+                             morse_lapack_const(uplo),
+                             N, NB,
+                             (MORSE_Complex64_t *) descAB.mat, LDAB,
+                             W, E, Q, LDQ );
+    if (status != 0) {
+        morse_error("MORSE_zhetrd_Tile_Async", "LAPACKE_zhbtrd failed");
+    }
+
+    morse_desc_mat_free(&descAB);
+    return MORSE_SUCCESS;
+}
diff --git a/control/compute_z.h b/control/compute_z.h
index 53ee78e874412b9e0f977f44aec14fbc8f32ae41..fd6051a4966326ae600e9904bf7536f4d0dff4c8 100644
--- a/control/compute_z.h
+++ b/control/compute_z.h
@@ -84,25 +84,22 @@ int morse_zshift(MORSE_context_t *morse, int m, int n, MORSE_Complex64_t *A,
 /***************************************************************************//**
  *  Declarations of parallel functions (dynamic scheduling) - alphabetical order
  **/
-void morse_pzbarrier_tl2pnl(MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzbarrier_pnl2tl(MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
-void morse_pzbarrier_tl2row(MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzbarrier_row2tl(MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
-void morse_pzgebrd_tb2bd(MORSE_enum uplo, MORSE_desc_t *A, double *D, double *E, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request);
-void morse_pzgebrd_ge2tb(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request);
+void morse_pzbarrier_tl2pnl(MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
+void morse_pzbarrier_tl2row(MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
+void morse_pzgebrd_gb2bd(MORSE_enum uplo, MORSE_desc_t *A, double *D, double *E, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request);
+void morse_pzgebrd_ge2gb(MORSE_desc_t A, MORSE_desc_t T, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzgelqf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzgelqfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzgemm(MORSE_enum transA, MORSE_enum transB, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzgeqrf(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzgeqrfrh(MORSE_desc_t *A, MORSE_desc_t *T, int BS, MORSE_sequence_t *sequence, MORSE_request_t *request);
-void morse_pzgerbh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request);
-void morse_pzgerbbrh(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzgetmi2(MORSE_enum idep, MORSE_enum odep, MORSE_enum storev, int m, int n, int mb, int nb, MORSE_Complex64_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzgetrf_incpiv(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzgetrf_nopiv(MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzgetrf_reclap(MORSE_desc_t *A, int *IPIV, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzgetrf_rectil(MORSE_desc_t *A, int *IPIV, MORSE_sequence_t *sequence, MORSE_request_t *request);
-void morse_pzhbcpy_t2bl(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *AB, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzhegst(MORSE_enum itype, MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
 #ifdef COMPLEX
 void morse_pzhemm(MORSE_enum side, MORSE_enum uplo, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
@@ -136,6 +133,7 @@ void morse_pzsymm(MORSE_enum side, MORSE_enum uplo, MORSE_Complex64_t alpha, MOR
 void morse_pzsyrk(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta,  MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzsyr2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzsytrf(MORSE_enum uplo, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
+void morse_pztile2band(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *descAB, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pztradd(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pztrsm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
diff --git a/coreblas/compute/CMakeLists.txt b/coreblas/compute/CMakeLists.txt
index a48dfdddc1f9c7a9bb93a30567ee658a14d3d270..82813231b0cd3e147e1e676142137ed312504136 100644
--- a/coreblas/compute/CMakeLists.txt
+++ b/coreblas/compute/CMakeLists.txt
@@ -44,6 +44,8 @@ set(ZSRC
     core_zgetrf.c
     core_zgetrf_incpiv.c
     core_zgetrf_nopiv.c
+    core_zhe2ge.c
+    core_zherfb.c
     core_zhemm.c
     core_zher2k.c
     core_zherk.c
@@ -56,6 +58,7 @@ set(ZSRC
     core_zlantr.c
     core_zlaset2.c
     core_zlaset.c
+    core_zlatro.c
     core_zlauum.c
     core_zpamm.c
     core_zparfb.c
@@ -79,6 +82,8 @@ set(ZSRC
     core_ztslqt.c
     core_ztsmlq.c
     core_ztsmqr.c
+    core_ztsmlq_hetra1.c
+    core_ztsmqr_hetra1.c
     core_ztsqrt.c
     core_ztstrf.c
     core_zttlqt.c
diff --git a/coreblas/compute/core_zhe2ge.c b/coreblas/compute/core_zhe2ge.c
new file mode 100644
index 0000000000000000000000000000000000000000..53b0b24ddf15637459866b578abfae6ee64c6d08
--- /dev/null
+++ b/coreblas/compute/core_zhe2ge.c
@@ -0,0 +1,95 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file core_zhe2ge.c
+ *
+ *  PLASMA core_blas kernel
+ *  PLASMA is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.5.0
+ * @comment This file has been automatically generated
+ *          from Plasma 2.5.0 for MORSE 1.0.0
+ * @author Julien Langou
+ * @author Henricus Bouwmeester
+ * @author Mathieu Faverge
+ * @author Emmanuel Agullo
+ * @author Cedric Castagnede
+ * @date 2010-11-15
+ * @precisions normal z -> c d s
+ *
+ **/
+#include "coreblas/include/lapacke.h"
+#include "coreblas/include/coreblas.h"
+
+/***************************************************************************//**
+ *
+ * @ingroup CORE_MORSE_Complex64_t
+ *
+ **/
+
+void CORE_zhe2ge(MORSE_enum uplo, int M, int N,
+                 const MORSE_Complex64_t *A, int LDA,
+                 MORSE_Complex64_t *B, int LDB)
+{
+    const MORSE_Complex64_t *Aptr;
+    MORSE_Complex64_t *Bptr, *BTptr;
+    int i, j;
+
+    Aptr  = A;
+    Bptr  = B;
+    BTptr = B;
+
+    if (uplo == MorseLower){
+        for (j = 0; j < N; j++){
+            /* Diagonal element */
+            *Bptr = *Aptr;
+            Bptr++; Aptr++;
+
+            /* Outside the diagonal */
+            BTptr = B + j + (j+1) * LDB;
+            for (i = j+1; i < M; i++, Bptr++, Aptr++, BTptr += LDB) {
+                *Bptr  = *Aptr;
+                *BTptr = conj( *Aptr );
+            }
+            Aptr += (LDA - i + j + 1);
+            Bptr += (LDB - i + j + 1);
+
+            /* Bptr[ j * LDB + j ] = A[ j * LDA + j ]; */
+
+            /* for (i = j+1; i < M; i++) { */
+            /*     Bptr [ j * LDB + i ] = A[ j * LDA + i ]; */
+            /*     BTptr[ i * LDB + j ] = conj(A[ j * LDA + i ]); */
+            /* } */
+        }
+    }
+    else{
+        for (j = 0; j < N; j++){
+            /* for (i = 0; i < j; i++) { */
+            /*     Bptr [ j * LDB + i ] = A[ j * LDA + i ]; */
+            /*     BTptr[ i * LDB + j ] = conj(A[ j * LDA + i ]); */
+            /* } */
+            /* Bptr[ j * LDB + j ] = A[ j * LDA + j ]; */
+
+            BTptr = B + j;
+            for (i = 0; i < j; i++, Bptr++, Aptr++, BTptr += LDB) {
+                *Bptr  = *Aptr;
+                *BTptr = conj( *Aptr );
+            }
+            *Bptr = *A;
+
+            Aptr += (LDA - i);
+            Bptr += (LDB - i);
+        }
+    }
+}
+
diff --git a/coreblas/compute/core_zherfb.c b/coreblas/compute/core_zherfb.c
new file mode 100644
index 0000000000000000000000000000000000000000..3f2079c20b711e2a3fea1fd05918cf80448a833c
--- /dev/null
+++ b/coreblas/compute/core_zherfb.c
@@ -0,0 +1,174 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file core_zherfb.c
+ *
+ *  PLASMA core_blas kernel
+ *  PLASMA is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.8.0
+ * @author Hatem Ltaief
+ * @date 2010-11-15
+ * @precisions normal z -> c d s
+ *
+ **/
+#include "coreblas/include/coreblas.h"
+#include "coreblas/include/lapacke.h"
+
+/**
+ *******************************************************************************
+ *
+ * @ingroup CORE_MORSE_Complex64_t
+ *
+ *  CORE_zherfb overwrites the symmetric complex N-by-N tile C with
+ *
+ *    Q**T*C*Q
+ *
+ *  where Q is a complex unitary matrix defined as the product of k
+ *  elementary reflectors
+ *
+ *    Q = H(1) H(2) . . . H(k)
+ *
+ *  as returned by CORE_zgeqrt. Only MorseLower supported!
+ *
+ *******************************************************************************
+ *
+ * @param[in] uplo
+ *         TODO comment
+ *
+ * @param[in] n
+ *         The number of rows/columns of the tile C.  N >= 0.
+ *
+ * @param[in] k
+ *         The number of elementary reflectors whose product defines
+ *         the matrix Q. K >= 0.
+ *
+ * @param[in] ib
+ *         The inner-blocking size.  IB >= 0.
+ *
+ * @param[in] nb
+ *         The blocking size.  NB >= 0.
+ *
+ * @param[in] A
+ *         The i-th column must contain the vector which defines the
+ *         elementary reflector H(i), for i = 1,2,...,k, as returned by
+ *         CORE_zgeqrt in the first k columns of its array argument A.
+ *
+ * @param[in] lda
+ *         The leading dimension of the array A. LDA >= max(1,N).
+ *
+ * @param[in] T
+ *         The IB-by-K triangular factor T of the block reflector.
+ *         T is upper triangular by block (economic storage);
+ *         The rest of the array is not referenced.
+ *
+ * @param[in] ldt
+ *         The leading dimension of the array T. LDT >= IB.
+ *
+ * @param[in,out] C
+ *         On entry, the symmetric N-by-N tile C.
+ *         On exit, C is overwritten by Q**T*C*Q.
+ *
+ * @param[in] ldc
+ *         The leading dimension of the array C. LDC >= max(1,M).
+ *
+ * @param[in,out] WORK
+ *         On exit, if INFO = 0, WORK(1) returns the optimal LDWORK.
+ *
+ * @param[in] ldwork
+ *         The dimension of the array WORK. LDWORK >= max(1,N);
+ *
+ *******************************************************************************
+ *
+ * @return
+ *          \retval MORSE_SUCCESS successful exit
+ *          \retval <0 if -i, the i-th argument had an illegal value
+ *
+ ******************************************************************************/
+#if defined(MORSE_HAVE_WEAK)
+#pragma weak CORE_zherfb = PCORE_zherfb
+#define CORE_zherfb PCORE_zherfb
+#define CORE_zunmlq PCORE_zunmlq
+#define CORE_zunmqr PCORE_zunmqr
+int  CORE_zunmlq(MORSE_enum side, MORSE_enum trans,
+                 int M, int N, int IB, int K,
+                 const MORSE_Complex64_t *V, int LDV,
+                 const MORSE_Complex64_t *T, int LDT,
+                 MORSE_Complex64_t *C, int LDC,
+                 MORSE_Complex64_t *WORK, int LDWORK);
+int  CORE_zunmqr(MORSE_enum side, MORSE_enum trans,
+                 int M, int N, int K, int IB,
+                 const MORSE_Complex64_t *V, int LDV,
+                 const MORSE_Complex64_t *T, int LDT,
+                 MORSE_Complex64_t *C, int LDC,
+                 MORSE_Complex64_t *WORK, int LDWORK);
+#endif
+int CORE_zherfb( MORSE_enum uplo, int n,
+                 int k, int ib, int nb,
+                 const MORSE_Complex64_t *A, int lda,
+                 const MORSE_Complex64_t *T, int ldt,
+                 MORSE_Complex64_t *C, int ldc,
+                 MORSE_Complex64_t *WORK, int ldwork )
+{
+    /* Check input arguments */
+    if ((uplo != MorseUpper) && (uplo != MorseLower)) {
+        coreblas_error(1, "Illegal value of uplo");
+        return -1;
+    }
+    if (n < 0) {
+        coreblas_error(2, "Illegal value of n");
+        return -2;
+    }
+    if (k < 0) {
+        coreblas_error(3, "Illegal value of k");
+        return -3;
+    }
+    if (ib < 0) {
+        coreblas_error(4, "Illegal value of ib");
+        return -4;
+    }
+    if (nb < 0) {
+        coreblas_error(5, "Illegal value of nb");
+        return -5;
+    }
+    if ( (lda < max(1,n)) && (n > 0) ) {
+        coreblas_error(7, "Illegal value of lda");
+        return -7;
+    }
+    if ( (ldt < max(1,ib)) && (ib > 0) ) {
+        coreblas_error(9, "Illegal value of ldt");
+        return -9;
+    }
+    if ( (ldc < max(1,n)) && (n > 0) ) {
+        coreblas_error(11, "Illegal value of ldc");
+        return -11;
+    }
+
+    if (uplo == MorseLower) {
+        /* Left */
+        CORE_zunmqr(MorseLeft, MorseConjTrans, n, n, k, ib,
+                    A, lda, T, ldt, C, ldc, WORK, ldwork);
+        /* Right */
+        CORE_zunmqr(MorseRight, MorseNoTrans, n, n, k, ib,
+                    A, lda, T, ldt, C, ldc, WORK, ldwork);
+    }
+    else {
+        /* Right */
+        CORE_zunmlq(MorseRight, MorseConjTrans, n, n, k, ib,
+                    A, lda, T, ldt, C, ldc, WORK, ldwork);
+        /* Left */
+        CORE_zunmlq(MorseLeft, MorseNoTrans, n, n, k, ib,
+                    A, lda, T, ldt, C, ldc, WORK, ldwork);
+    }
+    return 0;
+}
diff --git a/coreblas/compute/core_zlatro.c b/coreblas/compute/core_zlatro.c
new file mode 100644
index 0000000000000000000000000000000000000000..e7449e0ab8a1861283a60bbc9fc08fcd7dace45a
--- /dev/null
+++ b/coreblas/compute/core_zlatro.c
@@ -0,0 +1,164 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file core_zlatro.c
+ *
+ *  PLASMA core_blas kernel
+ *  PLASMA is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.7.1
+ * @author Azzam Haidar
+ * @date 2010-11-15
+ * @precisions normal z -> c d s
+ *
+ **/
+#include "coreblas/include/coreblas.h"
+#include "coreblas/include/lapacke.h"
+
+/***************************************************************************//**
+ *
+ * @ingroup CORE_MORSE_Complex64_t
+ *
+ *  CORE_zlatro transposes a m-by-n matrix out of place.
+ *
+ *******************************************************************************
+ *
+ * @param[in] uplo
+ *          Specifies whether the matrix A is upper triangular or lower
+ *          triangular:
+ *          = MorseUpper: the upper triangle of A and the lower triangle of B
+ *          are referenced.
+ *          = MorseLower: the lower triangle of A and the upper triangle of B
+ *          are referenced.
+ *          = MorseUpperLower: All A and B are referenced.
+ *
+ * @param[in] trans
+ *          Specifies whether the matrix A is transposed, not transposed or
+ *          conjugate transposed:
+ *          = MorseNoTrans:   B is a copy of A (equivalent to zlacpy);
+ *          = MorseTrans:     B is the transpose of A;
+ *          = MorseConjTrans: B is the conjugate transpose of A.
+ *
+ * @param[in] M
+ *         Number of rows of the matrix A and number of columns of the matrix B, if trans == Pasma[Conj]Trans.
+ *         Number of rows of the matrix A and the matrix B, if trans == PasmaNoTrans.
+ *
+ * @param[in] N
+ *         Number of columns of the matrix A and number of rows of the matrix B, if trans == Pasma[Conj]Trans.
+ *         Number of columns of the matrix A and of the matrix B, if trans == MorseNoTrans.
+ *
+ * @param[in] A
+ *         Matrix of size LDA-by-N, if trans == Pasma[Conj]Trans.
+ *         Matrix of size LDA-by-M, if trans == PasmaNoTrans.
+ *
+ * @param[in] LDA
+ *         The leading dimension of the array A.
+ *         LDA >= max(1,M), if trans == Pasma[Conj]Trans.
+ *         LDA >= max(1,N), if trans == PasmaNoTrans.
+ *
+ * @param[out] B
+ *         Matrix of size LDB-by-M, if trans == Pasma[Conj]Trans.
+ *         Matrix of size LDB-by-N, if trans == PasmaNoTrans.
+ *
+ * @param[in] LDB
+ *         The leading dimension of the array B.
+ *         LDB >= max(1,N), if trans == Pasma[Conj]Trans.
+ *         LDB >= max(1,M), if trans == PasmaNoTrans.
+ *
+ *
+ *******************************************************************************
+ *
+ * @return
+ *         \retval MORSE_SUCCESS successful exit
+ *         \retval <0 if INFO = -k, the k-th argument had an illegal value
+ *
+ ******************************************************************************/
+#if defined(MORSE_HAVE_WEAK)
+#pragma weak CORE_zlatro = PCORE_zlatro
+#define CORE_zlatro PCORE_zlatro
+#endif
+int CORE_zlatro(MORSE_enum uplo, MORSE_enum trans,
+                int M, int N,
+                const MORSE_Complex64_t *A, int LDA,
+                MORSE_Complex64_t *B, int LDB)
+{
+    int i, j;
+
+    /* Check input arguments */
+    if ((uplo != MorseUpper) && (uplo != MorseLower) && (uplo != MorseUpperLower) ) {
+        coreblas_error(1, "Illegal value of uplo");
+        return -1;
+    }
+    if ((trans != MorseConjTrans) && (trans != MorseNoTrans) && (trans != MorseTrans) ) {
+        coreblas_error(2, "Illegal value of trans");
+        return -2;
+    }
+    if (M < 0) {
+        coreblas_error(3, "Illegal value of M");
+        return -3;
+    }
+    if (N < 0) {
+        coreblas_error(4, "Illegal value of N");
+        return -4;
+    }
+    if ( (LDA < max(1,M)) && (M > 0) ) {
+        coreblas_error(6, "Illegal value of LDA");
+        return -6;
+    }
+    if ( (LDB < max(1,N)) && (N > 0) ) {
+        coreblas_error(8, "Illegal value of LDB");
+        return -8;
+    }
+
+    if (trans == MorseNoTrans) {
+        CORE_zlacpy(uplo, M, N, A, LDA, B, LDB);
+    }
+    else {
+        if (trans == MorseConjTrans) {
+            if(uplo == MorseUpper) {
+                for(j=0; j<N; j++)
+                    for(i=0; i<min(j+1,M); i++)
+                        B[j+i*LDB] = conj(A[i+j*LDA]);
+            }
+            else if(uplo == MorseLower) {
+                for(j=0;j<N;j++)
+                    for(i=j;i<M;i++)
+                        B[j+i*LDB] = conj(A[i+j*LDA]);
+            }
+            else {
+                for(j=0;j<N;j++)
+                    for(i=0;i<M;i++)
+                        B[j+i*LDB] = conj(A[i+j*LDA]);
+            }
+        }
+        else {
+            if(uplo==MorseUpper) {
+                for(j=0;j<N;j++)
+                    for(i=0;i<min(j+1,M);i++)
+                        B[j+i*LDB] = A[i+j*LDA];
+            }
+            else if(uplo==MorseLower) {
+                for(j=0;j<N;j++)
+                    for(i=j;i<M;i++)
+                        B[j+i*LDB] = A[i+j*LDA];
+            }
+            else {
+                for(j=0;j<N;j++)
+                    for(i=0;i<M;i++)
+                        B[j+i*LDB] = A[i+j*LDA];
+            }
+        }
+    }
+
+    return MORSE_SUCCESS;
+}
diff --git a/coreblas/compute/core_ztsmlq_hetra1.c b/coreblas/compute/core_ztsmlq_hetra1.c
new file mode 100644
index 0000000000000000000000000000000000000000..579ff9fdb4973372f1f9861fcae94fe081205154
--- /dev/null
+++ b/coreblas/compute/core_ztsmlq_hetra1.c
@@ -0,0 +1,177 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file core_ztsmlq_hetra1.c
+ *
+ *  PLASMA core_blas kernel
+ *  PLASMA is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.7.1
+ * @author Hatem Ltaief
+ * @author Mathieu Faverge
+ * @author Azzam Haidar
+ * @date 2010-11-15
+ * @precisions normal z -> c d s
+ *
+ **/
+#include <coreblas/include/lapacke.h>
+#include "coreblas/include/coreblas.h"
+
+/***************************************************************************//**
+ *
+ * @ingroup CORE_MORSE_Complex64_t
+ *
+ * CORE_ztsmlq_hetra1: see CORE_ztsmlq
+ *
+ * This kernel applies a Right transformation on | A1' A2 |
+ * and does not handle the transpose of A1.
+ * Needs therefore to make the explicit transpose of A1 before
+ * and after the application of the block of reflectors
+ * Can be further optimized by changing accordingly the underneath
+ * kernel ztsrfb!
+ *
+ *******************************************************************************
+ *
+ * @param[in] side
+ *         @arg MorseLeft  : apply Q or Q**H from the Left;
+ *         @arg MorseRight : apply Q or Q**H from the Right.
+ *
+ * @param[in] trans
+ *         @arg MorseNoTrans   :  No transpose, apply Q;
+ *         @arg MorseConjTrans :  ConjTranspose, apply Q**H.
+ *
+ * @param[in] m1
+ *         The number of rows of the tile A1. m1 >= 0.
+ *
+ * @param[in] n1
+ *         The number of columns of the tile A1. n1 >= 0.
+ *
+ * @param[in] m2
+ *         The number of rows of the tile A2. m2 >= 0.
+ *         m2 = m1 if side == MorseRight.
+ *
+ * @param[in] n2
+ *         The number of columns of the tile A2. n2 >= 0.
+ *         n2 = n1 if side == MorseLeft.
+ *
+ * @param[in] k
+ *         The number of elementary reflectors whose product defines
+ *         the matrix Q.
+ *
+ * @param[in] ib
+ *         The inner-blocking size.  ib >= 0.
+ *
+ * @param[in,out] A1
+ *         On entry, the m1-by-n1 tile A1.
+ *         On exit, A1 is overwritten by the application of Q.
+ *
+ * @param[in] lda1
+ *         The leading dimension of the array A1. lda1 >= max(1,m1).
+ *
+ * @param[in,out] A2
+ *         On entry, the m2-by-n2 tile A2.
+ *         On exit, A2 is overwritten by the application of Q.
+ *
+ * @param[in] lda2
+ *         The leading dimension of the tile A2. lda2 >= max(1,m2).
+ *
+ * @param[in] V
+ *         The i-th row must contain the vector which defines the
+ *         elementary reflector H(i), for i = 1,2,...,k, as returned by
+ *         CORE_ZTSLQT in the first k rows of its array argument V.
+ *!
+ * @param[in] ldv
+ *         The leading dimension of the array V. ldv >= max(1,K).
+ *
+ * @param[in] T
+ *         The IB-by-n1 triangular factor T of the block reflector.
+ *         T is upper triangular by block (economic storage);
+ *         The rest of the array is not referenced.
+ *
+ * @param[in] ldt
+ *         The leading dimension of the array T. ldt >= IB.
+ *
+ * @param[out] WORK
+ *         Workspace array of size
+ *             LDWORK-by-m1 if side == MorseLeft
+ *             LDWORK-by-IB if side == MorseRight
+ *
+ * @param[in] ldwork
+ *         The leading dimension of the array WORK.
+ *             LDWORK >= max(1,IB) if side == MorseLeft
+ *             LDWORK >= max(1,n1) if side == MorseRight
+ *
+ *******************************************************************************
+ *
+ * @return
+ *          \retval MORSE_SUCCESS successful exit
+ *          \retval <0 if -i, the i-th argument had an illegal value
+ *
+ ******************************************************************************/
+#if defined(MORSE_HAVE_WEAK)
+#pragma weak CORE_ztsmlq_hetra1 = PCORE_ztsmlq_hetra1
+#define CORE_ztsmlq_hetra1 PCORE_ztsmlq_hetra1
+#define CORE_ztsmlq PCORE_ztsmlq
+int  CORE_ztsmlq(MORSE_enum side, MORSE_enum trans,
+                 int m1, int n1, int m2, int n2, int K, int IB,
+                 MORSE_Complex64_t *A1, int lda1,
+                 MORSE_Complex64_t *A2, int lda2,
+                 const MORSE_Complex64_t *V, int ldv,
+                 const MORSE_Complex64_t *T, int ldt,
+                 MORSE_Complex64_t *WORK, int LDWORK);
+#endif
+int CORE_ztsmlq_hetra1( MORSE_enum side, MORSE_enum trans,
+                        int m1, int n1, int m2, int n2,
+                        int k, int ib,
+                        MORSE_Complex64_t *A1, int lda1,
+                        MORSE_Complex64_t *A2, int lda2,
+                        const MORSE_Complex64_t *V, int ldv,
+                        const MORSE_Complex64_t *T, int ldt,
+                        MORSE_Complex64_t *WORK, int ldwork)
+{
+    int i, j;
+
+    if ( (m1 != n1) ) {
+        coreblas_error(3, "Illegal value of M1, N1");
+        return -3;
+    }
+
+    /* in-place transposition of A1 */
+    for (j = 0; j < n1; j++){
+        A1[j + j*lda1] = conj(A1[j + j*lda1]);
+
+        for (i = j+1; i < m1; i++){
+            *WORK = *(A1 + i + j*lda1);
+            *(A1 + i + j*lda1) = conj(*(A1 + j + i*lda1));
+            *(A1 + j + i*lda1) = conj(*WORK);
+        }
+    }
+
+    CORE_ztsmlq(side, trans, m1, n1, m2, n2, k, ib,
+                A1, lda1, A2, lda2,
+                V,  ldv,  T,  ldt,
+                WORK, ldwork);
+
+    /* in-place transposition of A1 */
+    for (j = 0; j < n1; j++){
+        A1[j + j*lda1] = conj(A1[j + j*lda1]);
+
+        for (i = j+1; i < m1; i++){
+            *WORK = *(A1 + i + j*lda1);
+            *(A1 + i + j*lda1) = conj(*(A1 + j + i*lda1));
+            *(A1 + j + i*lda1) = conj(*WORK);
+        }
+    }
+
+    return MORSE_SUCCESS;
+}
diff --git a/coreblas/compute/core_ztsmqr_hetra1.c b/coreblas/compute/core_ztsmqr_hetra1.c
new file mode 100644
index 0000000000000000000000000000000000000000..76e7f65c08eedfa35057cb97c0350ba01b703bbc
--- /dev/null
+++ b/coreblas/compute/core_ztsmqr_hetra1.c
@@ -0,0 +1,176 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file core_ztsmqr_hetra1.c
+ *
+ *  PLASMA core_blas kernel
+ *  PLASMA is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.7.1
+ * @author Hatem Ltaief
+ * @author Mathieu Faverge
+ * @author Jakub Kurzak
+ * @author Azzam Haidar
+ * @date 2010-11-15
+ * @precisions normal z -> c d s
+ *
+ **/
+#include <coreblas/include/lapacke.h>
+#include "coreblas/include/coreblas.h"
+
+/***************************************************************************//**
+ *
+ * @ingroup CORE_MORSE_Complex64_t
+ *
+ *  CORE_ztsmqr_hetra1: see CORE_ztsmqr
+ *
+ *  This kernel applies a left transformation on | A1'|
+ *                                               | A2 |
+ *
+ * Needs therefore to make the explicit transpose of A1 before
+ * and after the application of the block of reflectors
+ * Can be further optimized by changing accordingly the underneath
+ * kernel ztsrfb!
+ *
+ *******************************************************************************
+ *
+ * @param[in] side
+ *         @arg MorseLeft  : apply Q or Q**H from the Left;
+ *         @arg MorseRight : apply Q or Q**H from the Right.
+ *
+ * @param[in] trans
+ *         @arg MorseNoTrans   :  No transpose, apply Q;
+ *         @arg MorseConjTrans :  ConjTranspose, apply Q**H.
+ *
+ * @param[in] m1
+ *         The number of rows of the tile A1. M1 >= 0.
+ *
+ * @param[in] n1
+ *         The number of columns of the tile A1. N1 >= 0.
+ *
+ * @param[in] m2
+ *         The number of rows of the tile A2. M2 >= 0.
+ *         M2 = M1 if side == MorseRight.
+ *
+ * @param[in] n2
+ *         The number of columns of the tile A2. N2 >= 0.
+ *         N2 = N1 if side == MorseLeft.
+ *
+ * @param[in] k
+ *         The number of elementary reflectors whose product defines
+ *         the matrix Q.
+ *
+ * @param[in] ib
+ *         The inner-blocking size.  IB >= 0.
+ *
+ * @param[in,out] A1
+ *         On entry, the M1-by-N1 tile A1.
+ *         On exit, A1 is overwritten by the application of Q.
+ *
+ * @param[in] lda1
+ *         The leading dimension of the array A1. LDA1 >= max(1,M1).
+ *
+ * @param[in,out] A2
+ *         On entry, the M2-by-N2 tile A2.
+ *         On exit, A2 is overwritten by the application of Q.
+ *
+ * @param[in] lda2
+ *         The leading dimension of the tile A2. LDA2 >= max(1,M2).
+ *
+ * @param[in] V
+ *         The i-th row must contain the vector which defines the
+ *         elementary reflector H(i), for i = 1,2,...,k, as returned by
+ *         CORE_ZTSQRT in the first k columns of its array argument V.
+ *
+ * @param[in] ldv
+ *         The leading dimension of the array V. LDV >= max(1,K).
+ *
+ * @param[in] T
+ *         The IB-by-N1 triangular factor T of the block reflector.
+ *         T is upper triangular by block (economic storage);
+ *         The rest of the array is not referenced.
+ *
+ * @param[in] ldt
+ *         The leading dimension of the array T. LDT >= IB.
+ *
+ * @param[out] WORK
+ *         Workspace array of size
+ *             LDWORK-by-N1 if side == MorseLeft
+ *             LDWORK-by-IB if side == MorseRight
+ *
+ * @param[in] ldwork
+ *         The leading dimension of the array WORK.
+ *             LDWORK >= max(1,IB) if side == MorseLeft
+ *             LDWORK >= max(1,M1) if side == MorseRight
+ *
+ *******************************************************************************
+ *
+ * @return
+ *          \retval MORSE_SUCCESS successful exit
+ *          \retval <0 if -i, the i-th argument had an illegal value
+ *
+ ******************************************************************************/
+#if defined(MORSE_HAVE_WEAK)
+#pragma weak CORE_ztsmqr_hetra1 = PCORE_ztsmqr_hetra1
+#define CORE_ztsmqr_hetra1 PCORE_ztsmqr_hetra1
+#define CORE_ztsmqr PCORE_ztsmqr
+int  CORE_ztsmqr(MORSE_enum side, MORSE_enum trans,
+                 int M1, int N1, int M2, int N2, int K, int IB,
+                 MORSE_Complex64_t *A1, int LDA1,
+                 MORSE_Complex64_t *A2, int LDA2,
+                 const MORSE_Complex64_t *V, int LDV,
+                 const MORSE_Complex64_t *T, int LDT,
+                 MORSE_Complex64_t *WORK, int LDWORK);
+#endif
+int CORE_ztsmqr_hetra1( MORSE_enum side, MORSE_enum trans,
+                        int m1, int n1, int m2, int n2,
+                        int k, int ib,
+                        MORSE_Complex64_t *A1, int lda1,
+                        MORSE_Complex64_t *A2, int lda2,
+                        const MORSE_Complex64_t *V, int ldv,
+                        const MORSE_Complex64_t *T, int ldt,
+                        MORSE_Complex64_t *WORK, int ldwork)
+{
+    int i, j;
+
+    if ( (m1 != n1) ) {
+        coreblas_error(3, "Illegal value of M1, N1");
+        return -3;
+    }
+
+    /* in-place transposition of A1 */
+    for (j = 0; j < n1; j++){
+        A1[j + j*lda1] = conj(A1[j + j*lda1]);
+
+        for (i = j+1; i < m1; i++){
+            *WORK = *(A1 + i + j*lda1);
+            *(A1 + i + j*lda1) = conj(*(A1 + j + i*lda1));
+            *(A1 + j + i*lda1) = conj(*WORK);
+        }
+    }
+
+    CORE_ztsmqr(side, trans, m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
+
+    /* in-place transposition of A1 */
+    for (j = 0; j < n1; j++){
+        A1[j + j*lda1] = conj(A1[j + j*lda1]);
+
+        for (i = j+1; i < m1; i++){
+            *WORK = *(A1 + i + j*lda1);
+            *(A1 + i + j*lda1) = conj(*(A1 + j + i*lda1));
+            *(A1 + j + i*lda1) = conj(*WORK);
+        }
+    }
+
+    return MORSE_SUCCESS;
+}
diff --git a/coreblas/include/coreblas_z.h b/coreblas/include/coreblas_z.h
index 4c6ee39cae6b975da25162e7edb852db83cb484a..e944555626b2093e1f053ba0bd5a4b7c25fdebd7 100644
--- a/coreblas/include/coreblas_z.h
+++ b/coreblas/include/coreblas_z.h
@@ -107,6 +107,9 @@ int  CORE_zgetrf_reclap(int M, int N,
 int  CORE_zgetrf_rectil(const MORSE_desc_t A, int *IPIV, int *info);
 void CORE_zgetrip(int m, int n, MORSE_Complex64_t *A,
                   MORSE_Complex64_t *work);
+void CORE_zhe2ge(MORSE_enum uplo, int M, int N,
+                 const MORSE_Complex64_t *A, int LDA,
+                 MORSE_Complex64_t *B, int LDB);
 int CORE_zhbelr(MORSE_enum uplo, int N,
                 MORSE_desc_t *A, MORSE_Complex64_t *V, MORSE_Complex64_t *TAU,
                 int st, int ed, int eltsize);
@@ -315,14 +318,6 @@ int  CORE_ztsmlq(MORSE_enum side, MORSE_enum trans,
                  const MORSE_Complex64_t *V, int LDV,
                  const MORSE_Complex64_t *T, int LDT,
                  MORSE_Complex64_t *WORK, int LDWORK);
-int CORE_ztsmlq_corner( int m1, int n1, int m2, int n2, int m3, int n3,
-                        int k, int ib, int nb,
-                        MORSE_Complex64_t *A1, int lda1,
-                        MORSE_Complex64_t *A2, int lda2,
-                        MORSE_Complex64_t *A3, int lda3,
-                        const MORSE_Complex64_t *V, int ldv,
-                        const MORSE_Complex64_t *T, int ldt,
-                        MORSE_Complex64_t *WORK, int ldwork);
 int CORE_ztsmlq_hetra1( MORSE_enum side, MORSE_enum trans,
                         int m1, int n1, int m2, int n2,
                         int k, int ib,
@@ -338,14 +333,6 @@ int  CORE_ztsmqr(MORSE_enum side, MORSE_enum trans,
                  const MORSE_Complex64_t *V, int LDV,
                  const MORSE_Complex64_t *T, int LDT,
                  MORSE_Complex64_t *WORK, int LDWORK);
-int CORE_ztsmqr_corner( int m1, int n1, int m2, int n2, int m3, int n3,
-                        int k, int ib, int nb,
-                        MORSE_Complex64_t *A1, int lda1,
-                        MORSE_Complex64_t *A2, int lda2,
-                        MORSE_Complex64_t *A3, int lda3,
-                        const MORSE_Complex64_t *V, int ldv,
-                        const MORSE_Complex64_t *T, int ldt,
-                        MORSE_Complex64_t *WORK, int ldwork);
 int CORE_ztsmqr_hetra1( MORSE_enum side, MORSE_enum trans,
                         int m1, int n1, int m2, int n2,
                         int k, int ib,
diff --git a/cudablas/compute/CMakeLists.txt b/cudablas/compute/CMakeLists.txt
index d05705eb7fa4a50aeed8bb7e00b707e89e7bf5ae..3f97d72ddca9ce073357d099d0f308cd602386f8 100644
--- a/cudablas/compute/CMakeLists.txt
+++ b/cudablas/compute/CMakeLists.txt
@@ -31,6 +31,7 @@ set(ZSRC
     cuda_zgemm.c
     cuda_zhemm.c
     cuda_zher2k.c
+    cuda_zherfb.c
     cuda_zherk.c
     cuda_zlarfb.c
     cuda_zparfb.c
diff --git a/cudablas/include/cudablas_z.h b/cudablas/include/cudablas_z.h
index 6611bf98deea97b4a00e152bf543c13a5555f56f..0ee705938a2a6cd88e52e212275983d2b254ec09 100644
--- a/cudablas/include/cudablas_z.h
+++ b/cudablas/include/cudablas_z.h
@@ -38,6 +38,7 @@ int CUDA_zgemerge( MORSE_enum side, MORSE_enum diag, int M, int N, cuDoubleCompl
 int CUDA_zgemm(  MORSE_enum transa, MORSE_enum transb, int m, int n, int k, cuDoubleComplex *alpha, const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, cuDoubleComplex *beta, cuDoubleComplex *C, int ldc, CUBLAS_STREAM_PARAM);
 int CUDA_zhemm(  MORSE_enum side, MORSE_enum uplo, int m, int n, cuDoubleComplex *alpha, const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, cuDoubleComplex *beta, cuDoubleComplex *C, int ldc, CUBLAS_STREAM_PARAM);
 int CUDA_zher2k( MORSE_enum uplo, MORSE_enum trans, int n, int k, cuDoubleComplex *alpha, const cuDoubleComplex *A, int lda, const cuDoubleComplex *B, int ldb, double *beta, cuDoubleComplex *C, int ldc, CUBLAS_STREAM_PARAM);
+int CUDA_zherfb( MORSE_enum uplo, int n, int k, int ib, int nb, const cuDoubleComplex *A, int lda, const cuDoubleComplex *T, int ldt, cuDoubleComplex *C, int ldc, cuDoubleComplex *WORK, int ldwork, CUBLAS_STREAM_PARAM );
 int CUDA_zherk(  MORSE_enum uplo, MORSE_enum trans, int n, int k, double *alpha, const cuDoubleComplex *A, int lda, double *beta, cuDoubleComplex *B, int ldb, CUBLAS_STREAM_PARAM);
 int CUDA_zlarfb(MORSE_enum side, MORSE_enum trans, MORSE_enum direct, MORSE_enum storev, int M, int N, int K, const cuDoubleComplex *V, int LDV, const cuDoubleComplex *T, int LDT, cuDoubleComplex *C, int LDC, cuDoubleComplex *WORK, int LDWORK, CUBLAS_STREAM_PARAM );
 int CUDA_zparfb(MORSE_enum side, MORSE_enum trans, MORSE_enum direct, MORSE_enum storev, int M1, int N1, int M2, int N2, int K, int L, cuDoubleComplex *A1, int LDA1, cuDoubleComplex *A2, int LDA2, const cuDoubleComplex *V, int LDV, const cuDoubleComplex *T, int LDT, cuDoubleComplex *WORK, int LDWORK, cuDoubleComplex *WORKC, int LDWORKC, CUBLAS_STREAM_PARAM );
diff --git a/include/morse_z.h b/include/morse_z.h
index 3d3a2dbf578468d5086cd0895ecd4f91d621c82b..f01b0c778aa1a487999f528e71573589af2c9a57 100644
--- a/include/morse_z.h
+++ b/include/morse_z.h
@@ -55,7 +55,7 @@ int MORSE_zgeqrs(int M, int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_de
 //int MORSE_zgesv(int N, int NRHS, MORSE_Complex64_t *A, int LDA, int *IPIV, MORSE_Complex64_t *B, int LDB);
 int MORSE_zgesv_incpiv(int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descL, int *IPIV, MORSE_Complex64_t *B, int LDB);
 int MORSE_zgesv_nopiv(int N, int NRHS, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB);
-//int MORSE_zgesvd(MORSE_enum jobu, MORSE_enum jobvt, int M, int N, MORSE_Complex64_t *A, int LDA, double *S, MORSE_Complex64_t *U, int LDU, MORSE_Complex64_t *VT, int LDVT, MORSE_desc_t *descT);
+int MORSE_zgesvd(MORSE_enum jobu, MORSE_enum jobvt, int M, int N, MORSE_Complex64_t *A, int LDA, double *S, MORSE_desc_t *descT, MORSE_Complex64_t *U, int LDU, MORSE_Complex64_t *VT, int LDVT);
 //int MORSE_zgetrf(int M, int N, MORSE_Complex64_t *A, int LDA, int *IPIV);
 int MORSE_zgetrf_incpiv(int M, int N, MORSE_Complex64_t *A, int LDA, MORSE_desc_t *descL, int *IPIV);
 int MORSE_zgetrf_nopiv(int M, int N, MORSE_Complex64_t *A, int LDA);
@@ -69,11 +69,11 @@ int MORSE_zherk(MORSE_enum uplo, MORSE_enum trans, int N, int K, double alpha, M
 int MORSE_zher2k(MORSE_enum uplo, MORSE_enum trans, int N, int K, MORSE_Complex64_t alpha, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB, double beta, MORSE_Complex64_t *C, int LDC);
 #endif
 //int MORSE_zheev(MORSE_enum jobz, MORSE_enum uplo, int N, MORSE_Complex64_t *A, int LDA, double *W, MORSE_desc_t *descT, MORSE_Complex64_t *Q, int LDQ);
-//int MORSE_zheevd(MORSE_enum jobz, MORSE_enum uplo, int N, MORSE_Complex64_t *A, int LDA, double *W, MORSE_desc_t *descT, MORSE_Complex64_t *Q, int LDQ);
+int MORSE_zheevd(MORSE_enum jobz, MORSE_enum uplo, int N, MORSE_Complex64_t *A, int LDA, double *W, MORSE_desc_t *descT);
 //int MORSE_zhegv(MORSE_enum itype, MORSE_enum jobz, MORSE_enum uplo, int N, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB, double *W, MORSE_desc_t *descT, MORSE_Complex64_t *Q, int LDQ);
 //int MORSE_zhegvd(MORSE_enum itype, MORSE_enum jobz, MORSE_enum uplo, int N, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB, double *W, MORSE_desc_t *descT, MORSE_Complex64_t *Q, int LDQ);
 //int MORSE_zhegst(MORSE_enum itype, MORSE_enum uplo, int N, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB);
-//int MORSE_zhetrd(MORSE_enum jobz, MORSE_enum uplo, int N, MORSE_Complex64_t *A, int LDA, double *D, double *E, MORSE_desc_t *descT, MORSE_Complex64_t *Q, int LDQ);
+int MORSE_zhetrd(MORSE_enum jobz, MORSE_enum uplo, int N, MORSE_Complex64_t *A, int LDA, double *D, double *E, MORSE_desc_t *descT, MORSE_Complex64_t *Q, int LDQ);
 int MORSE_zlacpy(MORSE_enum uplo, int M, int N, MORSE_Complex64_t *A, int LDA, MORSE_Complex64_t *B, int LDB);
 double MORSE_zlange(MORSE_enum norm, int M, int N, MORSE_Complex64_t *A, int LDA);
 #ifdef COMPLEX
@@ -132,7 +132,7 @@ int MORSE_zgeqrs_Tile(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B);
 //int MORSE_zgesv_Tile(MORSE_desc_t *A, int *IPIV, MORSE_desc_t *B);
 int MORSE_zgesv_incpiv_Tile(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV, MORSE_desc_t *B);
 int MORSE_zgesv_nopiv_Tile(MORSE_desc_t *A, MORSE_desc_t *B);
-//int MORSE_zgesvd_Tile(MORSE_enum jobu, MORSE_enum jobvt, MORSE_desc_t *A, double *S, MORSE_desc_t *U, MORSE_desc_t *VT, MORSE_desc_t *T);
+int MORSE_zgesvd_Tile(MORSE_enum jobu, MORSE_enum jobvt, MORSE_desc_t *A, double *S, MORSE_desc_t *T, MORSE_Complex64_t *U, int LDU, MORSE_Complex64_t *VT, int LDVT);
 //int MORSE_zgetrf_Tile(MORSE_desc_t *A, int *IPIV);
 int MORSE_zgetrf_incpiv_Tile(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV);
 int MORSE_zgetrf_nopiv_Tile(MORSE_desc_t *A);
@@ -146,11 +146,11 @@ int MORSE_zherk_Tile(MORSE_enum uplo, MORSE_enum trans, double alpha, MORSE_desc
 int MORSE_zher2k_Tile(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, double beta, MORSE_desc_t *C);
 #endif
 //int MORSE_zheev_Tile(MORSE_enum jobz, MORSE_enum uplo, MORSE_desc_t *A, double *W, MORSE_desc_t *T, MORSE_Complex64_t *Q, int LDQ);
-//int MORSE_zheevd_Tile(MORSE_enum jobz, MORSE_enum uplo, MORSE_desc_t *A, double *W, MORSE_desc_t *T, MORSE_Complex64_t *Q, int LDQ);
+int MORSE_zheevd_Tile(MORSE_enum jobz, MORSE_enum uplo, MORSE_desc_t *A, double *W, MORSE_desc_t *T);
 //int MORSE_zhegv_Tile( MORSE_enum itype, MORSE_enum jobz, MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, double *W, MORSE_desc_t *T, MORSE_desc_t *Q);
 //int MORSE_zhegvd_Tile(MORSE_enum itype, MORSE_enum jobz, MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, double *W, MORSE_desc_t *T, MORSE_desc_t *Q);
 //int MORSE_zhegst_Tile(MORSE_enum itype, MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B);
-//int MORSE_zhetrd_Tile(MORSE_enum jobz, MORSE_enum uplo, MORSE_desc_t *A, double *D, double *E, MORSE_desc_t *T, MORSE_Complex64_t *Q, int LDQ);
+int MORSE_zhetrd_Tile(MORSE_enum jobz, MORSE_enum uplo, MORSE_desc_t *A, double *D, double *E, MORSE_desc_t *T, MORSE_Complex64_t *Q, int LDQ);
 int MORSE_zlacpy_Tile(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B);
 double MORSE_zlange_Tile(MORSE_enum norm, MORSE_desc_t *A);
 #ifdef COMPLEX
@@ -206,7 +206,7 @@ int MORSE_zgeqrs_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *B, M
 //int MORSE_zgesv_Tile_Async(MORSE_desc_t *A, int *IPIV, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
 int MORSE_zgesv_incpiv_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
 int MORSE_zgesv_nopiv_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
-//int MORSE_zgesvd_Tile_Async(MORSE_enum jobu, MORSE_enum jobvt, MORSE_desc_t *A, double *S, MORSE_desc_t *U, MORSE_desc_t *VT, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request);
+int MORSE_zgesvd_Tile_Async(MORSE_enum jobu, MORSE_enum jobvt, MORSE_desc_t *A, double *S, MORSE_desc_t *T, MORSE_Complex64_t *U, int LDU, MORSE_Complex64_t *VT, int LDVT, MORSE_sequence_t *sequence, MORSE_request_t *request);
 //int MORSE_zgetrf_Tile_Async(MORSE_desc_t *A, int *IPIV, MORSE_sequence_t *sequence, MORSE_request_t *request);
 int MORSE_zgetrf_incpiv_Tile_Async(MORSE_desc_t *A, MORSE_desc_t *L, int *IPIV, MORSE_sequence_t *sequence, MORSE_request_t *request);
 int MORSE_zgetrf_nopiv_Tile_Async(MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
@@ -220,11 +220,11 @@ int MORSE_zherk_Tile_Async(MORSE_enum uplo, MORSE_enum trans, double alpha, MORS
 int MORSE_zher2k_Tile_Async(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, double beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
 #endif
 //int MORSE_zheev_Tile_Async(MORSE_enum jobz, MORSE_enum uplo, MORSE_desc_t *A, double *W, MORSE_desc_t *T, MORSE_Complex64_t *Q, int LDQ, MORSE_sequence_t *sequence, MORSE_request_t *request);
-//int MORSE_zheevd_Tile_Async(MORSE_enum jobz, MORSE_enum uplo, MORSE_desc_t *A, double *W, MORSE_desc_t *T, MORSE_Complex64_t *Q, int LDQ, MORSE_sequence_t *sequence, MORSE_request_t *request);
+int MORSE_zheevd_Tile_Async(MORSE_enum jobz, MORSE_enum uplo, MORSE_desc_t *A, double *W, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request);
 //int MORSE_zhegv_Tile_Async( MORSE_enum itype, MORSE_enum jobz, MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, double *W, MORSE_desc_t *T, MORSE_desc_t *Q, MORSE_sequence_t *sequence, MORSE_request_t *request);
 //int MORSE_zhegvd_Tile_Async(MORSE_enum itype, MORSE_enum jobz, MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, double *W, MORSE_desc_t *T, MORSE_desc_t *Q, MORSE_sequence_t *sequence, MORSE_request_t *request);
 //int MORSE_zhegst_Tile_Async(MORSE_enum itype, MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
-//int MORSE_zhetrd_Tile_Async(MORSE_enum jobz, MORSE_enum uplo, MORSE_desc_t *A, double *D, double *E, MORSE_desc_t *T, MORSE_Complex64_t *Q, int LDQ, MORSE_sequence_t *sequence, MORSE_request_t *request);
+int MORSE_zhetrd_Tile_Async(MORSE_enum jobz, MORSE_enum uplo, MORSE_desc_t *A, double *D, double *E, MORSE_desc_t *T, MORSE_Complex64_t *Q, int LDQ, MORSE_sequence_t *sequence, MORSE_request_t *request);
 int MORSE_zlacpy_Tile_Async(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
 int MORSE_zlange_Tile_Async(MORSE_enum norm, MORSE_desc_t *A, double *value, MORSE_sequence_t *sequence, MORSE_request_t *request);
 #ifdef COMPLEX
diff --git a/include/runtime.h b/include/runtime.h
index 1bc4b9b13b30b2d365867d98761a10013e027a37..95cf19b75674623160da018bda60c1cbd65ef413 100644
--- a/include/runtime.h
+++ b/include/runtime.h
@@ -60,19 +60,19 @@ void  RUNTIME_comm_size          (int*);
 /*******************************************************************************
  * RUNTIME Descriptor
  **/
-void* RUNTIME_mat_alloc        (size_t);
-void  RUNTIME_mat_free         (void*, size_t);
-void  RUNTIME_desc_init        (MORSE_desc_t*);
-void  RUNTIME_desc_create      (MORSE_desc_t*);
-void  RUNTIME_desc_destroy     (MORSE_desc_t*);
-void  RUNTIME_desc_submatrix   (MORSE_desc_t*);
-void* RUNTIME_desc_getaddr     (const MORSE_desc_t*, int, int);
+void* RUNTIME_mat_alloc      (size_t);
+void  RUNTIME_mat_free       (void*, size_t);
+void  RUNTIME_desc_init      (MORSE_desc_t*);
+void  RUNTIME_desc_create    (MORSE_desc_t*);
+void  RUNTIME_desc_destroy   (MORSE_desc_t*);
+void  RUNTIME_desc_submatrix (MORSE_desc_t*);
+void* RUNTIME_desc_getaddr   (const MORSE_desc_t*, int, int);
 /* Acquire in main memory an up-to-date copy of the data described by the descriptor for read-write access. */
-int   RUNTIME_desc_acquire     (MORSE_desc_t*);
+int   RUNTIME_desc_acquire   (MORSE_desc_t*);
 /* Release the data described by the descriptor to be used by the StarPU tasks again. */
-int   RUNTIME_desc_release     (MORSE_desc_t*);
-int   RUNTIME_desc_getoncpu    (MORSE_desc_t*);
-void  RUNTIME_user_tag_size    (int, int) ;
+int   RUNTIME_desc_release   (MORSE_desc_t*);
+int   RUNTIME_desc_getoncpu  (MORSE_desc_t*);
+void  RUNTIME_user_tag_size  (int, int) ;
 
 /*******************************************************************************
  * RUNTIME Options
diff --git a/include/runtime_z.h b/include/runtime_z.h
index 3ed09528ba9d80d1a42c2b5bc4248da91c7991e7..df6b2f553e17bcd16c872e24b360cafcb0b4f74c 100644
--- a/include/runtime_z.h
+++ b/include/runtime_z.h
@@ -157,6 +157,11 @@ void MORSE_TASK_zgetrip_f2(const MORSE_option_t *options,
                            int m, int n, const MORSE_desc_t *A, int Am, int An, int szeA,
                            const MORSE_desc_t *fake1, int fake1m, int fake1n, int szeF1, int paramF1,
                            const MORSE_desc_t *fake2, int fake2m, int fake2n, int szeF2, int paramF2);
+void MORSE_TASK_zhe2ge(const MORSE_option_t *options,
+                       MORSE_enum uplo,
+                       int m, int n, int mb,
+                       const MORSE_desc_t *A, int Am, int An, int lda,
+                       const MORSE_desc_t *B, int Bm, int Bn, int ldb);
 void MORSE_TASK_zhemm(const MORSE_option_t *options,
                       MORSE_enum side, MORSE_enum uplo,
                       int m, int n, int nb,
@@ -167,7 +172,6 @@ void MORSE_TASK_zhegst(const MORSE_option_t *options,
                        int itype, MORSE_enum uplo, int N,
                        const MORSE_desc_t *A, int Am, int An, int LDA,
                        const MORSE_desc_t *B, int Bm, int Bn, int LDB,
-
                        int iinfo);
 void MORSE_TASK_zherk(const MORSE_option_t *options,
                       MORSE_enum uplo, MORSE_enum trans,
@@ -190,6 +194,10 @@ void MORSE_TASK_zlacpy(const MORSE_option_t *options,
                        MORSE_enum uplo, int m, int n, int mb,
                        const MORSE_desc_t *A, int Am, int An, int lda,
                        const MORSE_desc_t *B, int Bm, int Bn, int ldb);
+void MORSE_TASK_zlacpyx(const MORSE_option_t *options,
+                        MORSE_enum uplo, int m, int n, int mb,
+                        int displA, const MORSE_desc_t *A, int Am, int An, int lda,
+                        int displB, const MORSE_desc_t *B, int Bm, int Bn, int ldb);
 void MORSE_TASK_zlange(const MORSE_option_t *options,
                        MORSE_enum norm, int M, int N, int NB,
                        const MORSE_desc_t *A, int Am, int An, int LDA,
@@ -367,13 +375,6 @@ void MORSE_TASK_ztsmlq_hetra1(const MORSE_option_t *options,
                               const MORSE_desc_t *A2, int A2m, int A2n, int lda2,
                               const MORSE_desc_t *V, int Vm, int Vn, int ldv,
                               const MORSE_desc_t *T, int Tm, int Tn, int ldt);
-void MORSE_TASK_ztsmlq_corner(const MORSE_option_t *options,
-                              int m1, int n1, int m2, int n2, int m3, int n3, int k, int ib, int nb,
-                              const MORSE_desc_t *A1, int A1m, int A1n, int lda1,
-                              const MORSE_desc_t *A2, int A2m, int A2n, int lda2,
-                              const MORSE_desc_t *A3, int A3m, int A3n, int lda3,
-                              const MORSE_desc_t *V, int Vm, int Vn, int ldv,
-                              const MORSE_desc_t *T, int Tm, int Tn, int ldt);
 void MORSE_TASK_ztsmqr(const MORSE_option_t *options,
                        MORSE_enum side, MORSE_enum trans,
                        int m1, int n1, int m2, int n2, int k, int ib, int nb,
@@ -388,13 +389,6 @@ void MORSE_TASK_ztsmqr_hetra1(const MORSE_option_t *options,
                               const MORSE_desc_t *A2, int A2m, int A2n, int lda2,
                               const MORSE_desc_t *V, int Vm, int Vn, int ldv,
                               const MORSE_desc_t *T, int Tm, int Tn, int ldt);
-void MORSE_TASK_ztsmqr_corner(const MORSE_option_t *options,
-                              int m1, int n1, int m2, int n2, int m3, int n3, int k, int ib, int nb,
-                              const MORSE_desc_t *A1, int A1m, int A1n, int lda1,
-                              const MORSE_desc_t *A2, int A2m, int A2n, int lda2,
-                              const MORSE_desc_t *A3, int A3m, int A3n, int lda3,
-                              const MORSE_desc_t *V, int Vm, int Vn, int ldv,
-                              const MORSE_desc_t *T, int Tm, int Tn, int ldt);
 void MORSE_TASK_ztsqrt(const MORSE_option_t *options,
                        int m, int n, int ib, int nb,
                        const MORSE_desc_t *A1, int A1m, int A1n, int lda1,
diff --git a/runtime/quark/CMakeLists.txt b/runtime/quark/CMakeLists.txt
index e67975d6b8b507587fe9978088d4c0d9244ddc05..9366a00d7ea931610daa6792d760bcea5e266c98 100644
--- a/runtime/quark/CMakeLists.txt
+++ b/runtime/quark/CMakeLists.txt
@@ -3,7 +3,7 @@
 # @copyright (c) 2009-2014 The University of Tennessee and The University
 #                          of Tennessee Research Foundation.
 #                          All rights reserved.
-# @copyright (c) 2012-2014 Inria. All rights reserved.
+# @copyright (c) 2012-2016 Inria. All rights reserved.
 # @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
 #
 ###
@@ -116,6 +116,8 @@ set(ZSRC
     codelets/codelet_zgetrf.c
     codelets/codelet_zgetrf_incpiv.c
     codelets/codelet_zgetrf_nopiv.c
+    codelets/codelet_zhe2ge.c
+    codelets/codelet_zherfb.c
     codelets/codelet_zhessq.c
     codelets/codelet_zlacpy.c
     codelets/codelet_zlange.c
@@ -124,6 +126,7 @@ set(ZSRC
     codelets/codelet_zlantr.c
     codelets/codelet_zlaset2.c
     codelets/codelet_zlaset.c
+    codelets/codelet_zlatro.c
     codelets/codelet_zlauum.c
     codelets/codelet_zplghe.c
     codelets/codelet_zplgsy.c
@@ -140,6 +143,8 @@ set(ZSRC
     codelets/codelet_ztslqt.c
     codelets/codelet_ztsmlq.c
     codelets/codelet_ztsmqr.c
+    codelets/codelet_ztsmlq_hetra1.c
+    codelets/codelet_ztsmqr_hetra1.c
     codelets/codelet_ztsqrt.c
     codelets/codelet_ztstrf.c
     codelets/codelet_zttlqt.c
@@ -180,6 +185,9 @@ add_dependencies(chameleon_quark
   control_include
   runtime_quark_include
 )
+if (NOT CHAMELEON_SIMULATION)
+    add_dependencies(chameleon_quark coreblas_include)
+endif()
 
 # installation
 # ------------
diff --git a/runtime/quark/codelets/codelet_zhe2ge.c b/runtime/quark/codelets/codelet_zhe2ge.c
new file mode 100644
index 0000000000000000000000000000000000000000..474ea51dc999960f1652babf576802616000efa8
--- /dev/null
+++ b/runtime/quark/codelets/codelet_zhe2ge.c
@@ -0,0 +1,62 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file codelet_zhe2ge.c
+ *
+ *  MORSE codelets kernel
+ *  MORSE is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @precisions normal z -> c d s
+ *
+ **/
+
+#include "runtime/quark/include/morse_quark.h"
+
+/**
+ *
+ * @ingroup CORE_MORSE_Complex64_t
+ *
+ **/
+static inline void CORE_zhe2ge_quark(Quark *quark)
+{
+    MORSE_enum uplo;
+    int M;
+    int N;
+    MORSE_Complex64_t *A;
+    int LDA;
+    MORSE_Complex64_t *B;
+    int LDB;
+
+    quark_unpack_args_7(quark, uplo, M, N, A, LDA, B, LDB);
+    CORE_zhe2ge(uplo, M, N, A, LDA, B, LDB);
+}
+
+
+void MORSE_TASK_zhe2ge(const MORSE_option_t *options,
+                       MORSE_enum uplo,
+                       int m, int n, int mb,
+                       const MORSE_desc_t *A, int Am, int An, int lda,
+                       const MORSE_desc_t *B, int Bm, int Bn, int ldb)
+{
+    quark_option_t *opt = (quark_option_t*)(options->schedopt);
+    DAG_CORE_LACPY;
+    QUARK_Insert_Task(opt->quark, CORE_zhe2ge_quark, (Quark_Task_Flags*)opt,
+        sizeof(MORSE_enum),              &uplo,   VALUE,
+        sizeof(int),                     &m,      VALUE,
+        sizeof(int),                     &n,      VALUE,
+        sizeof(MORSE_Complex64_t)*mb*mb,  RTBLKADDR(A, MORSE_Complex64_t, Am, An), INPUT,
+        sizeof(int),                     &lda,    VALUE,
+        sizeof(MORSE_Complex64_t)*mb*mb,  RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn), OUTPUT,
+        sizeof(int),                     &ldb,    VALUE,
+        0);
+}
diff --git a/runtime/quark/codelets/codelet_zherfb.c b/runtime/quark/codelets/codelet_zherfb.c
new file mode 100644
index 0000000000000000000000000000000000000000..b254c1ed1439c342fce00883fac2b9d384e92cb1
--- /dev/null
+++ b/runtime/quark/codelets/codelet_zherfb.c
@@ -0,0 +1,71 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file qwrapper_zherfb.c
+ *
+ *  PLASMA core_blas quark wrapper
+ *  PLASMA is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.8.0
+ * @author Hatem Ltaief
+ * @date 2010-11-15
+ * @precisions normal z -> c d s
+ *
+ **/
+#include "runtime/quark/include/morse_quark.h"
+
+void CORE_zherfb_quark(Quark *quark)
+{
+    MORSE_enum uplo;
+    int n;
+    int k;
+    int ib;
+    int nb;
+    MORSE_Complex64_t *A;
+    int lda;
+    MORSE_Complex64_t *T;
+    int ldt;
+    MORSE_Complex64_t *C;
+    int ldc;
+    MORSE_Complex64_t *WORK;
+    int ldwork;
+
+    quark_unpack_args_13(quark, uplo, n, k, ib, nb, A, lda, T, ldt, C, ldc, WORK, ldwork);
+    CORE_zherfb(uplo, n, k, ib, nb, A, lda, T, ldt, C, ldc, WORK, ldwork);
+}
+
+void MORSE_TASK_zherfb(const MORSE_option_t *options,
+                       MORSE_enum uplo,
+                       int n, int k, int ib, int nb,
+                       const MORSE_desc_t *A, int Am, int An, int lda,
+                       const MORSE_desc_t *T, int Tm, int Tn, int ldt,
+                       const MORSE_desc_t *C, int Cm, int Cn, int ldc)
+{
+    quark_option_t *opt = (quark_option_t*)(options->schedopt);
+
+    QUARK_Insert_Task(opt->quark, CORE_zherfb_quark, (Quark_Task_Flags*)opt,
+        sizeof(MORSE_enum),                &uplo, VALUE,
+        sizeof(int),                       &n,    VALUE,
+        sizeof(int),                       &k,    VALUE,
+        sizeof(int),                       &ib,   VALUE,
+        sizeof(int),                       &nb,   VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A, MORSE_Complex64_t, Am, An), (uplo == MorseUpper) ? INOUT|QUARK_REGION_U : INOUT|QUARK_REGION_L,
+        sizeof(int),                       &lda,  VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb,    RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn), INPUT,
+        sizeof(int),                       &ldt,  VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(C, MORSE_Complex64_t, Cm, Cn), (uplo == MorseUpper) ? INOUT|QUARK_REGION_D|QUARK_REGION_U : INOUT|QUARK_REGION_D|QUARK_REGION_L,
+        sizeof(int),                       &ldc,  VALUE,
+        sizeof(MORSE_Complex64_t)*2*nb*nb,  NULL, SCRATCH,
+        sizeof(int),                       &nb,   VALUE,
+        0);
+}
diff --git a/runtime/quark/codelets/codelet_zlacpy.c b/runtime/quark/codelets/codelet_zlacpy.c
index 2b2fd9da49a5ace36c50cb0c38f6d8b5d7cbefb9..ee78ca67bd681bda611b3543afef8ec423573cd9 100644
--- a/runtime/quark/codelets/codelet_zlacpy.c
+++ b/runtime/quark/codelets/codelet_zlacpy.c
@@ -41,30 +41,43 @@ static inline void CORE_zlacpy_quark(Quark *quark)
     MORSE_enum uplo;
     int M;
     int N;
+    int displA;
     MORSE_Complex64_t *A;
     int LDA;
+    int displB;
     MORSE_Complex64_t *B;
     int LDB;
 
-    quark_unpack_args_7(quark, uplo, M, N, A, LDA, B, LDB);
-    CORE_zlacpy(uplo, M, N, A, LDA, B, LDB);
+    quark_unpack_args_9(quark, uplo, M, N, displA, A, LDA, displB, B, LDB);
+    CORE_zlacpy(uplo, M, N, A + displA, LDA, B + displB, LDB);
 }
 
-void MORSE_TASK_zlacpy(const MORSE_option_t *options,
-                       MORSE_enum uplo, int m, int n, int nb,
-                       const MORSE_desc_t *A, int Am, int An, int lda,
-                       const MORSE_desc_t *B, int Bm, int Bn, int ldb)
+void MORSE_TASK_zlacpyx(const MORSE_option_t *options,
+                        MORSE_enum uplo, int m, int n, int nb,
+                        int displA, const MORSE_desc_t *A, int Am, int An, int lda,
+                        int displB, const MORSE_desc_t *B, int Bm, int Bn, int ldb)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_LACPY;
     QUARK_Insert_Task(opt->quark, CORE_zlacpy_quark, (Quark_Task_Flags*)opt,
-        sizeof(MORSE_enum),                &uplo,  VALUE,
-        sizeof(int),                        &m,     VALUE,
-        sizeof(int),                        &n,     VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(A, MORSE_Complex64_t, Am, An),             INPUT,
-        sizeof(int),                        &lda,   VALUE,
-        sizeof(MORSE_Complex64_t)*nb*nb,    RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn),             OUTPUT,
-        sizeof(int),                        &ldb,   VALUE,
+        sizeof(MORSE_enum),              &uplo,   VALUE,
+        sizeof(int),                     &m,      VALUE,
+        sizeof(int),                     &n,      VALUE,
+        sizeof(int),                     &displA, VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,  RTBLKADDR(A, MORSE_Complex64_t, Am, An),             INPUT,
+        sizeof(int),                     &lda,    VALUE,
+        sizeof(int),                     &displB, VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,  RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn),             OUTPUT,
+        sizeof(int),                     &ldb,    VALUE,
         0);
 }
 
+void MORSE_TASK_zlacpy(const MORSE_option_t *options,
+                       MORSE_enum uplo, int m, int n, int nb,
+                       const MORSE_desc_t *A, int Am, int An, int lda,
+                       const MORSE_desc_t *B, int Bm, int Bn, int ldb)
+{
+    MORSE_TASK_zlacpyx( options, uplo, m, n, nb,
+                        0, A, Am, An, lda,
+                        0, B, Bm, Bn, ldb );
+}
diff --git a/runtime/quark/codelets/codelet_zlatro.c b/runtime/quark/codelets/codelet_zlatro.c
new file mode 100644
index 0000000000000000000000000000000000000000..342f1b430b1d4c3e60eae7a363e8144b508f4044
--- /dev/null
+++ b/runtime/quark/codelets/codelet_zlatro.c
@@ -0,0 +1,63 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ * @file qwrapper_zlatro.c
+ *
+ *  PLASMA core_blas quark wrapper
+ *  PLASMA is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.8.0
+ * @author Azzam Haidar
+ * @date 2010-11-15
+ * @precisions normal z -> c d s
+ *
+ **/
+#include "runtime/quark/include/morse_quark.h"
+
+
+void CORE_zlatro_quark(Quark *quark)
+{
+    MORSE_enum uplo;
+    MORSE_enum trans;
+    int M;
+    int N;
+    const MORSE_Complex64_t *A;
+    int LDA;
+    MORSE_Complex64_t *B;
+    int LDB;
+
+    quark_unpack_args_8(quark, uplo, trans, M, N, A, LDA, B, LDB);
+    CORE_zlatro(uplo, trans, M, N, A, LDA, B, LDB);
+}
+
+/***************************************************************************//**
+ *
+ **/
+void MORSE_TASK_zlatro(const MORSE_option_t *options,
+                       MORSE_enum uplo, MORSE_enum trans,
+                       int m, int n, int mb,
+                       const MORSE_desc_t *A, int Am, int An, int lda,
+                       const MORSE_desc_t *B, int Bm, int Bn, int ldb)
+{
+    quark_option_t *opt = (quark_option_t*)(options->schedopt);
+
+    QUARK_Insert_Task(opt->quark, CORE_zlatro_quark, (Quark_Task_Flags*)opt,
+        sizeof(MORSE_enum),              &uplo,  VALUE,
+        sizeof(MORSE_enum),              &trans, VALUE,
+        sizeof(int),                     &m,     VALUE,
+        sizeof(int),                     &n,     VALUE,
+        sizeof(MORSE_Complex64_t)*mb*mb,  RTBLKADDR(A, MORSE_Complex64_t, Am, An), INPUT,
+        sizeof(int),                     &lda,   VALUE,
+        sizeof(MORSE_Complex64_t)*mb*mb,  RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn), OUTPUT,
+        sizeof(int),                     &ldb,   VALUE,
+        0);
+}
diff --git a/runtime/quark/codelets/codelet_ztsmlq_hetra1.c b/runtime/quark/codelets/codelet_ztsmlq_hetra1.c
new file mode 100644
index 0000000000000000000000000000000000000000..b381e12e2ae9762c283be3d6a081c29affa709f9
--- /dev/null
+++ b/runtime/quark/codelets/codelet_ztsmlq_hetra1.c
@@ -0,0 +1,86 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file codelet_ztsmlq_hetra1.c
+ *
+ *  PLASMA core_blas quark wrapper
+ *  PLASMA is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.8.0
+ * @author Hatem Ltaief
+ * @author Mathieu Faverge
+ * @author Jakub Kurzak
+ * @author Azzam Haidar
+ * @date 2010-11-15
+ * @precisions normal z -> c d s
+ *
+ **/
+#include "runtime/quark/include/morse_quark.h"
+
+void CORE_ztsmlq_hetra1_quark(Quark *quark)
+{
+    MORSE_enum side;
+    MORSE_enum trans;
+    int m1;
+    int n1;
+    int m2;
+    int n2;
+    int k;
+    int ib;
+    MORSE_Complex64_t *A1;
+    int lda1;
+    MORSE_Complex64_t *A2;
+    int lda2;
+    MORSE_Complex64_t *V;
+    int ldv;
+    MORSE_Complex64_t *T;
+    int ldt;
+    MORSE_Complex64_t *WORK;
+    int ldwork;
+
+    quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
+    CORE_ztsmlq_hetra1(side, trans, m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
+}
+
+void MORSE_TASK_ztsmlq_hetra1(const MORSE_option_t *options,
+                              MORSE_enum side, MORSE_enum trans,
+                              int m1, int n1, int m2, int n2, int k, int ib, int nb,
+                              const MORSE_desc_t *A1, int A1m, int A1n, int lda1,
+                              const MORSE_desc_t *A2, int A2m, int A2n, int lda2,
+                              const MORSE_desc_t *V, int Vm, int Vn, int ldv,
+                              const MORSE_desc_t *T, int Tm, int Tn, int ldt)
+{
+    quark_option_t *opt = (quark_option_t*)(options->schedopt);
+    int ldwork = side == MorseLeft ? ib : nb;
+
+    QUARK_Insert_Task(opt->quark, CORE_ztsmlq_hetra1_quark, (Quark_Task_Flags*)opt,
+        sizeof(MORSE_enum),              &side,   VALUE,
+        sizeof(MORSE_enum),              &trans,  VALUE,
+        sizeof(int),                     &m1,     VALUE,
+        sizeof(int),                     &n1,     VALUE,
+        sizeof(int),                     &m2,     VALUE,
+        sizeof(int),                     &n2,     VALUE,
+        sizeof(int),                     &k,      VALUE,
+        sizeof(int),                     &ib,     VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,  RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n), INOUT|QUARK_REGION_U|QUARK_REGION_D,
+        sizeof(int),                     &lda1,   VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,  RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n), INOUT,
+        sizeof(int),                     &lda2,   VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,  RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn),    INPUT,
+        sizeof(int),                     &ldv,    VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb,  RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),    INPUT,
+        sizeof(int),                     &ldt,    VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb,  NULL,   SCRATCH,
+        sizeof(int),                     &ldwork, VALUE,
+        0);
+}
diff --git a/runtime/quark/codelets/codelet_ztsmqr_hetra1.c b/runtime/quark/codelets/codelet_ztsmqr_hetra1.c
new file mode 100644
index 0000000000000000000000000000000000000000..5e7277eef07f5a26ff0d7608c59cd71da4c6425e
--- /dev/null
+++ b/runtime/quark/codelets/codelet_ztsmqr_hetra1.c
@@ -0,0 +1,86 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file codelets_ztsmqr_hetra1.c
+ *
+ *  PLASMA core_blas quark wrapper
+ *  PLASMA is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.8.0
+ * @author Hatem Ltaief
+ * @author Mathieu Faverge
+ * @author Jakub Kurzak
+ * @author Azzam Haidar
+ * @date 2010-11-15
+ * @precisions normal z -> c d s
+ *
+ **/
+#include "runtime/quark/include/morse_quark.h"
+
+void CORE_ztsmqr_hetra1_quark(Quark *quark)
+{
+    MORSE_enum side;
+    MORSE_enum trans;
+    int m1;
+    int n1;
+    int m2;
+    int n2;
+    int k;
+    int ib;
+    MORSE_Complex64_t *A1;
+    int lda1;
+    MORSE_Complex64_t *A2;
+    int lda2;
+    MORSE_Complex64_t *V;
+    int ldv;
+    MORSE_Complex64_t *T;
+    int ldt;
+    MORSE_Complex64_t *WORK;
+    int ldwork;
+
+    quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
+    CORE_ztsmqr_hetra1(side, trans, m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
+}
+
+void MORSE_TASK_ztsmqr_hetra1(const MORSE_option_t *options,
+                              MORSE_enum side, MORSE_enum trans,
+                              int m1, int n1, int m2, int n2, int k, int ib, int nb,
+                              const MORSE_desc_t *A1, int A1m, int A1n, int lda1,
+                              const MORSE_desc_t *A2, int A2m, int A2n, int lda2,
+                              const MORSE_desc_t *V, int Vm, int Vn, int ldv,
+                              const MORSE_desc_t *T, int Tm, int Tn, int ldt)
+{
+    quark_option_t *opt = (quark_option_t*)(options->schedopt);
+    int ldwork = side == MorseLeft ? ib : nb;
+
+    QUARK_Insert_Task(opt->quark, CORE_ztsmqr_hetra1_quark, (Quark_Task_Flags*)opt,
+        sizeof(MORSE_enum),              &side,   VALUE,
+        sizeof(MORSE_enum),              &trans,  VALUE,
+        sizeof(int),                     &m1,     VALUE,
+        sizeof(int),                     &n1,     VALUE,
+        sizeof(int),                     &m2,     VALUE,
+        sizeof(int),                     &n2,     VALUE,
+        sizeof(int),                     &k,      VALUE,
+        sizeof(int),                     &ib,     VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,  RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n), INOUT|QUARK_REGION_L|QUARK_REGION_D,
+        sizeof(int),                     &lda1,   VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,  RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n), INOUT,
+        sizeof(int),                     &lda2,   VALUE,
+        sizeof(MORSE_Complex64_t)*nb*nb,  RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn),    INPUT,
+        sizeof(int),                     &ldv,    VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb,  RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),    INPUT,
+        sizeof(int),                     &ldt,    VALUE,
+        sizeof(MORSE_Complex64_t)*ib*nb,  NULL,   SCRATCH,
+        sizeof(int),                     &ldwork, VALUE,
+        0);
+}
diff --git a/runtime/starpu/CMakeLists.txt b/runtime/starpu/CMakeLists.txt
index 5dc0bd19b7e01db14e5e05f8f2f78330eb609b5c..08956acafbf1742798bb235a8226b17bf1d73d05 100644
--- a/runtime/starpu/CMakeLists.txt
+++ b/runtime/starpu/CMakeLists.txt
@@ -136,6 +136,8 @@ set(ZSRC
     codelets/codelet_zgetrf.c
     codelets/codelet_zgetrf_incpiv.c
     codelets/codelet_zgetrf_nopiv.c
+    codelets/codelet_zhe2ge.c
+    codelets/codelet_zherfb.c
     codelets/codelet_zhessq.c
     codelets/codelet_zlacpy.c
     codelets/codelet_zlange.c
@@ -144,6 +146,7 @@ set(ZSRC
     codelets/codelet_zlantr.c
     codelets/codelet_zlaset2.c
     codelets/codelet_zlaset.c
+    codelets/codelet_zlatro.c
     codelets/codelet_zlauum.c
     codelets/codelet_zplghe.c
     codelets/codelet_zplgsy.c
@@ -160,6 +163,8 @@ set(ZSRC
     codelets/codelet_ztslqt.c
     codelets/codelet_ztsmlq.c
     codelets/codelet_ztsmqr.c
+    codelets/codelet_ztsmlq_hetra1.c
+    codelets/codelet_ztsmqr_hetra1.c
     codelets/codelet_ztsqrt.c
     codelets/codelet_ztstrf.c
     codelets/codelet_zttlqt.c
@@ -206,7 +211,7 @@ add_dependencies(chameleon_starpu
 if (NOT CHAMELEON_SIMULATION)
     add_dependencies(chameleon_starpu coreblas_include)
 endif()
- 
+
 if (CHAMELEON_USE_CUDA AND NOT CHAMELEON_SIMULATION)
     add_dependencies(chameleon_starpu cudablas_include)
 endif()
diff --git a/runtime/starpu/codelets/codelet_zcallback.c b/runtime/starpu/codelets/codelet_zcallback.c
index fa66cff4d9f3be011bfbe7e7a51f6c29ebdde7e4..8af4ec546d5e5c59f4ad01a06dd211258d3e99fd 100644
--- a/runtime/starpu/codelets/codelet_zcallback.c
+++ b/runtime/starpu/codelets/codelet_zcallback.c
@@ -40,6 +40,8 @@ CHAMELEON_CL_CB(zgessq,        starpu_matrix_get_nx(task->handles[0]), starpu_ma
 CHAMELEON_CL_CB(zgetrf,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (2./3.)*M*N*K);
 CHAMELEON_CL_CB(zgetrf_incpiv, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (2./3.)*M*N*K);
 CHAMELEON_CL_CB(zgetrf_nopiv,  starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (2./3.)*M*N*K);
+CHAMELEON_CL_CB(zhe2ge,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                       (1./2.0)*M*N);
+CHAMELEON_CL_CB(zherfb,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                         2. *M* M*M);
 #if defined(PRECISION_z) || defined(PRECISION_c)
 CHAMELEON_CL_CB(zhemm,         starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_ny(task->handles[2]), 0,                                          2.*M*M *N);
 CHAMELEON_CL_CB(zher2k,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                     ( 1.+2.*M*N)*M);
@@ -49,6 +51,7 @@ CHAMELEON_CL_CB(zlacpy,        starpu_matrix_get_nx(task->handles[0]), starpu_ma
 CHAMELEON_CL_CB(zlange,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                                M*N);
 CHAMELEON_CL_CB(zlaset,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                                M*N);
 CHAMELEON_CL_CB(zlaset2,       starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                                M*N);
+CHAMELEON_CL_CB(zlatro,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                                M*N);
 CHAMELEON_CL_CB(zlauum,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (1./3.)*M* M*M);
 #if defined(PRECISION_z) || defined(PRECISION_c)
 CHAMELEON_CL_CB(zplghe,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                                M*N);
@@ -71,6 +74,8 @@ CHAMELEON_CL_CB(ztrtri,        starpu_matrix_get_nx(task->handles[0]), starpu_ma
 CHAMELEON_CL_CB(ztslqt,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]),     2. *M* M*M);
 CHAMELEON_CL_CB(ztsmlq,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (4.0*M+starpu_matrix_get_nx(task->handles[3]))*M*M);
 CHAMELEON_CL_CB(ztsmqr,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (4.0*M+starpu_matrix_get_nx(task->handles[3]))*M*M);
+CHAMELEON_CL_CB(ztsmlq_hetra1, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (4.0*M+starpu_matrix_get_nx(task->handles[3]))*M*M);
+CHAMELEON_CL_CB(ztsmqr_hetra1, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (4.0*M+starpu_matrix_get_nx(task->handles[3]))*M*M);
 CHAMELEON_CL_CB(ztsqrt,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]),     2. *M* M*M);
 CHAMELEON_CL_CB(ztstrf,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]),         M* M*M);
 CHAMELEON_CL_CB(zttlqt,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]),     1. *M* M*M);
diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c
index 91b9ccdf4d89be209b3e183b2bc0f6aa570a10a9..581d46a1153631294ee2e0239e61de1c510625a0 100644
--- a/runtime/starpu/codelets/codelet_zgemm.c
+++ b/runtime/starpu/codelets/codelet_zgemm.c
@@ -100,8 +100,6 @@ void MORSE_TASK_zgemm(const MORSE_option_t *options,
     }
 }
 
-
-
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zgemm_cpu_func(void *descr[], void *cl_arg)
 {
@@ -169,7 +167,7 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
 
     return;
 }
-#endif /* CHAMELEON_USE_CUDA */
+#endif /* defined(CHAMELEON_USE_CUDA) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
diff --git a/runtime/starpu/codelets/codelet_zhe2ge.c b/runtime/starpu/codelets/codelet_zhe2ge.c
new file mode 100644
index 0000000000000000000000000000000000000000..9bb03ebaf9247db83de4e9f346ff3e4edb2a1885
--- /dev/null
+++ b/runtime/starpu/codelets/codelet_zhe2ge.c
@@ -0,0 +1,82 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file codelet_zhe2ge.c
+ *
+ *  MORSE codelets kernel
+ *  MORSE is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @precisions normal z -> c d s
+ *
+ **/
+
+#include "runtime/starpu/include/morse_starpu.h"
+#include "runtime/starpu/include/runtime_codelet_z.h"
+
+/**
+ *
+ * @ingroup CORE_MORSE_Complex64_t
+ *
+ **/
+
+void MORSE_TASK_zhe2ge(const MORSE_option_t *options,
+                       MORSE_enum uplo,
+                       int m, int n, int mb,
+                       const MORSE_desc_t *A, int Am, int An, int lda,
+                       const MORSE_desc_t *B, int Bm, int Bn, int ldb)
+{
+    (void)mb;
+    struct starpu_codelet *codelet = &cl_zhe2ge;
+    void (*callback)(void*) = options->profiling ? cl_zhe2ge_callback : NULL;
+
+    if ( morse_desc_islocal( A, Am, An ) ||
+         morse_desc_islocal( B, Bm, Bn ) )
+    {
+        starpu_insert_task(
+            codelet,
+            STARPU_VALUE,  &uplo,                sizeof(MORSE_enum),
+            STARPU_VALUE,     &m,                        sizeof(int),
+            STARPU_VALUE,     &n,                        sizeof(int),
+            STARPU_R,             RTBLKADDR(A, MORSE_Complex64_t, Am, An),
+            STARPU_VALUE,   &lda,                        sizeof(int),
+            STARPU_W,             RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn),
+            STARPU_VALUE,   &ldb,                        sizeof(int),
+            STARPU_PRIORITY,    options->priority,
+            STARPU_CALLBACK,    callback,
+            STARPU_NAME,        "zhe2ge",
+            0);
+    }
+}
+
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zhe2ge_cpu_func(void *descr[], void *cl_arg)
+{
+    MORSE_enum uplo;
+    int M;
+    int N;
+    const MORSE_Complex64_t *A;
+    int LDA;
+    MORSE_Complex64_t *B;
+    int LDB;
+
+    A = (const MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &LDA, &LDB);
+    CORE_zhe2ge(uplo, M, N, A, LDA, B, LDB);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zhe2ge, 2, cl_zhe2ge_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zherfb.c b/runtime/starpu/codelets/codelet_zherfb.c
new file mode 100644
index 0000000000000000000000000000000000000000..9f693a90173728c2e6abb26ef09c509ab231c864
--- /dev/null
+++ b/runtime/starpu/codelets/codelet_zherfb.c
@@ -0,0 +1,135 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file codelet_zherfb.c
+ *
+ *  MORSE codelets kernel
+ *  MORSE is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.7.1
+ * @author Hatem Ltaief
+ * @date 2010-11-15
+ * @precisions normal z -> c d s
+ *
+ **/
+#include "runtime/starpu/include/morse_starpu.h"
+#include "runtime/starpu/include/runtime_codelet_z.h"
+
+/**
+ *
+ * @ingroup CORE_MORSE_Complex64_t
+ *
+ **/
+void MORSE_TASK_zherfb(const MORSE_option_t *options,
+                       MORSE_enum uplo,
+                       int n, int k, int ib, int nb,
+                       const MORSE_desc_t *A, int Am, int An, int lda,
+                       const MORSE_desc_t *T, int Tm, int Tn, int ldt,
+                       const MORSE_desc_t *C, int Cm, int Cn, int ldc)
+{
+    struct starpu_codelet *codelet = &cl_zherfb;
+    void (*callback)(void*) = options->profiling ? cl_zherfb_callback : NULL;
+
+    if ( morse_desc_islocal( A, Am, An ) ||
+         morse_desc_islocal( T, Tm, Tn ) ||
+         morse_desc_islocal( C, Cm, Cn ) )
+    {
+        starpu_insert_task(
+            codelet,
+            STARPU_VALUE,    &uplo,              sizeof(MORSE_enum),
+            STARPU_VALUE,    &n,                 sizeof(int),
+            STARPU_VALUE,    &k,                 sizeof(int),
+            STARPU_VALUE,    &ib,                sizeof(int),
+            STARPU_VALUE,    &nb,                sizeof(int),
+            STARPU_R,         RTBLKADDR(A, MORSE_Complex64_t, Am, An),
+            STARPU_VALUE,    &lda,               sizeof(int),
+            STARPU_R,         RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),
+            STARPU_VALUE,    &ldt,               sizeof(int),
+            STARPU_RW,        RTBLKADDR(C, MORSE_Complex64_t, Cm, Cn),
+            STARPU_VALUE,    &ldc,               sizeof(int),
+            STARPU_SCRATCH,   options->ws_worker,
+            STARPU_VALUE,    &nb,                sizeof(int),
+            STARPU_PRIORITY,  options->priority,
+            STARPU_CALLBACK,  callback,
+            STARPU_NAME,      "zherfb",
+            0);
+    }
+}
+
+
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zherfb_cpu_func(void *descr[], void *cl_arg)
+{
+    MORSE_enum uplo;
+    int n;
+    int k;
+    int ib;
+    int nb;
+    const MORSE_Complex64_t *A;
+    int lda;
+    const MORSE_Complex64_t *T;
+    int ldt;
+    MORSE_Complex64_t *C;
+    int ldc;
+    MORSE_Complex64_t *WORK;
+    int ldwork;
+
+    A    = (const MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    T    = (const MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    C    = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
+    WORK = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
+
+    starpu_codelet_unpack_args(cl_arg, &uplo, &n, &k, &ib, &nb, &lda, &ldt, &ldc, &ldwork);
+
+    CORE_zherfb(uplo, n, k, ib, nb, A, lda, T, ldt, C, ldc, WORK, ldwork);
+}
+
+#if defined(CHAMELEON_USE_CUDA)
+static void cl_zherfb_cuda_func(void *descr[], void *cl_arg)
+{
+    MORSE_enum uplo;
+    int n;
+    int k;
+    int ib;
+    int nb;
+    const cuDoubleComplex *A;
+    int lda;
+    const cuDoubleComplex *T;
+    int ldt;
+    cuDoubleComplex *C;
+    int ldc;
+    cuDoubleComplex *WORK;
+    int ldwork;
+
+    stream = starpu_cuda_get_local_stream();
+
+    A    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
+    T    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
+    C    = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
+    WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
+
+    starpu_codelet_unpack_args(cl_arg, &uplo, &n, &k, &ib, &nb, &lda, &ldt, &ldc, &ldwork);
+
+    CUDA_zherfb(uplo, n, k, ib, nb, A, lda, T, ldt, C, ldc, WORK, ldwork);
+
+#ifndef STARPU_CUDA_ASYNC
+    cudaStreamSynchronize( stream );
+#endif
+}
+#endif /* defined(CHAMELEON_USE_CUDA) */
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS(zherfb, 4, cl_zherfb_cpu_func, cl_zherfb_cuda_func, STARPU_CUDA_ASYNC);
diff --git a/runtime/starpu/codelets/codelet_zlacpy.c b/runtime/starpu/codelets/codelet_zlacpy.c
index b62d2f06e2cd2f2154ebf9e4cd9bbc08e33e952e..c70e6b6805ef7a377f52e6f25acb80b3ab93c8d0 100644
--- a/runtime/starpu/codelets/codelet_zlacpy.c
+++ b/runtime/starpu/codelets/codelet_zlacpy.c
@@ -38,10 +38,10 @@
  *
  **/
 
-void MORSE_TASK_zlacpy(const MORSE_option_t *options,
-                       MORSE_enum uplo, int m, int n, int nb,
-                       const MORSE_desc_t *A, int Am, int An, int lda,
-                       const MORSE_desc_t *B, int Bm, int Bn, int ldb)
+void MORSE_TASK_zlacpyx(const MORSE_option_t *options,
+                        MORSE_enum uplo, int m, int n, int nb,
+                        int displA, const MORSE_desc_t *A, int Am, int An, int lda,
+                        int displB, const MORSE_desc_t *B, int Bm, int Bn, int ldb)
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zlacpy;
@@ -52,19 +52,30 @@ void MORSE_TASK_zlacpy(const MORSE_option_t *options,
     {
         starpu_insert_task(
             codelet,
-            STARPU_VALUE,  &uplo,                sizeof(MORSE_enum),
-            STARPU_VALUE,     &m,                        sizeof(int),
-            STARPU_VALUE,     &n,                        sizeof(int),
-            STARPU_R,             RTBLKADDR(A, MORSE_Complex64_t, Am, An),
-            STARPU_VALUE,   &lda,                        sizeof(int),
-            STARPU_W,             RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn),
-            STARPU_VALUE,   &ldb,                        sizeof(int),
-            STARPU_PRIORITY,    options->priority,
-            STARPU_CALLBACK,    callback,
+            STARPU_VALUE,   &uplo,                sizeof(MORSE_enum),
+            STARPU_VALUE,   &m,                   sizeof(int),
+            STARPU_VALUE,   &n,                   sizeof(int),
+            STARPU_VALUE,   &displA,              sizeof(int),
+            STARPU_R,        RTBLKADDR(A, MORSE_Complex64_t, Am, An),
+            STARPU_VALUE,   &lda,                 sizeof(int),
+            STARPU_VALUE,   &displB,              sizeof(int),
+            STARPU_W,        RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn),
+            STARPU_VALUE,   &ldb,                 sizeof(int),
+            STARPU_PRIORITY, options->priority,
+            STARPU_CALLBACK, callback,
             0);
     }
 }
 
+void MORSE_TASK_zlacpy(const MORSE_option_t *options,
+                       MORSE_enum uplo, int m, int n, int nb,
+                       const MORSE_desc_t *A, int Am, int An, int lda,
+                       const MORSE_desc_t *B, int Bm, int Bn, int ldb)
+{
+    MORSE_TASK_zlacpyx( options, uplo, m, n, nb,
+                        0, A, Am, An, lda,
+                        0, B, Bm, Bn, ldb );
+}
 
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zlacpy_cpu_func(void *descr[], void *cl_arg)
@@ -72,15 +83,17 @@ static void cl_zlacpy_cpu_func(void *descr[], void *cl_arg)
     MORSE_enum uplo;
     int M;
     int N;
-    MORSE_Complex64_t *A;
+    int displA;
+    int displB;
+    const MORSE_Complex64_t *A;
     int LDA;
     MORSE_Complex64_t *B;
     int LDB;
 
-    A = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    A = (const MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
     B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &LDA, &LDB);
-    CORE_zlacpy(uplo, M, N, A, LDA, B, LDB);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &displA, &LDA, &displB, &LDB);
+    CORE_zlacpy(uplo, M, N, A + displA, LDA, B + displB, LDB);
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
diff --git a/runtime/starpu/codelets/codelet_zlatro.c b/runtime/starpu/codelets/codelet_zlatro.c
new file mode 100644
index 0000000000000000000000000000000000000000..6914020301ecf892ff9ce8bc882eca72393b77cc
--- /dev/null
+++ b/runtime/starpu/codelets/codelet_zlatro.c
@@ -0,0 +1,93 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file codelet_zlatro.c
+ *
+ *  MORSE codelets kernel
+ *  MORSE is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.5.0
+ * @comment This file has been automatically generated
+ *          from Plasma 2.5.0 for MORSE 1.0.0
+ * @author Julien Langou
+ * @author Henricus Bouwmeester
+ * @author Mathieu Faverge
+ * @author Emmanuel Agullo
+ * @author Cedric Castagnede
+ * @date 2010-11-15
+ * @precisions normal z -> c d s
+ *
+ **/
+
+#include "runtime/starpu/include/morse_starpu.h"
+#include "runtime/starpu/include/runtime_codelet_z.h"
+
+/**
+ *
+ * @ingroup CORE_MORSE_Complex64_t
+ *
+ **/
+
+void MORSE_TASK_zlatro(const MORSE_option_t *options,
+                       MORSE_enum uplo, MORSE_enum trans,
+                       int m, int n, int mb,
+                       const MORSE_desc_t *A, int Am, int An, int lda,
+                       const MORSE_desc_t *B, int Bm, int Bn, int ldb)
+{
+    struct starpu_codelet *codelet = &cl_zlatro;
+    void (*callback)(void*) = NULL;
+
+    if ( morse_desc_islocal( A, Am, An ) ||
+         morse_desc_islocal( B, Bm, Bn ) )
+    {
+        starpu_insert_task(
+            codelet,
+            STARPU_VALUE,   &uplo,    sizeof(MORSE_enum),
+            STARPU_VALUE,   &trans,   sizeof(MORSE_enum),
+            STARPU_VALUE,   &m,       sizeof(int),
+            STARPU_VALUE,   &n,       sizeof(int),
+            STARPU_R,        RTBLKADDR(A, MORSE_Complex64_t, Am, An),
+            STARPU_VALUE,   &lda,     sizeof(int),
+            STARPU_W,        RTBLKADDR(B, MORSE_Complex64_t, Bm, Bn),
+            STARPU_VALUE,   &ldb,     sizeof(int),
+            STARPU_PRIORITY, options->priority,
+            STARPU_CALLBACK, callback,
+            STARPU_NAME,     "zlatro",
+            0);
+    }
+    (void)mb;
+}
+
+#if defined(CHAMELEON_SIMULATION)
+static void cl_zlatro_cpu_func(void *descr[], void *cl_arg)
+{
+    MORSE_enum uplo;
+    MORSE_enum trans;
+    int M;
+    int N;
+    const MORSE_Complex64_t *A;
+    int LDA;
+    MORSE_Complex64_t *B;
+    int LDB;
+
+    A = (const MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    B = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &LDA, &LDB);
+    CORE_zlatro(uplo, trans, M, N, A, LDA, B, LDB);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zlatro, 2, cl_zlatro_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
new file mode 100644
index 0000000000000000000000000000000000000000..b61a8fc3a5493276a26f866ba5902e4e2d6d3b29
--- /dev/null
+++ b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
@@ -0,0 +1,121 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file codelet_ztsmlq_hetra1.c
+ *
+ *  MORSE codelets kernel
+ *  MORSE is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.7.1
+ * @author Hatem Ltaief
+ * @author Mathieu Faverge
+ * @author Azzam Haidar
+ * @date 2010-11-15
+ * @precisions normal z -> c d s
+ *
+ **/
+#include "runtime/starpu/include/morse_starpu.h"
+#include "runtime/starpu/include/runtime_codelet_z.h"
+
+/**
+ *
+ * @ingroup CORE_MORSE_Complex64_t
+ *
+ **/
+
+void MORSE_TASK_ztsmlq_hetra1(const MORSE_option_t *options,
+                              MORSE_enum side, MORSE_enum trans,
+                              int m1, int n1, int m2, int n2, int k, int ib, int nb,
+                              const MORSE_desc_t *A1, int A1m, int A1n, int lda1,
+                              const MORSE_desc_t *A2, int A2m, int A2n, int lda2,
+                              const MORSE_desc_t *V,  int Vm,  int Vn,  int ldv,
+                              const MORSE_desc_t *T,  int Tm,  int Tn,  int ldt)
+{
+    struct starpu_codelet *codelet = &cl_ztsmlq_hetra1;
+    void (*callback)(void*) = options->profiling ? cl_ztsmlq_hetra1_callback : NULL;
+
+    int ldwork = side == MorseLeft ? ib : nb;
+
+    /* T follows distribution of V */
+    if ( morse_desc_islocal( A1, A1m, A1n ) ||
+         morse_desc_islocal( A2, A2m, A2n ) ||
+         morse_desc_islocal( V,  Vm,  Vn  ) )
+    {
+        starpu_insert_task(
+            codelet,
+            STARPU_VALUE,    &side,              sizeof(MORSE_enum),
+            STARPU_VALUE,    &trans,             sizeof(MORSE_enum),
+            STARPU_VALUE,    &m1,                sizeof(int),
+            STARPU_VALUE,    &n1,                sizeof(int),
+            STARPU_VALUE,    &m2,                sizeof(int),
+            STARPU_VALUE,    &n2,                sizeof(int),
+            STARPU_VALUE,    &k,                 sizeof(int),
+            STARPU_VALUE,    &ib,                sizeof(int),
+            STARPU_VALUE,    &nb,                sizeof(int),
+            STARPU_RW,        RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n),
+            STARPU_VALUE,    &lda1,              sizeof(int),
+            STARPU_RW,        RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n),
+            STARPU_VALUE,    &lda2,              sizeof(int),
+            STARPU_R,         RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn),
+            STARPU_VALUE,    &ldv,               sizeof(int),
+            STARPU_R,         RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),
+            STARPU_VALUE,    &ldt,               sizeof(int),
+            STARPU_SCRATCH,   options->ws_worker,
+            STARPU_VALUE,    &ldwork,            sizeof(int),
+            STARPU_PRIORITY,  options->priority,
+            STARPU_CALLBACK,  callback,
+            0);
+    }
+}
+
+#if defined(CHAMELEON_SIMULATION)
+static void cl_ztsmlq_hetra1_cpu_func(void *descr[], void *cl_arg)
+{
+    MORSE_enum side;
+    MORSE_enum trans;
+    int m1;
+    int n1;
+    int m2;
+    int n2;
+    int k;
+    int ib;
+    int nb;
+    MORSE_Complex64_t *A1;
+    int lda1;
+    MORSE_Complex64_t *A2;
+    int lda2;
+    MORSE_Complex64_t *V;
+    int ldv;
+    MORSE_Complex64_t *T;
+    int ldt;
+
+    MORSE_Complex64_t *WORK;
+    int ldwork;
+
+    A1    = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    A2    = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    V     = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
+    T     = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
+    WORK  = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */
+
+    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k,
+                               &ib, &nb, &lda1, &lda2, &ldv, &ldt, &ldwork);
+    CORE_ztsmlq_hetra1(side, trans, m1, n1, m2, n2, k,
+                       ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(ztsmlq_hetra1, 5, cl_ztsmlq_hetra1_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
new file mode 100644
index 0000000000000000000000000000000000000000..5e305f000b90039c95b78519a77fb12ad35f20d9
--- /dev/null
+++ b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
@@ -0,0 +1,121 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file codelet_ztsmqr_hetra1.c
+ *
+ *  MORSE codelets kernel
+ *  MORSE is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.7.1
+ * @author Hatem Ltaief
+ * @author Mathieu Faverge
+ * @author Azzam Haidar
+ * @date 2010-11-15
+ * @precisions normal z -> c d s
+ *
+ **/
+#include "runtime/starpu/include/morse_starpu.h"
+#include "runtime/starpu/include/runtime_codelet_z.h"
+
+/**
+ *
+ * @ingroup CORE_MORSE_Complex64_t
+ *
+ **/
+
+void MORSE_TASK_ztsmqr_hetra1(const MORSE_option_t *options,
+                              MORSE_enum side, MORSE_enum trans,
+                              int m1, int n1, int m2, int n2, int k, int ib, int nb,
+                              const MORSE_desc_t *A1, int A1m, int A1n, int lda1,
+                              const MORSE_desc_t *A2, int A2m, int A2n, int lda2,
+                              const MORSE_desc_t *V,  int Vm,  int Vn,  int ldv,
+                              const MORSE_desc_t *T,  int Tm,  int Tn,  int ldt)
+{
+    struct starpu_codelet *codelet = &cl_ztsmqr_hetra1;
+    void (*callback)(void*) = options->profiling ? cl_ztsmqr_hetra1_callback : NULL;
+
+    int ldwork = side == MorseLeft ? ib : nb;
+
+    /* T follows distribution of V */
+    if ( morse_desc_islocal( A1, A1m, A1n ) ||
+         morse_desc_islocal( A2, A2m, A2n ) ||
+         morse_desc_islocal( V,  Vm,  Vn  ) )
+    {
+        starpu_insert_task(
+            codelet,
+            STARPU_VALUE,    &side,              sizeof(MORSE_enum),
+            STARPU_VALUE,    &trans,             sizeof(MORSE_enum),
+            STARPU_VALUE,    &m1,                sizeof(int),
+            STARPU_VALUE,    &n1,                sizeof(int),
+            STARPU_VALUE,    &m2,                sizeof(int),
+            STARPU_VALUE,    &n2,                sizeof(int),
+            STARPU_VALUE,    &k,                 sizeof(int),
+            STARPU_VALUE,    &ib,                sizeof(int),
+            STARPU_RW,        RTBLKADDR(A1, MORSE_Complex64_t, A1m, A1n),
+            STARPU_VALUE,    &lda1,              sizeof(int),
+            STARPU_RW,        RTBLKADDR(A2, MORSE_Complex64_t, A2m, A2n),
+            STARPU_VALUE,    &lda2,              sizeof(int),
+            STARPU_R,         RTBLKADDR(V, MORSE_Complex64_t, Vm, Vn),
+            STARPU_VALUE,    &ldv,               sizeof(int),
+            STARPU_R,         RTBLKADDR(T, MORSE_Complex64_t, Tm, Tn),
+            STARPU_VALUE,    &ldt,               sizeof(int),
+            STARPU_SCRATCH,   options->ws_worker,
+            STARPU_VALUE,    &ldwork,            sizeof(int),
+            STARPU_PRIORITY,  options->priority,
+            STARPU_CALLBACK,  callback,
+            STARPU_NAME,      "ztsmqr_hetra1",
+            0);
+    }
+}
+
+#if defined(CHAMELEON_SIMULATION)
+static void cl_ztsmqr_hetra1_cpu_func(void *descr[], void *cl_arg)
+{
+    MORSE_enum side;
+    MORSE_enum trans;
+    int m1;
+    int n1;
+    int m2;
+    int n2;
+    int k;
+    int ib;
+    MORSE_Complex64_t *A1;
+    int lda1;
+    MORSE_Complex64_t *A2;
+    int lda2;
+    MORSE_Complex64_t *V;
+    int ldv;
+    MORSE_Complex64_t *T;
+    int ldt;
+
+    /* TODO: manage workspace */
+    MORSE_Complex64_t *WORK;
+    int ldwork;
+
+    A1    = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    A2    = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    V     = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
+    T     = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
+    WORK  = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]);
+
+    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k,
+                               &ib, &lda1, &lda2, &ldv, &ldt, &ldwork);
+    CORE_ztsmqr_hetra1(side, trans, m1, n1, m2, n2, k,
+                       ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(ztsmqr_hetra1, 5, cl_ztsmqr_hetra1_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zunmlq.c b/runtime/starpu/codelets/codelet_zunmlq.c
index 4c913d17873ab5f397f040cf035e014ca5be1e04..38678c0fc43b89664626932e8cbf956c2e899a74 100644
--- a/runtime/starpu/codelets/codelet_zunmlq.c
+++ b/runtime/starpu/codelets/codelet_zunmlq.c
@@ -150,6 +150,7 @@ void MORSE_TASK_zunmlq(const MORSE_option_t *options,
             STARPU_VALUE,    &nb,                sizeof(int),
             STARPU_PRIORITY,  options->priority,
             STARPU_CALLBACK,  callback,
+            STARPU_NAME,      "zunmlq",
             0);
     }
 }
@@ -164,17 +165,17 @@ static void cl_zunmlq_cpu_func(void *descr[], void *cl_arg)
     int n;
     int k;
     int ib;
-    MORSE_Complex64_t *A;
+    const MORSE_Complex64_t *A;
     int lda;
-    MORSE_Complex64_t *T;
+    const MORSE_Complex64_t *T;
     int ldt;
     MORSE_Complex64_t *C;
     int ldc;
     MORSE_Complex64_t *WORK;
     int ldwork;
 
-    A    = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T    = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    A    = (const MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    T    = (const MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
     C    = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
     WORK = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
 
@@ -195,7 +196,8 @@ static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg)
     int n;
     int k;
     int ib;
-    cuDoubleComplex *A, *T, *C, *WORK;
+    const cuDoubleComplex *A, *T;
+    cuDoubleComplex *C, *WORK;
     int lda, ldt, ldc, ldwork;
     int info = 0;
     CUstream stream;
@@ -203,8 +205,8 @@ static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg)
     starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib,
                                &lda, &ldt, &ldc, &ldwork);
 
-    A    = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T    = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
+    A    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
+    T    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
     C    = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
     WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
 
diff --git a/runtime/starpu/codelets/codelet_zunmqr.c b/runtime/starpu/codelets/codelet_zunmqr.c
index 61bec4f56863a7160553df46f1025ff3f2461af0..e8b47fbb38d5ea10ab7ccfbe9a427b2b39f9a953 100644
--- a/runtime/starpu/codelets/codelet_zunmqr.c
+++ b/runtime/starpu/codelets/codelet_zunmqr.c
@@ -190,17 +190,17 @@ static void cl_zunmqr_cpu_func(void *descr[], void *cl_arg)
     int n;
     int k;
     int ib;
-    MORSE_Complex64_t *A;
+    const MORSE_Complex64_t *A;
     int lda;
-    MORSE_Complex64_t *T;
+    const MORSE_Complex64_t *T;
     int ldt;
     MORSE_Complex64_t *C;
     int ldc;
     MORSE_Complex64_t *WORK;
     int ldwork;
 
-    A    = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T    = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    A    = (const MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    T    = (const MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
     C    = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
     WORK = (MORSE_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
 
@@ -221,7 +221,8 @@ static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg)
     int n;
     int k;
     int ib;
-    cuDoubleComplex *A, *T, *C, *WORK;
+    const cuDoubleComplex *A, *T;
+    cuDoubleComplex *C, *WORK;
     int lda, ldt, ldc, ldwork;
     int info = 0;
     CUstream stream;
@@ -229,8 +230,8 @@ static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg)
     starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib,
                                &lda, &ldt, &ldc, &ldwork);
 
-    A    = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T    = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
+    A    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
+    T    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
     C    = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
     WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
 
diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h
index 7af0ac2e49860997f9a7c1d190b4c2765ace7bd0..0da29addb22d4383b6266e757df2a3fb0d751189 100644
--- a/runtime/starpu/include/runtime_codelet_z.h
+++ b/runtime/starpu/include/runtime_codelet_z.h
@@ -65,6 +65,7 @@ ZCODELETS_HEADER(gessq)
 ZCODELETS_HEADER(getrf)
 ZCODELETS_HEADER(getrf_incpiv)
 ZCODELETS_HEADER(getrf_nopiv)
+ZCODELETS_HEADER(herfb)
 ZCODELETS_HEADER(lauum)
 ZCODELETS_HEADER(potrf)
 ZCODELETS_HEADER(ssssm)
@@ -75,6 +76,8 @@ ZCODELETS_HEADER(trtri)
 ZCODELETS_HEADER(tslqt)
 ZCODELETS_HEADER(tsmlq)
 ZCODELETS_HEADER(tsmqr)
+ZCODELETS_HEADER(tsmlq_hetra1)
+ZCODELETS_HEADER(tsmqr_hetra1)
 ZCODELETS_HEADER(tsqrt)
 ZCODELETS_HEADER(tstrf)
 ZCODELETS_HEADER(ttlqt)
@@ -88,6 +91,7 @@ ZCODELETS_HEADER(unmqr)
  * Auxiliary functions
  */
 ZCODELETS_HEADER(geadd)
+ZCODELETS_HEADER(he2ge)
 ZCODELETS_HEADER(lascal)
 ZCODELETS_HEADER(tradd)
 ZCODELETS_HEADER(lacpy)
@@ -97,6 +101,7 @@ ZCODELETS_HEADER(lansy)
 ZCODELETS_HEADER(lantr)
 ZCODELETS_HEADER(laset)
 ZCODELETS_HEADER(laset2)
+ZCODELETS_HEADER(latro)
 ZCODELETS_HEADER(plssq)
 ZCODELETS_HEADER(plssq2)
 
diff --git a/testing/CMakeLists.txt b/testing/CMakeLists.txt
index e4de172912b9ab9b42449bac31934c7ad8246d3d..fe9c243f374a1210b8394491c77bd2e6465e3756 100644
--- a/testing/CMakeLists.txt
+++ b/testing/CMakeLists.txt
@@ -82,10 +82,10 @@ set(ZSRC
     ##################
     testing_zgeadd.c
     #testing_zgecfi.c
-    #testing_zgesvd.c
+    testing_zgesvd.c
     #testing_zgetmi.c
     #testing_zheev.c
-    #testing_zheevd.c
+    testing_zheevd.c
     #testing_zhegst.c
     #testing_zhegv.c
     #testing_zhegvd.c
@@ -138,6 +138,7 @@ if(NOT CHAMELEON_SIMULATION)
     list(APPEND libs_for_tests
     coreblas
     ${LAPACKE_LIBRARIES}
+    ${TMG_LIBRARIES}
     ${LAPACK_LIBRARIES}
     ${CBLAS_LIBRARIES}
     ${BLAS_LIBRARIES}
@@ -146,6 +147,7 @@ if(NOT CHAMELEON_SIMULATION)
     )
 
     link_directories(${LAPACKE_LIBRARY_DIRS})
+    link_directories(${TMG_LIBRARY_DIRS})
     link_directories(${LAPACK_LIBRARY_DIRS})
     link_directories(${CBLAS_LIBRARY_DIRS})
     link_directories(${BLAS_LIBRARY_DIRS})
diff --git a/testing/testing_zauxiliary.c b/testing/testing_zauxiliary.c
index 503fa34b2540cec94e6aa92674cedcb97df22589..aa73b238279ecac1614e926aa1ab4c4f5f3781d0 100644
--- a/testing/testing_zauxiliary.c
+++ b/testing/testing_zauxiliary.c
@@ -194,99 +194,117 @@ int main (int argc, char **argv)
      */
     if ( strcmp(func, "LANGE") == 0 ) {
         info += testing_zlange( argc, argv );
-        /*
-         * Blas Level 3
-         */
-    } else if ( strcmp(func, "GEMM") == 0 ) {
+    }
+    /*
+     * Blas Level 3
+     */
+    else if ( strcmp(func, "GEMM") == 0 ) {
         info += testing_zgemm( argc, argv );
+    }
 #ifdef COMPLEX
-    } else if ( strcmp(func, "HEMM") == 0 ) {
+    else if ( strcmp(func, "HEMM") == 0 ) {
         info += testing_zhemm( argc, argv );
-    } else if ( strcmp(func, "HERK") == 0 ) {
+    }
+    else if ( strcmp(func, "HERK") == 0 ) {
         info += testing_zherk( argc, argv );
-    } else if ( strcmp(func, "HER2K") == 0 ) {
+    }
+    else if ( strcmp(func, "HER2K") == 0 ) {
         info += testing_zher2k( argc, argv );
+    }
 #endif
-    } else if ( strcmp(func, "SYMM") == 0 ) {
+    else if ( strcmp(func, "SYMM") == 0 ) {
         info += testing_zsymm( argc, argv );
-    } else if ( strcmp(func, "SYRK") == 0 ) {
+    }
+    else if ( strcmp(func, "SYRK") == 0 ) {
         info += testing_zsyrk( argc, argv );
-    } else if ( strcmp(func, "SYR2K") == 0 ) {
+    }
+    else if ( strcmp(func, "SYR2K") == 0 ) {
         info += testing_zsyr2k( argc, argv );
-    } else if ( strcmp(func, "TRMM") == 0 ) {
+    }
+    else if ( strcmp(func, "TRMM") == 0 ) {
         info += testing_ztrmm( argc, argv );
-    } else if ( strcmp(func, "TRSM") == 0 ) {
+    }
+    else if ( strcmp(func, "TRSM") == 0 ) {
         info += testing_ztrsm( argc, argv );
-    } else if ( strcmp(func, "PEMV") == 0 ) {
+    }
+    else if ( strcmp(func, "PEMV") == 0 ) {
         info += testing_zpemv( argc, argv );
-    } else if ( strcmp(func, "GEADD") == 0 ) {
+    }
+    else if ( strcmp(func, "GEADD") == 0 ) {
         info = testing_zgeadd( argc, argv );
-        /*
-         * Linear system
-         */
-    } else if ( strcmp(func, "POSV") == 0 ) {
+    }
+    /*
+     * Linear system
+     */
+    else if ( strcmp(func, "POSV") == 0 ) {
         info += testing_zposv( argc, argv );
-    } else if ( strcmp(func, "GELS") == 0 ) {
+    }
+    else if ( strcmp(func, "GELS") == 0 ) {
         info += testing_zgels( argc, argv );
-    } else if ( strcmp(func, "GESV_INCPIV") == 0 ) {
+    }
+    else if ( strcmp(func, "GESV_INCPIV") == 0 ) {
         info += testing_zgesv_incpiv( argc, argv );
-        /*
-         } else if ( strcmp(func, "GESV") == 0 ) {
-         info += testing_zgesv( argc, argv );
-         */
-        /*
-         * Matrix inversion
-         */
-
-    } else if ( strcmp(func, "POTRI") == 0 ) {
+    }
+    /* else if ( strcmp(func, "GESV") == 0 ) { */
+    /*     info += testing_zgesv( argc, argv ); */
+    /* } */
+    /*
+     * Matrix inversion
+     */
+    else if ( strcmp(func, "POTRI") == 0 ) {
         info += testing_zpotri( argc, argv );
-        /*
-         } else if ( strcmp(func, "GETRI") == 0 ) {
-         info += testing_zgetri( argc, argv );
-         */
-        /*
-         * Eigenvalue Problems
-         */
-        /*
-         } else if ( strcmp(func, "HEEV") == 0 ) {
-         info += testing_zheev( argc, argv );
-         } else if ( strcmp(func, "HEEVD") == 0 ) {
-         info += testing_zheevd( argc, argv );
-         } else if ( strcmp(func, "HEGV") == 0 ) {
-         info += testing_zhegv( argc, argv );
-         } else if ( strcmp(func, "HEGVD") == 0 ) {
-         info += testing_zhegv( argc, argv );
-         } else if ( strcmp(func, "HEGST") == 0 ) {
-         info += testing_zhegst( argc, argv );
-         */
-        /*
-         * Singular Value Decomposition
-         */
-        /*
-         } else if ( strcmp(func, "GESVD") == 0 ) {
-         info += testing_zgesvd( argc, argv );
-         */
+    }
+    /* else if ( strcmp(func, "GETRI") == 0 ) { */
+    /*      info += testing_zgetri( argc, argv ); */
+    /* } */
+    /*
+     * Eigenvalue Problems
+     */
+    /* else if ( strcmp(func, "HEEV") == 0 ) { */
+    /*     info += testing_zheev( argc, argv ); */
+    /* } */
+    else if ( strcmp(func, "HEEVD") == 0 ) {
+        info += testing_zheevd( argc, argv );
+    }
+    /* else if ( strcmp(func, "HEGV") == 0 ) { */
+    /*     info += testing_zhegv( argc, argv ); */
+    /* } */
+    /* else if ( strcmp(func, "HEGVD") == 0 ) { */
+    /*     info += testing_zhegv( argc, argv ); */
+    /* } */
+    /* else if ( strcmp(func, "HEGST") == 0 ) { */
+    /*     info += testing_zhegst( argc, argv ); */
+    /* } */
+    /*
+     * Singular Value Decomposition
+     */
+    else if ( strcmp(func, "GESVD") == 0 ) {
+        info += testing_zgesvd( argc, argv );
+    }
 #ifdef DOUBLE
-        /*
-         * Mixed precision
-         */
-        /*
-         } else if ( strcmp(func, "CPOSV") == 0 ) {
-         info += testing_zcposv( argc, argv );
-         } else if ( strcmp(func, "CGESV") == 0 ) {
-         info += testing_zcgesv( argc, argv );
-         } else if ( strcmp(func, "CUNGESV") == 0 ) {
-         info += testing_zcungesv( argc, argv );
-         */
+    /*
+     * Mixed precision
+     */
+    /* else if ( strcmp(func, "CPOSV") == 0 ) { */
+    /*     info += testing_zcposv( argc, argv ); */
+    /* } */
+    /* else if ( strcmp(func, "CGESV") == 0 ) { */
+    /*     info += testing_zcgesv( argc, argv ); */
+    /* } */
+    /* else if ( strcmp(func, "CUNGESV") == 0 ) { */
+    /*     info += testing_zcungesv( argc, argv ); */
+    /* } */
 #endif
-        /* Layout Transformation */
-        /*
-         } else if ( strcmp(func, "GECFI") == 0 ) {
-         info += testing_zgecfi( argc, argv );
-         } else if ( strcmp(func, "GETMI") == 0 ) {
-         info += testing_zgetmi( argc, argv );
-         */
-    } else {
+    /*
+     * Layout Transformation
+     */
+    /* else if ( strcmp(func, "GECFI") == 0 ) { */
+    /*     info += testing_zgecfi( argc, argv ); */
+    /* } */
+    /* else if ( strcmp(func, "GETMI") == 0 ) { */
+    /*     info += testing_zgetmi( argc, argv ); */
+    /* } */
+    else {
         fprintf(stderr, "Function unknown\n");
     }
 
diff --git a/testing/testing_zgesvd.c b/testing/testing_zgesvd.c
new file mode 100644
index 0000000000000000000000000000000000000000..83f784e2df5dc45508a4c2f3a2ed2e3a89a735bd
--- /dev/null
+++ b/testing/testing_zgesvd.c
@@ -0,0 +1,364 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file testing_zgesvd.c
+ *
+ *  PLASMA testing routines
+ *  PLASMA is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.8.0
+ * @author Azzam Haidar
+ * @author Hatem Ltaief
+ * @date 2010-11-15
+ * @precisions normal z -> c d s
+ *
+ **/
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+#include <morse.h>
+#include <coreblas/include/cblas.h>
+#include <coreblas/include/lapacke.h>
+#include <coreblas/include/coreblas.h>
+#include "testing_zauxiliary.h"
+
+static int check_orthogonality(int, int, int, MORSE_Complex64_t*, int, double);
+static int check_reduction(int, int, double*, MORSE_Complex64_t*, int, MORSE_Complex64_t*, int, MORSE_Complex64_t*, int, double);
+static int check_solution(int, double*, double*, double);
+
+int testing_zgesvd(int argc, char **argv)
+{
+    int tree = 0;
+
+    if ( argc < 1 ){
+        goto usage;
+    } else {
+        tree = atoi(argv[0]);
+    }
+
+    /* Check for number of arguments*/
+    if ( ((tree == 0) && (argc != 4)) ||
+         ((tree != 0) && (argc != 5)) ){
+      usage:
+        USAGE("GESVD", "MODE M N LDA [RH]",
+              "   - MODE : 0: flat, 1: tree (RH needed)\n"
+              "   - M    : number of rows of the matrix A\n"
+              "   - N    : number of columns of the matrix A\n"
+              "   - LDA  : leading dimension of the matrix A\n"
+              "   - RH   : Size of each subdomains\n");
+        return -1;
+    }
+
+    int M   = atoi(argv[1]);
+    int N   = atoi(argv[2]);
+    int LDA = atoi(argv[3]);
+    int rh;
+    if ( tree ) {
+        rh = atoi(argv[4]);
+        MORSE_Set(MORSE_HOUSEHOLDER_MODE, MORSE_TREE_HOUSEHOLDER);
+        MORSE_Set(MORSE_HOUSEHOLDER_SIZE, rh);
+    }
+
+    if (LDA < M){
+        printf("LDA should be >= M !\n");
+        return -1;
+    }
+
+    double eps  = LAPACKE_dlamch_work('e');
+    double dmax = 1.0;
+    MORSE_enum jobu  = MorseVec;
+    MORSE_enum jobvt = MorseVec;
+    int info_orthou    = 0;
+    int info_orthovt   = 0;
+    int info_solution  = 0;
+    int info_reduction = 0;
+    int minMN = min(M, N);
+    int mode  = 4;
+    double rcond = (double) minMN;
+    int INFO=-1;
+
+    MORSE_Complex64_t *A1   = (MORSE_Complex64_t *)malloc(LDA*N*sizeof(MORSE_Complex64_t));
+    double *S1              = (double *)           malloc(minMN*sizeof(double));
+    double *S2              = (double *)           malloc(minMN*sizeof(double));
+    MORSE_Complex64_t *work = (MORSE_Complex64_t *)malloc(3*max(M, N)* sizeof(MORSE_Complex64_t));
+    MORSE_Complex64_t *A2 = NULL;
+    MORSE_Complex64_t *U  = NULL;
+    MORSE_Complex64_t *VT = NULL;
+    MORSE_desc_t *T;
+
+    /* Check if unable to allocate memory */
+    if ( (!A1) || (!S1) || (!S2) || (!work) ) {
+        printf("Out of Memory \n ");
+        return -2;
+    }
+
+    /* TODO: check problem with workspace!!! */
+    MORSE_Alloc_Workspace_zgesvd(M, N, &T, 1, 1);
+
+    /*----------------------------------------------------------
+    *  TESTING ZGESVD
+    */
+    /* Initialize A1 */
+    LAPACKE_zlatms_work( LAPACK_COL_MAJOR, M, N,
+                         morse_lapack_const(MorseDistUniform), ISEED,
+                         morse_lapack_const(MorseNonsymPosv), S1, mode, rcond,
+                         dmax, M, N,
+                         morse_lapack_const(MorseNoPacking), A1, LDA, work );
+    free(work);
+
+    /* Copy A1 for check */
+    if ( (jobu == MorseVec) && (jobvt == MorseVec) ) {
+        A2 = (MORSE_Complex64_t *)malloc(LDA*N*sizeof(MORSE_Complex64_t));
+        LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'A', M, N, A1, LDA, A2, LDA);
+    }
+    if ( jobu == MorseVec ) {
+        U = (MORSE_Complex64_t *)malloc(M*M*sizeof(MORSE_Complex64_t));
+        LAPACKE_zlaset_work(LAPACK_COL_MAJOR, 'A', M, M, 0., 1., U, M);
+    }
+    if ( jobvt == MorseVec ) {
+        VT = (MORSE_Complex64_t *)malloc(N*N*sizeof(MORSE_Complex64_t));
+        LAPACKE_zlaset_work(LAPACK_COL_MAJOR, 'A', N, N, 0., 1., VT, N);
+    }
+
+    /* MORSE ZGESVD */
+    INFO = MORSE_zgesvd(jobu, jobvt, M, N, A1, LDA, S2, T, U, M, VT, N);
+    if( INFO != 0 ){
+        printf(" MORSE_zgesvd returned with error code %d\n",INFO);
+        goto fin;
+    }
+
+    printf("\n");
+    printf("------ TESTS FOR MORSE ZGESVD ROUTINE -------  \n");
+    printf("        Size of the Matrix %d by %d\n", M, N);
+    printf("\n");
+    printf(" The matrix A is randomly generated for each test.\n");
+    printf("============\n");
+    printf(" The relative machine precision (eps) is to be %e \n",eps);
+    printf(" Computational tests pass if scaled residuals are less than 60.\n");
+
+
+    /* Check the orthogonality, reduction and the singular values */
+    if ( jobu == MorseVec )
+        info_orthou = check_orthogonality(MorseLeft, M, M, U, M, eps);
+
+    if ( jobvt == MorseVec )
+        info_orthovt = check_orthogonality(MorseRight, N, N, VT, N, eps);
+
+    if ( (jobu == MorseVec) && (jobvt == MorseVec) )
+        info_reduction = check_reduction(M, N, S2, A2, LDA, U, M, VT, N, eps);
+
+    info_solution = check_solution(minMN, S1, S2, eps);
+
+    if ( (info_solution == 0) & (info_orthou == 0) &
+         (info_orthovt == 0) & (info_reduction == 0) ) {
+        if (M >= N) {
+           printf("***************************************************\n");
+           printf(" ---- TESTING ZGESVD .. M >= N ........... PASSED !\n");
+           printf("***************************************************\n");
+        }
+        else {
+           printf("***************************************************\n");
+           printf(" ---- TESTING ZGESVD .. M < N ............ PASSED !\n");
+           printf("***************************************************\n");
+        }
+    }
+    else {
+        if (M >= N) {
+           printf("************************************************\n");
+           printf(" - TESTING ZGESVD .. M >= N .. FAILED !\n");
+           printf("************************************************\n");
+        }
+        else {
+           printf("************************************************\n");
+           printf(" - TESTING ZGESVD .. M < N .. FAILED !\n");
+           printf("************************************************\n");
+        }
+    }
+
+fin:
+    if ( A2 != NULL ) free(A2);
+    if ( U  != NULL ) free(U);
+    if ( VT != NULL ) free(VT);
+    free(A1); free(S1); free(S2);
+    MORSE_Dealloc_Workspace(&T);
+
+    return 0;
+}
+
+/*-------------------------------------------------------------------
+ * Check the orthogonality of U VT
+ */
+static int check_orthogonality(int side, int M, int N, MORSE_Complex64_t *Q, int LDQ, double eps)
+{
+    double  done =  1.0;
+    double  mdone  = -1.0;
+    double  normQ, result;
+    int     info_ortho;
+    int     minMN = min(M, N);
+    double *work = (double *)malloc(minMN*sizeof(double));
+
+    /* Build the idendity matrix */
+    MORSE_Complex64_t *Id = (MORSE_Complex64_t *) malloc(minMN*minMN*sizeof(MORSE_Complex64_t));
+    LAPACKE_zlaset_work(LAPACK_COL_MAJOR, 'A', minMN, minMN, 0., 1., Id, minMN);
+
+    /* Perform Id - Q'Q */
+    if (M >= N)
+        cblas_zherk(CblasColMajor, CblasUpper, CblasConjTrans, N, M, done, Q, LDQ, mdone, Id, N);
+    else
+        cblas_zherk(CblasColMajor, CblasUpper, CblasNoTrans,   M, N, done, Q, LDQ, mdone, Id, M);
+
+    normQ = LAPACKE_zlansy_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), 'U', minMN, Id, minMN, work);
+
+    if (getenv("MORSE_TESTING_VERBOSE"))
+        printf( "||Q||_oo=%e\n", normQ );
+
+    result = normQ / (M * eps);
+    if (side == MorseLeft)
+    {
+        printf(" ======================================================\n");
+        printf(" ||Id-U'*U||_oo / (M*eps)            : %e \n",  result );
+        printf(" ======================================================\n");
+    }
+    else
+    {
+        printf(" ======================================================\n");
+        printf(" ||Id-VT'*VT||_oo / (M*eps)          : %e \n",  result );
+        printf(" ======================================================\n");
+    }
+
+    if ( isnan(result) || isinf(result) || (result > 60.0) ) {
+        printf("-- Orthogonality is suspicious ! \n");
+        info_ortho = 1;
+    }
+    else {
+        printf("-- Orthogonality is CORRECT ! \n");
+        info_ortho = 0;
+    }
+
+    free(work); free(Id);
+
+    return info_ortho;
+}
+
+/*------------------------------------------------------------
+ *  Check the bidiagonal reduction
+ */
+static int check_reduction(int M, int N, double *S, MORSE_Complex64_t *A, int LDA,
+                           MORSE_Complex64_t *U, int LDU, MORSE_Complex64_t *VT, int LDVT, double eps )
+{
+    MORSE_Complex64_t zone  =  1.0;
+    MORSE_Complex64_t mzone = -1.0;
+    MORSE_Complex64_t zzero = 0.0;
+    double Anorm, Rnorm, result;
+    int info_reduction;
+    int i;
+    int maxMN = max(M, N);
+    int minMN = min(M, N);
+
+    MORSE_Complex64_t *TEMP     = (MORSE_Complex64_t *)malloc(minMN*minMN*sizeof(MORSE_Complex64_t));
+    MORSE_Complex64_t *Residual = (MORSE_Complex64_t *)malloc(M    *N    *sizeof(MORSE_Complex64_t));
+    double *work = (double *)malloc(maxMN*sizeof(double));
+
+    LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseUpperLower), M, N, A, LDA, Residual, M);
+
+    if ( M >= N ) {
+        /* Compute TEMP =  SIGMA * Vt */
+        LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'A', N, N, VT, LDVT, TEMP, N);
+        for (i = 0; i < minMN; i++){
+            cblas_zdscal(N, S[i], TEMP + i, N);
+        }
+
+        /* Compute Residual = A - U * (SIGMA * VT) */
+        cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, N,
+                    CBLAS_SADDR(mzone), U,        LDU,
+                                        TEMP,     minMN,
+                    CBLAS_SADDR(zone),  Residual, M);
+    }
+    else {
+        /* Compute TEMP =  U * SIGMA */
+        LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'A', M, M, U, LDU, TEMP, M);
+        for (i = 0; i < minMN; i++){
+            cblas_zdscal(M, S[i], TEMP + i*M, 1);
+        }
+
+        /* Compute Residual = A - (U * SIGMA) * VT */
+        cblas_zgemm(CblasColMajor, CblasNoTrans, CblasNoTrans, M, N, M,
+                    CBLAS_SADDR(mzone), TEMP,     M,
+                                        VT,       LDVT,
+                    CBLAS_SADDR(zone),  Residual, M);
+    }
+
+    /* Compute the norms */
+    Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseOneNorm), M, N, Residual, M,   work);
+    Anorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseOneNorm), M, N, A,        LDA, work);
+
+    if (getenv("MORSE_TESTING_VERBOSE"))
+        printf( "||A||_oo=%e\n||A - U*SIGMA*VT||_oo=%e\n", Anorm, Rnorm );
+
+    result = Rnorm / ( Anorm * maxMN * eps);
+    printf(" ======================================================\n");
+    printf(" ||A-U*SIGMA*V'||_oo/(||A||_oo.N.eps) : %e \n",  result );
+    printf(" ======================================================\n");
+
+    if ( isnan(result) || isinf(result) || (result > 60.0) ) {
+        printf("-- Reduction is suspicious ! \n");
+        info_reduction = 1;
+    }
+    else {
+        printf("-- Reduction is CORRECT ! \n");
+        info_reduction = 0;
+    }
+
+    free(TEMP);
+    free(Residual);
+    free(work);
+
+    return info_reduction;
+}
+
+/*------------------------------------------------------------
+ *  Check the eigenvalues
+ */
+static int check_solution(int N, double *E1, double *E2, double eps)
+{
+    int info_solution, i;
+    double resid;
+    double maxtmp;
+    double maxel = fabs( fabs(E1[0]) - fabs(E2[0]) );
+    double maxeig = max( fabs(E1[0]), fabs(E2[0]) );
+    for (i = 1; i < N; i++){
+        resid   = fabs(fabs(E1[i])-fabs(E2[i]));
+        maxtmp  = max(fabs(E1[i]), fabs(E2[i]));
+
+        /* Update */
+        maxeig = max(maxtmp, maxeig);
+        maxel  = max(resid,  maxel );
+    }
+
+    maxel = maxel / (maxeig * N * eps);
+    printf(" ======================================================\n");
+    printf(" | S - singularcomputed | / (|S| * N * eps) : %e \n",  maxel );
+    printf(" ======================================================\n");
+
+    if ( isnan(maxel) || isinf(maxel) || (maxel > 100) ) {
+        printf("-- The singular values are suspicious ! \n");
+        info_solution = 1;
+    }
+    else{
+        printf("-- The singular values are CORRECT ! \n");
+        info_solution = 0;
+    }
+    return info_solution;
+}
diff --git a/testing/testing_zheevd.c b/testing/testing_zheevd.c
new file mode 100644
index 0000000000000000000000000000000000000000..0837a58c799f38798e21a6b3889e95c030f5c2f8
--- /dev/null
+++ b/testing/testing_zheevd.c
@@ -0,0 +1,293 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @file testing_zheevd.c
+ *
+ *  PLASMA testing routines
+ *  PLASMA is a software package provided by Univ. of Tennessee,
+ *  Univ. of California Berkeley and Univ. of Colorado Denver
+ *
+ * @version 2.8.0
+ * @author Hatem Ltaief
+ * @author Azzam Haidar
+ * @date 2010-11-15
+ * @precisions normal z -> c d s
+ *
+ **/
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <math.h>
+
+#include <morse.h>
+#include <coreblas/include/cblas.h>
+#include <coreblas/include/lapacke.h>
+#include <coreblas/include/coreblas.h>
+#include "testing_zauxiliary.h"
+
+static int check_orthogonality(int, int, MORSE_Complex64_t*, int, double);
+static int check_reduction(int, int, int, MORSE_Complex64_t*, double*, int, MORSE_Complex64_t*, double);
+static int check_solution(int, double*, double*, double);
+
+int testing_zheevd(int argc, char **argv)
+{
+    /* Check for number of arguments*/
+    if (argc != 3) {
+        USAGE("HEEVD", "MODE N LDA",
+              "   - MODE : mode used to generate the matrix\n"
+              "   - N    : size of the matrix A\n"
+              "   - LDA  : leading dimension of the matrix A\n");
+        return -1;
+    }
+
+    int    mode  = atoi(argv[0]);
+    int    N     = atoi(argv[1]);
+    int    LDA   = atoi(argv[2]);
+    double eps   = LAPACKE_dlamch_work('e');
+    double dmax  = 1.0;
+    double rcond = 1.0e6;
+    int INFO     = -1;
+
+    MORSE_enum uplo    = MorseUpper;
+    MORSE_enum vec     = MorseVec;
+    int info_ortho     = 0;
+    int info_solution  = 0;
+    int info_reduction = 0;
+    int LDAxN          = LDA * N;
+
+    MORSE_Complex64_t *A1   = NULL;
+    MORSE_Complex64_t *A2   = (MORSE_Complex64_t *)malloc(LDAxN * sizeof(MORSE_Complex64_t));
+    double            *W1   = (double *)malloc(N * sizeof(double));
+    double            *W2   = (double *)malloc(N * sizeof(double));
+    MORSE_Complex64_t *work = (MORSE_Complex64_t *)malloc(3* N * sizeof(MORSE_Complex64_t));
+    MORSE_desc_t      *T;
+
+    /* Check if unable to allocate memory */
+    if ( (!A2) || (!W1) || (!W2) ){
+        printf("Out of Memory \n ");
+        return -2;
+    }
+
+    MORSE_Alloc_Workspace_zheevd(N, N, &T, 1, 1);
+
+    /*----------------------------------------------------------
+    *  TESTING ZHEEVD
+    */
+    /* Initialize A1 */
+    if (mode == 0){
+        int i;
+        for (i=0; i<N; i++){
+            W1[i] = (double )i+1;
+        }
+    }
+    LAPACKE_zlatms_work( LAPACK_COL_MAJOR, N, N,
+                         morse_lapack_const(MorseDistSymmetric), ISEED,
+                         morse_lapack_const(MorseHermGeev), W1, mode, rcond,
+                         dmax, N, N,
+                         morse_lapack_const(MorseNoPacking), A2, LDA, work );
+
+    /*
+     * Sort the eigenvalue because when computing the tridiag
+     * and then the eigenvalue of the DSTQR are sorted.
+     * So to avoid testing fail when having good results W1 should be sorted
+     */
+    LAPACKE_dlasrt_work( 'I', N, W1 );
+
+    if ( vec == MorseVec ) {
+        A1 = (MORSE_Complex64_t *)malloc(LDAxN*sizeof(MORSE_Complex64_t));
+
+        /* Copy A2 into A1 */
+        LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, 'A', N, N, A2, LDA, A1, LDA);
+    }
+
+    /*
+     * MORSE ZHEEVD
+     */
+    INFO = MORSE_zheevd(vec, uplo, N, A2, LDA, W2, T);
+
+    if (INFO != 0) {
+        printf(" ERROR OCCURED INFO %d\n", INFO);
+        goto fin;
+    }
+
+    printf("\n");
+    printf("------ TESTS FOR MORSE ZHEEVD ROUTINE -------  \n");
+    printf("        Size of the Matrix %d by %d\n", N, N);
+    printf("\n");
+    printf(" The matrix A is randomly generated for each test.\n");
+    printf("============\n");
+    printf(" The relative machine precision (eps) is to be %e \n", eps);
+    printf(" Computational tests pass if scaled residuals are less than 60.\n");
+
+    /* Check the orthogonality, reduction and the eigen solutions */
+    if (vec == MorseVec) {
+        info_ortho = check_orthogonality(N, N, A2, LDA, eps);
+        info_reduction = check_reduction(uplo, N, 1, A1, W2, LDA, A2, eps);
+    }
+    info_solution = check_solution(N, W1, W2, eps);
+
+    if ( (info_solution == 0) & (info_ortho == 0) & (info_reduction == 0) ) {
+        printf("***************************************************\n");
+        printf(" ---- TESTING ZHEEVD ...................... PASSED !\n");
+        printf("***************************************************\n");
+    }
+    else {
+        printf("************************************************\n");
+        printf(" - TESTING ZHEEVD ... FAILED !\n");
+        printf("************************************************\n");
+    }
+
+ fin:
+    MORSE_Dealloc_Workspace(&T);
+    free(A2);
+    free(W1);
+    free(W2);
+    free(work);
+    if (A1 != NULL) free(A1);
+
+    return 0;
+}
+
+/*-------------------------------------------------------------------
+ * Check the orthogonality of Q
+ */
+static int check_orthogonality(int M, int N, MORSE_Complex64_t *Q, int LDQ, double eps)
+{
+    double  done  =  1.0;
+    double  mdone = -1.0;
+    double  normQ, result;
+    int     info_ortho;
+    int     minMN = min(M, N);
+    double *work = (double *)malloc(minMN*sizeof(double));
+
+    /* Build the idendity matrix */
+    MORSE_Complex64_t *Id = (MORSE_Complex64_t *) malloc(minMN*minMN*sizeof(MORSE_Complex64_t));
+    LAPACKE_zlaset_work(LAPACK_COL_MAJOR, 'A', minMN, minMN, 0., 1., Id, minMN);
+
+    /* Perform Id - Q'Q */
+    if (M >= N)
+        cblas_zherk(CblasColMajor, CblasUpper, CblasConjTrans, N, M, done, Q, LDQ, mdone, Id, minMN);
+    else
+        cblas_zherk(CblasColMajor, CblasUpper, CblasNoTrans,   M, N, done, Q, LDQ, mdone, Id, minMN);
+
+    normQ = LAPACKE_zlanhe_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseInfNorm), 'U', minMN, Id, minMN, work);
+
+    result = normQ / (minMN * eps);
+    printf(" ======================================================\n");
+    printf(" ||Id-Q'*Q||_oo / (minMN*eps)          : %15.3E \n",  result );
+    printf(" ======================================================\n");
+
+    if ( isnan(result) || isinf(result) || (result > 60.0) ) {
+        printf("-- Orthogonality is suspicious ! \n");
+        info_ortho=1;
+    }
+    else {
+        printf("-- Orthogonality is CORRECT ! \n");
+        info_ortho=0;
+    }
+    free(work); free(Id);
+    return info_ortho;
+}
+
+/*------------------------------------------------------------
+ *  Check the reduction
+ */
+static int check_reduction(int uplo, int N, int bw, MORSE_Complex64_t *A, double *D, int LDA, MORSE_Complex64_t *Q, double eps )
+{
+    (void) bw;
+    MORSE_Complex64_t zone  =  1.0;
+    MORSE_Complex64_t mzone = -1.0;
+    MORSE_Complex64_t *TEMP     = (MORSE_Complex64_t *)malloc(N*N*sizeof(MORSE_Complex64_t));
+    MORSE_Complex64_t *Residual = (MORSE_Complex64_t *)malloc(N*N*sizeof(MORSE_Complex64_t));
+    double *work = (double *)malloc(N*sizeof(double));
+    double Anorm, Rnorm, result;
+    int info_reduction;
+    int i;
+
+    /* Compute TEMP =  Q * LAMBDA */
+    LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseUpperLower), N, N, Q, LDA, TEMP, N);
+
+    for (i = 0; i < N; i++){
+        cblas_zdscal(N, D[i], &(TEMP[i*N]), 1);
+    }
+    /* Compute Residual = A - Q * LAMBDA * Q^H */
+    /* A is Hermetian but both upper and lower
+     * are assumed valable here for checking
+     * otherwise it need to be symetrized before
+     * checking.
+     */
+    LAPACKE_zlacpy_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseUpperLower), N, N, A, LDA, Residual, N);
+    cblas_zgemm(CblasColMajor, CblasNoTrans, CblasConjTrans, N, N, N, CBLAS_SADDR(mzone), TEMP, N,  Q, LDA, CBLAS_SADDR(zone), Residual,     N);
+
+    Rnorm = LAPACKE_zlange_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseOneNorm), N, N, Residual, N,   work);
+    Anorm = LAPACKE_zlanhe_work(LAPACK_COL_MAJOR, morse_lapack_const(MorseOneNorm), morse_lapack_const(uplo), N, A, LDA, work);
+
+    result = Rnorm / ( Anorm * N * eps);
+    if ( uplo == MorseLower ){
+        printf(" ======================================================\n");
+        printf(" ||A-Q*LAMBDA*Q'||_oo/(||A||_oo.N.eps) : %15.3E \n",  result );
+        printf(" ======================================================\n");
+    }else{
+        printf(" ======================================================\n");
+        printf(" ||A-Q'*LAMBDA*Q||_oo/(||A||_oo.N.eps) : %15.3E \n",  result );
+        printf(" ======================================================\n");
+    }
+
+    if ( isnan(result) || isinf(result) || (result > 60.0) ) {
+        printf("-- Reduction is suspicious ! \n");
+        info_reduction = 1;
+    }
+    else {
+        printf("-- Reduction is CORRECT ! \n");
+        info_reduction = 0;
+    }
+
+    free(TEMP); free(Residual);
+    free(work);
+
+    return info_reduction;
+}
+/*------------------------------------------------------------
+ *  Check the eigenvalues
+ */
+static int check_solution(int N, double *E1, double *E2, double eps)
+{
+    int info_solution, i;
+    double resid;
+    double maxtmp;
+    double maxel = fabs( fabs(E1[0]) - fabs(E2[0]) );
+    double maxeig = max( fabs(E1[0]), fabs(E2[0]) );
+
+    for (i = 1; i < N; i++){
+        resid   = fabs(fabs(E1[i])-fabs(E2[i]));
+        maxtmp  = max(fabs(E1[i]), fabs(E2[i]));
+
+        /* Update */
+        maxeig = max(maxtmp, maxeig);
+        maxel  = max(resid,  maxel );
+    }
+
+    maxel = maxel / (maxeig * N * eps);
+    printf(" ======================================================\n");
+    printf(" | D - eigcomputed | / (|D| * N * eps) : %15.3E \n",  maxel );
+    printf(" ======================================================\n");
+
+    if ( isnan(maxel) || isinf(maxel) || (maxel > 100) ) {
+        printf("-- The eigenvalues are suspicious ! \n");
+        info_solution = 1;
+    }
+    else{
+        printf("-- The eigenvalues are CORRECT ! \n");
+        info_solution = 0;
+    }
+    return info_solution;
+}
diff --git a/timing/CMakeLists.txt b/timing/CMakeLists.txt
index bd29812209892b41796228844ee31a2da5e5368c..d30584ac43575b725abc8d244c92385a3d77bc17 100644
--- a/timing/CMakeLists.txt
+++ b/timing/CMakeLists.txt
@@ -118,10 +118,10 @@ if (NOT CHAMELEON_SIMULATION)
         time_zlange_tile.c
         #time_zgebrd_tile.c
         #time_zgecfi.c
-        #time_zgesvd_tile.c
+        time_zgesvd_tile.c
         #time_zgetrf_reclap.c
         #time_zgetrf_rectil.c
-        #time_zheevd_tile.c
+        time_zheevd_tile.c
         #time_zheev_tile.c
         #time_zhegv_tile.c
         #time_zlapack2tile.c
@@ -237,6 +237,7 @@ if(NOT CHAMELEON_SIMULATION)
     list(APPEND libs_for_timings
     coreblas
     ${LAPACKE_LIBRARIES}
+    ${TMG_LIBRARIES}
     ${CBLAS_LIBRARIES}
     ${LAPACK_SEQ_LIBRARIES}
     ${BLAS_SEQ_LIBRARIES}
@@ -245,6 +246,7 @@ if(NOT CHAMELEON_SIMULATION)
     )
 
     link_directories(${LAPACKE_LIBRARY_DIRS})
+    link_directories(${TMG_LIBRARY_DIRS})
     link_directories(${LAPACK_LIBRARY_DIRS})
     link_directories(${CBLAS_LIBRARY_DIRS})
     link_directories(${BLAS_LIBRARY_DIRS})
diff --git a/timing/time_zgesvd_tile.c b/timing/time_zgesvd_tile.c
new file mode 100644
index 0000000000000000000000000000000000000000..e4cb71a7ee97fb73435b65ec3f5110b69da4e231
--- /dev/null
+++ b/timing/time_zgesvd_tile.c
@@ -0,0 +1,75 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @precisions normal z -> c d s
+ *
+ **/
+#define _TYPE  MORSE_Complex64_t
+#define _PREC  double
+#define _LAMCH LAPACKE_dlamch_work
+
+#define _NAME  "PLASMA_zheev_Tile"
+/* See Lawn 41 page 120 */
+#define _FMULS FMULS_GEBRD( M, N )
+#define _FADDS FADDS_GEBRD( M, N )
+
+#include "./timing.c"
+
+static int
+RunTest(int *iparam, double *dparam, morse_time_t *t_)
+{
+    PASTE_CODE_IPARAM_LOCALS( iparam );
+    MORSE_desc_t *descT;
+    int jobu  = MorseVec;
+    int jobvt = MorseVec;
+    int INFO;
+
+    /* Allocate Data */
+    PASTE_CODE_ALLOCATE_MATRIX_TILE( descA, 1, MORSE_Complex64_t, MorseComplexDouble, LDA, M, N );
+    PASTE_CODE_ALLOCATE_MATRIX( VT, (jobvt == MorseVec), MORSE_Complex64_t, N, N );
+    PASTE_CODE_ALLOCATE_MATRIX( U, (jobu == MorseVec), MORSE_Complex64_t, M, M );
+    PASTE_CODE_ALLOCATE_MATRIX( S, 1, double, N, 1 );
+
+    /* Initialiaze Data */
+    MORSE_zplrnt_Tile(descA, 51 );
+
+    /* Allocate Workspace */
+    MORSE_Alloc_Workspace_zgesvd(N, N, &descT, 1, 1);
+
+    if ( jobu == MorseVec ) {
+        LAPACKE_zlaset_work(LAPACK_COL_MAJOR, 'A', M, M, 0., 1., U, M);
+    }
+    if ( jobvt == MorseVec ) {
+        LAPACKE_zlaset_work(LAPACK_COL_MAJOR, 'A', N, N, 0., 1., VT, N);
+    }
+
+    START_TIMING();
+    INFO = MORSE_zgesvd_Tile(jobu, jobvt, descA, S, descT, U, M, VT, N);
+    STOP_TIMING();
+
+    if(INFO!=0){
+            printf(" ERROR OCCURED INFO %d\n",INFO);
+    }
+
+    /* DeAllocate Workspace */
+    MORSE_Dealloc_Workspace(&descT);
+
+    if (jobu == MorseVec) {
+      free( U );
+    }
+    if (jobvt == MorseVec) {
+      free( VT );
+    }
+    PASTE_CODE_FREE_MATRIX( descA );
+    free( S );
+    return 0;
+}
diff --git a/timing/time_zheevd_tile.c b/timing/time_zheevd_tile.c
new file mode 100644
index 0000000000000000000000000000000000000000..d659b82138de39592c6925f0f2ce709560e15f77
--- /dev/null
+++ b/timing/time_zheevd_tile.c
@@ -0,0 +1,60 @@
+/**
+ *
+ * @copyright (c) 2009-2014 The University of Tennessee and The University
+ *                          of Tennessee Research Foundation.
+ *                          All rights reserved.
+ * @copyright (c) 2012-2014 Inria. All rights reserved.
+ * @copyright (c) 2012-2014 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria, Univ. Bordeaux. All rights reserved.
+ *
+ **/
+
+/**
+ *
+ * @precisions normal z -> c d s
+ *
+ **/
+#define _TYPE  MORSE_Complex64_t
+#define _PREC  double
+#define _LAMCH LAPACKE_dlamch_work
+
+#define _NAME  "PLASMA_zheevd_Tile"
+/* See Lawn 41 page 120 */
+#define _FMULS ((2. / 3.) * ((double)N * (double)N * (double)N))
+#define _FADDS ((2. / 3.) * ((double)N * (double)N * (double)N))
+
+#include "./timing.c"
+/* #include <mkl_service.h> */
+
+static int
+RunTest(int *iparam, double *dparam, morse_time_t *t_)
+{
+    PASTE_CODE_IPARAM_LOCALS( iparam );
+    MORSE_desc_t *descT;
+    int uplo = MorseLower;
+    int vec  = MorseVec;
+    int INFO;
+
+    LDA = max(LDA, N);
+
+    /* Allocate Data */
+    PASTE_CODE_ALLOCATE_MATRIX( A, 1, MORSE_Complex64_t, LDA, N);
+    PASTE_CODE_ALLOCATE_MATRIX( S, 1, double, N, 1 );
+
+    /* Allocate Workspace */
+    MORSE_zplghe( (double)N, MorseUpperLower, N, A, LDA, 51 );
+    MORSE_Alloc_Workspace_zheevd(N, N, &descT, 1, 1);
+
+    START_TIMING();
+    INFO = MORSE_zheevd(vec, uplo, N, A, LDA, S, descT);
+    STOP_TIMING();
+
+    if (INFO != 0){
+        printf(" ERROR OCCURED INFO %d\n", INFO);
+    }
+
+    /* DeAllocate Workspace */
+    MORSE_Dealloc_Workspace(&descT);
+
+    free( A );
+    return 0;
+}
diff --git a/timing/timing.c b/timing/timing.c
index 831f99bbe97f62a2c15c4ecd9549243edcc2e2e8..28bb1c23a6e2b514d101fe3fe44177819492a460 100644
--- a/timing/timing.c
+++ b/timing/timing.c
@@ -491,6 +491,7 @@ main(int argc, char *argv[]) {
     iparam[IPARAM_NX            ] = -1;
     iparam[IPARAM_RHBLK         ] = 0;
     iparam[IPARAM_INPLACE       ] = MORSE_OUTOFPLACE;
+    iparam[IPARAM_MODE          ] = 0;
 
     iparam[IPARAM_INVERSE       ] = 0;
     iparam[IPARAM_NCUDAS        ] = 0;
@@ -613,6 +614,12 @@ main(int argc, char *argv[]) {
             iparam[IPARAM_BOUND] = 1;
         } else if (startswith( argv[i], "--p=" )) {
             sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_P]) );
+        } else if (startswith( argv[i], "--mode=" )) {
+            sscanf( strchr( argv[i], '=' ) + 1, "%d", &(iparam[IPARAM_MODE]) );
+            if (iparam[IPARAM_MODE] < 0 || iparam[IPARAM_MODE] > 20){
+                fprintf( stderr, "Invalid mode: %s from 0 to 20\n", argv[i] );
+                exit(0);
+            }
         } else {
             fprintf( stderr, "Unknown option: %s\n", argv[i] );
         }
diff --git a/timing/timing.h b/timing/timing.h
index 11019ab02268eafa6a9413658cba5bb1176fdf79..948866a778d05dd648a393b2a35b357851d35b2f 100644
--- a/timing/timing.h
+++ b/timing/timing.h
@@ -42,6 +42,7 @@ enum iparam_timing {
     IPARAM_NX,             /* */
     IPARAM_RHBLK,          /* Householder reduction parameter for QR/LQ  */
     IPARAM_INPLACE,        /* InPlace/OutOfPlace translation mode        */
+    IPARAM_MODE,           /* Eigenvalue generation mode                 */
 
     IPARAM_INVERSE,
     IPARAM_NCUDAS,
@@ -101,7 +102,7 @@ enum dparam_timing {
     (void)M;(void)N;(void)K;(void)NRHS;            \
     (void)LDA;(void)LDB;(void)LDC;                 \
     (void)IB;(void)MB;(void)NB;(void)P;(void)Q;    \
-    (void)MT;(void)NT;(void)check;(void)loud;
+    (void)MT;(void)NT;(void)check;(void)loud;(void)bigmat;
 
 /* Paste code to allocate a matrix in desc if cond_init is true */
 #define PASTE_CODE_ALLOCATE_MATRIX_TILE(_desc_, _cond_, _type_, _type2_, _lda_, _m_, _n_) \