diff --git a/compute/pzhetrd_he2hb.c b/compute/pzhetrd_he2hb.c
index 2173faa09def0a8a31440baf84f563dbce432f6f..2230bd0c20244c7d7eb397670cfab357d464fab4 100644
--- a/compute/pzhetrd_he2hb.c
+++ b/compute/pzhetrd_he2hb.c
@@ -41,12 +41,11 @@
  *  Parallel tile BAND Tridiagonal Reduction - dynamic scheduler
  **/
 void morse_pzhetrd_he2hb(MORSE_enum uplo,
-                         MORSE_desc_t *A, MORSE_desc_t *T,
+                         MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *E,
                          MORSE_sequence_t *sequence, MORSE_request_t *request)
 {
     MORSE_context_t *morse;
     MORSE_option_t options;
-    MORSE_desc_t *E  = NULL;
     MORSE_desc_t *D  = NULL;
     MORSE_desc_t *AT = NULL;
     size_t ws_worker = 0;
@@ -90,12 +89,6 @@ void morse_pzhetrd_he2hb(MORSE_enum uplo,
 
     RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
 
-#if defined(CHAMELEON_COPY_DIAG)
-    /* Copy of the extra-diagonal to generate more parallelism by releasing anti-dependencies on UNMQR/TSMQR triangle conflict */
-    E = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
-    morse_zdesc_alloc_diag(*E, A->mb, A->nb, chameleon_min(A->m, A->n), A->nb, 0, 0, chameleon_min(A->m, A->n), A->nb, A->p, A->q);
-#endif
-
     /* Copy of the diagonal tiles to keep the general version of the tile all along the computation */
     D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
     morse_zdesc_alloc_diag(*D, A->mb, A->nb, chameleon_min(A->m, A->n) - A->mb, A->nb, 0, 0, chameleon_min(A->m, A->n) - A->mb, A->nb, A->p, A->q);
@@ -451,10 +444,4 @@ void morse_pzhetrd_he2hb(MORSE_enum uplo,
 
     morse_desc_mat_free(AT);
     free(AT);
-
-#if defined(CHAMELEON_COPY_DIAG)
-    morse_desc_mat_free(E);
-    free(E);
-#endif
-    (void)E;
 }
diff --git a/compute/pztpgqrt.c b/compute/pztpgqrt.c
index 4f8b5e8b0d09ec1489912fd59518b51782511539..27f2c17018934fee64173c65c5a2dd357d2151f8 100644
--- a/compute/pztpgqrt.c
+++ b/compute/pztpgqrt.c
@@ -31,9 +31,9 @@
 #define Q1(m,n) Q1,  m,  n
 #define Q2(m,n) Q2,  m,  n
 #if defined(CHAMELEON_COPY_DIAG)
-#define DIAG(k) DIAG, k, 0
+#define D(k)    D, k, 0
 #else
-#define DIAG(k) V1, k, k
+#define D(k)    V1, k, k
 #endif
 
 /***************************************************************************//**
@@ -43,19 +43,19 @@ void morse_pztpgqrt( int L,
                      MORSE_desc_t *V1, MORSE_desc_t *T1,
                      MORSE_desc_t *V2, MORSE_desc_t *T2,
                      MORSE_desc_t *Q1, MORSE_desc_t *Q2,
+                     MORSE_desc_t *D,
                      MORSE_sequence_t *sequence, MORSE_request_t *request )
 {
     MORSE_context_t *morse;
     MORSE_option_t options;
     size_t ws_worker = 0;
     size_t ws_host = 0;
-    MORSE_desc_t *DIAG = NULL;
 
     int k, m, n;
     int ldvk, ldvm;
     int ldqk, ldqm;
     int tempkm, tempkn, tempkk, tempnn, tempmm, templm;
-    int ib, minMT;
+    int ib;
 
     /* Dimension of the first column */
     int maxm  = chameleon_max( Q2->m - L, 1 );
@@ -68,13 +68,6 @@ void morse_pztpgqrt( int L,
     RUNTIME_options_init(&options, morse, sequence, request);
 
     ib = MORSE_IB;
-
-    if (V1->m > V1->n) {
-        minMT = V1->nt;
-    } else {
-        minMT = V1->mt;
-    }
-
     /*
      * ztpmqrt = Q1->nb * ib
      */
@@ -94,12 +87,6 @@ void morse_pztpgqrt( int L,
 
     RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
 
-#if defined(CHAMELEON_COPY_DIAG)
-    /* necessary to avoid dependencies between tasks regarding the diag tile */
-    DIAG = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
-    morse_zdesc_alloc_diag(*DIAG, V1->mb, V1->nb, minMT*V1->mb, V1->nb, 0, 0, minMT*V1->mb, V1->nb, V1->p, V1->q);
-#endif
-
     for (k = V1->nt-1; k >= 0; k--) {
         RUNTIME_iteration_push(morse, k);
 
@@ -152,13 +139,13 @@ void morse_pztpgqrt( int L,
             &options,
             MorseLower, tempkm, tempkk, V1->nb,
             V1(k, k), ldvk,
-            DIAG(k), ldvk );
+            D(k), ldvk );
 #if defined(CHAMELEON_USE_CUDA)
         MORSE_TASK_zlaset(
             &options,
             MorseUpper, tempkm, tempkk,
             0., 1.,
-            DIAG(k), ldvk );
+            D(k), ldvk );
 #endif
 #endif
         for (n = k; n < Q1->nt; n++) {
@@ -167,7 +154,7 @@ void morse_pztpgqrt( int L,
                 &options,
                 MorseLeft, MorseNoTrans,
                 tempkm, tempnn, tempkk, ib, T1->nb,
-                DIAG(k), ldvk,
+                D(k), ldvk,
                 T1(k, k), T1->mb,
                 Q1(k, n), ldqk);
         }
@@ -178,11 +165,4 @@ void morse_pztpgqrt( int L,
     RUNTIME_options_ws_free(&options);
     RUNTIME_options_finalize(&options, morse);
     MORSE_TASK_dataflush_all();
-
-#if defined(CHAMELEON_COPY_DIAG)
-    MORSE_Sequence_Wait(sequence);
-    morse_desc_mat_free(DIAG);
-    free(DIAG);
-#endif
-    (void)DIAG; (void)minMT;
 }
diff --git a/compute/zhetrd.c b/compute/zhetrd.c
index b74a90afc880a8a8f044cb0d421e727077e4db53..e3a6179cc2d1f430bdca9ff15308364c7ea27297 100644
--- a/compute/zhetrd.c
+++ b/compute/zhetrd.c
@@ -334,6 +334,7 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz,
     MORSE_desc_t descAB;
     int N, NB, LDAB;
     int status;
+    MORSE_desc_t D, *Dptr = NULL;
 
     morse = morse_context_self();
     if (morse == NULL) {
@@ -387,9 +388,14 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz,
 
     N  = descA.m;
     NB = descA.mb;
-
+#if defined(CHAMELEON_COPY_DIAG)
+    {
+        morse_zdesc_alloc_diag(D, A->mb, A->nb, chameleon_min(A->m, A->n), A->nb, 0, 0, chameleon_min(A->m, A->n), A->nb, A->p, A->q);
+        Dptr = &D;
+    }
+#endif
     /* Reduction to band. On exit, T contains reflectors */
-    morse_pzhetrd_he2hb( uplo, A, T,
+    morse_pzhetrd_he2hb( uplo, A, T, Dptr,
                          sequence, request );
 
     LDAB = NB+1;
@@ -419,7 +425,9 @@ int MORSE_zhetrd_Tile_Async(MORSE_enum jobz,
         morse_error("MORSE_zhetrd_Tile_Async", "LAPACKE_zhbtrd failed");
     }
 #endif /* !defined(CHAMELEON_SIMULATION) */
-
+    if (Dptr != NULL) {
+        morse_desc_mat_free(Dptr);
+    }
     morse_desc_mat_free(&descAB);
     return MORSE_SUCCESS;
 }
diff --git a/compute/ztpgqrt.c b/compute/ztpgqrt.c
index 1cdab39d879bfe7d19355e55a7e60e33a8af37fe..3943a31d8b13e452e163789721c1a7017a1e59f7 100644
--- a/compute/ztpgqrt.c
+++ b/compute/ztpgqrt.c
@@ -341,6 +341,7 @@ int MORSE_ztpgqrt_Tile_Async( int L,
                               MORSE_sequence_t *sequence, MORSE_request_t *request )
 {
     MORSE_context_t *morse;
+    MORSE_desc_t D, *Dptr = NULL;
 
     morse = morse_context_self();
     if (morse == NULL) {
@@ -395,15 +396,29 @@ int MORSE_ztpgqrt_Tile_Async( int L,
         morse_error("MORSE_ztpgqrt_Tile", "Triangular part must be aligned with tiles");
         return morse_request_fail(sequence, request, MORSE_ERR_ILLEGAL_VALUE);
     }
+#if defined(CHAMELEON_COPY_DIAG)
+    {
+        int minMT;
+        if (V1->m > V1->n) {
+        minMT = V1->nt;
+        } else {
+            minMT = V1->mt;
+        }
+        morse_zdesc_alloc_diag(D, V1->mb, V1->nb, minMT*V1->mb, V1->nb, 0, 0, minMT*V1->mb, V1->nb, V1->p, V1->q);
+        Dptr = &D;
+    }
+#endif
 
     /* if (morse->householder == MORSE_FLAT_HOUSEHOLDER) { */
     morse_pzlaset( MorseUpperLower, 0., 1., Q1, sequence, request );
     morse_pzlaset( MorseUpperLower, 0., 0., Q2, sequence, request );
-    morse_pztpgqrt( L, V1, T1, V2, T2, Q1, Q2, sequence, request );
+    morse_pztpgqrt( L, V1, T1, V2, T2, Q1, Q2, Dptr, sequence, request );
     /* } */
     /* else { */
     /*    morse_pztpgqrtrh(Q1, T, MORSE_RHBLK, sequence, request); */
     /* } */
-
+    if (Dptr != NULL) {
+        morse_desc_mat_free(Dptr);
+    }
     return MORSE_SUCCESS;
 }
diff --git a/control/compute_z.h b/control/compute_z.h
index a97a78c97198cb5c3750b73087387fd7a6a6a802..4327e583f51dece572a44db85378ee9ff7bea9ee 100644
--- a/control/compute_z.h
+++ b/control/compute_z.h
@@ -106,7 +106,7 @@ void morse_pzhemm(MORSE_enum side, MORSE_enum uplo, MORSE_Complex64_t alpha, MOR
 void morse_pzherk(MORSE_enum uplo, MORSE_enum trans, double alpha, MORSE_desc_t *A, double beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzher2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, double beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
 #endif
-void morse_pzhetrd_he2hb(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request);
+void morse_pzhetrd_he2hb(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *T, MORSE_desc_t *E, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzlacpy(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzlag2c(MORSE_desc_t *A, MORSE_desc_t *SB, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzlange(MORSE_enum norm, MORSE_desc_t *A, double *result, MORSE_sequence_t *sequence, MORSE_request_t *request);
@@ -134,7 +134,7 @@ void morse_pzsyrk(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MO
 void morse_pzsyr2k(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_Complex64_t beta, MORSE_desc_t *C, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzsytrf(MORSE_enum uplo, MORSE_desc_t *A, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pztile2band(MORSE_enum uplo, MORSE_desc_t *A, MORSE_desc_t *descAB, MORSE_sequence_t *sequence, MORSE_request_t *request);
-void morse_pztpgqrt( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_sequence_t *sequence, MORSE_request_t *request );
+void morse_pztpgqrt( int L, MORSE_desc_t *V1, MORSE_desc_t *T1, MORSE_desc_t *V2, MORSE_desc_t *T2, MORSE_desc_t *Q1, MORSE_desc_t *Q2, MORSE_desc_t *D, MORSE_sequence_t *sequence, MORSE_request_t *request );
 void morse_pztpqrt( int L, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *T, MORSE_sequence_t *sequence, MORSE_request_t *request );
 void morse_pztradd(MORSE_enum uplo, MORSE_enum trans, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_Complex64_t beta, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pztrmm(MORSE_enum side, MORSE_enum uplo, MORSE_enum transA, MORSE_enum diag, MORSE_Complex64_t alpha, MORSE_desc_t *A, MORSE_desc_t *B, MORSE_sequence_t *sequence, MORSE_request_t *request);