diff --git a/compute/pzgeqrf_param.c b/compute/pzgeqrf_param.c
index 360a2d408f08fab411be9497289a9cf1428b1129..44fead1c4b9b1a99a46e37985bb7e24240f5e5da 100644
--- a/compute/pzgeqrf_param.c
+++ b/compute/pzgeqrf_param.c
@@ -42,15 +42,15 @@
 /**
  *  Parallel tile QR factorization (reduction Householder) - dynamic scheduling
  */
-void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT,
+void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A,
+                          MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D,
                           MORSE_sequence_t *sequence, MORSE_request_t *request)
 {
     MORSE_context_t *morse;
     MORSE_option_t options;
     size_t ws_worker = 0;
     size_t ws_host = 0;
-    MORSE_desc_t *D = NULL;
-
+    
     int k, m, n, i, p;
     int K;
     int ldap, ldam;
@@ -95,14 +95,6 @@ void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_de
 
     RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
 
-#if defined(CHAMELEON_COPY_DIAG)
-    {
-        /* necessary to avoid dependencies between tasks regarding the diag tile */
-        D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
-        morse_zdesc_alloc(*D, A->mb, A->nb, A->m, A->n, 0, 0, A->m, A->n, );
-    }
-#endif
-
     K = chameleon_min(A->mt, A->nt);
 
     /* The number of the factorization */
@@ -212,11 +204,5 @@ void morse_pzgeqrf_param( const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_de
     RUNTIME_options_ws_free(&options);
     RUNTIME_options_finalize(&options, morse);
     MORSE_TASK_dataflush_all();
-
-#if defined(CHAMELEON_COPY_DIAG)
-    MORSE_Sequence_Wait(sequence);
-    morse_desc_mat_free(D);
-    free(D);
-#endif
     (void)D;
 }
diff --git a/compute/pzungqr_param.c b/compute/pzungqr_param.c
index 3434c8f6998f4284ed3f52f53b86fedc074850b5..ef0ecd41fd84de44e96282e0ae7fb1939135e959 100644
--- a/compute/pzungqr_param.c
+++ b/compute/pzungqr_param.c
@@ -44,14 +44,14 @@
  *  Parallel construction of Q using tile V (application to identity) - dynamic scheduling
  */
 void morse_pzungqr_param(const libhqr_tree_t *qrtree,
-                         MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *TS, MORSE_desc_t *TT,
+                         MORSE_desc_t *A, MORSE_desc_t *Q,
+                         MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D,
                          MORSE_sequence_t *sequence, MORSE_request_t *request)
 {
     MORSE_context_t *morse;
     MORSE_option_t options;
     size_t ws_worker = 0;
     size_t ws_host = 0;
-    MORSE_desc_t *D = NULL;
 
     int k, m, n, i, p;
     int ldam, ldqm, ldqp;
@@ -98,14 +98,6 @@ void morse_pzungqr_param(const libhqr_tree_t *qrtree,
 
     RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
 
-#if defined(CHAMELEON_COPY_DIAG)
-    {
-        /* necessary to avoid dependencies between tasks regarding the diag tile */
-        D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
-        morse_zdesc_alloc(*D, A->mb, A->nb, A->m, A->n, 0, 0, A->m, A->n, );
-    }
-#endif
-
     for (k = minMT-1; k >= 0; k--) {
         RUNTIME_iteration_push(morse, k);
 
@@ -192,10 +184,5 @@ void morse_pzungqr_param(const libhqr_tree_t *qrtree,
     RUNTIME_options_finalize(&options, morse);
     MORSE_TASK_dataflush_all();
 
-#if defined(CHAMELEON_COPY_DIAG)
-    MORSE_Sequence_Wait(sequence);
-    morse_desc_mat_free(D);
-    free(D);
-#endif
     (void)D;
 }
diff --git a/compute/pzunmqr_param.c b/compute/pzunmqr_param.c
index 678172f7e7010fe32131d8e54398ee0f0ac21dca..7e27339855ba5be585439f58be19da65009480ec 100644
--- a/compute/pzunmqr_param.c
+++ b/compute/pzunmqr_param.c
@@ -40,14 +40,13 @@
  */
 void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
                          MORSE_enum side, MORSE_enum trans,
-                         MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT,
+                         MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D,
                          MORSE_sequence_t *sequence, MORSE_request_t *request)
 {
     MORSE_context_t *morse;
     MORSE_option_t options;
     size_t ws_worker = 0;
     size_t ws_host = 0;
-    MORSE_desc_t *D = NULL;
 
     int k, m, n, i, p;
     int ldam, ldan, ldbm, ldbp;
@@ -90,12 +89,6 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
 
     RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
 
-    /* necessary to avoid dependencies between tasks regarding the diag tile */
-#if defined(CHAMELEON_COPY_DIAG)
-    D = (MORSE_desc_t*)malloc(sizeof(MORSE_desc_t));
-    morse_zdesc_alloc_diag(*D, A->mb, A->nb, K*A->nb, A->nb, 0, 0, K*A->nb, A->nb, A->p, A->q);
-#endif
-
     if (side == MorseLeft ) {
         if (trans == MorseConjTrans) {
             /*
@@ -442,10 +435,5 @@ void morse_pzunmqr_param(const libhqr_tree_t *qrtree,
     RUNTIME_options_finalize(&options, morse);
     MORSE_TASK_dataflush_all();
 
-#if defined(CHAMELEON_COPY_DIAG)
-    MORSE_Sequence_Wait(sequence);
-    morse_desc_mat_free(D);
-    free(D);
-#endif
     (void)D;
 }
diff --git a/compute/zgels_param.c b/compute/zgels_param.c
index 6d78f62c1a9c17de8ce9b7535886f6e1ecb4aa89..cd76d61b052eb127dbb7a52271ba94c133b11931 100644
--- a/compute/zgels_param.c
+++ b/compute/zgels_param.c
@@ -331,7 +331,7 @@ int MORSE_zgels_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum trans,
     MORSE_desc_t *subA;
     MORSE_desc_t *subB;
     MORSE_context_t *morse;
-
+    MORSE_desc_t D;
     morse = morse_context_self();
     if (morse == NULL) {
         morse_fatal_error("MORSE_zgels_param_Tile", "MORSE not initialized");
@@ -386,15 +386,22 @@ int MORSE_zgels_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum trans,
     }
      */
     if (A->m >= A->n) {
-        morse_pzgeqrf_param(qrtree, A, TS, TT, sequence, request);
 
-        morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request);
+#if defined(CHAMELEON_COPY_DIAG)
+    morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), );
+    morse_pzgeqrf_param(qrtree, A, TS, TT, &D, sequence, request);
+    morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, &D, sequence, request);
+    morse_desc_mat_free(&D);
+#else
+    morse_pzgeqrf_param(qrtree, A, TS, TT, NULL, sequence, request);
+    morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, NULL, sequence, request);
+#endif
 
-        subB = morse_desc_submatrix(B, 0, 0, A->n, B->n);
-        subA = morse_desc_submatrix(A, 0, 0, A->n, A->n);
-        morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request);
-        free(subA);
-        free(subB);
+    subB = morse_desc_submatrix(B, 0, 0, A->n, B->n);
+    subA = morse_desc_submatrix(A, 0, 0, A->n, A->n);
+    morse_pztrsm(MorseLeft, MorseUpper, MorseNoTrans, MorseNonUnit, 1.0, subA, subB, sequence, request);
+    free(subA);
+    free(subB);
     }
     else {
         /* subB = morse_desc_submatrix(B, A->m, 0, A->n-A->m, B->n);
@@ -409,7 +416,8 @@ int MORSE_zgels_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_enum trans,
         free(subA);
         free(subB);
 
-        morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request);
+        //morse_pzunmlq_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request);
+        morse_pzunmlq(MorseLeft, MorseConjTrans, A, B, TS, sequence, request);
     }
     return MORSE_SUCCESS;
 }
diff --git a/compute/zgeqrf_param.c b/compute/zgeqrf_param.c
index 3e3113fb6584d0449e8b1d47e7cba6df1786b467..4525309bc8eec78e436cd8f38dea0c27a7b323ce 100644
--- a/compute/zgeqrf_param.c
+++ b/compute/zgeqrf_param.c
@@ -253,6 +253,7 @@ int MORSE_zgeqrf_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A,
                              MORSE_sequence_t *sequence, MORSE_request_t *request)
 {
     MORSE_context_t *morse;
+    MORSE_desc_t D;
 
     morse = morse_context_self();
     if (morse == NULL) {
@@ -296,7 +297,13 @@ int MORSE_zgeqrf_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A,
     if (chameleon_min(M, N) == 0)
         return MORSE_SUCCESS;
 */
-    morse_pzgeqrf_param(qrtree, A, TS, TT, sequence, request);
-
+#if defined(CHAMELEON_COPY_DIAG)
+    morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), );
+    morse_pzgeqrf_param(qrtree, A, TS, TT, &D, sequence, request);
+    morse_desc_mat_free(&D);
+#else
+    morse_pzgeqrf_param(qrtree, A, TS, TT, NULL, sequence, request);
+#endif
+    (void)D;
     return MORSE_SUCCESS;
 }
diff --git a/compute/zgeqrs_param.c b/compute/zgeqrs_param.c
index 7dddbdcf0e961479e1c9d4c5ad14a49f8be8057e..1b86b10be599c9f03fd4e1994729f8c03c3f039e 100644
--- a/compute/zgeqrs_param.c
+++ b/compute/zgeqrs_param.c
@@ -268,7 +268,8 @@ int MORSE_zgeqrs_param_Tile_Async(const libhqr_tree_t *qrtree,
     MORSE_desc_t *subA;
     MORSE_desc_t *subB;
     MORSE_context_t *morse;
-
+    MORSE_desc_t D;
+    
     morse = morse_context_self();
     if (morse == NULL) {
         morse_fatal_error("MORSE_zgeqrs_param_Tile", "MORSE not initialized");
@@ -316,7 +317,13 @@ int MORSE_zgeqrs_param_Tile_Async(const libhqr_tree_t *qrtree,
      return MORSE_SUCCESS;
      }
      */
-    morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, sequence, request);
+#if defined(CHAMELEON_COPY_DIAG)
+    morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), );
+    morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, &D, sequence, request);
+    morse_desc_mat_free(&D);
+#else
+    morse_pzunmqr_param(qrtree, MorseLeft, MorseConjTrans, A, B, TS, TT, NULL, sequence, request);
+#endif
 
     subB = morse_desc_submatrix(B, 0, 0, A->n, B->n);
     subA = morse_desc_submatrix(A, 0, 0, A->n, A->n);
diff --git a/compute/zungqr_param.c b/compute/zungqr_param.c
index 24bec0c623a53b11dceef761161a4882f6aef67e..513c50af4b4781758076aaa5112188f20467f3bf 100644
--- a/compute/zungqr_param.c
+++ b/compute/zungqr_param.c
@@ -257,11 +257,10 @@ int MORSE_zungqr_param_Tile(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_
  * @sa MORSE_zgeqrf_Tile_Async
  *
  ******************************************************************************/
-int MORSE_zungqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *Q,
-                             MORSE_sequence_t *sequence, MORSE_request_t *request)
+int MORSE_zungqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *Q, MORSE_sequence_t *sequence, MORSE_request_t *request)
 {
     MORSE_context_t *morse;
-
+    MORSE_desc_t D;
     morse = morse_context_self();
     if (morse == NULL) {
         morse_fatal_error("MORSE_zungqr_param_Tile", "MORSE not initialized");
@@ -308,8 +307,14 @@ int MORSE_zungqr_param_Tile_Async(const libhqr_tree_t *qrtree, MORSE_desc_t *A,
     if (N <= 0)
         return MORSE_SUCCESS;
 */
+#if defined(CHAMELEON_COPY_DIAG)
+    morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), );
     morse_pzlaset(MorseUpperLower, 0., 1., Q, sequence, request);
-    morse_pzungqr_param(qrtree, A, Q, TS, TT, sequence, request);
-
+    morse_pzungqr_param(qrtree, A, Q, TS, TT, &D, sequence, request);
+    morse_desc_mat_free(&D);
+#else
+    morse_pzlaset(MorseUpperLower, 0., 1., Q, sequence, request);
+    morse_pzungqr_param(qrtree, A, Q, TS, TT, NULL, sequence, request);
+#endif
     return MORSE_SUCCESS;
 }
diff --git a/compute/zunmqr_param.c b/compute/zunmqr_param.c
index 8a5a42cd47431a9a31ff377e9b172f1338015a03..1e9af1473ea66033988871ec29fe5ed817a2ae1c 100644
--- a/compute/zunmqr_param.c
+++ b/compute/zunmqr_param.c
@@ -317,6 +317,7 @@ int MORSE_zunmqr_param_Tile_Async(const libhqr_tree_t *qrtree,
                                   MORSE_sequence_t *sequence, MORSE_request_t *request)
 {
     MORSE_context_t *morse;
+    MORSE_desc_t D;
 
     morse = morse_context_self();
     if (morse == NULL) {
@@ -372,7 +373,13 @@ int MORSE_zunmqr_param_Tile_Async(const libhqr_tree_t *qrtree,
         return MORSE_SUCCESS;
 */
 
-    morse_pzunmqr_param(qrtree, side, trans, A, C, TS, TT, sequence, request);
-
+#if defined(CHAMELEON_COPY_DIAG)
+    morse_zdesc_alloc(D, A->mb, A->nb, A->m, chameleon_min(A->m, A->n), 0, 0, A->m, chameleon_min(A->m, A->n), );
+    morse_pzunmqr_param(qrtree, side, trans, A, C, TS, TT, &D, sequence, request);
+    morse_desc_mat_free(&D);
+#else
+    morse_pzunmqr_param(qrtree, side, trans, A, C, TS, TT, NULL, sequence, request);
+#endif
+    (void)D;
     return MORSE_SUCCESS;
 }
diff --git a/control/compute_z.h b/control/compute_z.h
index a6b77619a63db2fe5118328f1344e7d6157530da..65a8547b0bbc6d16fa14208106bd724ee1f8d335 100644
--- a/control/compute_z.h
+++ b/control/compute_z.h
@@ -158,16 +158,18 @@ void morse_pzbuild( MORSE_enum uplo, MORSE_desc_t *A, void *user_data, void* use
 #if defined(CHAMELEON_USE_LIBHQR)
 void morse_pzgelqf_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT,
                          MORSE_sequence_t *sequence, MORSE_request_t *request);
-void morse_pzgeqrf_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT,
+void morse_pzgeqrf_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D,
                          MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzunmlq_param(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans,
                          MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT,
                          MORSE_sequence_t *sequence, MORSE_request_t *request);
 void morse_pzunmqr_param(const libhqr_tree_t *qrtree, MORSE_enum side, MORSE_enum trans,
-                         MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT,
+                         MORSE_desc_t *A, MORSE_desc_t *B, MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D,
                          MORSE_sequence_t *sequence, MORSE_request_t *request);
-void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *TS, MORSE_desc_t *TT,
+void morse_pzunglq_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q,
+                         MORSE_desc_t *TS, MORSE_desc_t *TT,
                          MORSE_sequence_t *sequence, MORSE_request_t *request);
-void morse_pzungqr_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q, MORSE_desc_t *TS, MORSE_desc_t *TT,
+void morse_pzungqr_param(const libhqr_tree_t *qrtree, MORSE_desc_t *A, MORSE_desc_t *Q,
+                         MORSE_desc_t *TS, MORSE_desc_t *TT, MORSE_desc_t *D,
                          MORSE_sequence_t *sequence, MORSE_request_t *request);
 #endif /* defined(CHAMELEON_USE_LIBHQR) */
diff --git a/timing/CMakeLists.txt b/timing/CMakeLists.txt
index 61334fa4209b3e44a91e7381968027af4fe67b1e..500dda7f9120fc74ccf8af11bc7f3507d40f7744 100644
--- a/timing/CMakeLists.txt
+++ b/timing/CMakeLists.txt
@@ -85,6 +85,7 @@ if (NOT CHAMELEON_SIMULATION)
         time_zgels.c
         time_zgels_tile.c
         time_zgeqrf.c
+        time_zgeqrf_hqr.c
         time_zgeqrf_tile.c
         time_zgelqf.c
         time_zgelqf_tile.c
@@ -237,6 +238,7 @@ if(NOT CHAMELEON_SIMULATION)
     ${CBLAS_LIBRARIES}
     ${LAPACK_SEQ_LIBRARIES}
     ${BLAS_SEQ_LIBRARIES}
+    ${LIBHQR_LIBRARIES}
     ${HWLOC_LIBRARIES}
     ${EXTRA_LIBRARIES}
     )
@@ -246,6 +248,7 @@ if(NOT CHAMELEON_SIMULATION)
     link_directories(${LAPACK_LIBRARY_DIRS})
     link_directories(${CBLAS_LIBRARY_DIRS})
     link_directories(${BLAS_LIBRARY_DIRS})
+    link_directories(${LIBHQR_LIBRARY_DIRS})
 
 else()
 
diff --git a/timing/time_zgeqrf_hqr.c b/timing/time_zgeqrf_hqr.c
index a475afb035becb399fc5d328678dcab1bdfc9324..5c17848b1df613384050e59e5ad4fc66a8933918 100644
--- a/timing/time_zgeqrf_hqr.c
+++ b/timing/time_zgeqrf_hqr.c
@@ -30,6 +30,9 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_)
 {
     MORSE_desc_t *TS;
     MORSE_desc_t *TT;
+    libhqr_tree_t qrtree;
+    libhqr_tiledesc_t matrix;
+
     PASTE_CODE_IPARAM_LOCALS( iparam );
 
     if ( M != N && check ) {
@@ -46,14 +49,25 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_)
     /* Allocate Workspace */
     MORSE_Alloc_Workspace_zgels(M, N, &TS, P, Q);
     memset(TS->mat, 0, (TS->llm*TS->lln)*sizeof(MorseComplexDouble));
-    MORSE_Alloc_Workspace_zgels(M, N, &TT P, Q);
+    MORSE_Alloc_Workspace_zgels(M, N, &TT, P, Q);
     memset(TT->mat, 0, (TT->llm*TT->lln)*sizeof(MorseComplexDouble));
 
     /* Save AT in lapack layout for check */
     PASTE_CODE_ALLOCATE_COPY( Acpy, check, MORSE_Complex64_t, A, LDA, N );
 
+    /* Initialize matrix */
+    matrix.mt = TS->mt;
+    matrix.nt = TS->nt;
+    matrix.nodes = 1;
+    matrix.p = 1;
+
+    /* Initialize qrtree  */
+    libhqr_hqr_init( &qrtree,
+                     ( matrix.mt >= matrix.nt ) ? LIBHQR_QR : LIBHQR_LQ,
+                     &matrix, -1, -1, 1, -1, 0, 0);
+
     START_TIMING();
-    MORSE_zgeqrf( M, N, A, LDA, TS );
+    MORSE_zgeqrf_param(&qrtree, M, N, A, LDA, TS, TT );
     STOP_TIMING();
 
     /* Check the solution */
@@ -63,7 +77,7 @@ RunTest(int *iparam, double *dparam, morse_time_t *t_)
         MORSE_zplrnt( N, NRHS, X, LDB, 5673 );
         PASTE_CODE_ALLOCATE_COPY( B, 1, MORSE_Complex64_t, X, LDB, NRHS );
 
-        MORSE_zgeqrs(M, N, NRHS, A, LDA, TS, X, LDB);
+        MORSE_zgeqrs_param(&qrtree, M, N, NRHS, A, LDA, TS, TT, X, LDB);
 
         dparam[IPARAM_RES] = z_check_solution(M, N, NRHS, Acpy, LDA, B, X, LDB,
                                               &(dparam[IPARAM_ANORM]),