diff --git a/compute/pzgeqrf_param.c b/compute/pzgeqrf_param.c
index 0831540e19f302dc8477787b3f48ec9ae798e72e..418d9ee8b797ab416109a57d90cc90a331d2fa72 100644
--- a/compute/pzgeqrf_param.c
+++ b/compute/pzgeqrf_param.c
@@ -30,8 +30,148 @@
  *  Parallel tile QR factorization (reduction Householder) - dynamic scheduling
  *
  * @param[in] genD
- *         Indicate if the copies of the geqrt tiles must be done to speedup
- *         computations in updates.
+ *         Indicate if copies of the geqrt tiles must be done to speedup
+ *         computations in updates. genD is considered only if D is not NULL.
+ *
+ * @param[in] uplo
+ *         - ChamLower: Classic QR factorization of the matrix A.
+ *         - ChamUpper: QR factorization of the TTQRT kernel.
+ *         - ChamUpperLower: QR factorization of the TSQRT kernel.
+ */
+int chameleon_pzgeqrf_param_step( int genD, cham_uplo_t uplo, int k, int ib,
+                                  const libhqr_tree_t *qrtree, int *tiles,
+                                  CHAM_desc_t *A, CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *D,
+                                  RUNTIME_option_t *options, RUNTIME_sequence_t *sequence )
+{
+    CHAM_desc_t *T;
+    int m, n, i, p;
+    int L, nbgeqrt;
+    int tempkmin, tempkn, tempnn, tempmm;
+    int node, nbtiles;
+
+    tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
+
+    /* The number of geqrt to apply */
+    nbgeqrt = qrtree->getnbgeqrf( qrtree, k );
+
+    T = TS;
+    for (i = 0; i < nbgeqrt; i++) {
+        m = qrtree->getm( qrtree, k, i );
+
+        /* We skip the QR factorization if this is the last diagonal tile */
+        if ( (uplo == ChamUpper) && (m == k) ) {
+            continue;
+        }
+
+        tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
+        tempkmin = chameleon_min(tempmm, tempkn);
+
+        INSERT_TASK_zgeqrt(
+            options,
+            tempmm, tempkn, ib, T->nb,
+            A(m, k), T(m, k) );
+
+        if ( genD ) {
+            int tempDmm = m == D->mt-1 ? D->m-m*D->mb : D->mb;
+            int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
+
+            INSERT_TASK_zlacpy(
+                options,
+                ChamLower, tempDmm, tempDkn, A->nb,
+                A(m, k), D(m, k) );
+#if defined(CHAMELEON_USE_CUDA)
+            INSERT_TASK_zlaset(
+                options,
+                ChamUpper, tempDmm, tempDkn,
+                0., 1.,
+                D(m, k) );
+#endif
+        }
+
+        for (n = k+1; n < A->nt; n++) {
+            tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
+            INSERT_TASK_zunmqr(
+                options,
+                ChamLeft, ChamConjTrans,
+                tempmm, tempnn, tempkmin, ib, T->nb,
+                D(m, k),
+                T(m, k),
+                A(m, n));
+        }
+
+        if ( genD || ((k+1) < A->nt)) {
+            RUNTIME_data_flush( sequence, D(m, k) );
+        }
+        RUNTIME_data_flush( sequence, T(m, k) );
+    }
+
+    /* Setting the order of the tiles */
+    nbtiles = libhqr_walk_stepk( qrtree, k, tiles );
+
+    for (i = 0; i < nbtiles; i++) {
+        m = tiles[i];
+        p = qrtree->currpiv( qrtree, k, m );
+
+        tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
+
+        if ( qrtree->gettype( qrtree, k, m ) == LIBHQR_KILLED_BY_TS ) {
+            /* TS kernel */
+            T = TS;
+            L = 0;
+
+            /* Force TT kernel if this is the last diagonal tile */
+            if ( (uplo == ChamUpper) && (m == k) ) {
+                L = tempmm;
+            }
+        }
+        else {
+            /* TT kernel */
+            T = TT;
+            L = tempmm;
+        }
+
+        node = A->get_rankof( A, m, k );
+        RUNTIME_data_migrate( sequence, A(p, k), node );
+        RUNTIME_data_migrate( sequence, A(m, k), node );
+
+        INSERT_TASK_ztpqrt(
+            options,
+            tempmm, tempkn, chameleon_min(L, tempkn), ib, T->nb,
+            A(p, k),
+            A(m, k),
+            T(m, k));
+
+        for (n = k+1; n < A->nt; n++) {
+            tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
+
+            node = A->get_rankof( A, m, n );
+            RUNTIME_data_migrate( sequence, A(p, n), node );
+            RUNTIME_data_migrate( sequence, A(m, n), node );
+
+            INSERT_TASK_ztpmqrt(
+                options,
+                ChamLeft, ChamConjTrans,
+                tempmm, tempnn, A->nb, L, ib, T->nb,
+                A(m, k),
+                T(m, k),
+                A(p, n),
+                A(m, n));
+        }
+        RUNTIME_data_flush( sequence, A(m, k) );
+        RUNTIME_data_flush( sequence, T(m, k) );
+    }
+
+    return tiles[nbtiles];
+}
+
+
+/**
+ *  Parallel tile QR factorization (reduction Householder) - dynamic scheduling
+ *
+ * @param[in] genD
+ *         Indicate if copies of the geqrt tiles must be done to speedup
+ *         computations in updates. genD is considered only if D is not NULL.
+ *
  */
 void chameleon_pzgeqrf_param( int genD, int K,
                               const libhqr_tree_t *qrtree, CHAM_desc_t *A,
@@ -40,14 +180,11 @@ void chameleon_pzgeqrf_param( int genD, int K,
 {
     CHAM_context_t *chamctxt;
     RUNTIME_option_t options;
-    CHAM_desc_t *T;
     size_t ws_worker = 0;
     size_t ws_host = 0;
 
-    int k, m, n, i, p;
-    int L, nbgeqrt;
-    int tempkmin, tempkn, tempnn, tempmm;
-    int ib, node, nbtiles, *tiles;
+    int k, n;
+    int ib, *tiles;
 
     chamctxt = chameleon_context_self();
     if (sequence->status != CHAMELEON_SUCCESS) {
@@ -57,7 +194,7 @@ void chameleon_pzgeqrf_param( int genD, int K,
 
     ib = CHAMELEON_IB;
 
-    if ( D == NULL ) {
+    if ( (genD == 0) || (D == NULL) ) {
         D    = A;
         genD = 0;
     }
@@ -85,109 +222,13 @@ void chameleon_pzgeqrf_param( int genD, int K,
     RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
 
     /* Initialisation of temporary tiles array */
-    tiles = (int*)calloc(qrtree->mt, sizeof(int));
+    tiles = (int*)calloc( qrtree->mt, sizeof(int) );
 
     for (k = 0; k < K; k++) {
-        RUNTIME_iteration_push(chamctxt, k);
-        tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-
-        /* The number of geqrt to apply */
-        nbgeqrt = qrtree->getnbgeqrf(qrtree, k);
-
-        T = TS;
-        for (i = 0; i < nbgeqrt; i++) {
-            m = qrtree->getm(qrtree, k, i);
-            tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-            tempkmin = chameleon_min(tempmm, tempkn);
-
-            INSERT_TASK_zgeqrt(
-                &options,
-                tempmm, tempkn, ib, T->nb,
-                A(m, k),
-                T(m, k));
-
-            if ( genD ) {
-                int tempDmm = m == D->mt-1 ? D->m-m*D->mb : D->mb;
-                int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
+        RUNTIME_iteration_push( chamctxt, k );
 
-                INSERT_TASK_zlacpy(
-                    &options,
-                    ChamLower, tempDmm, tempDkn, A->nb,
-                    A(m, k),
-                    D(m, k) );
-#if defined(CHAMELEON_USE_CUDA)
-                INSERT_TASK_zlaset(
-                    &options,
-                    ChamUpper, tempDmm, tempDkn,
-                    0., 1.,
-                    D(m, k) );
-#endif
-            }
-
-            for (n = k+1; n < A->nt; n++) {
-                tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
-                INSERT_TASK_zunmqr(
-                    &options,
-                    ChamLeft, ChamConjTrans,
-                    tempmm, tempnn, tempkmin, ib, T->nb,
-                    D(m, k),
-                    T(m, k),
-                    A(m, n));
-            }
-            RUNTIME_data_flush( sequence, D(m, k) );
-            RUNTIME_data_flush( sequence, T(m, k) );
-        }
-
-        /* Setting the order of the tiles */
-        nbtiles = libhqr_walk_stepk( qrtree, k, tiles );
-
-        for (i = 0; i < nbtiles; i++) {
-            m = tiles[i];
-            p = qrtree->currpiv(qrtree, k, m);
-
-            tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-
-            if ( qrtree->gettype(qrtree, k, m) == LIBHQR_KILLED_BY_TS ) {
-                /* TS kernel */
-                T = TS;
-                L = 0;
-            }
-            else {
-                /* TT kernel */
-                T = TT;
-                L = tempmm;
-            }
-
-            node = A->get_rankof( A, m, k );
-            RUNTIME_data_migrate( sequence, A(p, k), node );
-            RUNTIME_data_migrate( sequence, A(m, k), node );
-
-            INSERT_TASK_ztpqrt(
-                &options,
-                tempmm, tempkn, chameleon_min(L, tempkn), ib, T->nb,
-                A(p, k),
-                A(m, k),
-                T(m, k));
-
-            for (n = k+1; n < A->nt; n++) {
-                tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
-
-                node = A->get_rankof( A, m, n );
-                RUNTIME_data_migrate( sequence, A(p, n), node );
-                RUNTIME_data_migrate( sequence, A(m, n), node );
-
-                INSERT_TASK_ztpmqrt(
-                    &options,
-                    ChamLeft, ChamConjTrans,
-                    tempmm, tempnn, A->nb, L, ib, T->nb,
-                    A(m, k),
-                    T(m, k),
-                    A(p, n),
-                    A(m, n));
-            }
-            RUNTIME_data_flush( sequence, A(m, k) );
-            RUNTIME_data_flush( sequence, T(m, k) );
-        }
+        chameleon_pzgeqrf_param_step( genD, ChamLower, k, ib, qrtree, tiles,
+                                      A, TS, TT, D, &options, sequence );
 
         /* Restore the original location of the tiles */
         for (n = k; n < A->nt; n++) {
@@ -195,10 +236,10 @@ void chameleon_pzgeqrf_param( int genD, int K,
                                   A->get_rankof( A, k, n ) );
         }
 
-        RUNTIME_iteration_pop(chamctxt);
+        RUNTIME_iteration_pop( chamctxt );
     }
 
-    free(tiles);
-    RUNTIME_options_ws_free(&options);
-    RUNTIME_options_finalize(&options, chamctxt);
+    free( tiles );
+    RUNTIME_options_ws_free( &options );
+    RUNTIME_options_finalize( &options, chamctxt );
 }
diff --git a/compute/pzungqr_param.c b/compute/pzungqr_param.c
index e9046ab30581d6f38e35f9514a9070fa36c89802..5ffb77ab7e61ac7dbbfdd80222bf245581f2d744 100644
--- a/compute/pzungqr_param.c
+++ b/compute/pzungqr_param.c
@@ -28,6 +28,131 @@
 
 /**
  *  Parallel construction of Q using tile V (application to identity) - dynamic scheduling
+ *
+ * @param[in] genD
+ *         Indicate if the copies of the A tiles must be done to speedup
+ *         computations in updates.
+ *
+ * @param[in] uplo
+ *         Indicate which kind of factorization has been performed on A to apply
+ *         the respective Q generation.
+ *         - ChamLower: Apply Classic QR factorization of the matrix A
+ *         - ChamUpper: Apply the factorization of the upper part from a TT kernel.
+ *         - ChamUpperLower: Apply the factorization of the full tile from a TS kernel.
+ */
+void chameleon_pzungqr_param_step( int genD, cham_uplo_t uplo, int k, int ib,
+                                   const libhqr_tree_t *qrtree, int nbtiles, int *tiles,
+                                   CHAM_desc_t *A, CHAM_desc_t *Q,
+                                   CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *D,
+                                   RUNTIME_option_t *options, RUNTIME_sequence_t *sequence )
+{
+    CHAM_desc_t *T;
+    int m, n, i, p, L;
+    int tempmm, tempnn, tempkmin, tempkn;
+    int nbgeqrt, node;
+
+    tempkn = k == A->nt-1 ? A->n - k * A->nb : A->nb;
+
+    for (i = nbtiles-1; i >= 0; i--) {
+        m = tiles[i];
+        p = qrtree->currpiv( qrtree, k, m );
+
+        tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
+
+        if( qrtree->gettype( qrtree, k, m ) == LIBHQR_KILLED_BY_TS ) {
+            /* TS kernel */
+            T = TS;
+            L = 0;
+
+            /* Force TT kernel if this is the last diagonal tile */
+            if ( (uplo == ChamUpper) && (m == k) ) {
+                L = tempmm;
+            }
+        }
+        else {
+            /* TT kernel */
+            T = TT;
+            L = tempmm;
+        }
+
+        for (n = k; n < Q->nt; n++) {
+            tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
+
+            node = Q->get_rankof( Q, m, n );
+            RUNTIME_data_migrate( sequence, Q(p, n), node );
+            RUNTIME_data_migrate( sequence, Q(m, n), node );
+
+            INSERT_TASK_ztpmqrt(
+                options,
+                ChamLeft, ChamNoTrans,
+                tempmm, tempnn, tempkn, L, ib, T->nb,
+                A(m, k),
+                T(m, k),
+                Q(p, n),
+                Q(m, n));
+        }
+        RUNTIME_data_flush( sequence, A(m, k) );
+        RUNTIME_data_flush( sequence, T(m, k) );
+    }
+
+    T = TS;
+
+    /* The number of geqrt to apply */
+    nbgeqrt = qrtree->getnbgeqrf( qrtree, k );
+    for (i = 0; i < nbgeqrt; i++) {
+        m = qrtree->getm( qrtree, k, i );
+
+        /* We skip the QR factorization if this is the last diagonal tile */
+        if ( (uplo == ChamUpper) && (m == k) ) {
+            continue;
+        }
+
+        tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
+        tempkmin = chameleon_min( tempmm, tempkn );
+
+        if ( genD ) {
+            int tempDmm = m == D->mt-1 ? D->m - m * D->mb : D->mb;
+            INSERT_TASK_zlacpy(
+                options,
+                ChamLower, tempDmm, tempkmin, A->nb,
+                A(m, k),
+                D(m, k) );
+#if defined(CHAMELEON_USE_CUDA)
+            INSERT_TASK_zlaset(
+                options,
+                ChamUpper, tempDmm, tempkmin,
+                0., 1.,
+                D(m, k) );
+#endif
+        }
+
+        for (n = k; n < Q->nt; n++) {
+            tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
+
+            /* Restore the original location of the tiles */
+            RUNTIME_data_migrate( sequence, Q(m, n),
+                                  Q->get_rankof( Q, m, n ) );
+
+            INSERT_TASK_zunmqr(
+                options,
+                ChamLeft, ChamNoTrans,
+                tempmm, tempnn, tempkmin, ib, T->nb,
+                D(m, k),
+                T(m, k),
+                Q(m, n));
+        }
+        RUNTIME_data_flush( sequence, D(m, k) );
+        RUNTIME_data_flush( sequence, T(m, k) );
+    }
+}
+
+/**
+ *  Parallel construction of Q using tile V (application to identity) - dynamic scheduling
+ *
+ * @param[in] genD
+ *         Indicate if the copies of the A tiles must be done to speedup
+ *         computations in updates. genD is considered only if D is not NULL.
+ *
  */
 void chameleon_pzungqr_param( int genD, int K,
                               const libhqr_tree_t *qrtree,
@@ -37,13 +162,9 @@ void chameleon_pzungqr_param( int genD, int K,
 {
     CHAM_context_t *chamctxt;
     RUNTIME_option_t options;
-    CHAM_desc_t *T;
     size_t ws_worker = 0;
     size_t ws_host = 0;
-
-    int k, m, n, i, p, L;
-    int tempmm, tempnn, tempkmin, tempkn;
-    int ib, nbgeqrt, node, nbtiles, *tiles;
+    int k, ib, nbtiles, *tiles;
 
     chamctxt = chameleon_context_self();
     if (sequence->status != CHAMELEON_SUCCESS) {
@@ -53,7 +174,7 @@ void chameleon_pzungqr_param( int genD, int K,
 
     ib = CHAMELEON_IB;
 
-    if (D == NULL) {
+    if ( D == NULL ) {
         D    = A;
         genD = 0;
     }
@@ -83,92 +204,13 @@ void chameleon_pzungqr_param( int genD, int K,
     for (k = K-1; k >=0; k--) {
         RUNTIME_iteration_push(chamctxt, k);
 
-        tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-
         /* Setting the order of tiles */
         nbtiles = libhqr_walk_stepk( qrtree, k, tiles );
 
-        for (i = nbtiles-1; i >= 0; i--) {
-            m = tiles[i];
-            p = qrtree->currpiv(qrtree, k, m);
-
-            tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
-
-            if( qrtree->gettype(qrtree, k, m) == LIBHQR_KILLED_BY_TS ) {
-                /* TS kernel */
-                T = TS;
-                L = 0;
-            }
-            else {
-                /* TT kernel */
-                T = TT;
-                L = tempmm;
-            }
-
-            for (n = k; n < Q->nt; n++) {
-                tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
-
-                node = Q->get_rankof( Q, m, n );
-                RUNTIME_data_migrate( sequence, Q(p, n), node );
-                RUNTIME_data_migrate( sequence, Q(m, n), node );
-
-                INSERT_TASK_ztpmqrt(
-                    &options,
-                    ChamLeft, ChamNoTrans,
-                    tempmm, tempnn, tempkn, L, ib, T->nb,
-                    A(m, k),
-                    T(m, k),
-                    Q(p, n),
-                    Q(m, n));
-            }
-            RUNTIME_data_flush( sequence, A(m, k) );
-            RUNTIME_data_flush( sequence, T(m, k) );
-        }
-
-        T = TS;
-
-        /* The number of geqrt to apply */
-        nbgeqrt = qrtree->getnbgeqrf(qrtree, k);
-        for (i = 0; i < nbgeqrt; i++) {
-            m = qrtree->getm(qrtree, k, i);
-
-            tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-            tempkmin = chameleon_min(tempmm, tempkn);
-
-            if ( genD ) {
-                int tempDmm = m == D->mt-1 ? D->m-m*D->mb : D->mb;
-                INSERT_TASK_zlacpy(
-                    &options,
-                    ChamLower, tempDmm, tempkmin, A->nb,
-                    A(m, k),
-                    D(m, k) );
-#if defined(CHAMELEON_USE_CUDA)
-                INSERT_TASK_zlaset(
-                    &options,
-                    ChamUpper, tempDmm, tempkmin,
-                    0., 1.,
-                    D(m, k) );
-#endif
-            }
-
-            for (n = k; n < Q->nt; n++) {
-                tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
-
-                /* Restore the original location of the tiles */
-                RUNTIME_data_migrate( sequence, Q(m, n),
-                                      Q->get_rankof( Q, m, n ) );
-
-                INSERT_TASK_zunmqr(
-                    &options,
-                    ChamLeft, ChamNoTrans,
-                    tempmm, tempnn, tempkmin, ib, T->nb,
-                    D(m, k),
-                    T(m, k),
-                    Q(m, n));
-            }
-            RUNTIME_data_flush( sequence, D(m, k) );
-            RUNTIME_data_flush( sequence, T(m, k) );
-        }
+        chameleon_pzungqr_param_step( genD, ChamLower, k, ib,
+                                      qrtree, nbtiles, tiles,
+                                      A, Q, TS, TT, D,
+                                      &options, sequence );
 
         RUNTIME_iteration_pop(chamctxt);
     }
diff --git a/control/compute_z.h b/control/compute_z.h
index 82d3e99ca5858f37f507a32e442ac2504b4c3ae5..70e94325e4ad00cc0e7bdb826cd83bce2d458fe9 100644
--- a/control/compute_z.h
+++ b/control/compute_z.h
@@ -99,6 +99,15 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans, CHAM_des
 void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *T, CHAM_desc_t *D, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzbuild( cham_uplo_t uplo, CHAM_desc_t *A, void *user_data, void* user_build_callback, RUNTIME_sequence_t *sequence, RUNTIME_request_t *request );
 
+int  chameleon_pzgeqrf_param_step( int genD, cham_uplo_t uplo, int k, int ib,
+                                   const libhqr_tree_t *qrtree, int *tiles,
+                                   CHAM_desc_t *A, CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *D,
+                                   RUNTIME_option_t *options, RUNTIME_sequence_t *sequence );
+void chameleon_pzungqr_param_step( int genD, cham_uplo_t uplo, int k, int ib,
+                                   const libhqr_tree_t *qrtree, int nbtiles, int *tiles,
+                                   CHAM_desc_t *A, CHAM_desc_t *Q,
+                                   CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *D,
+                                   RUNTIME_option_t *options, RUNTIME_sequence_t *sequence );
 void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t *A, CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *D,
                               RUNTIME_sequence_t *sequence, RUNTIME_request_t *request);
 void chameleon_pzgeqrf_param( int genD, int K, const libhqr_tree_t *qrtree,