diff --git a/compute/pzunmlq.c b/compute/pzunmlq.c
index 0aa1054dce5da9ffd09a83d7a36ecb0f25aea1bf..ac2cb0aa76116c1cf70f6cd3efaa0f9621ff5b9b 100644
--- a/compute/pzunmlq.c
+++ b/compute/pzunmlq.c
@@ -27,7 +27,7 @@
 #include "control/common.h"
 
 #define A(m,n) A,  m,  n
-#define B(m,n) B,  m,  n
+#define C(m,n) C,  m,  n
 #define T(m,n) T,  m,  n
 #define D(k)   D,  k,  k
 
@@ -35,7 +35,7 @@
  *  Parallel application of Q using tile V - LQ factorization - dynamic scheduling
  */
 void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
-                        CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *T, CHAM_desc_t *D,
+                        CHAM_desc_t *A, CHAM_desc_t *C, CHAM_desc_t *T, CHAM_desc_t *D,
                         RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
 {
     CHAM_context_t *chamctxt;
@@ -44,9 +44,9 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
     size_t ws_host = 0;
 
     int k, m, n;
-    int ldak, ldbk, ldbm, lddk;
-    int tempmm, tempnn, tempkn, tempkm, tempkmin;
-    int ib, minMT, minM;
+    int ldak, ldck, ldcm, lddk;
+    int tempkm, tempkn, tempkmin, tempmm, tempnn;
+    int ib, KT, K;
 
     chamctxt = chameleon_context_self();
     if (sequence->status != CHAMELEON_SUCCESS) {
@@ -57,11 +57,11 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
     ib = CHAMELEON_IB;
 
     if (A->m > A->n) {
-        minM  = A->n;
-        minMT = A->nt;
+        KT = A->nt;
+        K  = A->n;
     } else {
-        minM  = A->m;
-        minMT = A->mt;
+        KT = A->mt;
+        K  = A->m;
     }
 
     if ( D == NULL ) {
@@ -94,13 +94,14 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
             /*
              *  ChamLeft / ChamNoTrans
              */
-            for (k = 0; k < minMT; k++) {
+            for (k = 0; k < KT; k++) {
                 RUNTIME_iteration_push(chamctxt, k);
 
-                tempkm   = k == B->mt-1 ? B->m-k*B->mb : B->mb;
-                tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb;
+                tempkm   = k == C->mt - 1 ? C->m - k * C->mb : C->mb;
+                tempkmin = k == KT    - 1 ? K    - k * A->nb : A->nb;
+
                 ldak = BLKLDD(A, k);
-                ldbk = BLKLDD(B, k);
+                ldck = BLKLDD(C, k);
                 lddk = BLKLDD(D, k);
 
                 if ( genD ) {
@@ -118,28 +119,28 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                         D(k), lddk );
 #endif
                 }
-                for (n = 0; n < B->nt; n++) {
-                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                for (n = 0; n < C->nt; n++) {
+                    tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
                     INSERT_TASK_zunmlq(
                         &options,
                         side, trans,
                         tempkm, tempnn, tempkmin, ib, T->nb,
                         D(k),    lddk,
                         T(k, k), T->mb,
-                        B(k, n), ldbk);
+                        C(k, n), ldck);
                 }
 
                 RUNTIME_data_flush( sequence, D(k)    );
                 RUNTIME_data_flush( sequence, T(k, k) );
 
-                for (m = k+1; m < B->mt; m++) {
-                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                    ldbm = BLKLDD(B, m);
-                    for (n = 0; n < B->nt; n++) {
-                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                for (m = k+1; m < C->mt; m++) {
+                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                    ldcm = BLKLDD(C, m);
+                    for (n = 0; n < C->nt; n++) {
+                        tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
 
-                        RUNTIME_data_migrate( sequence, B(k, n),
-                                              B->get_rankof( B, m, n ) );
+                        RUNTIME_data_migrate( sequence, C(k, n),
+                                              C->get_rankof( C, m, n ) );
 
                         /* TS kernel */
                         INSERT_TASK_ztpmlqt(
@@ -148,8 +149,8 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                             tempmm, tempnn, tempkmin, 0, ib, T->nb,
                             A(k, m), ldak,
                             T(k, m), T->mb,
-                            B(k, n), ldbk,
-                            B(m, n), ldbm);
+                            C(k, n), ldck,
+                            C(m, n), ldcm);
                     }
 
                     RUNTIME_data_flush( sequence, A(k, m) );
@@ -157,9 +158,9 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                 }
 
                 /* Restore the original location of the tiles */
-                for (n = 0; n < B->nt; n++) {
-                    RUNTIME_data_migrate( sequence, B(k, n),
-                                          B->get_rankof( B, k, n ) );
+                for (n = 0; n < C->nt; n++) {
+                    RUNTIME_data_migrate( sequence, C(k, n),
+                                          C->get_rankof( C, k, n ) );
                 }
 
                 RUNTIME_iteration_pop(chamctxt);
@@ -169,23 +170,24 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
          *  ChamLeft / ChamConjTrans
          */
         else {
-            for (k = minMT-1; k >= 0; k--) {
+            for (k = KT-1; k >= 0; k--) {
                 RUNTIME_iteration_push(chamctxt, k);
 
-                tempkm   = k == B->mt-1 ? B->m-k*B->mb : B->mb;
-                tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb;
+                tempkm   = k == C->mt - 1 ? C->m - k * C->mb : C->mb;
+                tempkmin = k == KT    - 1 ? K    - k * A->nb : A->nb;
+
                 ldak = BLKLDD(A, k);
-                ldbk = BLKLDD(B, k);
+                ldck = BLKLDD(C, k);
                 lddk = BLKLDD(D, k);
 
-                for (m = B->mt-1; m > k; m--) {
-                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                    ldbm = BLKLDD(B, m);
-                    for (n = 0; n < B->nt; n++) {
-                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                for (m = C->mt-1; m > k; m--) {
+                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                    ldcm = BLKLDD(C, m);
+                    for (n = 0; n < C->nt; n++) {
+                        tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
 
-                        RUNTIME_data_migrate( sequence, B(k, n),
-                                              B->get_rankof( B, m, n ) );
+                        RUNTIME_data_migrate( sequence, C(k, n),
+                                              C->get_rankof( C, m, n ) );
 
                         /* TS kernel */
                         INSERT_TASK_ztpmlqt(
@@ -194,13 +196,14 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                             tempmm, tempnn, tempkmin, 0, ib, T->nb,
                             A(k, m), ldak,
                             T(k, m), T->mb,
-                            B(k, n), ldbk,
-                            B(m, n), ldbm);
+                            C(k, n), ldck,
+                            C(m, n), ldcm);
                     }
 
                     RUNTIME_data_flush( sequence, A(k, m) );
                     RUNTIME_data_flush( sequence, T(k, m) );
                 }
+
                 if ( genD ) {
                     int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
                     INSERT_TASK_zlacpy(
@@ -216,11 +219,11 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                         D(k), lddk );
 #endif
                 }
-                for (n = 0; n < B->nt; n++) {
-                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                for (n = 0; n < C->nt; n++) {
+                    tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
 
-                    RUNTIME_data_migrate( sequence, B(k, n),
-                                          B->get_rankof( B, k, n ) );
+                    RUNTIME_data_migrate( sequence, C(k, n),
+                                          C->get_rankof( C, k, n ) );
 
                     INSERT_TASK_zunmlq(
                         &options,
@@ -228,7 +231,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                         tempkm, tempnn, tempkmin, ib, T->nb,
                         D(k),    lddk,
                         T(k, k), T->mb,
-                        B(k, n), ldbk);
+                        C(k, n), ldck);
                 }
                 RUNTIME_data_flush( sequence, D(k)    );
                 RUNTIME_data_flush( sequence, T(k, k) );
@@ -241,22 +244,22 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
      */
     else {
         if (trans == ChamNoTrans) {
-            for (k = minMT-1; k >= 0; k--) {
+            for (k = KT-1; k >= 0; k--) {
                 RUNTIME_iteration_push(chamctxt, k);
 
-                tempkn   = k == B->nt - 1 ? B->n - k * B->nb : B->nb;
-                tempkmin = k == minMT - 1 ? minM - k * A->nb : A->nb;
+                tempkn   = k == C->nt - 1 ? C->n - k * C->nb : C->nb;
+                tempkmin = k == KT    - 1 ? K    - k * A->nb : A->nb;
                 ldak = BLKLDD(A, k);
                 lddk = BLKLDD(D, k);
 
-                for (n = B->nt-1; n > k; n--) {
-                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                    for (m = 0; m < B->mt; m++) {
-                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                        ldbm = BLKLDD(B, m);
+                for (n = C->nt-1; n > k; n--) {
+                    tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+                    for (m = 0; m < C->mt; m++) {
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                        ldcm = BLKLDD(C, m);
 
-                        RUNTIME_data_migrate( sequence, B(m, k),
-                                              B->get_rankof( B, m, n ) );
+                        RUNTIME_data_migrate( sequence, C(m, k),
+                                              C->get_rankof( C, m, n ) );
 
                         /* TS kernel */
                         INSERT_TASK_ztpmlqt(
@@ -265,13 +268,14 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                             tempmm, tempnn, tempkmin, 0, ib, T->nb,
                             A(k, n), ldak,
                             T(k, n), T->mb,
-                            B(m, k), ldbm,
-                            B(m, n), ldbm);
+                            C(m, k), ldcm,
+                            C(m, n), ldcm);
                     }
 
                     RUNTIME_data_flush( sequence, A(k, n) );
                     RUNTIME_data_flush( sequence, T(k, n) );
                 }
+
                 if ( genD ) {
                     int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
                     INSERT_TASK_zlacpy(
@@ -287,12 +291,12 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                         D(k), lddk );
 #endif
                 }
-                for (m = 0; m < B->mt; m++) {
-                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                    ldbm = BLKLDD(B, m);
+                for (m = 0; m < C->mt; m++) {
+                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                    ldcm = BLKLDD(C, m);
 
-                    RUNTIME_data_migrate( sequence, B(m, k),
-                                          B->get_rankof( B, m, k ) );
+                    RUNTIME_data_migrate( sequence, C(m, k),
+                                          C->get_rankof( C, m, k ) );
 
                     INSERT_TASK_zunmlq(
                         &options,
@@ -300,7 +304,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                         tempmm, tempkn, tempkmin, ib, T->nb,
                         D(k),    lddk,
                         T(k, k), T->mb,
-                        B(m, k), ldbm);
+                        C(m, k), ldcm);
                 }
 
                 RUNTIME_data_flush( sequence, D(k)    );
@@ -313,16 +317,17 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
          *  ChamRight / ChamConjTrans
          */
         else {
-            for (k = 0; k < minMT; k++) {
+            for (k = 0; k < KT; k++) {
                 RUNTIME_iteration_push(chamctxt, k);
 
-                tempkn   = k == B->nt-1 ? B->n-k*B->nb : B->nb;
-                tempkmin = k == minMT-1 ? minM-k*A->mb : A->mb;
+                tempkn   = k == C->nt - 1 ? C->n - k * C->nb : C->nb;
+                tempkmin = k == KT    - 1 ? K    - k * A->nb : A->nb;
                 ldak = BLKLDD(A, k);
                 lddk = BLKLDD(D, k);
 
                 if ( genD ) {
                     int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
+
                     INSERT_TASK_zlacpy(
                         &options,
                         ChamUpper, tempkmin, tempDkn, A->nb,
@@ -336,29 +341,29 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                         D(k), lddk );
 #endif
                 }
-                for (m = 0; m < B->mt; m++) {
-                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                    ldbm = BLKLDD(B, m);
+                for (m = 0; m < C->mt; m++) {
+                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                    ldcm = BLKLDD(C, m);
                     INSERT_TASK_zunmlq(
                         &options,
                         side, trans,
                         tempmm, tempkn, tempkmin, ib, T->nb,
                         D(k),    lddk,
                         T(k, k), T->mb,
-                        B(m, k), ldbm);
+                        C(m, k), ldcm);
                 }
 
                 RUNTIME_data_flush( sequence, D(k)    );
                 RUNTIME_data_flush( sequence, T(k, k) );
 
-                for (n = k+1; n < B->nt; n++) {
-                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                    for (m = 0; m < B->mt; m++) {
-                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                        ldbm = BLKLDD(B, m);
+                for (n = k+1; n < C->nt; n++) {
+                    tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+                    for (m = 0; m < C->mt; m++) {
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                        ldcm = BLKLDD(C, m);
 
-                        RUNTIME_data_migrate( sequence, B(m, k),
-                                              B->get_rankof( B, m, n ) );
+                        RUNTIME_data_migrate( sequence, C(m, k),
+                                              C->get_rankof( C, m, n ) );
 
                         /* TS kernel */
                         INSERT_TASK_ztpmlqt(
@@ -367,8 +372,8 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                             tempmm, tempnn, tempkmin, 0, ib, T->nb,
                             A(k, n), ldak,
                             T(k, n), T->mb,
-                            B(m, k), ldbm,
-                            B(m, n), ldbm);
+                            C(m, k), ldcm,
+                            C(m, n), ldcm);
                     }
 
                     RUNTIME_data_flush( sequence, A(k, n) );
@@ -376,9 +381,9 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                 }
 
                 /* Restore the original location of the tiles */
-                for (m = 0; m < B->mt; m++) {
-                    RUNTIME_data_migrate( sequence, B(m, k),
-                                          B->get_rankof( B, m, k ) );
+                for (m = 0; m < C->mt; m++) {
+                    RUNTIME_data_migrate( sequence, C(m, k),
+                                          C->get_rankof( C, m, k ) );
                 }
 
                 RUNTIME_iteration_pop(chamctxt);
diff --git a/compute/pzunmlq_param.c b/compute/pzunmlq_param.c
index fe122e0b4798aa795b881a94173c82df250d9336..6e27ca3f610cbb139e0f37edc127e69097fcedab 100644
--- a/compute/pzunmlq_param.c
+++ b/compute/pzunmlq_param.c
@@ -22,7 +22,7 @@
 #include <stdlib.h>
 
 #define A(m,n) A,  m,  n
-#define B(m,n) B,  m,  n
+#define C(m,n) C,  m,  n
 #define T(m,n) T,  m,  n
 #define D(m,n) D,  m,  n
 
@@ -31,7 +31,7 @@
  */
 void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                               cham_side_t side, cham_trans_t trans,
-                              CHAM_desc_t *A, CHAM_desc_t *B,
+                              CHAM_desc_t *A, CHAM_desc_t *C,
                               CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *D,
                               RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
 {
@@ -42,9 +42,9 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
     size_t ws_host = 0;
 
     int k, m, n, i, p;
-    int ldbm, ldak, ldbp, lddk;
-    int tempnn, temppn, tempkmin, tempmm, tempkm;
-    int ib, K, L;
+    int ldak, lddk, ldcp, ldcm;
+    int temppm, temppn, tempmm, tempnn, tempkm,tempkmin;
+    int ib, KT, L;
     int node, nbtiles, *tiles;
 
     chamctxt = chameleon_context_self();
@@ -55,8 +55,6 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
 
     ib = CHAMELEON_IB;
 
-    K = chameleon_min(A->mt, A->nt);
-
     if ( D == NULL ) {
         D    = A;
         genD = 0;
@@ -85,15 +83,16 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
     /* Initialisation of tiles */
     tiles = (int*)calloc( qrtree->mt, sizeof(int) );
 
+    KT = chameleon_min( A->mt, A->nt );
     if (side == ChamLeft ) {
         if (trans == ChamNoTrans) {
             /*
              *  ChamLeft / ChamNoTrans
              */
-            for (k = 0; k < K; k++) {
+            for (k = 0; k < KT; k++) {
                 RUNTIME_iteration_push(chamctxt, k);
 
-                tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
+                tempkm = k == A->mt - 1 ? A->m - k * A->mb : A->mb;
                 ldak = BLKLDD(A, k);
                 lddk = BLKLDD(D, k);
 
@@ -101,12 +100,14 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                 for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
                     p = qrtree->getm(qrtree, k, i);
 
-                    temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb;
-                    tempkmin = chameleon_min(tempkm, temppn);
-                    ldbp = BLKLDD(B, p);
+                    temppm   = p == C->mt-1 ? C->m - p * C->mb : C->mb;
+                    tempkmin = chameleon_min( temppm, tempkm );
+
+                    ldcp = BLKLDD(C, p);
 
                     if ( genD ) {
                         int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb;
+
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamUpper, tempkmin, tempDpn, A->nb,
@@ -120,17 +121,15 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                             D(k, p), lddk );
 #endif
                     }
-                    for (n = 0; n < B->nt; n++) {
-                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                    for (n = 0; n < C->nt; n++) {
+                        tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
                         INSERT_TASK_zunmlq(
-                            &options,
-                            side, trans,
-                            temppn, tempnn, tempkmin, ib, T->nb,
+                            &options, side, trans,
+                            temppm, tempnn, tempkmin, ib, T->nb,
                             D(k, p), lddk,
                             T(k, p), T->mb,
-                            B(p, n), ldbp);
+                            C(p, n), ldcp);
                     }
-
                     RUNTIME_data_flush( sequence, D(k, p) );
                     RUNTIME_data_flush( sequence, T(k, p) );
                 }
@@ -142,9 +141,9 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                     m = tiles[i];
                     p = qrtree->currpiv(qrtree, k, m);
 
-                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                    ldbp = BLKLDD(B, p);
-                    ldbm = BLKLDD(B, m);
+                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                    ldcm = BLKLDD(C, m);
+                    ldcp = BLKLDD(C, p);
 
                     if( qrtree->gettype(qrtree, k, m) == LIBHQR_KILLED_BY_TS ) {
                         /* TS kernel */
@@ -156,30 +155,29 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                         L = A->nb;
                         T = TT;
                     }
-                    for (n = 0; n < B->nt; n++) {
-                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                    for (n = 0; n < C->nt; n++) {
+                        tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
 
-                        node = B->get_rankof( B, m, n );
-                        RUNTIME_data_migrate( sequence, B(p, n), node );
-                        RUNTIME_data_migrate( sequence, B(m, n), node );
+                        node = C->get_rankof( C, m, n );
+                        RUNTIME_data_migrate( sequence, C(p, n), node );
+                        RUNTIME_data_migrate( sequence, C(m, n), node );
 
                         INSERT_TASK_ztpmlqt(
-                            &options,
-                            side, trans,
+                            &options, side, trans,
                             tempmm, tempnn, tempkm, chameleon_min( L, tempnn ), ib, T->nb,
                             A(k, m), ldak,
                             T(k, m), T->mb,
-                            B(p, n), ldbp,
-                            B(m, n), ldbm);
+                            C(p, n), ldcp,
+                            C(m, n), ldcm);
                     }
                     RUNTIME_data_flush( sequence, A(k, m) );
                     RUNTIME_data_flush( sequence, T(k, m) );
                 }
 
                 /* Restore the original location of the tiles */
-                for (n = 0; n < B->nt; n++) {
-                    RUNTIME_data_migrate( sequence, B(k, n),
-                                          B->get_rankof( B, k, n ) );
+                for (n = 0; n < C->nt; n++) {
+                    RUNTIME_data_migrate( sequence, C(k, n),
+                                          C->get_rankof( C, k, n ) );
                 }
 
                 RUNTIME_iteration_pop(chamctxt);
@@ -189,7 +187,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
          *  ChamLeft / ChamConjTrans
          */
         else {
-            for (k = K-1; k >= 0; k--) {
+            for (k = KT-1; k >= 0; k--) {
                 RUNTIME_iteration_push(chamctxt, k);
 
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
@@ -203,9 +201,9 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                     m = tiles[i];
                     p = qrtree->currpiv(qrtree, k, m);
 
-                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                    ldbp = BLKLDD(B, p);
-                    ldbm = BLKLDD(B, m);
+                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                    ldcp = BLKLDD(C, p);
+                    ldcm = BLKLDD(C, m);
 
                     if( qrtree->gettype(qrtree, k, m) == LIBHQR_KILLED_BY_TS ) {
                         /* TS kernel */
@@ -217,21 +215,20 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                         L = A->nb;
                         T = TT;
                     }
-                    for (n = 0; n < B->nt; n++) {
-                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                    for (n = 0; n < C->nt; n++) {
+                        tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
 
-                        node = B->get_rankof( B, m, n );
-                        RUNTIME_data_migrate( sequence, B(p, n), node );
-                        RUNTIME_data_migrate( sequence, B(m, n), node );
+                        node = C->get_rankof( C, m, n );
+                        RUNTIME_data_migrate( sequence, C(p, n), node );
+                        RUNTIME_data_migrate( sequence, C(m, n), node );
 
                         INSERT_TASK_ztpmlqt(
-                            &options,
-                            side, trans,
+                            &options, side, trans,
                             tempmm, tempnn, tempkm, chameleon_min(L, tempnn), ib, T->nb,
                             A(k, m), ldak,
                             T(k, m), T->mb,
-                            B(p, n), ldbp,
-                            B(m, n), ldbm);
+                            C(p, n), ldcp,
+                            C(m, n), ldcm);
                     }
                     RUNTIME_data_flush( sequence, A(k, m) );
                     RUNTIME_data_flush( sequence, T(k, m) );
@@ -241,12 +238,14 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                 for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
                     p = qrtree->getm(qrtree, k, i);
 
-                    temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb;
-                    tempkmin = chameleon_min(tempkm, temppn);
-                    ldbp = BLKLDD(B, p);
+                    temppm   = p == C->mt-1 ? C->m-p*C->mb : C->mb;
+                    tempkmin = chameleon_min( temppm, tempkm );
+
+                    ldcp = BLKLDD(C, p);
 
                     if ( genD ) {
                         int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb;
+
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamUpper, tempkmin, tempDpn, A->nb,
@@ -260,25 +259,24 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                             D(k, p), lddk );
 #endif
                     }
-                    for (n = 0; n < B->nt; n++) {
-                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
 
-                        RUNTIME_data_migrate( sequence, B(p, n),
-                                              B->get_rankof( B, p, n ) );
+                    for (n = 0; n < C->nt; n++) {
+                        tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+
+                        RUNTIME_data_migrate( sequence, C(p, n),
+                                              C->get_rankof( C, p, n ) );
 
                         INSERT_TASK_zunmlq(
-                            &options,
-                            side, trans,
-                            temppn, tempnn, tempkmin, ib, T->nb,
+                            &options, side, trans,
+                            temppm, tempnn, tempkmin, ib, T->nb,
                             D(k, p), lddk,
                             T(k, p), T->mb,
-                            B(p, n), ldbp);
+                            C(p, n), ldcp);
                     }
 
                     RUNTIME_data_flush( sequence, D(k, p) );
                     RUNTIME_data_flush( sequence, T(k, p) );
                 }
-
                 RUNTIME_iteration_pop(chamctxt);
             }
         }
@@ -288,7 +286,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
      */
     else {
         if (trans == ChamNoTrans) {
-            for (k = K-1; k >= 0; k--) {
+            for (k = KT-1; k >= 0; k--) {
                 RUNTIME_iteration_push(chamctxt, k);
 
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
@@ -302,7 +300,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                     n = tiles[i];
                     p = qrtree->currpiv(qrtree, k, n);
 
-                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                    tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
 
                     if( qrtree->gettype(qrtree, k, n) == LIBHQR_KILLED_BY_TS ) {
                         /* TS kernel */
@@ -314,22 +312,22 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                         L = tempnn;
                         T = TT;
                     }
-                    for (m = 0; m < B->mt; m++) {
-                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                        ldbm = BLKLDD(B, m);
 
-                        node = B->get_rankof( B, m, n );
-                        RUNTIME_data_migrate( sequence, B(m, p), node );
-                        RUNTIME_data_migrate( sequence, B(m, n), node );
+                    for (m = 0; m < C->mt; m++) {
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                        ldcm = BLKLDD(C, m);
+
+                        node = C->get_rankof( C, m, n );
+                        RUNTIME_data_migrate( sequence, C(m, p), node );
+                        RUNTIME_data_migrate( sequence, C(m, n), node );
 
                         INSERT_TASK_ztpmlqt(
-                            &options,
-                            side, trans,
+                            &options, side, trans,
                             tempmm, tempnn, tempkm, L, ib, T->nb,
                             A(k, n), ldak,
                             T(k, n), T->mb,
-                            B(m, p), ldbm,
-                            B(m, n), ldbm);
+                            C(m, p), ldcm,
+                            C(m, n), ldcm);
                     }
                     RUNTIME_data_flush( sequence, A(k, n) );
                     RUNTIME_data_flush( sequence, T(k, n) );
@@ -339,11 +337,12 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                 for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
                     p = qrtree->getm(qrtree, k, i);
 
-                    temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb;
-                    tempkmin = chameleon_min(tempkm, temppn);
+                    temppn   = p == C->nt-1 ? C->n - p * C->nb : C->nb;
+                    tempkmin = chameleon_min( temppn, tempkm );
 
                     if ( genD ) {
                         int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb;
+
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamUpper, tempkmin, tempDpn, A->nb,
@@ -357,26 +356,24 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                             D(k, p), lddk );
 #endif
                     }
-                    for (m = 0; m < B->mt; m++) {
-                        ldbm = BLKLDD(B, m);
-                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
 
-                        RUNTIME_data_migrate( sequence, B(m, p),
-                                              B->get_rankof( B, m, p ) );
+                    for (m = 0; m < C->mt; m++) {
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                        ldcm = BLKLDD(C, m);
+
+                        RUNTIME_data_migrate( sequence, C(m, p),
+                                              C->get_rankof( C, m, p ) );
 
                         INSERT_TASK_zunmlq(
-                            &options,
-                            side, trans,
+                            &options, side, trans,
                             tempmm, temppn, tempkmin, ib, T->nb,
                             D(k, p), lddk,
                             T(k, p), T->mb,
-                            B(m, p), ldbm);
+                            C(m, p), ldcm);
                     }
-
                     RUNTIME_data_flush( sequence, D(k, p) );
                     RUNTIME_data_flush( sequence, T(k, p) );
                 }
-
                 RUNTIME_iteration_pop(chamctxt);
             }
         }
@@ -384,7 +381,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
          *  ChamRight / ChamConjTrans
          */
         else {
-            for (k = 0; k < K; k++) {
+            for (k = 0; k < KT; k++) {
                 RUNTIME_iteration_push(chamctxt, k);
 
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
@@ -395,11 +392,12 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                 for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
                     p = qrtree->getm(qrtree, k, i);
 
-                    temppn = p == A->nt-1 ? A->n-p*A->nb : A->nb;
-                    tempkmin = chameleon_min(tempkm, temppn);
+                    temppn   = p == C->nt - 1 ? C->n - p * C->nb : C->nb;
+                    tempkmin = chameleon_min( temppn, tempkm );
 
                     if ( genD ) {
                         int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb;
+
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamUpper, tempkmin, tempDpn, A->nb,
@@ -413,18 +411,17 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                             D(k, p), lddk );
 #endif
                     }
-                    for (m = 0; m < B->mt; m++) {
-                        ldbm = BLKLDD(B, m);
-                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+
+                    for (m = 0; m < C->mt; m++) {
+                        ldcm = BLKLDD(C, m);
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
                         INSERT_TASK_zunmlq(
-                            &options,
-                            side, trans,
+                            &options, side, trans,
                             tempmm, temppn, tempkmin, ib, T->nb,
                             D(k, p), lddk,
                             T(k, p), TS->mb,
-                            B(m, p), ldbm);
+                            C(m, p), ldcm);
                     }
-
                     RUNTIME_data_flush( sequence, D(k, p) );
                     RUNTIME_data_flush( sequence, T(k, p) );
                 }
@@ -436,7 +433,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                     n = tiles[i];
                     p = qrtree->currpiv(qrtree, k, n);
 
-                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                    tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
 
                     if( qrtree->gettype(qrtree, k, n) == LIBHQR_KILLED_BY_TS ) {
                         /* TS kernel */
@@ -449,31 +446,30 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                         T = TT;
                     }
 
-                    for (m = 0; m < B->mt; m++) {
-                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                        ldbm = BLKLDD(B, m);
+                    for (m = 0; m < C->mt; m++) {
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                        ldcm = BLKLDD(C, m);
 
-                        node = B->get_rankof( B, m, n );
-                        RUNTIME_data_migrate( sequence, B(m, p), node );
-                        RUNTIME_data_migrate( sequence, B(m, n), node );
+                        node = C->get_rankof( C, m, n );
+                        RUNTIME_data_migrate( sequence, C(m, p), node );
+                        RUNTIME_data_migrate( sequence, C(m, n), node );
 
                         INSERT_TASK_ztpmlqt(
-                            &options,
-                            side, trans,
+                            &options, side, trans,
                             tempmm, tempnn, tempkm, L, ib, T->nb,
                             A(k, n), ldak,
                             T(k, n), T->mb,
-                            B(m, p), ldbm,
-                            B(m, n), ldbm);
+                            C(m, p), ldcm,
+                            C(m, n), ldcm);
                     }
                     RUNTIME_data_flush( sequence, A(k, n) );
                     RUNTIME_data_flush( sequence, T(k, n) );
                 }
 
                 /* Restore the original location of the tiles */
-                for (m = 0; m < B->mt; m++) {
-                    RUNTIME_data_migrate( sequence, B( m, k ),
-                                          B->get_rankof( B, m, k ) );
+                for (m = 0; m < C->mt; m++) {
+                    RUNTIME_data_migrate( sequence, C( m, k ),
+                                          C->get_rankof( C, m, k ) );
                 }
 
                 RUNTIME_iteration_pop(chamctxt);
diff --git a/compute/pzunmlqrh.c b/compute/pzunmlqrh.c
index c3b7d0c1d0f36636d42c1d2fa67296c283df3181..8f2931ccdba43f7f7d89f8f5f1b962a28567c4bf 100644
--- a/compute/pzunmlqrh.c
+++ b/compute/pzunmlqrh.c
@@ -27,7 +27,7 @@
 #include "control/common.h"
 
 #define A(m,n)  A, (m), (n)
-#define B(m,n)  B, (m), (n)
+#define C(m,n)  C, (m), (n)
 #define T(m,n)  T, (m), (n)
 #define T2(m,n) T, (m), ((n)+A->nt)
 #define D(m,n)  D, (m), (n)
@@ -37,7 +37,7 @@
  *  Householder) - dynamic scheduling
  */
 void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans,
-                          CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *T, CHAM_desc_t *D,
+                          CHAM_desc_t *A, CHAM_desc_t *C, CHAM_desc_t *T, CHAM_desc_t *D,
                           RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
 {
     CHAM_context_t *chamctxt;
@@ -45,10 +45,11 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
     size_t ws_worker = 0;
     size_t ws_host = 0;
 
-    int k, m, n;
-    int K, N, RD, lastRD;
-    int ldak, lddk, ldbN, ldbm, ldbNRD;
-    int tempNn, tempkm, tempnn, tempmm, tempNRDn, tempkmin;
+    int k, m, n, p;
+    int KT, RD, lastRD;
+    int ldak, lddk;
+    int ldcp, ldcm;
+    int temppm, temppn, tempkm, tempnn, tempmm, tempkmin;
     int ib, node;
 
     chamctxt = chameleon_context_self();
@@ -84,62 +85,66 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
 
     RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
 
-    K = chameleon_min(A->mt, A->nt);
+    KT = chameleon_min( A->mt, A->nt );
     if (side == ChamLeft ) {
         if (trans == ChamNoTrans) {
             /*
              *  ChamLeft / ChamNoTrans
              */
-            for (k = 0; k < K; k++) {
+            for (k = 0; k < KT; k++) {
                 RUNTIME_iteration_push(chamctxt, k);
 
-                tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
+                tempkm = k == A->mt - 1 ? A->m - k * A->mb : A->mb;
+
                 ldak = BLKLDD(A, k);
                 lddk = BLKLDD(D, k);
 
-                for (N = k; N < A->nt; N += BS) {
-                    tempNn   = N == A->nt-1 ? A->n-N*A->nb : A->nb;
-                    tempkmin = chameleon_min(tempkm,tempNn);
-                    ldbN = BLKLDD(B, N);
+                for (p = k; p < C->mt; p += BS) {
+
+                    temppm   = p == C->mt-1 ? C->m - p * C->mb : C->mb;
+                    tempkmin = chameleon_min( temppm, tempkm );
+
+                    ldcp = BLKLDD(C, p);
+
                     if ( genD ) {
-                        int tempDNn = N == D->nt-1 ? D->n-N*D->nb : D->nb;
+                        int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb;
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamUpper, tempkmin, tempDNn, A->nb,
-                            A(k, N), ldak,
-                            D(k, N), lddk );
+                            ChamUpper, tempkmin, tempDpn, A->nb,
+                            A(k, p), ldak,
+                            D(k, p), lddk );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
-                            ChamLower, tempkmin, tempDNn,
+                            ChamLower, tempkmin, tempDpn,
                             0., 1.,
-                            D(k, N), lddk );
+                            D(k, p), lddk );
 #endif
                     }
-                    for (n = 0; n < B->nt; n++) {
-                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                    for (n = 0; n < C->nt; n++) {
+                        tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
                         INSERT_TASK_zunmlq(
                             &options,
                             side, trans,
-                            tempNn, tempnn,
-                            tempkmin, ib, T->nb,
-                            D(k, N), lddk,
-                            T(k, N), T->mb,
-                            B(N, n), ldbN);
+                            temppm, tempnn, tempkmin, ib, T->nb,
+                            D(k, p), lddk,
+                            T(k, p), T->mb,
+                            C(p, n), ldcp);
                     }
-                    RUNTIME_data_flush( sequence, D(k, N) );
-                    RUNTIME_data_flush( sequence, T(k, N) );
+                    RUNTIME_data_flush( sequence, D(k, p) );
+                    RUNTIME_data_flush( sequence, T(k, p) );
 
-                    for (m = N+1; m < chameleon_min(N+BS, A->nt); m++) {
-                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                        ldbm = BLKLDD(B, m);
-                        for (n = 0; n < B->nt; n++) {
-                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                    for (m = p+1; m < chameleon_min(p+BS, C->mt); m++) {
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                        ldcm = BLKLDD(C, m);
 
-                            node = B->get_rankof( B, m, n );
-                            RUNTIME_data_migrate( sequence, B(N, n), node );
-                            RUNTIME_data_migrate( sequence, B(m, n), node );
+                        for (n = 0; n < C->nt; n++) {
+                            tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+
+                            node = C->get_rankof( C, m, n );
+                            RUNTIME_data_migrate( sequence, C(p, n), node );
+                            RUNTIME_data_migrate( sequence, C(m, n), node );
 
                             /* TS kernel */
                             INSERT_TASK_ztpmlqt(
@@ -147,350 +152,369 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                                 tempmm, tempnn, tempkm, 0, ib, T->nb,
                                 A(k, m), ldak,
                                 T(k, m), T->mb,
-                                B(N, n), ldbN,
-                                B(m, n), ldbm);
+                                C(p, n), ldcp,
+                                C(m, n), ldcm);
                         }
                         RUNTIME_data_flush( sequence, A(k, m) );
                         RUNTIME_data_flush( sequence, T(k, m) );
                     }
                 }
-                for (RD = BS; RD < A->nt-k; RD *= 2) {
-                    for (N = k; N+RD < A->nt; N += 2*RD) {
-                        tempNRDn = N+RD == A->nt-1 ? A->n-(N+RD)*A->nb : A->nb;
-                        ldbN   = BLKLDD(B, N   );
-                        ldbNRD = BLKLDD(B, N+RD);
-                        for (n = 0; n < B->nt; n++) {
-                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-
-                            node = B->get_rankof( B, N+RD, n );
-                            RUNTIME_data_migrate( sequence, B(N, n),    node );
-                            RUNTIME_data_migrate( sequence, B(N+RD, n), node );
+                for (RD = BS; RD < C->mt-k; RD *= 2) {
+                    for (p = k; p+RD < C->mt; p += 2*RD) {
+                        m = p+RD;
+
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                        ldcm = BLKLDD(C, m);
+                        ldcp = BLKLDD(C, p);
+
+                        for (n = 0; n < C->nt; n++) {
+                            tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+
+                            node = C->get_rankof( C, m, n );
+                            RUNTIME_data_migrate( sequence, C(p, n), node );
+                            RUNTIME_data_migrate( sequence, C(m, n), node );
 
                             /* TT kernel */
                             INSERT_TASK_ztpmlqt(
                                 &options,
                                 side, trans,
-                                tempNRDn, tempnn, tempkm, tempnn, ib, T->nb,
-                                A (k, N+RD), ldak,
-                                T2(k, N+RD), T->mb,
-                                B (N,    n), ldbN,
-                                B (N+RD, n), ldbNRD);
+                                tempmm, tempnn, tempkm, tempnn, ib, T->nb,
+                                A (k, m), ldak,
+                                T2(k, m), T->mb,
+                                C (p, n), ldcp,
+                                C (m, n), ldcm);
                         }
-                        RUNTIME_data_flush( sequence, A (k, N+RD) );
-                        RUNTIME_data_flush( sequence, T2(k, N+RD) );
+                        RUNTIME_data_flush( sequence, A (k, m) );
+                        RUNTIME_data_flush( sequence, T2(k, m) );
                     }
                 }
 
                 /* Restore the original location of the tiles */
-                for (n = 0; n < B->nt; n++) {
-                    RUNTIME_data_migrate( sequence, B(k, n),
-                                          B->get_rankof( B, k, n ) );
+                for (n = 0; n < C->nt; n++) {
+                    RUNTIME_data_migrate( sequence, C(k, n),
+                                          C->get_rankof( C, k, n ) );
                 }
 
                 RUNTIME_iteration_pop(chamctxt);
             }
-        } else {
-            /*
-             *  ChamLeft / ChamConjTrans
-             */
-            for (k = K-1; k >= 0; k--) {
+        }
+        /*
+         *  ChamLeft / ChamConjTrans
+         */
+        else {
+            for (k = KT-1; k >= 0; k--) {
                 RUNTIME_iteration_push(chamctxt, k);
 
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
                 ldak = BLKLDD(A, k);
                 lddk = BLKLDD(D, k);
+
                 lastRD = 0;
-                for (RD = BS; RD < A->nt-k; RD *= 2)
+                for (RD = BS; RD < C->mt-k; RD *= 2)
                     lastRD = RD;
                 for (RD = lastRD; RD >= BS; RD /= 2) {
-                    for (N = k; N+RD < A->nt; N += 2*RD) {
-                        tempNRDn = N+RD == A->nt-1 ? A->n-(N+RD)*A->nb : A->nb;
-                        ldbN   = BLKLDD(B, N   );
-                        ldbNRD = BLKLDD(B, N+RD);
-                        for (n = 0; n < B->nt; n++) {
-                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                    for (p = k; p+RD < C->mt; p += 2*RD) {
+                        m = p+RD;
 
-                            node = B->get_rankof( B, N+RD, n );
-                            RUNTIME_data_migrate( sequence, B(N, n),    node );
-                            RUNTIME_data_migrate( sequence, B(N+RD, n), node );
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                        ldcm = BLKLDD(C, m);
+                        ldcp = BLKLDD(C, p);
+
+                        for (n = 0; n < C->nt; n++) {
+                            tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+
+                            node = C->get_rankof( C, m, n );
+                            RUNTIME_data_migrate( sequence, C(p, n), node );
+                            RUNTIME_data_migrate( sequence, C(m, n), node );
 
                             /* TT kernel */
                             INSERT_TASK_ztpmlqt(
-                                &options,
-                                side, trans,
-                                tempNRDn, tempnn, tempkm, tempnn, ib, T->nb,
-                                A (k, N+RD), ldak,
-                                T2(k, N+RD), T->mb,
-                                B (N,    n), ldbN,
-                                B (N+RD, n), ldbNRD);
+                                &options, side, trans,
+                                tempmm, tempnn, tempkm, tempnn, ib, T->nb,
+                                A (k, m), ldak,
+                                T2(k, m), T->mb,
+                                C (p, n), ldcp,
+                                C (m, n), ldcm);
                         }
-                        RUNTIME_data_flush( sequence, A (k, N+RD) );
-                        RUNTIME_data_flush( sequence, T2(k, N+RD) );
+                        RUNTIME_data_flush( sequence, A (k, m) );
+                        RUNTIME_data_flush( sequence, T2(k, m) );
                     }
                 }
-                for (N = k; N < A->nt; N += BS) {
-                    tempNn   = N == A->nt-1 ? A->n-N*A->nb : A->nb;
-                    tempkmin = chameleon_min(tempkm,tempNn);
-                    ldbN = BLKLDD(B, N);
-                    for (m = chameleon_min(N+BS, A->nt)-1; m > N; m--) {
-                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                        ldbm = BLKLDD(B, m);
-                        for (n = 0; n < B->nt; n++) {
-                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-
-                            node = B->get_rankof( B, m, n );
-                            RUNTIME_data_migrate( sequence, B(N, n), node );
-                            RUNTIME_data_migrate( sequence, B(m, n), node );
+                for (p = k; p < C->mt; p += BS) {
+                    ldcp = BLKLDD(C, p);
+
+                    for (m = chameleon_min(p+BS, C->mt)-1; m > p; m--) {
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                        ldcm = BLKLDD(C, m);
+
+                        for (n = 0; n < C->nt; n++) {
+                            tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+
+                            node = C->get_rankof( C, m, n );
+                            RUNTIME_data_migrate( sequence, C(p, n), node );
+                            RUNTIME_data_migrate( sequence, C(m, n), node );
 
                             /* TS kernel */
                             INSERT_TASK_ztpmlqt(
-                                &options,
-                                side, trans,
+                                &options, side, trans,
                                 tempmm, tempnn, tempkm, 0, ib, T->nb,
                                 A(k, m), ldak,
                                 T(k, m), T->mb,
-                                B(N, n), ldbN,
-                                B(m, n), ldbm);
+                                C(p, n), ldcp,
+                                C(m, n), ldcm);
                         }
                         RUNTIME_data_flush( sequence, A(k, m) );
                         RUNTIME_data_flush( sequence, T(k, m) );
                     }
+
+                    temppm   = p == C->mt-1 ? C->m-p*C->mb : C->mb;
+                    tempkmin = chameleon_min( temppm, tempkm );
+
                     if ( genD ) {
-                        int tempDNn = N == D->nt-1 ? D->n-N*D->nb : D->nb;
+                        int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb;
+
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamUpper, tempkmin, tempDNn, A->nb,
-                            A(k, N), ldak,
-                            D(k, N), lddk );
+                            ChamUpper, tempkmin, tempDpn, A->nb,
+                            A(k, p), ldak,
+                            D(k, p), lddk );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
-                            ChamLower, tempkmin, tempDNn,
+                            ChamLower, tempkmin, tempDpn,
                             0., 1.,
-                            D(k, N), lddk );
+                            D(k, p), lddk );
 #endif
                     }
-                    for (n = 0; n < B->nt; n++) {
-                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
 
-                        RUNTIME_data_migrate( sequence, B(N, n),
-                                              B->get_rankof( B, N, n ) );
+                    for (n = 0; n < C->nt; n++) {
+                        tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+
+                        RUNTIME_data_migrate( sequence, C(p, n),
+                                              C->get_rankof( C, p, n ) );
 
                         INSERT_TASK_zunmlq(
-                            &options,
-                            side, trans,
-                            tempNn, tempnn,
-                            tempkmin, ib, T->nb,
-                            D(k, N), lddk,
-                            T(k, N), T->mb,
-                            B(N, n), ldbN);
+                            &options, side, trans,
+                            temppm, tempnn, tempkmin, ib, T->nb,
+                            D(k, p), lddk,
+                            T(k, p), T->mb,
+                            C(p, n), ldcp);
                     }
-                    RUNTIME_data_flush( sequence, D(k, N) );
-                    RUNTIME_data_flush( sequence, T(k, N) );
+                    RUNTIME_data_flush( sequence, D(k, p) );
+                    RUNTIME_data_flush( sequence, T(k, p) );
                 }
                 RUNTIME_iteration_pop(chamctxt);
             }
         }
     }
+    /*
+     *  ChamRight / ChamNoTrans
+     */
     else {
         if (trans == ChamNoTrans) {
-            /*
-             *  ChamRight / ChamNoTrans
-             */
-            for (k = K-1; k >= 0; k--) {
+            for (k = KT-1; k >= 0; k--) {
                 RUNTIME_iteration_push(chamctxt, k);
 
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
                 ldak = BLKLDD(A, k);
                 lddk = BLKLDD(D, k);
                 lastRD = 0;
-                for (RD = BS; RD < A->nt-k; RD *= 2)
+                for (RD = BS; RD < C->nt-k; RD *= 2)
                     lastRD = RD;
                 for (RD = lastRD; RD >= BS; RD /= 2) {
-                    for (N = k; N+RD < A->nt; N += 2*RD) {
-                        tempNRDn = N+RD == A->nt-1 ? A->n-(N+RD)*A->nb : A->nb;
-                        for (m = 0; m < B->mt; m++) {
-                            ldbm   = BLKLDD(B, m);
-                            tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+                    for (p = k; p+RD < C->nt; p += 2*RD) {
+                        n = p+RD;
+
+                        tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+
+                        for (m = 0; m < C->mt; m++) {
+                            tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                            ldcm   = BLKLDD(C, m);
 
-                            node = B->get_rankof( B, m, N+RD );
-                            RUNTIME_data_migrate( sequence, B(m, N),    node );
-                            RUNTIME_data_migrate( sequence, B(m, N+RD), node );
+                            node = C->get_rankof( C, m, n );
+                            RUNTIME_data_migrate( sequence, C(m, p), node );
+                            RUNTIME_data_migrate( sequence, C(m, n), node );
 
                             /* TT kernel */
                             INSERT_TASK_ztpmlqt(
-                                &options,
-                                side, trans,
-                                tempmm, tempNRDn, tempkm, tempNRDn, ib, T->nb,
-                                A (k, N+RD), ldak,
-                                T2(k, N+RD), T->mb,
-                                B (m, N   ), ldbm,
-                                B (m, N+RD), ldbm);
+                                &options, side, trans,
+                                tempmm, tempnn, tempkm, tempnn, ib, T->nb,
+                                A (k, n), ldak,
+                                T2(k, n), T->mb,
+                                C (m, p), ldcm,
+                                C (m, n), ldcm);
                         }
-                        RUNTIME_data_flush( sequence, A (k, N+RD) );
-                        RUNTIME_data_flush( sequence, T2(k, N+RD) );
+                        RUNTIME_data_flush( sequence, A (k, n) );
+                        RUNTIME_data_flush( sequence, T2(k, n) );
                     }
                 }
-                for (N = k; N < A->nt; N += BS) {
-                    tempNn   = N == A->nt-1 ? A->n-N*A->nb : A->nb;
-                    tempkmin = chameleon_min(tempkm,tempNn);
-                    for (n = chameleon_min(N+BS, A->nt)-1; n > N; n--) {
-                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                        for (m = 0; m < B->mt; m++) {
-                            tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                            ldbm = BLKLDD(B, m);
-
-                            node = B->get_rankof( B, m, n );
-                            RUNTIME_data_migrate( sequence, B(m, N), node );
-                            RUNTIME_data_migrate( sequence, B(m, n), node );
+                for (p = k; p < C->nt; p += BS) {
+
+                    for (n = chameleon_min(p+BS, C->nt)-1; n > p; n--) {
+
+                        tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+
+                        for (m = 0; m < C->mt; m++) {
+                            tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                            ldcm = BLKLDD(C, m);
+
+                            node = C->get_rankof( C, m, n );
+                            RUNTIME_data_migrate( sequence, C(m, p), node );
+                            RUNTIME_data_migrate( sequence, C(m, n), node );
 
                             /* TS kernel */
                             INSERT_TASK_ztpmlqt(
-                                &options,
-                                side, trans,
+                                &options, side, trans,
                                 tempmm, tempnn, tempkm, 0, ib, T->nb,
                                 A(k, n), ldak,
                                 T(k, n), T->mb,
-                                B(m, N), ldbm,
-                                B(m, n), ldbm);
+                                C(m, p), ldcm,
+                                C(m, n), ldcm);
                         }
                         RUNTIME_data_flush( sequence, A(k, n) );
                         RUNTIME_data_flush( sequence, T(k, n) );
                     }
+
+                    temppn   = p == C->nt-1 ? C->n - p * C->nb : C->nb;
+                    tempkmin = chameleon_min( temppn, tempkm );
+
                     if ( genD ) {
-                        int tempDNn = N == D->nt-1 ? D->n-N*D->nb : D->nb;
+                        int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb;
+
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamUpper, tempkmin, tempDNn, A->nb,
-                            A(k, N), ldak,
-                            D(k, N), lddk );
+                            ChamUpper, tempkmin, tempDpn, A->nb,
+                            A(k, p), ldak,
+                            D(k, p), lddk );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
-                            ChamLower, tempkmin, tempDNn,
+                            ChamLower, tempkmin, tempDpn,
                             0., 1.,
-                            D(k, N), lddk );
+                            D(k, p), lddk );
 #endif
                     }
-                    for (m = 0; m < B->mt; m++) {
-                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                        ldbm = BLKLDD(B, m);
 
-                        RUNTIME_data_migrate( sequence, B(m, N),
-                                              B->get_rankof( B, m, N ) );
+                    for (m = 0; m < C->mt; m++) {
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                        ldcm = BLKLDD(C, m);
+
+                        RUNTIME_data_migrate( sequence, C(m, p),
+                                              C->get_rankof( C, m, p ) );
 
                         INSERT_TASK_zunmlq(
-                            &options,
-                            side, trans,
-                            tempmm, tempNn,
-                            tempkmin, ib, T->nb,
-                            D(k, N), lddk,
-                            T(k, N), T->mb,
-                            B(m, N), ldbm);
+                            &options, side, trans,
+                            tempmm, temppn, tempkmin, ib, T->nb,
+                            D(k, p), lddk,
+                            T(k, p), T->mb,
+                            C(m, p), ldcm);
                     }
-                    RUNTIME_data_flush( sequence, D(k, N) );
-                    RUNTIME_data_flush( sequence, T(k, N) );
+                    RUNTIME_data_flush( sequence, D(k, p) );
+                    RUNTIME_data_flush( sequence, T(k, p) );
                 }
-
                 RUNTIME_iteration_pop(chamctxt);
             }
-        } else {
-            /*
-             *  ChamRight / ChamConjTrans
-             */
-            for (k = 0; k < K; k++) {
+        }
+        /*
+         *  ChamRight / ChamConjTrans
+         */
+        else {
+            for (k = 0; k < KT; k++) {
                 RUNTIME_iteration_push(chamctxt, k);
 
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
                 ldak = BLKLDD(A, k);
                 lddk = BLKLDD(D, k);
-                for (N = k; N < A->nt; N += BS) {
-                    tempNn = N == A->nt-1 ? A->n-N*A->nb : A->nb;
-                    tempkmin = chameleon_min(tempkm,tempNn);
+
+                for (p = k; p < C->nt; p += BS) {
+                    temppn   = p == C->nt - 1 ? C->n - p * C->nb : C->nb;
+                    tempkmin = chameleon_min( temppn, tempkm );
+
                     if ( genD ) {
-                        int tempDNn = N == D->nt-1 ? D->n-N*D->nb : D->nb;
+                        int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb;
+
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamUpper, tempkmin, tempDNn, A->nb,
-                            A(k, N), ldak,
-                            D(k, N), lddk );
+                            ChamUpper, tempkmin, tempDpn, A->nb,
+                            A(k, p), ldak,
+                            D(k, p), lddk );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
-                            ChamLower, tempkmin, tempDNn,
+                            ChamLower, tempkmin, tempDpn,
                             0., 1.,
-                            D(k, N), lddk );
+                            D(k, p), lddk );
 #endif
                     }
-                    for (m = 0; m < B->mt; m++) {
-                        ldbm = BLKLDD(B, m);
-                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+
+                    for (m = 0; m < C->mt; m++) {
+                        ldcm = BLKLDD(C, m);
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
                         INSERT_TASK_zunmlq(
-                            &options,
-                            side, trans,
-                            tempmm, tempNn,
-                            tempkmin, ib, T->nb,
-                            D(k, N), lddk,
-                            T(k, N), T->mb,
-                            B(m, N), ldbm);
+                            &options, side, trans,
+                            tempmm, temppn, tempkmin, ib, T->nb,
+                            D(k, p), lddk,
+                            T(k, p), T->mb,
+                            C(m, p), ldcm);
                     }
-                    RUNTIME_data_flush( sequence, D(k, N) );
-                    RUNTIME_data_flush( sequence, T(k, N) );
+                    RUNTIME_data_flush( sequence, D(k, p) );
+                    RUNTIME_data_flush( sequence, T(k, p) );
 
-                    for (n = N+1; n < chameleon_min(N+BS, A->nt); n++) {
-                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                        for (m = 0; m < B->mt; m++) {
-                            tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                            ldbm = BLKLDD(B, m);
+                    for (n = p+1; n < chameleon_min(p+BS, C->nt); n++) {
+                        tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+                        for (m = 0; m < C->mt; m++) {
+                            tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                            ldcm = BLKLDD(C, m);
 
-                            node = B->get_rankof( B, m, n );
-                            RUNTIME_data_migrate( sequence, B(m, N), node );
-                            RUNTIME_data_migrate( sequence, B(m, n), node );
+                            node = C->get_rankof( C, m, n );
+                            RUNTIME_data_migrate( sequence, C(m, p), node );
+                            RUNTIME_data_migrate( sequence, C(m, n), node );
 
                             /* TS kernel */
                             INSERT_TASK_ztpmlqt(
-                                &options,
-                                side, trans,
+                                &options, side, trans,
                                 tempmm, tempnn, tempkm, 0, ib, T->nb,
                                 A(k, n), ldak,
                                 T(k, n), T->mb,
-                                B(m, N), ldbm,
-                                B(m, n), ldbm);
+                                C(m, p), ldcm,
+                                C(m, n), ldcm);
                         }
                         RUNTIME_data_flush( sequence, A(k, n) );
                         RUNTIME_data_flush( sequence, T(k, n) );
                     }
                 }
-                for (RD = BS; RD < A->nt-k; RD *= 2) {
-                    for (N = k; N+RD < A->nt; N += 2*RD) {
-                        tempNRDn = N+RD == A->nt-1 ? A->n-(N+RD)*A->nb : A->nb;
-                        for (m = 0; m < B->mt; m++) {
-                            tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                            ldbm   = BLKLDD(B, m);
+                for (RD = BS; RD < C->nt-k; RD *= 2) {
+                    for (p = k; p+RD < C->nt; p += 2*RD) {
+                        n = p + RD;
+                        tempnn = n == C->mt-1 ? C->m-n*C->mb : C->mb;
+
+                        for (m = 0; m < C->mt; m++) {
+                            tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                            ldcm   = BLKLDD(C, m);
 
-                            node = B->get_rankof( B, m, N+RD );
-                            RUNTIME_data_migrate( sequence, B(m, N),    node );
-                            RUNTIME_data_migrate( sequence, B(m, N+RD), node );
+                            node = C->get_rankof( C, m, n );
+                            RUNTIME_data_migrate( sequence, C(m, p), node );
+                            RUNTIME_data_migrate( sequence, C(m, n), node );
 
                             /* TT kernel */
                             INSERT_TASK_ztpmlqt(
                                 &options,
                                 side, trans,
-                                tempmm, tempNRDn, tempkm, tempNRDn, ib, T->nb,
-                                A (k, N+RD), ldak,
-                                T2(k, N+RD), T->mb,
-                                B (m, N   ), ldbm,
-                                B (m, N+RD), ldbm);
+                                tempmm, tempnn, tempkm, tempnn, ib, T->nb,
+                                A (k, n), ldak,
+                                T2(k, n), T->mb,
+                                C (m, p), ldcm,
+                                C (m, n), ldcm);
                         }
-                        RUNTIME_data_flush( sequence, A (k, N+RD) );
-                        RUNTIME_data_flush( sequence, T2(k, N+RD) );
+                        RUNTIME_data_flush( sequence, A (k, n) );
+                        RUNTIME_data_flush( sequence, T2(k, n) );
                     }
                 }
 
                 /* Restore the original location of the tiles */
-                for (m = 0; m < B->mt; m++) {
-                    RUNTIME_data_migrate( sequence, B(m, k),
-                                          B->get_rankof( B, m, k ) );
+                for (m = 0; m < C->mt; m++) {
+                    RUNTIME_data_migrate( sequence, C(m, k),
+                                          C->get_rankof( C, m, k ) );
                 }
 
                 RUNTIME_iteration_pop(chamctxt);
diff --git a/compute/pzunmqr.c b/compute/pzunmqr.c
index c872ea0de59ea6122a7cd358cbd210d147c5da90..3a9e93bbee000aab330e562ee82bb6befd44d5db 100644
--- a/compute/pzunmqr.c
+++ b/compute/pzunmqr.c
@@ -27,7 +27,7 @@
 #include "control/common.h"
 
 #define A(m,n) A,  m,  n
-#define B(m,n) B,  m,  n
+#define C(m,n) C,  m,  n
 #define T(m,n) T,  m,  n
 #define D(k)   D,  k,  k
 
@@ -35,7 +35,7 @@
  *  Parallel application of Q using tile V - QR factorization - dynamic scheduling
  */
 void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
-                        CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *T, CHAM_desc_t *D,
+                        CHAM_desc_t *A, CHAM_desc_t *C, CHAM_desc_t *T, CHAM_desc_t *D,
                         RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
 {
     CHAM_context_t *chamctxt;
@@ -44,9 +44,9 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
     size_t ws_host = 0;
 
     int k, m, n;
-    int ldak, ldbk, ldam, ldan, ldbm, lddk;
-    int tempkm, tempnn, tempkmin, tempmm, tempkn;
-    int ib, minMT, minM;
+    int ldak, ldck, ldam, ldan, ldcm, lddk;
+    int tempkm, tempkn, tempkmin, tempmm, tempnn;
+    int ib, KT, K;
 
     chamctxt = chameleon_context_self();
     if (sequence->status != CHAMELEON_SUCCESS) {
@@ -57,11 +57,11 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
     ib = CHAMELEON_IB;
 
     if (A->m > A->n) {
-        minM  = A->n;
-        minMT = A->nt;
+        KT = A->nt;
+        K  = A->n;
     } else {
-        minM  = A->m;
-        minMT = A->mt;
+        KT = A->mt;
+        K  = A->m;
     }
 
     if ( D == NULL ) {
@@ -94,14 +94,16 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
             /*
              *  ChamLeft / ChamConjTrans
              */
-            for (k = 0; k < minMT; k++) {
+            for (k = 0; k < KT; k++) {
                 RUNTIME_iteration_push(chamctxt, k);
 
-                tempkm   = k == B->mt-1 ? B->m-k*B->mb : B->mb;
-                tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb;
+                tempkm   = k == C->mt - 1 ? C->m - k * C->mb : C->mb;
+                tempkmin = k == KT    - 1 ? K    - k * A->nb : A->nb;
+
                 ldak = BLKLDD(A, k);
+                ldck = BLKLDD(C, k);
                 lddk = BLKLDD(D, k);
-                ldbk = BLKLDD(B, k);
+
                 if ( genD ) {
                     int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb;
 
@@ -118,29 +120,29 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
                         D(k), lddk );
 #endif
                 }
-                for (n = 0; n < B->nt; n++) {
-                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                for (n = 0; n < C->nt; n++) {
+                    tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
                     INSERT_TASK_zunmqr(
                         &options,
                         side, trans,
                         tempkm, tempnn, tempkmin, ib, T->nb,
                         D(k),    lddk,
                         T(k, k), T->mb,
-                        B(k, n), ldbk);
+                        C(k, n), ldck);
                 }
 
                 RUNTIME_data_flush( sequence, D(k)    );
                 RUNTIME_data_flush( sequence, T(k, k) );
 
-                for (m = k+1; m < B->mt; m++) {
-                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+                for (m = k+1; m < C->mt; m++) {
+                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
                     ldam = BLKLDD(A, m);
-                    ldbm = BLKLDD(B, m);
-                    for (n = 0; n < B->nt; n++) {
-                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                    ldcm = BLKLDD(C, m);
+                    for (n = 0; n < C->nt; n++) {
+                        tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
 
-                        RUNTIME_data_migrate( sequence, B(k, n),
-                                              B->get_rankof( B, m, n ) );
+                        RUNTIME_data_migrate( sequence, C(k, n),
+                                              C->get_rankof( C, m, n ) );
 
                         /* TS kernel */
                         INSERT_TASK_ztpmqrt(
@@ -149,8 +151,8 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
                             tempmm, tempnn, tempkmin, 0, ib, T->nb,
                             A(m, k), ldam,
                             T(m, k), T->mb,
-                            B(k, n), ldbk,
-                            B(m, n), ldbm);
+                            C(k, n), ldck,
+                            C(m, n), ldcm);
                     }
 
                     RUNTIME_data_flush( sequence, A(m, k) );
@@ -158,9 +160,9 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
                 }
 
                 /* Restore the original location of the tiles */
-                for (n = 0; n < B->nt; n++) {
-                    RUNTIME_data_migrate( sequence, B(k, n),
-                                          B->get_rankof( B, k, n ) );
+                for (n = 0; n < C->nt; n++) {
+                    RUNTIME_data_migrate( sequence, C(k, n),
+                                          C->get_rankof( C, k, n ) );
                 }
 
                 RUNTIME_iteration_pop(chamctxt);
@@ -170,23 +172,25 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
          *  ChamLeft / ChamNoTrans
          */
         else {
-            for (k = minMT-1; k >= 0; k--) {
+            for (k = KT-1; k >= 0; k--) {
                 RUNTIME_iteration_push(chamctxt, k);
 
-                tempkm   = k == B->mt-1 ? B->m-k*B->mb : B->mb;
-                tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb;
+                tempkm   = k == C->mt - 1 ? C->m - k * C->mb : C->mb;
+                tempkmin = k == KT    - 1 ? K    - k * A->nb : A->nb;
+
                 ldak = BLKLDD(A, k);
-                ldbk = BLKLDD(B, k);
+                ldck = BLKLDD(C, k);
                 lddk = BLKLDD(D, k);
-                for (m = B->mt-1; m > k; m--) {
-                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+
+                for (m = C->mt-1; m > k; m--) {
+                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
                     ldam = BLKLDD(A, m);
-                    ldbm = BLKLDD(B, m);
-                    for (n = 0; n < B->nt; n++) {
-                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                    ldcm = BLKLDD(C, m);
+                    for (n = 0; n < C->nt; n++) {
+                        tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
 
-                        RUNTIME_data_migrate( sequence, B(k, n),
-                                              B->get_rankof( B, m, n ) );
+                        RUNTIME_data_migrate( sequence, C(k, n),
+                                              C->get_rankof( C, m, n ) );
 
                         /* TS kernel */
                         INSERT_TASK_ztpmqrt(
@@ -195,32 +199,34 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
                             tempmm, tempnn, tempkmin, 0, ib, T->nb,
                             A(m, k), ldam,
                             T(m, k), T->mb,
-                            B(k, n), ldbk,
-                            B(m, n), ldbm);
+                            C(k, n), ldck,
+                            C(m, n), ldcm);
                     }
                     RUNTIME_data_flush( sequence, A(m, k) );
                     RUNTIME_data_flush( sequence, T(m, k) );
                 }
 
                 if ( genD ) {
+                    int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb;
+
                     INSERT_TASK_zlacpy(
                         &options,
-                        ChamLower, tempkm, tempkmin, A->nb,
+                        ChamLower, tempDkm, tempkmin, A->nb,
                         A(k, k), ldak,
                         D(k),    lddk );
 #if defined(CHAMELEON_USE_CUDA)
                     INSERT_TASK_zlaset(
                         &options,
-                        ChamUpper, tempkm, tempkmin,
+                        ChamUpper, tempDkm, tempkmin,
                         0., 1.,
                         D(k), lddk );
 #endif
                 }
-                for (n = 0; n < B->nt; n++) {
-                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                for (n = 0; n < C->nt; n++) {
+                    tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
 
-                    RUNTIME_data_migrate( sequence, B(k, n),
-                                          B->get_rankof( B, k, n ) );
+                    RUNTIME_data_migrate( sequence, C(k, n),
+                                          C->get_rankof( C, k, n ) );
 
                     INSERT_TASK_zunmqr(
                         &options,
@@ -228,7 +234,7 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
                         tempkm, tempnn, tempkmin, ib, T->nb,
                         D(k),    lddk,
                         T(k, k), T->mb,
-                        B(k, n), ldbk);
+                        C(k, n), ldck);
                 }
                 RUNTIME_data_flush( sequence, D(k)    );
                 RUNTIME_data_flush( sequence, T(k, k) );
@@ -241,22 +247,23 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
      */
     else {
         if (trans == ChamConjTrans) {
-            for (k = minMT-1; k >= 0; k--) {
+            for (k = KT-1; k >= 0; k--) {
                 RUNTIME_iteration_push(chamctxt, k);
 
-                tempkn   = k == B->nt - 1 ? B->n - k * B->nb : B->nb;
-                tempkmin = k == minMT - 1 ? minM - k * A->nb : A->nb;
+                tempkn   = k == C->nt - 1 ? C->n - k * C->nb : C->nb;
+                tempkmin = k == KT    - 1 ? K    - k * A->nb : A->nb;
                 ldak = BLKLDD(A, k);
                 lddk = BLKLDD(D, k);
-                for (n = B->nt-1; n > k; n--) {
-                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+
+                for (n = C->nt-1; n > k; n--) {
+                    tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
                     ldan = BLKLDD(A, n);
-                    for (m = 0; m < B->mt; m++) {
-                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                        ldbm = BLKLDD(B, m);
+                    for (m = 0; m < C->mt; m++) {
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                        ldcm = BLKLDD(C, m);
 
-                        RUNTIME_data_migrate( sequence, B(m, k),
-                                              B->get_rankof( B, m, n ) );
+                        RUNTIME_data_migrate( sequence, C(m, k),
+                                              C->get_rankof( C, m, n ) );
 
                         /* TS kernel */
                         INSERT_TASK_ztpmqrt(
@@ -265,41 +272,44 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
                             tempmm, tempnn, tempkmin, 0, ib, T->nb,
                             A(n, k), ldan,
                             T(n, k), T->mb,
-                            B(m, k), ldbm,
-                            B(m, n), ldbm);
+                            C(m, k), ldcm,
+                            C(m, n), ldcm);
                     }
 
                     RUNTIME_data_flush( sequence, A(n, k) );
                     RUNTIME_data_flush( sequence, T(n, k) );
                 }
+
                 if ( genD ) {
+                    int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb;
+
                     INSERT_TASK_zlacpy(
                         &options,
-                        ChamLower, tempkn, tempkmin, A->nb,
+                        ChamLower, tempDkm, tempkmin, A->nb,
                         A(k, k), ldak,
                         D(k),    lddk );
 #if defined(CHAMELEON_USE_CUDA)
                     INSERT_TASK_zlaset(
                         &options,
-                        ChamUpper, tempkn, tempkmin,
+                        ChamUpper, tempDkm, tempkmin,
                         0., 1.,
                         D(k), lddk );
 #endif
                 }
-                for (m = 0; m < B->mt; m++) {
-                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                    ldbm = BLKLDD(B, m);
+                for (m = 0; m < C->mt; m++) {
+                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                    ldcm = BLKLDD(C, m);
 
-                    RUNTIME_data_migrate( sequence, B(m, k),
-                                          B->get_rankof( B, m, k ) );
+                    RUNTIME_data_migrate( sequence, C(m, k),
+                                          C->get_rankof( C, m, k ) );
 
                     INSERT_TASK_zunmqr(
                         &options,
                         side, trans,
                         tempmm, tempkn, tempkmin, ib, T->nb,
-                        D(k), lddk,
+                        D(k),    lddk,
                         T(k, k), T->mb,
-                        B(m, k), ldbm);
+                        C(m, k), ldcm);
                 }
 
                 RUNTIME_data_flush( sequence, D(k)    );
@@ -312,51 +322,54 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
          *  ChamRight / ChamNoTrans
          */
         else {
-            for (k = 0; k < minMT; k++) {
+            for (k = 0; k < KT; k++) {
                 RUNTIME_iteration_push(chamctxt, k);
 
-                tempkn   = k == B->nt-1 ? B->n-k*B->nb : B->nb;
-                tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb;
+                tempkn   = k == C->nt - 1 ? C->n - k * C->nb : C->nb;
+                tempkmin = k == KT    - 1 ? K    - k * A->nb : A->nb;
                 ldak = BLKLDD(A, k);
                 lddk = BLKLDD(D, k);
+
                 if ( genD ) {
+                    int tempDkm = k == D->mt - 1 ? D->m - k * D->mb : D->mb;
+
                     INSERT_TASK_zlacpy(
                         &options,
-                        ChamLower, tempkn, tempkmin, A->nb,
+                        ChamLower, tempDkm, tempkmin, A->nb,
                         A(k, k), ldak,
-                        D(k), lddk );
+                        D(k),    lddk );
 #if defined(CHAMELEON_USE_CUDA)
                     INSERT_TASK_zlaset(
                         &options,
-                        ChamUpper, tempkn, tempkmin,
+                        ChamUpper, tempDkm, tempkmin,
                         0., 1.,
                         D(k), lddk );
 #endif
                 }
-                for (m = 0; m < B->mt; m++) {
-                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                    ldbm = BLKLDD(B, m);
+                for (m = 0; m < C->mt; m++) {
+                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                    ldcm = BLKLDD(C, m);
                     INSERT_TASK_zunmqr(
                         &options,
                         side, trans,
                         tempmm, tempkn, tempkmin, ib, T->nb,
-                        D(k), lddk,
+                        D(k),    lddk,
                         T(k, k), T->mb,
-                        B(m, k), ldbm);
+                        C(m, k), ldcm);
                 }
 
                 RUNTIME_data_flush( sequence, D(k)    );
                 RUNTIME_data_flush( sequence, T(k, k) );
 
-                for (n = k+1; n < B->nt; n++) {
-                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                for (n = k+1; n < C->nt; n++) {
+                    tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
                     ldan = BLKLDD(A, n);
-                    for (m = 0; m < B->mt; m++) {
-                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                        ldbm = BLKLDD(B, m);
+                    for (m = 0; m < C->mt; m++) {
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                        ldcm = BLKLDD(C, m);
 
-                        RUNTIME_data_migrate( sequence, B(m, k),
-                                              B->get_rankof( B, m, n ) );
+                        RUNTIME_data_migrate( sequence, C(m, k),
+                                              C->get_rankof( C, m, n ) );
 
                         /* TS kernel */
                         INSERT_TASK_ztpmqrt(
@@ -365,8 +378,8 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
                             tempmm, tempnn, tempkmin, 0, ib, T->nb,
                             A(n, k), ldan,
                             T(n, k), T->mb,
-                            B(m, k), ldbm,
-                            B(m, n), ldbm);
+                            C(m, k), ldcm,
+                            C(m, n), ldcm);
                     }
 
                     RUNTIME_data_flush( sequence, A(n, k) );
@@ -374,9 +387,9 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
                 }
 
                 /* Restore the original location of the tiles */
-                for (m = 0; m < B->mt; m++) {
-                    RUNTIME_data_migrate( sequence, B(m, k),
-                                          B->get_rankof( B, m, k ) );
+                for (m = 0; m < C->mt; m++) {
+                    RUNTIME_data_migrate( sequence, C(m, k),
+                                          C->get_rankof( C, m, k ) );
                 }
 
                 RUNTIME_iteration_pop(chamctxt);
diff --git a/compute/pzunmqr_param.c b/compute/pzunmqr_param.c
index 923a3f8a73a5dc53f07804fe23e357d3ae14c708..a3905647c0767cbaad4f4a8be98e1397b6996924 100644
--- a/compute/pzunmqr_param.c
+++ b/compute/pzunmqr_param.c
@@ -22,7 +22,7 @@
 #include <stdlib.h>
 
 #define A(m,n) A,  m,  n
-#define B(m,n) B,  m,  n
+#define C(m,n) C,  m,  n
 #define T(m,n) T,  m,  n
 #define D(m,n) D,  m,  n
 
@@ -31,7 +31,7 @@
  */
 void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                               cham_side_t side, cham_trans_t trans,
-                              CHAM_desc_t *A, CHAM_desc_t *B,
+                              CHAM_desc_t *A, CHAM_desc_t *C,
                               CHAM_desc_t *TS, CHAM_desc_t *TT, CHAM_desc_t *D,
                               RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
 {
@@ -42,9 +42,9 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
     size_t ws_host = 0;
 
     int k, m, n, i, p;
-    int ldam, ldan, ldbm, ldbp, lddn, lddm;
-    int tempnn, tempkmin, tempmm, tempkn;
-    int ib, K, L;
+    int ldap, ldam, ldan, lddp, ldcp, ldcm;
+    int temppm, temppn, tempmm, tempnn, tempkn,tempkmin;
+    int ib, KT, L;
     int node, nbtiles, *tiles;
 
     chamctxt = chameleon_context_self();
@@ -55,8 +55,6 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
 
     ib = CHAMELEON_IB;
 
-    K = chameleon_min(A->mt, A->nt);
-
     if ( D == NULL ) {
         D    = A;
         genD = 0;
@@ -71,7 +69,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
 #if defined(CHAMELEON_USE_CUDA)
     /* Worker space
      *
-     * zunmqr  =      A->nb * ib
+     * zunmqr  =     A->nb * ib
      * ztpmqrt = 3 * A->nb * ib
      */
     ws_worker = chameleon_max( ws_worker, ib * A->nb * 3 );
@@ -85,54 +83,55 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
     /* Initialisation of tiles */
     tiles = (int*)calloc( qrtree->mt, sizeof(int) );
 
+    KT = chameleon_min( A->mt, A->nt );
     if (side == ChamLeft ) {
         if (trans == ChamConjTrans) {
             /*
              *  ChamLeft / ChamConjTrans
              */
-            for (k = 0; k < K; k++) {
+            for (k = 0; k < KT; k++) {
                 RUNTIME_iteration_push(chamctxt, k);
 
-                tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
+                tempkn = k == A->nt - 1 ? A->n - k * A->nb : A->nb;
 
                 T = TS;
                 for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
-                    m = qrtree->getm(qrtree, k, i);
+                    p = qrtree->getm(qrtree, k, i);
 
-                    tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-                    tempkmin = chameleon_min(tempmm, tempkn);
-                    ldam = BLKLDD(A, m);
-                    lddm = BLKLDD(D, m);
-                    ldbm = BLKLDD(B, m);
+                    temppm   = p == C->mt-1 ? C->m - p * C->mb : C->mb;
+                    tempkmin = chameleon_min( temppm, tempkn );
+
+                    ldap = BLKLDD(A, p);
+                    lddp = BLKLDD(D, p);
+                    ldcp = BLKLDD(C, p);
 
                     if ( genD ) {
-                        int tempDmm = m == D->mt-1 ? D->m-m*D->mb : D->mb;
+                        int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamLower, tempDmm, tempkmin, A->nb,
-                            A(m, k), ldam,
-                            D(m, k), lddm );
+                            ChamLower, tempDpm, tempkmin, A->nb,
+                            A(p, k), ldap,
+                            D(p, k), lddp );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
-                            ChamUpper, tempDmm, tempkmin,
+                            ChamUpper, tempDpm, tempkmin,
                             0., 1.,
-                            D(m, k), lddm );
+                            D(p, k), lddp );
 #endif
                     }
-                    for (n = 0; n < B->nt; n++) {
-                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                    for (n = 0; n < C->nt; n++) {
+                        tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
                         INSERT_TASK_zunmqr(
-                            &options,
-                            side, trans,
-                            tempmm, tempnn, tempkmin, ib, T->nb,
-                            D(m, k), lddm,
-                            T(m, k), T->mb,
-                            B(m, n), ldbm);
+                            &options, side, trans,
+                            temppm, tempnn, tempkmin, ib, T->nb,
+                            D(p, k), lddp,
+                            T(p, k), T->mb,
+                            C(p, n), ldcp);
                     }
-                    RUNTIME_data_flush( sequence, D(m, k) );
-                    RUNTIME_data_flush( sequence, T(m, k) );
+                    RUNTIME_data_flush( sequence, D(p, k) );
+                    RUNTIME_data_flush( sequence, T(p, k) );
                 }
 
                 /* Setting the order of the tiles*/
@@ -142,10 +141,10 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                     m = tiles[i];
                     p = qrtree->currpiv(qrtree, k, m);
 
-                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
                     ldam = BLKLDD(A, m);
-                    ldbm = BLKLDD(B, m);
-                    ldbp = BLKLDD(B, p);
+                    ldcp = BLKLDD(C, p);
+                    ldcm = BLKLDD(C, m);
 
                     if( qrtree->gettype(qrtree, k, m) == LIBHQR_KILLED_BY_TS ) {
                         /* TS kernel */
@@ -157,30 +156,29 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                         L = tempmm;
                         T = TT;
                     }
-                    for (n = 0; n < B->nt; n++) {
-                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                    for (n = 0; n < C->nt; n++) {
+                        tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
 
-                        node = B->get_rankof( B, m, n );
-                        RUNTIME_data_migrate( sequence, B(p, n), node );
-                        RUNTIME_data_migrate( sequence, B(m, n), node );
+                        node = C->get_rankof( C, m, n );
+                        RUNTIME_data_migrate( sequence, C(p, n), node );
+                        RUNTIME_data_migrate( sequence, C(m, n), node );
 
                         INSERT_TASK_ztpmqrt(
-                            &options,
-                            side, trans,
+                            &options, side, trans,
                             tempmm, tempnn, tempkn, L, ib, T->nb,
                             A(m, k), ldam,
                             T(m, k), T->mb,
-                            B(p, n), ldbp,
-                            B(m, n), ldbm);
+                            C(p, n), ldcp,
+                            C(m, n), ldcm);
                     }
                     RUNTIME_data_flush( sequence, A(m, k) );
                     RUNTIME_data_flush( sequence, T(m, k) );
                 }
 
                 /* Restore the original location of the tiles */
-                for (n = 0; n < B->nt; n++) {
-                    RUNTIME_data_migrate( sequence, B(k, n),
-                                          B->get_rankof( B, k, n ) );
+                for (n = 0; n < C->nt; n++) {
+                    RUNTIME_data_migrate( sequence, C(k, n),
+                                          C->get_rankof( C, k, n ) );
                 }
 
                 RUNTIME_iteration_pop(chamctxt);
@@ -190,7 +188,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
          *  ChamLeft / ChamNoTrans
          */
         else {
-            for (k = K-1; k >= 0; k--) {
+            for (k = KT-1; k >= 0; k--) {
                 RUNTIME_iteration_push(chamctxt, k);
 
                 tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
@@ -198,14 +196,14 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                 /* Setting the order of the tiles*/
                 nbtiles = libhqr_walk_stepk( qrtree, k, tiles );
 
-                for (i = nbtiles-1; i >=0; i--) {
+                for (i = nbtiles-1; i >= 0; i--) {
                     m = tiles[i];
                     p = qrtree->currpiv(qrtree, k, m);
 
-                    tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+                    tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
                     ldam = BLKLDD(A, m);
-                    ldbm = BLKLDD(B, m);
-                    ldbp = BLKLDD(B, p);
+                    ldcp = BLKLDD(C, p);
+                    ldcm = BLKLDD(C, m);
 
                     if( qrtree->gettype(qrtree, k, m) == LIBHQR_KILLED_BY_TS ) {
                         /* TS kernel */
@@ -217,21 +215,20 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                         L = tempmm;
                         T = TT;
                     }
-                    for (n = 0; n < B->nt; n++) {
-                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                    for (n = 0; n < C->nt; n++) {
+                        tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
 
-                        node = B->get_rankof( B, m, n );
-                        RUNTIME_data_migrate( sequence, B(p, n), node );
-                        RUNTIME_data_migrate( sequence, B(m, n), node );
+                        node = C->get_rankof( C, m, n );
+                        RUNTIME_data_migrate( sequence, C(p, n), node );
+                        RUNTIME_data_migrate( sequence, C(m, n), node );
 
                         INSERT_TASK_ztpmqrt(
-                            &options,
-                            side, trans,
+                            &options, side, trans,
                             tempmm, tempnn, tempkn, L, ib, T->nb,
                             A(m, k), ldam,
                             T(m, k), T->mb,
-                            B(p, n), ldbp,
-                            B(m, n), ldbm);
+                            C(p, n), ldcp,
+                            C(m, n), ldcm);
                     }
                     RUNTIME_data_flush( sequence, A(m, k) );
                     RUNTIME_data_flush( sequence, T(m, k) );
@@ -239,49 +236,48 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
 
                 T = TS;
                 for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
-                    m = qrtree->getm(qrtree, k, i);
+                    p = qrtree->getm(qrtree, k, i);
 
-                    tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-                    tempkmin = chameleon_min(tempmm, tempkn);
-                    ldam = BLKLDD(A, m);
-                    lddm = BLKLDD(D, m);
-                    ldbm = BLKLDD(B, m);
+                    temppm   = p == C->mt-1 ? C->m-p*C->mb : C->mb;
+                    tempkmin = chameleon_min( temppm, tempkn );
+
+                    ldap = BLKLDD(A, p);
+                    lddp = BLKLDD(D, p);
+                    ldcp = BLKLDD(C, p);
 
                     if ( genD ) {
-                        int tempDmm = m == D->mt-1 ? D->m-m*D->mb : D->mb;
+                        int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamLower, tempDmm, tempkmin, A->nb,
-                            A(m, k), ldam,
-                            D(m, k), lddm );
+                            ChamLower, tempDpm, tempkmin, A->nb,
+                            A(p, k), ldap,
+                            D(p, k), lddp );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
-                            ChamUpper, tempDmm, tempkmin,
+                            ChamUpper, tempDpm, tempkmin,
                             0., 1.,
-                            D(m, k), lddm );
+                            D(p, k), lddp );
 #endif
                     }
-                    for (n = 0; n < B->nt; n++) {
-                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
 
-                        RUNTIME_data_migrate( sequence, B(m, n),
-                                              B->get_rankof( B, m, n ) );
+                    for (n = 0; n < C->nt; n++) {
+                        tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+
+                        RUNTIME_data_migrate( sequence, C(p, n),
+                                              C->get_rankof( C, p, n ) );
 
                         INSERT_TASK_zunmqr(
-                            &options,
-                            side, trans,
-                            tempmm, tempnn, tempkmin, ib, T->nb,
-                            D(m, k), lddm,
-                            T(m, k), T->mb,
-                            B(m, n), ldbm);
+                            &options, side, trans,
+                            temppm, tempnn, tempkmin, ib, T->nb,
+                            D(p, k), lddp,
+                            T(p, k), T->mb,
+                            C(p, n), ldcp);
                     }
-
-                    RUNTIME_data_flush( sequence, D(m, k) );
-                    RUNTIME_data_flush( sequence, T(m, k) );
+                    RUNTIME_data_flush( sequence, D(p, k) );
+                    RUNTIME_data_flush( sequence, T(p, k) );
                 }
-
                 RUNTIME_iteration_pop(chamctxt);
             }
         }
@@ -291,10 +287,10 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
      */
     else {
         if (trans == ChamConjTrans) {
-            for (k = K-1; k >= 0; k--) {
+            for (k = KT-1; k >= 0; k--) {
                 RUNTIME_iteration_push(chamctxt, k);
 
-                tempkn = k == A->nt-1 ? A->n - k*A->nb : A->nb;
+                tempkn = k == A->nt-1 ? A->n - k * A->nb : A->nb;
 
                 /* Setting the order of the tiles*/
                 nbtiles = libhqr_walk_stepk( qrtree, k, tiles );
@@ -303,7 +299,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                     n = tiles[i];
                     p = qrtree->currpiv(qrtree, k, n);
 
-                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                    tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
                     ldan = BLKLDD(A, n);
 
                     if( qrtree->gettype(qrtree, k, n) == LIBHQR_KILLED_BY_TS ) {
@@ -317,22 +313,21 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                         T = TT;
                     }
 
-                    for (m = 0; m < B->mt; m++) {
-                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                        ldbm = BLKLDD(B, m);
+                    for (m = 0; m < C->mt; m++) {
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                        ldcm = BLKLDD(C, m);
 
-                        node = B->get_rankof( B, m, n );
-                        RUNTIME_data_migrate( sequence, B(m, p), node );
-                        RUNTIME_data_migrate( sequence, B(m, n), node );
+                        node = C->get_rankof( C, m, n );
+                        RUNTIME_data_migrate( sequence, C(m, p), node );
+                        RUNTIME_data_migrate( sequence, C(m, n), node );
 
                         INSERT_TASK_ztpmqrt(
-                            &options,
-                            side, trans,
+                            &options, side, trans,
                             tempmm, tempnn, tempkn, chameleon_min( L, tempmm ), ib, T->nb,
                             A(n, k), ldan,
                             T(n, k), T->mb,
-                            B(m, p), ldbm,
-                            B(m, n), ldbm);
+                            C(m, p), ldcm,
+                            C(m, n), ldcm);
                     }
                     RUNTIME_data_flush( sequence, A(n, k) );
                     RUNTIME_data_flush( sequence, T(n, k) );
@@ -340,46 +335,46 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
 
                 T = TS;
                 for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
-                    n = qrtree->getm(qrtree, k, i);
+                    p = qrtree->getm(qrtree, k, i);
 
-                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                    tempkmin = chameleon_min(tempnn, tempkn);
-                    ldan = BLKLDD(A, n);
-                    lddn = BLKLDD(D, n);
+                    temppn   = p == C->nt-1 ? C->n - p * C->nb : C->nb;
+                    tempkmin = chameleon_min(temppn, tempkn);
+                    ldap = BLKLDD(A, p);
+                    lddp = BLKLDD(D, p);
 
                     if ( genD ) {
-                        int tempDnn = n == D->nt-1 ? D->n-n*D->nb : D->nb;
+                        int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamLower, tempDnn, tempkmin, A->nb,
-                            A(n, k), ldan,
-                            D(n, k), lddn );
+                            ChamLower, tempDpm, tempkmin, A->nb,
+                            A(p, k), ldap,
+                            D(p, k), lddp );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
-                            ChamUpper, tempDnn, tempkmin,
+                            ChamUpper, tempDpm, tempkmin,
                             0., 1.,
-                            D(n, k), lddn );
+                            D(p, k), lddp );
 #endif
                     }
-                    for (m = 0; m < B->mt; m++) {
-                        ldbm = BLKLDD(B, m);
-                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
 
-                        RUNTIME_data_migrate( sequence, B(m, n),
-                                              B->get_rankof( B, m, n ) );
+                    for (m = 0; m < C->mt; m++) {
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                        ldcm = BLKLDD(C, m);
+
+                        RUNTIME_data_migrate( sequence, C(m, p),
+                                              C->get_rankof( C, m, p ) );
 
                         INSERT_TASK_zunmqr(
-                            &options,
-                            side, trans,
-                            tempmm, tempnn, tempkmin, ib, T->nb,
-                            D(n, k), lddn,
-                            T(n, k), T->mb,
-                            B(m, n), ldbm);
+                            &options, side, trans,
+                            tempmm, temppn, tempkmin, ib, T->nb,
+                            D(p, k), lddp,
+                            T(p, k), T->mb,
+                            C(m, p), ldcm);
                     }
-                    RUNTIME_data_flush( sequence, D(n, k) );
-                    RUNTIME_data_flush( sequence, T(n, k) );
+                    RUNTIME_data_flush( sequence, D(p, k) );
+                    RUNTIME_data_flush( sequence, T(p, k) );
                 }
                 RUNTIME_iteration_pop(chamctxt);
             }
@@ -388,49 +383,49 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
          *  ChamRight / ChamNoTrans
          */
         else {
-            for (k = 0; k < K; k++) {
+            for (k = 0; k < KT; k++) {
                 RUNTIME_iteration_push(chamctxt, k);
 
                 tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
 
                 T = TS;
                 for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
-                    n = qrtree->getm(qrtree, k, i);
+                    p = qrtree->getm(qrtree, k, i);
 
-                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                    tempkmin = chameleon_min(tempnn, tempkn);
-                    ldan = BLKLDD(A, n);
-                    lddn = BLKLDD(D, n);
+                    temppn   = p == C->nt - 1 ? C->n - p * C->nb : C->nb;
+                    tempkmin = chameleon_min( temppn, tempkn );
+                    ldap = BLKLDD(A, p);
+                    lddp = BLKLDD(D, p);
 
                     if ( genD ) {
-                        int tempDnn = n == D->nt-1 ? D->n-n*D->nb : D->nb;
+                        int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamLower, tempDnn, tempkmin, A->nb,
-                            A(n, k), ldan,
-                            D(n, k), lddn );
+                            ChamLower, tempDpm, tempkmin, A->nb,
+                            A(p, k), ldap,
+                            D(p, k), lddp );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
-                            ChamUpper, tempDnn, tempkmin,
+                            ChamUpper, tempDpm, tempkmin,
                             0., 1.,
-                            D(n, k), lddn );
+                            D(p, k), lddp );
 #endif
                     }
-                    for (m = 0; m < B->mt; m++) {
-                        ldbm = BLKLDD(B, m);
-                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+
+                    for (m = 0; m < C->mt; m++) {
+                        ldcm = BLKLDD(C, m);
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
                         INSERT_TASK_zunmqr(
-                            &options,
-                            side, trans,
-                            tempmm, tempnn, tempkmin, ib, T->nb,
-                            D(n, k), lddn,
-                            T(n, k), T->mb,
-                            B(m, n), ldbm);
+                            &options, side, trans,
+                            tempmm, temppn, tempkmin, ib, T->nb,
+                            D(p, k), lddp,
+                            T(p, k), T->mb,
+                            C(m, p), ldcm);
                     }
-                    RUNTIME_data_flush( sequence, D(n, k) );
-                    RUNTIME_data_flush( sequence, T(n, k) );
+                    RUNTIME_data_flush( sequence, D(p, k) );
+                    RUNTIME_data_flush( sequence, T(p, k) );
                 }
 
                 /* Setting the order of tiles */
@@ -440,7 +435,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                     n = tiles[i];
                     p = qrtree->currpiv(qrtree, k, n);
 
-                    tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                    tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
                     ldan = BLKLDD(A, n);
 
                     if( qrtree->gettype(qrtree, k, n) == LIBHQR_KILLED_BY_TS ) {
@@ -454,31 +449,30 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                         T = TT;
                     }
 
-                    for (m = 0; m < B->mt; m++) {
-                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                        ldbm = BLKLDD(B, m);
+                    for (m = 0; m < C->mt; m++) {
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                        ldcm = BLKLDD(C, m);
 
-                        node = B->get_rankof( B, m, n );
-                        RUNTIME_data_migrate( sequence, B(m, p), node );
-                        RUNTIME_data_migrate( sequence, B(m, n), node );
+                        node = C->get_rankof( C, m, n );
+                        RUNTIME_data_migrate( sequence, C(m, p), node );
+                        RUNTIME_data_migrate( sequence, C(m, n), node );
 
                         INSERT_TASK_ztpmqrt(
-                            &options,
-                            side, trans,
+                            &options, side, trans,
                             tempmm, tempnn, tempkn, chameleon_min( L, tempmm ), ib, T->nb,
                             A(n, k), ldan,
                             T(n, k), T->mb,
-                            B(m, p), ldbm,
-                            B(m, n), ldbm);
+                            C(m, p), ldcm,
+                            C(m, n), ldcm);
                     }
                     RUNTIME_data_flush( sequence, A(n, k) );
                     RUNTIME_data_flush( sequence, T(n, k) );
                 }
 
                 /* Restore the original location of the tiles */
-                for (m = 0; m < B->mt; m++) {
-                    RUNTIME_data_migrate( sequence, B(m, k),
-                                          B->get_rankof( B, m, k ) );
+                for (m = 0; m < C->mt; m++) {
+                    RUNTIME_data_migrate( sequence, C( m, k ),
+                                          C->get_rankof( C, m, k ) );
                 }
 
                 RUNTIME_iteration_pop(chamctxt);
diff --git a/compute/pzunmqrrh.c b/compute/pzunmqrrh.c
index 7ba65c846a48672612bc5554cd6aa8ca7b367b94..1d4500f849c3072d7bbf9e54e0f25c72363a5fbd 100644
--- a/compute/pzunmqrrh.c
+++ b/compute/pzunmqrrh.c
@@ -27,7 +27,7 @@
 #include "control/common.h"
 
 #define A(m,n)  A, (m), (n)
-#define B(m,n)  B, (m), (n)
+#define C(m,n)  C, (m), (n)
 #define T(m,n)  T, (m), (n)
 #define T2(m,n) T, (m), ((n)+A->nt)
 #define D(m,n)  D, (m), (n)
@@ -37,7 +37,7 @@
  *  Householder) - dynamic scheduling
  */
 void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans,
-                          CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *T, CHAM_desc_t *D,
+                          CHAM_desc_t *A, CHAM_desc_t *C, CHAM_desc_t *T, CHAM_desc_t *D,
                           RUNTIME_sequence_t *sequence, RUNTIME_request_t *request )
 {
     CHAM_context_t *chamctxt;
@@ -45,11 +45,12 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
     size_t ws_worker = 0;
     size_t ws_host = 0;
 
-    int k, m, n;
-    int K, M, RD, lastRD;
-    int ldaM, ldam, ldan, ldaMRD, lddM;
-    int ldbM, ldbm, ldbMRD;
-    int tempMm, tempkn, tempnn, tempmm, tempMRDm, tempkmin;
+    int k, m, n, p;
+    int KT, RD, lastRD;
+    int ldap, ldam, ldan;
+    int ldcp, ldcm;
+    int lddp;
+    int temppm, temppn, tempkn, tempnn, tempmm, tempkmin;
     int ib, node;
 
     chamctxt = chameleon_context_self();
@@ -85,61 +86,66 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
 
     RUNTIME_options_ws_alloc( &options, ws_worker, ws_host );
 
-    K = chameleon_min(A->mt, A->nt);
+    KT = chameleon_min( A->mt, A->nt );
     if (side == ChamLeft ) {
         if (trans == ChamConjTrans) {
             /*
              *  ChamLeft / ChamConjTrans
              */
-            for (k = 0; k < K; k++) {
+            for (k = 0; k < KT; k++) {
                 RUNTIME_iteration_push(chamctxt, k);
 
-                tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-                for (M = k; M < A->mt; M += BS) {
-                    tempMm   = M == A->mt-1 ? A->m-M*A->mb : A->mb;
-                    tempkmin = chameleon_min(tempMm, tempkn);
-                    ldaM = BLKLDD(A, M);
-                    lddM = BLKLDD(D, M);
-                    ldbM = BLKLDD(B, M);
+                tempkn = k == A->nt - 1 ? A->n - k * A->nb : A->nb;
+
+                for (p = k; p < C->mt; p += BS) {
+
+                    temppm   = p == C->mt-1 ? C->m - p * C->mb : C->mb;
+                    tempkmin = chameleon_min( temppm, tempkn );
+
+                    ldap = BLKLDD(A, p);
+                    lddp = BLKLDD(D, p);
+                    ldcp = BLKLDD(C, p);
+
                     if ( genD ) {
-                        int tempDMm = M == D->mt-1 ? D->m-M*D->mb : D->mb;
+                        int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamLower, tempDMm, tempkmin, A->nb,
-                            A(M, k), ldaM,
-                            D(M, k), lddM );
+                            ChamLower, tempDpm, tempkmin, A->nb,
+                            A(p, k), ldap,
+                            D(p, k), lddp );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
-                            ChamUpper, tempDMm, tempkmin,
+                            ChamUpper, tempDpm, tempkmin,
                             0., 1.,
-                            D(M, k), lddM );
+                            D(p, k), lddp );
 #endif
                     }
-                    for (n = 0; n < B->nt; n++) {
-                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                    for (n = 0; n < C->nt; n++) {
+                        tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
                         INSERT_TASK_zunmqr(
                             &options,
                             side, trans,
-                            tempMm, tempnn, tempkmin, ib, T->nb,
-                            D(M, k), lddM,
-                            T(M, k), T->mb,
-                            B(M, n), ldbM);
+                            temppm, tempnn, tempkmin, ib, T->nb,
+                            D(p, k), lddp,
+                            T(p, k), T->mb,
+                            C(p, n), ldcp);
                     }
-                    RUNTIME_data_flush( sequence, D(M, k) );
-                    RUNTIME_data_flush( sequence, T(M, k) );
+                    RUNTIME_data_flush( sequence, D(p, k) );
+                    RUNTIME_data_flush( sequence, T(p, k) );
 
-                    for (m = M+1; m < chameleon_min(M+BS, A->mt); m++) {
-                        tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-                        ldbm = BLKLDD(B, m);
+                    for (m = p+1; m < chameleon_min(p+BS, C->mt); m++) {
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
                         ldam = BLKLDD(A, m);
-                        for (n = 0; n < B->nt; n++) {
-                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                        ldcm = BLKLDD(C, m);
+
+                        for (n = 0; n < C->nt; n++) {
+                            tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
 
-                            node = B->get_rankof( B, m, n );
-                            RUNTIME_data_migrate( sequence, B(M, n), node );
-                            RUNTIME_data_migrate( sequence, B(m, n), node );
+                            node = C->get_rankof( C, m, n );
+                            RUNTIME_data_migrate( sequence, C(p, n), node );
+                            RUNTIME_data_migrate( sequence, C(m, n), node );
 
                             /* TS kernel */
                             INSERT_TASK_ztpmqrt(
@@ -147,101 +153,108 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                                 tempmm, tempnn, tempkn, 0, ib, T->nb,
                                 A(m, k), ldam,
                                 T(m, k), T->mb,
-                                B(M, n), ldbM,
-                                B(m, n), ldbm);
+                                C(p, n), ldcp,
+                                C(m, n), ldcm);
                         }
                         RUNTIME_data_flush( sequence, A(m, k) );
                         RUNTIME_data_flush( sequence, T(m, k) );
                     }
                 }
-                for (RD = BS; RD < A->mt-k; RD *= 2) {
-                    for (M = k; M+RD < A->mt; M += 2*RD) {
-                        tempMRDm = M+RD == A->mt-1 ? A->m-(M+RD)*A->mb : A->mb;
-                        ldbM   = BLKLDD(B, M   );
-                        ldbMRD = BLKLDD(B, M+RD);
-                        ldaMRD = BLKLDD(A, M+RD);
-                        for (n = 0; n < B->nt; n++) {
-                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-
-                            node = B->get_rankof( B, M+RD, n );
-                            RUNTIME_data_migrate( sequence, B(M, n),    node );
-                            RUNTIME_data_migrate( sequence, B(M+RD, n), node );
+                for (RD = BS; RD < C->mt-k; RD *= 2) {
+                    for (p = k; p+RD < C->mt; p += 2*RD) {
+                        m = p+RD;
+
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                        ldam = BLKLDD(A, m);
+                        ldcm = BLKLDD(C, m);
+                        ldcp = BLKLDD(C, p);
+
+                        for (n = 0; n < C->nt; n++) {
+                            tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+
+                            node = C->get_rankof( C, m, n );
+                            RUNTIME_data_migrate( sequence, C(p, n), node );
+                            RUNTIME_data_migrate( sequence, C(m, n), node );
 
                             /* TT kernel */
                             INSERT_TASK_ztpmqrt(
                                 &options, side, trans,
-                                tempMRDm, tempnn, tempkn, tempMRDm, ib, T->nb,
-                                A (M+RD, k), ldaMRD,
-                                T2(M+RD, k), T->mb,
-                                B (M,    n), ldbM,
-                                B (M+RD, n), ldbMRD);
+                                tempmm, tempnn, tempkn, tempmm, ib, T->nb,
+                                A (m, k), ldam,
+                                T2(m, k), T->mb,
+                                C (p, n), ldcp,
+                                C (m, n), ldcm);
                         }
-                        RUNTIME_data_flush( sequence, A (M+RD, k) );
-                        RUNTIME_data_flush( sequence, T2(M+RD, k) );
+                        RUNTIME_data_flush( sequence, A (m, k) );
+                        RUNTIME_data_flush( sequence, T2(m, k) );
                     }
                 }
 
                 /* Restore the original location of the tiles */
-                for (n = 0; n < B->nt; n++) {
-                    RUNTIME_data_migrate( sequence, B(k, n),
-                                          B->get_rankof( B, k, n ) );
+                for (n = 0; n < C->nt; n++) {
+                    RUNTIME_data_migrate( sequence, C(k, n),
+                                          C->get_rankof( C, k, n ) );
                 }
 
                 RUNTIME_iteration_pop(chamctxt);
             }
-        } else {
-            /*
-             *  ChamLeft / ChamNoTrans
-             */
-            for (k = K-1; k >= 0; k--) {
+        }
+        /*
+         *  ChamLeft / ChamNoTrans
+         */
+        else {
+            for (k = KT-1; k >= 0; k--) {
                 RUNTIME_iteration_push(chamctxt, k);
 
                 tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
                 lastRD = 0;
-                for (RD = BS; RD < A->mt-k; RD *= 2)
+                for (RD = BS; RD < C->mt-k; RD *= 2)
                     lastRD = RD;
                 for (RD = lastRD; RD >= BS; RD /= 2) {
-                    for (M = k; M+RD < A->mt; M += 2*RD) {
-                        tempMRDm = M+RD == A->mt-1 ? A->m-(M+RD)*A->mb : A->mb;
-                        ldbM   = BLKLDD(B, M   );
-                        ldbMRD = BLKLDD(B, M+RD);
-                        ldaMRD = BLKLDD(A, M+RD);
-                        for (n = 0; n < B->nt; n++) {
-                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-
-                            node = B->get_rankof( B, M+RD, n );
-                            RUNTIME_data_migrate( sequence, B(M, n),    node );
-                            RUNTIME_data_migrate( sequence, B(M+RD, n), node );
+                    for (p = k; p+RD < C->mt; p += 2*RD) {
+                        m = p+RD;
+
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                        ldam = BLKLDD(A, m);
+                        ldcm = BLKLDD(C, m);
+                        ldcp = BLKLDD(C, p);
+
+                        for (n = 0; n < C->nt; n++) {
+                            tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+
+                            node = C->get_rankof( C, m, n );
+                            RUNTIME_data_migrate( sequence, C(p, n), node );
+                            RUNTIME_data_migrate( sequence, C(m, n), node );
 
                             /* TT kernel */
                             INSERT_TASK_ztpmqrt(
                                 &options, side, trans,
-                                tempMRDm, tempnn, tempkn, tempMRDm, ib, T->nb,
-                                A (M+RD, k), ldaMRD,
-                                T2(M+RD, k), T->mb,
-                                B (M,    n), ldbM,
-                                B (M+RD, n), ldbMRD);
+                                tempmm, tempnn, tempkn, tempmm, ib, T->nb,
+                                A (m, k), ldam,
+                                T2(m, k), T->mb,
+                                C (p, n), ldcp,
+                                C (m, n), ldcm);
                         }
-                        RUNTIME_data_flush( sequence, A (M+RD, k) );
-                        RUNTIME_data_flush( sequence, T2(M+RD, k) );
+                        RUNTIME_data_flush( sequence, A (m, k) );
+                        RUNTIME_data_flush( sequence, T2(m, k) );
                     }
                 }
-                for (M = k; M < A->mt; M += BS) {
-                    tempMm   = M == A->mt-1 ? A->m-M*A->mb : A->mb;
-                    tempkmin = chameleon_min(tempMm, tempkn);
-                    ldaM = BLKLDD(A, M);
-                    lddM = BLKLDD(D, M);
-                    ldbM = BLKLDD(B, M);
-                    for (m = chameleon_min(M+BS, A->mt)-1; m > M; m--) {
-                        tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-                        ldbm = BLKLDD(B, m);
+                for (p = k; p < C->mt; p += BS) {
+                    ldap = BLKLDD(A, p);
+                    lddp = BLKLDD(D, p);
+                    ldcp = BLKLDD(C, p);
+
+                    for (m = chameleon_min(p+BS, C->mt)-1; m > p; m--) {
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
                         ldam = BLKLDD(A, m);
-                        for (n = 0; n < B->nt; n++) {
-                            tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                        ldcm = BLKLDD(C, m);
 
-                            node = B->get_rankof( B, m, n );
-                            RUNTIME_data_migrate( sequence, B(M, n), node );
-                            RUNTIME_data_migrate( sequence, B(m, n), node );
+                        for (n = 0; n < C->nt; n++) {
+                            tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+
+                            node = C->get_rankof( C, m, n );
+                            RUNTIME_data_migrate( sequence, C(p, n), node );
+                            RUNTIME_data_migrate( sequence, C(m, n), node );
 
                             /* TS kernel */
                             INSERT_TASK_ztpmqrt(
@@ -249,100 +262,108 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                                 tempmm, tempnn, tempkn, 0, ib, T->nb,
                                 A(m, k), ldam,
                                 T(m, k), T->mb,
-                                B(M, n), ldbM,
-                                B(m, n), ldbm);
+                                C(p, n), ldcp,
+                                C(m, n), ldcm);
                         }
                         RUNTIME_data_flush( sequence, A(m, k) );
                         RUNTIME_data_flush( sequence, T(m, k) );
                     }
+
+                    temppm   = p == C->mt-1 ? C->m-p*C->mb : C->mb;
+                    tempkmin = chameleon_min( temppm, tempkn );
+
                     if ( genD ) {
-                        int tempDMm = M == D->mt-1 ? D->m-M*D->mb : D->mb;
+                        int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamLower, tempDMm, tempkmin, A->nb,
-                            A(M, k), ldaM,
-                            D(M, k), lddM );
+                            ChamLower, tempDpm, tempkmin, A->nb,
+                            A(p, k), ldap,
+                            D(p, k), lddp );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
-                            ChamUpper, tempDMm, tempkmin,
+                            ChamUpper, tempDpm, tempkmin,
                             0., 1.,
-                            D(M, k), lddM );
+                            D(p, k), lddp );
 #endif
                     }
-                    for (n = 0; n < B->nt; n++) {
-                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
 
-                        RUNTIME_data_migrate( sequence, B(M, n),
-                                              B->get_rankof( B, M, n ) );
+                    for (n = 0; n < C->nt; n++) {
+                        tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+
+                        RUNTIME_data_migrate( sequence, C(p, n),
+                                              C->get_rankof( C, p, n ) );
 
                         INSERT_TASK_zunmqr(
                             &options, side, trans,
-                            tempMm, tempnn, tempkmin, ib, T->nb,
-                            D(M, k), lddM,
-                            T(M, k), T->mb,
-                            B(M, n), ldbM);
+                            temppm, tempnn, tempkmin, ib, T->nb,
+                            D(p, k), lddp,
+                            T(p, k), T->mb,
+                            C(p, n), ldcp);
                     }
-                    RUNTIME_data_flush( sequence, D(M, k) );
-                    RUNTIME_data_flush( sequence, T(M, k) );
+                    RUNTIME_data_flush( sequence, D(p, k) );
+                    RUNTIME_data_flush( sequence, T(p, k) );
                 }
                 RUNTIME_iteration_pop(chamctxt);
             }
         }
     }
+    /*
+     *  ChamRight / ChamConjTrans
+     */
     else {
         if (trans == ChamConjTrans) {
-            /*
-             *  ChamRight / ChamConjTrans
-             */
-            for (k = K-1; k >= 0; k--) {
+            for (k = KT-1; k >= 0; k--) {
                 RUNTIME_iteration_push(chamctxt, k);
 
-                tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
+                tempkn = k == A->nt-1 ? A->n - k * A->nb : A->nb;
+
                 lastRD = 0;
-                for (RD = BS; RD < A->mt-k; RD *= 2)
+                for (RD = BS; RD < C->nt-k; RD *= 2)
                     lastRD = RD;
                 for (RD = lastRD; RD >= BS; RD /= 2) {
-                    for (M = k; M+RD < A->mt; M += 2*RD) {
-                        tempMRDm = M+RD == A->mt-1 ? A->m-(M+RD)*A->mb : A->mb;
-                        ldaMRD = BLKLDD(A, M+RD);
-                        for (m = 0; m < B->mt; m++) {
-                            ldbm   = BLKLDD(B, m);
-                            tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+                    for (p = k; p+RD < C->nt; p += 2*RD) {
+                        n = p+RD;
+
+                        tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
+                        ldan = BLKLDD(A, n);
+
+                        for (m = 0; m < C->mt; m++) {
+                            tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                            ldcm   = BLKLDD(C, m);
 
-                            node = B->get_rankof( B, m, M+RD );
-                            RUNTIME_data_migrate( sequence, B(m, M),    node );
-                            RUNTIME_data_migrate( sequence, B(m, M+RD), node );
+                            node = C->get_rankof( C, m, n );
+                            RUNTIME_data_migrate( sequence, C(m, p), node );
+                            RUNTIME_data_migrate( sequence, C(m, n), node );
 
                             /* TT kernel */
                             INSERT_TASK_ztpmqrt(
                                 &options, side, trans,
-                                tempmm, tempMRDm, tempkn, tempmm, ib, T->nb,
-                                A (M+RD, k), ldaMRD,
-                                T2(M+RD, k), T->mb,
-                                B (m, M), ldbm,
-                                B (m, M+RD), ldbm);
+                                tempmm, tempnn, tempkn, tempmm, ib, T->nb,
+                                A (n, k), ldan,
+                                T2(n, k), T->mb,
+                                C (m, p), ldcm,
+                                C (m, n), ldcm);
                         }
-                        RUNTIME_data_flush( sequence, A (M+RD, k) );
-                        RUNTIME_data_flush( sequence, T2(M+RD, k) );
+                        RUNTIME_data_flush( sequence, A (n, k) );
+                        RUNTIME_data_flush( sequence, T2(n, k) );
                     }
                 }
-                for (M = k; M < A->mt; M += BS) {
-                    tempMm   = M == A->mt-1 ? A->m-M*A->mb : A->mb;
-                    tempkmin = chameleon_min(tempMm, tempkn);
-                    ldaM = BLKLDD(A, M);
-                    lddM = BLKLDD(D, M);
-                    for (n = chameleon_min(M+BS, A->mt)-1; n > M; n--) {
+                for (p = k; p < C->nt; p += BS) {
+
+                    for (n = chameleon_min(p+BS, C->nt)-1; n > p; n--) {
+
+                        tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
                         ldan = BLKLDD(A, n);
-                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                        for (m = 0; m < B->mt; m++) {
-                            ldbm = BLKLDD(B, m);
-                            tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
 
-                            node = B->get_rankof( B, m, n );
-                            RUNTIME_data_migrate( sequence, B(m, M), node );
-                            RUNTIME_data_migrate( sequence, B(m, m), node );
+                        for (m = 0; m < C->mt; m++) {
+                            tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                            ldcm = BLKLDD(C, m);
+
+                            node = C->get_rankof( C, m, n );
+                            RUNTIME_data_migrate( sequence, C(m, p), node );
+                            RUNTIME_data_migrate( sequence, C(m, n), node );
 
                             /* TS kernel */
                             INSERT_TASK_ztpmqrt(
@@ -350,102 +371,111 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                                 tempmm, tempnn, tempkn, 0, ib, T->nb,
                                 A(n, k), ldan,
                                 T(n, k), T->mb,
-                                B(m, M), ldbm,
-                                B(m, n), ldbm);
+                                C(m, p), ldcm,
+                                C(m, n), ldcm);
                         }
                         RUNTIME_data_flush( sequence, A(n, k) );
                         RUNTIME_data_flush( sequence, T(n, k) );
                     }
+
+                    temppn   = p == C->nt-1 ? C->n - p * C->nb : C->nb;
+                    tempkmin = chameleon_min( temppn, tempkn );
+                    ldap = BLKLDD(A, p);
+                    lddp = BLKLDD(D, p);
+
                     if ( genD ) {
-                        int tempDMm = M == D->mt-1 ? D->m-M*D->mb : D->mb;
+                        int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamLower, tempDMm, tempkmin, A->nb,
-                            A(M, k), ldaM,
-                            D(M, k), lddM );
+                            ChamLower, tempDpm, tempkmin, A->nb,
+                            A(p, k), ldap,
+                            D(p, k), lddp );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
-                            ChamUpper, tempDMm, tempkmin,
+                            ChamUpper, tempDpm, tempkmin,
                             0., 1.,
-                            D(M, k), lddM );
+                            D(p, k), lddp );
 #endif
                     }
-                    for (m = 0; m < B->mt; m++) {
-                        ldbm = BLKLDD(B, m);
-                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
 
-                        RUNTIME_data_migrate( sequence, B(m, M),
-                                              B->get_rankof( B, m, M ) );
+                    for (m = 0; m < C->mt; m++) {
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                        ldcm = BLKLDD(C, m);
+
+                        RUNTIME_data_migrate( sequence, C(m, p),
+                                              C->get_rankof( C, m, p ) );
 
                         INSERT_TASK_zunmqr(
-                            &options,
-                            side, trans,
-                            tempmm, tempMm, tempkmin, ib, T->nb,
-                            D(M, k), lddM,
-                            T(M, k), T->mb,
-                            B(m, M), ldbm);
+                            &options, side, trans,
+                            tempmm, temppn, tempkmin, ib, T->nb,
+                            D(p, k), lddp,
+                            T(p, k), T->mb,
+                            C(m, p), ldcm);
                     }
-                    RUNTIME_data_flush( sequence, D(M, k) );
-                    RUNTIME_data_flush( sequence, T(M, k) );
+                    RUNTIME_data_flush( sequence, D(p, k) );
+                    RUNTIME_data_flush( sequence, T(p, k) );
                 }
-
                 RUNTIME_iteration_pop(chamctxt);
             }
-        } else {
-            /*
-             *  ChamRight / ChamNoTrans
-             */
-            for (k = 0; k < K; k++) {
+        }
+        /*
+         *  ChamRight / ChamNoTrans
+         */
+        else {
+            for (k = 0; k < KT; k++) {
                 RUNTIME_iteration_push(chamctxt, k);
 
                 tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-                for (M = k; M < A->mt; M += BS) {
-                    tempMm   = M == A->mt-1 ? A->m-M*A->mb : A->mb;
-                    tempkmin = chameleon_min(tempMm, tempkn);
-                    ldaM = BLKLDD(A, M);
-                    lddM = BLKLDD(D, M);
+
+                for (p = k; p < C->nt; p += BS) {
+                    temppn   = p == C->nt - 1 ? C->n - p * C->nb : C->nb;
+                    tempkmin = chameleon_min( temppn, tempkn );
+
+                    ldap = BLKLDD(A, p);
+                    lddp = BLKLDD(D, p);
+
                     if ( genD ) {
-                        int tempDMm = M == D->mt-1 ? D->m-M*D->mb : D->mb;
+                        int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamLower, tempDMm, tempkmin, A->nb,
-                            A(M, k), ldaM,
-                            D(M, k), lddM );
+                            ChamLower, tempDpm, tempkmin, A->nb,
+                            A(p, k), ldap,
+                            D(p, k), lddp );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
-                            ChamUpper, tempDMm, tempkmin,
+                            ChamUpper, tempDpm, tempkmin,
                             0., 1.,
-                            D(M, k), lddM );
+                            D(p, k), lddp );
 #endif
                     }
-                    for (m = 0; m < B->mt; m++) {
-                        ldbm = BLKLDD(B, m);
-                        tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
+
+                    for (m = 0; m < C->mt; m++) {
+                        ldcm = BLKLDD(C, m);
+                        tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
                         INSERT_TASK_zunmqr(
-                            &options,
-                            side, trans,
-                            tempmm, tempMm, tempkmin, ib, T->nb,
-                            D(M, k), lddM,
-                            T(M, k), T->mb,
-                            B(m, M), ldbm);
+                            &options, side, trans,
+                            tempmm, temppn, tempkmin, ib, T->nb,
+                            D(p, k), lddp,
+                            T(p, k), T->mb,
+                            C(m, p), ldcm);
                     }
-                    RUNTIME_data_flush( sequence, D(M, k) );
-                    RUNTIME_data_flush( sequence, T(M, k) );
+                    RUNTIME_data_flush( sequence, D(p, k) );
+                    RUNTIME_data_flush( sequence, T(p, k) );
 
-                    for (n = M+1; n < chameleon_min(M+BS,  A->mt); n++) {
-                        tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
+                    for (n = p+1; n < chameleon_min(p+BS,  C->nt); n++) {
+                        tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
                         ldan = BLKLDD(A, n);
-                        for (m = 0; m < B->mt; m++) {
-                            tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                            ldbm = BLKLDD(B, m);
+                        for (m = 0; m < C->mt; m++) {
+                            tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                            ldcm = BLKLDD(C, m);
 
-                            node = B->get_rankof( B, m, n );
-                            RUNTIME_data_migrate( sequence, B(m, M), node );
-                            RUNTIME_data_migrate( sequence, B(m, n), node );
+                            node = C->get_rankof( C, m, n );
+                            RUNTIME_data_migrate( sequence, C(m, p), node );
+                            RUNTIME_data_migrate( sequence, C(m, n), node );
 
                             /* TS kernel */
                             INSERT_TASK_ztpmqrt(
@@ -453,43 +483,45 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                                 tempmm, tempnn, tempkn, 0, ib, T->nb,
                                 A(n, k), ldan,
                                 T(n, k), T->mb,
-                                B(m, M), ldbm,
-                                B(m, n), ldbm);
+                                C(m, p), ldcm,
+                                C(m, n), ldcm);
                         }
                         RUNTIME_data_flush( sequence, A(n, k) );
                         RUNTIME_data_flush( sequence, T(n, k) );
                     }
                 }
-                for (RD = BS; RD < A->mt-k; RD *= 2) {
-                    for (M = k; M+RD < A->mt; M += 2*RD) {
-                        tempMRDm = M+RD == A->mt-1 ? A->m-(M+RD)*A->mb : A->mb;
-                        ldaMRD = BLKLDD(A, M+RD);
-                        for (m = 0; m < B->mt; m++) {
-                            tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                            ldbm   = BLKLDD(B, m);
-
-                            node = B->get_rankof( B, m, M+RD );
-                            RUNTIME_data_migrate( sequence, B(m, M),    node );
-                            RUNTIME_data_migrate( sequence, B(m, M+RD), node );
+                for (RD = BS; RD < C->nt-k; RD *= 2) {
+                    for (p = k; p+RD < C->nt; p += 2*RD) {
+                        n = p + RD;
+                        tempnn = n == C->mt-1 ? C->m-n*C->mb : C->mb;
+                        ldan = BLKLDD(A, n);
+
+                        for (m = 0; m < C->mt; m++) {
+                            tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
+                            ldcm   = BLKLDD(C, m);
+
+                            node = C->get_rankof( C, m, n );
+                            RUNTIME_data_migrate( sequence, C(m, p), node );
+                            RUNTIME_data_migrate( sequence, C(m, n), node );
 
                             /* TT kernel */
                             INSERT_TASK_ztpmqrt(
                                 &options, side, trans,
-                                tempmm, tempMRDm, tempkn, tempmm, ib, T->nb,
-                                A (M+RD, k), ldaMRD,
-                                T2(M+RD, k), T->mb,
-                                B (m, M   ), ldbm,
-                                B (m, M+RD), ldbm);
+                                tempmm, tempnn, tempkn, tempmm, ib, T->nb,
+                                A (n, k), ldan,
+                                T2(n, k), T->mb,
+                                C (m, p), ldcm,
+                                C (m, n), ldcm);
                         }
-                        RUNTIME_data_flush( sequence, A (M+RD, k) );
-                        RUNTIME_data_flush( sequence, T2(M+RD, k) );
+                        RUNTIME_data_flush( sequence, A (n, k) );
+                        RUNTIME_data_flush( sequence, T2(n, k) );
                     }
                 }
 
                 /* Restore the original location of the tiles */
-                for (m = 0; m < B->mt; m++) {
-                    RUNTIME_data_migrate( sequence, B(m, k),
-                                          B->get_rankof( B, m, k ) );
+                for (m = 0; m < C->mt; m++) {
+                    RUNTIME_data_migrate( sequence, C(m, k),
+                                          C->get_rankof( C, m, k ) );
                 }
 
                 RUNTIME_iteration_pop(chamctxt);