diff --git a/CTestConfig.cmake b/CTestConfig.cmake
index e79cbcc92dc25229e5b07e7c33c9d07f2b89de25..c3b485e175923b43ba70a11f6270ec5098d28b97 100644
--- a/CTestConfig.cmake
+++ b/CTestConfig.cmake
@@ -49,7 +49,7 @@ if(NOT BUILDNAME)
 
   if(CHAMELEON_SCHED_PARSEC)
     set(BUILDNAME "${BUILDNAME}-PaRSEC")
-  endif(CHAMELEON_SCHED_STARPU)
+  endif(CHAMELEON_SCHED_PARSEC)
 
   if(CHAMELEON_SIMULATION)
     set(BUILDNAME "${BUILDNAME}-SimGrid")
diff --git a/compute/pzgelqf.c b/compute/pzgelqf.c
index 31b8bff089877521a0749982fdf0aa7c7c7bdf94..55e16627424850f10510d91a56296b0422bd6326 100644
--- a/compute/pzgelqf.c
+++ b/compute/pzgelqf.c
@@ -41,7 +41,7 @@ void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
     size_t ws_host = 0;
 
     int k, m, n;
-    int ldak, ldam;
+    int ldak, ldam, lddk;
     int tempkm, tempkn, tempmm, tempnn;
     int ib, minMNT;
 
@@ -92,6 +92,7 @@ void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
         tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
         tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
         ldak = BLKLDD(A, k);
+        lddk = BLKLDD(D, k);
         INSERT_TASK_zgelqt(
             &options,
             tempkm, tempkn, ib, T->nb,
@@ -100,15 +101,15 @@ void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
         if ( genD ) {
             INSERT_TASK_zlacpy(
                 &options,
-                ChamUpper, A->mb, A->nb, A->nb,
+                ChamUpper, tempkm, tempkn, A->nb,
                 A(k, k), ldak,
-                D(k), ldak );
+                D(k), lddk );
 #if defined(CHAMELEON_USE_CUDA)
             INSERT_TASK_zlaset(
                 &options,
-                ChamLower, A->mb, A->nb,
+                ChamLower, tempkm, tempkn,
                 0., 1.,
-                D(k), ldak );
+                D(k), lddk );
 #endif
         }
         for (m = k+1; m < A->mt; m++) {
@@ -118,7 +119,7 @@ void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
                 &options,
                 ChamRight, ChamConjTrans,
                 tempmm, tempkn, tempkn, ib, T->nb,
-                D(k), ldak,
+                D(k), lddk,
                 T(k, k), T->mb,
                 A(m, k), ldam);
         }
diff --git a/compute/pzgelqf_param.c b/compute/pzgelqf_param.c
index b1415f2bbd8ea53ff88d6daf2e301ea3a31d2254..9f853ef7ae1ef6bb9d86c86b586d335ea3ebd18d 100644
--- a/compute/pzgelqf_param.c
+++ b/compute/pzgelqf_param.c
@@ -42,7 +42,7 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
 
     int k, m, n, i, p;
     int K, L;
-    int ldak, ldam;
+    int ldak, ldam, lddk;
     int tempkmin, tempkm, tempnn, tempmm, temppn;
     int ib;
     int *tiles;
@@ -96,6 +96,7 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
 
         tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
         ldak = BLKLDD(A, k);
+        lddk = BLKLDD(D, k);
 
         T = TS;
         /* The number of geqrt to apply */
@@ -114,13 +115,13 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
                     &options,
                     ChamUpper, tempkm, temppn, A->nb,
                     A(k, p), ldak,
-                    D(k, p), ldak );
+                    D(k, p), lddk );
 #if defined(CHAMELEON_USE_CUDA)
                 INSERT_TASK_zlaset(
                     &options,
                     ChamLower, tempkm, temppn,
                     0., 1.,
-                    D(k, p), ldak );
+                    D(k, p), lddk );
 #endif
             }
             for (m = k+1; m < A->mt; m++) {
@@ -130,7 +131,7 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
                     &options,
                     ChamRight, ChamConjTrans,
                    tempmm, temppn, tempkmin, ib, T->nb,
-                    D(k, p), ldak,
+                    D(k, p), lddk,
                     T(k, p), T->mb,
                     A(m, p), ldam);
             }
diff --git a/compute/pzgelqfrh.c b/compute/pzgelqfrh.c
index f6ac81f523ddbf3f3923936b99a825eab23591aa..6dd19a90eec6ae92ad7a49ce97b7dab86466a938 100644
--- a/compute/pzgelqfrh.c
+++ b/compute/pzgelqfrh.c
@@ -44,7 +44,7 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
 
     int k, m, n;
     int K, N, RD;
-    int ldak, ldam;
+    int ldak, ldam, lddk;
     int tempkmin, tempkm, tempNn, tempnn, tempmm, tempNRDn;
     int ib;
 
@@ -91,6 +91,8 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
 
         tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
         ldak = BLKLDD(A, k);
+        lddk = BLKLDD(D, k);
+
         for (N = k; N < A->nt; N += BS) {
             tempNn = N == A->nt-1 ? A->n-N*A->nb : A->nb;
             tempkmin = chameleon_min(tempkm, tempNn);
@@ -104,13 +106,13 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
                     &options,
                     ChamUpper, tempkm, tempNn, A->nb,
                     A(k, N), ldak,
-                    D(k, N), ldak );
+                    D(k, N), lddk );
 #if defined(CHAMELEON_USE_CUDA)
                 INSERT_TASK_zlaset(
                     &options,
                     ChamLower, tempkm, tempNn,
                     0., 1.,
-                    D(k, N), ldak );
+                    D(k, N), lddk );
 #endif
             }
             for (m = k+1; m < A->mt; m++) {
@@ -120,7 +122,7 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
                     &options,
                     ChamRight, ChamConjTrans,
                     tempmm, tempNn, tempkmin, ib, T->nb,
-                    D(k, N), ldak,
+                    D(k, N), lddk,
                     T(k, N), T->mb,
                     A(m, N), ldam);
             }
diff --git a/compute/pzgeqrf.c b/compute/pzgeqrf.c
index 2bf10f0f9375ff740dd214f9e2219ea2d7831805..7c38e4773c99b8214d8b82776f63604061851890 100644
--- a/compute/pzgeqrf.c
+++ b/compute/pzgeqrf.c
@@ -41,7 +41,7 @@ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
     size_t ws_host = 0;
 
     int k, m, n;
-    int ldak, ldam;
+    int ldak, ldam, lddk;
     int tempkm, tempkn, tempnn, tempmm;
     int ib;
     int minMNT = chameleon_min(A->mt, A->nt);
@@ -87,6 +87,7 @@ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
         tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
         tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
         ldak = BLKLDD(A, k);
+        lddk = BLKLDD(D, k);
         INSERT_TASK_zgeqrt(
             &options,
             tempkm, tempkn, ib, T->nb,
@@ -95,15 +96,15 @@ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
         if ( genD ) {
             INSERT_TASK_zlacpy(
                 &options,
-                ChamLower, A->mb, A->nb, A->nb,
+                ChamLower, tempkm, tempkn, A->nb,
                 A(k, k), ldak,
-                D(k), ldak );
+                D(k), lddk );
 #if defined(CHAMELEON_USE_CUDA)
             INSERT_TASK_zlaset(
                 &options,
-                ChamUpper, A->mb, A->nb,
+                ChamUpper, tempkm, tempkn,
                 0., 1.,
-                D(k), ldak );
+                D(k), lddk );
 #endif
         }
         for (n = k+1; n < A->nt; n++) {
@@ -112,7 +113,7 @@ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
                 &options,
                 ChamLeft, ChamConjTrans,
                 tempkm, tempnn, tempkm, ib, T->nb,
-                D(k), ldak,
+                D(k),    lddk,
                 T(k, k), T->mb,
                 A(k, n), ldak);
         }
diff --git a/compute/pzgeqrf_param.c b/compute/pzgeqrf_param.c
index 8afa3938ada8bb59dbfa0215b95181d8c0925fcd..01c0a816f24440ea4cb830e39a909504f117aa36 100644
--- a/compute/pzgeqrf_param.c
+++ b/compute/pzgeqrf_param.c
@@ -41,7 +41,7 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
 
     int k, m, n, i, p;
     int K, L, nbgeqrt;
-    int ldap, ldam;
+    int ldap, ldam, lddm;
     int tempkmin, tempkn, tempnn, tempmm;
     int ib;
     int *tiles;
@@ -97,6 +97,7 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
             tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
             tempkmin = chameleon_min(tempmm, tempkn);
             ldam = BLKLDD(A, m);
+            lddm = BLKLDD(D, m);
 
             T = TS;
 
@@ -108,15 +109,15 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
             if ( genD ) {
                 INSERT_TASK_zlacpy(
                     &options,
-                    ChamLower, tempmm, A->nb, A->nb,
+                    ChamLower, tempmm, tempkn, A->nb,
                     A(m, k), ldam,
-                    D(m, k), ldam );
+                    D(m, k), lddm );
 #if defined(CHAMELEON_USE_CUDA)
                 INSERT_TASK_zlaset(
                     &options,
-                    ChamUpper, tempmm, A->nb,
+                    ChamUpper, tempmm, tempkn,
                     0., 1.,
-                    D(m, k), ldam );
+                    D(m, k), lddm );
 #endif
             }
             for (n = k+1; n < A->nt; n++) {
@@ -125,7 +126,7 @@ void chameleon_pzgeqrf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
                     &options,
                     ChamLeft, ChamConjTrans,
                     tempmm, tempnn, tempkmin, ib, T->nb,
-                    D(m, k), ldam,
+                    D(m, k), lddm,
                     T(m, k), T->mb,
                     A(m, n), ldam);
             }
diff --git a/compute/pzgeqrfrh.c b/compute/pzgeqrfrh.c
index fb10c11c1af6327ca87198437553738afd495b30..472e77e7e96654023aaba277cda5d30e28543feb 100644
--- a/compute/pzgeqrfrh.c
+++ b/compute/pzgeqrfrh.c
@@ -44,7 +44,7 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
 
     int k, m, n;
     int K, M, RD;
-    int ldaM, ldam, ldaMRD;
+    int ldaM, ldam, ldaMRD, lddM;
     int tempkmin, tempkn, tempMm, tempnn, tempmm, tempMRDm;
     int ib;
 
@@ -92,6 +92,7 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
             tempMm = M == A->mt-1 ? A->m-M*A->mb : A->mb;
             tempkmin = chameleon_min(tempMm, tempkn);
             ldaM = BLKLDD(A, M);
+            lddM = BLKLDD(D, M);
 
             INSERT_TASK_zgeqrt(
                 &options,
@@ -101,15 +102,15 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
             if ( genD ) {
                 INSERT_TASK_zlacpy(
                     &options,
-                    ChamLower, tempMm, A->nb, A->nb,
+                    ChamLower, tempMm, tempkn, A->nb,
                     A(M, k), ldaM,
-                    D(M, k), ldaM );
+                    D(M, k), lddM );
 #if defined(CHAMELEON_USE_CUDA)
                 INSERT_TASK_zlaset(
                     &options,
-                    ChamUpper, tempMm, A->nb,
+                    ChamUpper, tempMm, tempkn,
                     0., 1.,
-                    D(M, k), ldaM );
+                    D(M, k), lddM );
 #endif
             }
             for (n = k+1; n < A->nt; n++) {
@@ -118,7 +119,7 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
                     &options,
                     ChamLeft, ChamConjTrans,
                     tempMm, tempnn, tempkmin, ib, T->nb,
-                    D(M, k), ldaM,
+                    D(M, k), lddM,
                     T(M, k), T->mb,
                     A(M, n), ldaM);
             }
diff --git a/compute/pzlantr.c b/compute/pzlantr.c
index cbdefa954a5d97293bf9277895a499de3300a6a2..acf27965580c15df4fd4535249a8112e47e07fdb 100644
--- a/compute/pzlantr.c
+++ b/compute/pzlantr.c
@@ -24,18 +24,18 @@
 #include "control/common.h"
 
 #define A(m, n) A, m, n
-#define VECNORMS_STEP1(m, n) VECNORMS_STEP1, m, n
-#define VECNORMS_STEP2(m, n) VECNORMS_STEP2, m, n
+#define W1(m, n) W1, m, n
+#define W2(m, n) W2, m, n
 #define RESULT(m, n) RESULT, m, n
 /**
  *
  */
 void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
-                   CHAM_desc_t *A, double *result,
-                   RUNTIME_sequence_t *sequence, RUNTIME_request_t *request)
+                       CHAM_desc_t *A, double *result,
+                       RUNTIME_sequence_t *sequence, RUNTIME_request_t *request)
 {
-    CHAM_desc_t *VECNORMS_STEP1 = NULL;
-    CHAM_desc_t *VECNORMS_STEP2 = NULL;
+    CHAM_desc_t *W1 = NULL;
+    CHAM_desc_t *W2 = NULL;
     CHAM_desc_t *RESULT         = NULL;
     CHAM_context_t *chamctxt;
     RUNTIME_option_t options;
@@ -58,46 +58,47 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
 
     *result = 0.0;
     switch ( norm ) {
-    /*
-     *  ChamOneNorm
-     */
+        /*
+         *  ChamOneNorm
+         */
     case ChamOneNorm:
         /* Init workspace handle for the call to zlange but unused */
         RUNTIME_options_ws_alloc( &options, 1, 0 );
 
         workm = chameleon_max( A->mt, A->p );
-        workn = A->n;
-        CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, 1, A->nb, A->nb,
-                          workm, workn, 0, 0, workm, workn, A->p, A->q);
+        workn = ( uplo == ChamLower ) ? chameleon_min( A->m, A->n ) : A->n;
+
+        CHAMELEON_Desc_Create(&(W1), NULL, ChamRealDouble, 1, A->nb, A->nb,
+                              workm, workn, 0, 0, workm, workn, A->p, A->q);
 
-        CHAMELEON_Desc_Create(&(VECNORMS_STEP2), NULL, ChamRealDouble, 1, A->nb, A->nb,
-                          1, workn, 0, 0, 1, workn, A->p, A->q);
+        CHAMELEON_Desc_Create(&(W2), NULL, ChamRealDouble, 1, A->nb, A->nb,
+                              1, workn, 0, 0, 1, workn, A->p, A->q);
 
         CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1,
-                          1, 1, 0, 0, 1, 1, 1, 1);
+                              1, 1, 0, 0, 1, 1, 1, 1);
 
         /*
          *  ChamUpper
          */
         if (uplo == ChamUpper) {
             /* Zeroes intermediate vector */
-            for(n = 0; n < A->nt; n++) {
-                tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
+            for(n = 0; n < W2->nt; n++) {
+                tempkn = n == W2->nt-1 ? W2->n-n*W2->nb : W2->nb;
                 INSERT_TASK_dlaset(
                     &options,
                     ChamUpperLower, 1, tempkn,
                     0., 0.,
-                    VECNORMS_STEP2(0, n), 1);
+                    W2(0, n), 1);
             }
             for(m = 0; m < minMNT; m++) {
                 /* Zeroes intermediate vectors */
-                for(n = m; n < A->nt; n++) {
-                    tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
+                for(n = m; n < W1->nt; n++) {
+                    tempkn = n == W1->nt-1 ? W1->n-n*W1->nb : W1->nb;
                     INSERT_TASK_dlaset(
                         &options,
                         ChamUpperLower, 1, tempkn,
                         0., 0.,
-                        VECNORMS_STEP1(m, n), 1);
+                        W1(m, n), 1);
                 }
                 tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
                 tempkn = m == A->nt-1 ? A->n-m*A->nb : A->nb;
@@ -107,7 +108,7 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                     &options,
                     ChamColumnwise, uplo, diag, tempkm, tempkn,
                     A(m, m), ldam,
-                    VECNORMS_STEP1(m, m));
+                    W1(m, m));
 
                 /* compute sums of absolute values on columns of each tile */
                 for(n = m+1; n < A->nt; n++) {
@@ -115,17 +116,17 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                     INSERT_TASK_dzasum(
                         &options,
                         ChamColumnwise, ChamUpperLower, tempkm, tempkn,
-                        A(m, n), ldam, VECNORMS_STEP1(m, n));
+                        A(m, n), ldam, W1(m, n));
                 }
 
                 /* Compute vector sums between tiles in columns */
-                for(n = m; n < A->nt; n++) {
-                    tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
+                for(n = m; n < W1->nt; n++) {
+                    tempkn = n == W1->nt-1 ? W1->n-n*W1->nb : W1->nb;
                     INSERT_TASK_dgeadd(
                         &options,
-                        ChamNoTrans, 1, tempkn, A->mb,
-                        1.0, VECNORMS_STEP1(m, n), 1,
-                        1.0, VECNORMS_STEP2(0, n), 1);
+                        ChamNoTrans, 1, tempkn, W1->mb,
+                        1.0, W1(m, n), 1,
+                        1.0, W2(0, n), 1);
                 }
             }
         }
@@ -143,21 +144,21 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                         &options,
                         ChamUpperLower, 1, tempkn,
                         0., 0.,
-                        VECNORMS_STEP1(m, n), 1);
+                        W1(m, n), 1);
                 }
                 /* Zeroes the second intermediate vector */
                 INSERT_TASK_dlaset(
                     &options,
                     ChamUpperLower, 1, tempkn,
                     0., 0.,
-                    VECNORMS_STEP2(0, n), 1);
+                    W2(0, n), 1);
 
                 /* compute sums of absolute values on columns of diag tile */
                 INSERT_TASK_ztrasm(
                     &options,
                     ChamColumnwise, uplo, diag, tempkm, tempkn,
                     A(n, n), ldan,
-                    VECNORMS_STEP1(n, n));
+                    W1(n, n));
 
                 /* compute sums of absolute values on columns of each tile */
                 for(m = n+1; m < A->mt; m++) {
@@ -166,7 +167,7 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                     INSERT_TASK_dzasum(
                         &options,
                         ChamColumnwise, ChamUpperLower, tempkm, tempkn,
-                        A(m, n), ldam, VECNORMS_STEP1(m, n));
+                        A(m, n), ldam, W1(m, n));
                 }
 
                 /* Compute vector sums between tiles in columns */
@@ -174,8 +175,8 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                     INSERT_TASK_dgeadd(
                         &options,
                         ChamNoTrans, 1, tempkn, A->mb,
-                        1.0, VECNORMS_STEP1(m, n), 1,
-                        1.0, VECNORMS_STEP2(0, n), 1);
+                        1.0, W1(m, n), 1,
+                        1.0, W2(0, n), 1);
                 }
             }
         }
@@ -189,8 +190,8 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
             INSERT_TASK_dlange(
                 &options,
                 ChamMaxNorm, 1, tempkn, A->nb,
-                VECNORMS_STEP2(0, n), 1,
-                VECNORMS_STEP1(0, n));
+                W2(0, n), 1,
+                W1(0, n));
         }
 
         /* Initialize RESULT array */
@@ -205,7 +206,7 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
             for(n = 0; n < A->nt; n++) {
                 INSERT_TASK_dlange_max(
                     &options,
-                    VECNORMS_STEP1(0, n),
+                    W1(0, n),
                     RESULT(0,0));
             }
         }
@@ -217,35 +218,35 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                     &options,
                     ChamUpperLower, 1, 1, 1,
                     RESULT(0,0), 1,
-                    VECNORMS_STEP1(m, n), 1 );
+                    W1(m, n), 1 );
             }
         }
-        CHAMELEON_Desc_Flush( VECNORMS_STEP2, sequence );
-        CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence );
+        CHAMELEON_Desc_Flush( W2, sequence );
+        CHAMELEON_Desc_Flush( W1, sequence );
         CHAMELEON_Desc_Flush( RESULT, sequence );
         RUNTIME_sequence_wait(chamctxt, sequence);
-        *result = *(double *)VECNORMS_STEP1->get_blkaddr(VECNORMS_STEP1, A->myrank / A->q, A->myrank % A->q );
-        CHAMELEON_Desc_Destroy( &(VECNORMS_STEP1) );
-        CHAMELEON_Desc_Destroy( &(VECNORMS_STEP2) );
+        *result = *(double *)W1->get_blkaddr(W1, A->myrank / A->q, A->myrank % A->q );
+        CHAMELEON_Desc_Destroy( &(W1) );
+        CHAMELEON_Desc_Destroy( &(W2) );
         CHAMELEON_Desc_Destroy( &(RESULT) );
         break;
-    /*
-     *  ChamInfNorm
-     */
+        /*
+         *  ChamInfNorm
+         */
     case ChamInfNorm:
         /* Init workspace handle for the call to zlange */
         RUNTIME_options_ws_alloc( &options, A->mb, 0 );
 
-        workm = A->m;
+        workm = ( uplo == ChamUpper ) ? chameleon_min( A->m, A->n ) : A->m;
         workn = chameleon_max( A->nt, A->q );
-        CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, A->mb, 1, A->mb,
-                          workm, workn, 0, 0, workm, workn, A->p, A->q);
+        CHAMELEON_Desc_Create(&(W1), NULL, ChamRealDouble, A->mb, 1, A->mb,
+                              workm, workn, 0, 0, workm, workn, A->p, A->q);
 
-        CHAMELEON_Desc_Create(&(VECNORMS_STEP2), NULL, ChamRealDouble, A->mb, 1, A->mb,
-                          workm, 1, 0, 0, workm, 1, A->p, A->q);
+        CHAMELEON_Desc_Create(&(W2), NULL, ChamRealDouble, A->mb, 1, A->mb,
+                              workm, 1, 0, 0, workm, 1, A->p, A->q);
 
         CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1,
-                          1, 1, 0, 0, 1, 1, 1, 1);
+                              1, 1, 0, 0, 1, 1, 1, 1);
 
         /*
          *  ChamUpper
@@ -261,21 +262,21 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                         &options,
                         ChamUpperLower, tempkm, 1,
                         0., 0.,
-                        VECNORMS_STEP1(m, n), 1);
+                        W1(m, n), 1);
                 }
                 /* Zeroes intermediate vector */
                 INSERT_TASK_dlaset(
                     &options,
                     ChamUpperLower, tempkm, 1,
                     0., 0.,
-                    VECNORMS_STEP2(m, 0), 1);
+                    W2(m, 0), 1);
 
                 /* compute sums of absolute values on rows of diag tile */
                 INSERT_TASK_ztrasm(
                     &options,
                     ChamRowwise, uplo, diag, tempkm, tempkn,
                     A(m, m), ldam,
-                    VECNORMS_STEP1(m, m));
+                    W1(m, m));
 
                 /* compute sums of absolute values on rows of each tile */
                 for(n = m+1; n < A->nt; n++) {
@@ -283,7 +284,7 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                     INSERT_TASK_dzasum(
                         &options,
                         ChamRowwise, ChamUpperLower, tempkm, tempkn,
-                        A(m, n), ldam, VECNORMS_STEP1(m, n));
+                        A(m, n), ldam, W1(m, n));
                 }
 
                 /* Compute vector sums between tiles in rows */
@@ -291,8 +292,8 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                     INSERT_TASK_dgeadd(
                         &options,
                         ChamNoTrans, tempkm, 1, A->mb,
-                        1.0, VECNORMS_STEP1(m, n), tempkm,
-                        1.0, VECNORMS_STEP2(m, 0), tempkm);
+                        1.0, W1(m, n), tempkm,
+                        1.0, W2(m, 0), tempkm);
                 }
 
             }
@@ -308,7 +309,7 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                     &options,
                     ChamUpperLower, tempkm, 1,
                     0., 0.,
-                    VECNORMS_STEP2(m, 0), 1);
+                    W2(m, 0), 1);
             }
             for(n = 0; n < minMNT; n++) {
                 /* Zeroes intermediate vectors */
@@ -318,7 +319,7 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                         &options,
                         ChamUpperLower, tempkm, 1,
                         0., 0.,
-                        VECNORMS_STEP1(m, n), tempkm);
+                        W1(m, n), tempkm);
                 }
                 tempkm = n == A->mt-1 ? A->m-n*A->mb : A->mb;
                 tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
@@ -328,7 +329,7 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                     &options,
                     ChamRowwise, uplo, diag, tempkm, tempkn,
                     A(n, n), ldan,
-                    VECNORMS_STEP1(n, n));
+                    W1(n, n));
 
                 /* compute sums of absolute values on rows of each tile */
                 for(m = n+1; m < A->mt; m++) {
@@ -337,7 +338,7 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                     INSERT_TASK_dzasum(
                         &options,
                         ChamRowwise, ChamUpperLower, tempkm, tempkn,
-                        A(m, n), ldam, VECNORMS_STEP1(m, n));
+                        A(m, n), ldam, W1(m, n));
                 }
 
                 /* Compute vector sums between tiles in rows */
@@ -346,8 +347,8 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                     INSERT_TASK_dgeadd(
                         &options,
                         ChamNoTrans, tempkm, 1, A->mb,
-                        1.0, VECNORMS_STEP1(m, n), tempkm,
-                        1.0, VECNORMS_STEP2(m, 0), tempkm);
+                        1.0, W1(m, n), tempkm,
+                        1.0, W2(m, 0), tempkm);
                 }
             }
         }
@@ -356,13 +357,13 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
          * Compute max norm of each segment of the final vector in the
          * previous workspace
          */
-        for(m = 0; m < A->mt; m++) {
+        for(m = 0; m < W1->mt; m++) {
             tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
             INSERT_TASK_dlange(
                 &options,
                 ChamMaxNorm, tempkm, 1, A->nb,
-                VECNORMS_STEP2(m, 0), 1,
-                VECNORMS_STEP1(m, 0));
+                W2(m, 0), 1,
+                W1(m, 0));
         }
 
         /* Initialize RESULT array */
@@ -374,10 +375,10 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
 
         /* compute max norm between tiles in the column */
         if (A->myrank % A->q == 0) {
-            for(m = 0; m < A->mt; m++) {
+            for(m = 0; m < W1->mt; m++) {
                 INSERT_TASK_dlange_max(
                     &options,
-                    VECNORMS_STEP1(m, 0),
+                    W1(m, 0),
                     RESULT(0,0));
             }
         }
@@ -389,29 +390,29 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                     &options,
                     ChamUpperLower, 1, 1, 1,
                     RESULT(0,0), 1,
-                    VECNORMS_STEP1(m, n), 1 );
+                    W1(m, n), 1 );
             }
         }
-        CHAMELEON_Desc_Flush( VECNORMS_STEP2, sequence );
-        CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence );
+        CHAMELEON_Desc_Flush( W2, sequence );
+        CHAMELEON_Desc_Flush( W1, sequence );
         CHAMELEON_Desc_Flush( RESULT, sequence );
         RUNTIME_sequence_wait(chamctxt, sequence);
-        *result = *(double *)VECNORMS_STEP1->get_blkaddr(VECNORMS_STEP1, A->myrank / A->q, A->myrank % A->q );
-        CHAMELEON_Desc_Destroy( &(VECNORMS_STEP1) );
-        CHAMELEON_Desc_Destroy( &(VECNORMS_STEP2) );
+        *result = *(double *)W1->get_blkaddr(W1, A->myrank / A->q, A->myrank % A->q );
+        CHAMELEON_Desc_Destroy( &(W1) );
+        CHAMELEON_Desc_Destroy( &(W2) );
         CHAMELEON_Desc_Destroy( &(RESULT) );
         break;
-    /*
-     *  ChamFrobeniusNorm
-     */
+        /*
+         *  ChamFrobeniusNorm
+         */
     case ChamFrobeniusNorm:
         workm = chameleon_max( A->mt, A->p );
         workn = chameleon_max( A->nt, A->q );
 
-        CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, 1, 2, 2,
-                          workm, 2*workn, 0, 0, workm, 2*workn, A->p, A->q);
+        CHAMELEON_Desc_Create(&(W1), NULL, ChamRealDouble, 1, 2, 2,
+                              workm, 2*workn, 0, 0, workm, 2*workn, A->p, A->q);
         CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 2, 2,
-                          1, 2, 0, 0, 1, 2, 1, 1);
+                              1, 2, 0, 0, 1, 2, 1, 1);
 
         /*
          *  ChamLower
@@ -428,14 +429,14 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                         &options,
                         ChamUpperLower, 1, 2,
                         1., 0.,
-                        VECNORMS_STEP1(m,n), 1);
+                        W1(m,n), 1);
                 }
                 /* Compute local norm of the diagonal tile */
                 INSERT_TASK_ztrssq(
                     &options,
                     uplo, diag, tempkm, tempkn,
                     A(n, n), ldan,
-                    VECNORMS_STEP1(n, n));
+                    W1(n, n));
                 /* Compute local norm to each tile */
                 for(m = n+1; m < A->mt; m++) {
                     tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
@@ -444,7 +445,7 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                         &options,
                         tempkm, tempkn,
                         A(m, n), ldam,
-                        VECNORMS_STEP1(m, n));
+                        W1(m, n));
                 }
             }
         }
@@ -463,14 +464,14 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                         &options,
                         ChamUpperLower, 1, 2,
                         1., 0.,
-                        VECNORMS_STEP1(m,n), 1);
+                        W1(m,n), 1);
                 }
                 /* Compute local norm of the diagonal tile */
                 INSERT_TASK_ztrssq(
                     &options,
                     uplo, diag, tempkm, tempkn,
                     A(m, m), ldam,
-                    VECNORMS_STEP1(m, m));
+                    W1(m, m));
                 /* Compute local norm to each tile */
                 for(n = m+1; n < A->nt; n++) {
                     tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
@@ -478,7 +479,7 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                         &options,
                         tempkm, tempkn,
                         A(m, n), ldam,
-                        VECNORMS_STEP1(m, n));
+                        W1(m, n));
                 }
             }
         }
@@ -499,7 +500,7 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                 for(m = n; m < A->mt; m++) {
                     INSERT_TASK_dplssq(
                         &options,
-                        VECNORMS_STEP1(m, n),
+                        W1(m, n),
                         RESULT(0,0));
                 }
             }
@@ -513,7 +514,7 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                 for(n = m; n < A->nt; n++) {
                     INSERT_TASK_dplssq(
                         &options,
-                        VECNORMS_STEP1(m, n),
+                        W1(m, n),
                         RESULT(0,0));
                 }
             }
@@ -531,142 +532,142 @@ void chameleon_pzlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                     &options,
                     ChamUpperLower, 1, 1, 1,
                     RESULT(0,0), 1,
-                    VECNORMS_STEP1(m, n), 1 );
+                    W1(m, n), 1 );
             }
         }
 
-        CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence );
+        CHAMELEON_Desc_Flush( W1, sequence );
         CHAMELEON_Desc_Flush( RESULT, sequence );
         RUNTIME_sequence_wait(chamctxt, sequence);
-        *result = *(double *)VECNORMS_STEP1->get_blkaddr(VECNORMS_STEP1, A->myrank / A->q, A->myrank % A->q );
-        CHAMELEON_Desc_Destroy( &(VECNORMS_STEP1) );
+        *result = *(double *)W1->get_blkaddr(W1, A->myrank / A->q, A->myrank % A->q );
+        CHAMELEON_Desc_Destroy( &(W1) );
         CHAMELEON_Desc_Destroy( &(RESULT) );
         break;
 
         /*
          *  ChamMaxNorm
          */
-        case ChamMaxNorm:
-        default:
-            /* Init workspace handle for the call to zlange but unused */
-            RUNTIME_options_ws_alloc( &options, 1, 0 );
+    case ChamMaxNorm:
+    default:
+        /* Init workspace handle for the call to zlange but unused */
+        RUNTIME_options_ws_alloc( &options, 1, 0 );
 
-            workm = chameleon_max( A->mt, A->p );
-            workn = chameleon_max( A->nt, A->q );
+        workm = chameleon_max( A->mt, A->p );
+        workn = chameleon_max( A->nt, A->q );
 
-            CHAMELEON_Desc_Create(&(VECNORMS_STEP1), NULL, ChamRealDouble, 1, 1, 1,
+        CHAMELEON_Desc_Create(&(W1), NULL, ChamRealDouble, 1, 1, 1,
                               workm, workn, 0, 0, workm, workn, A->p, A->q);
-            CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1,
+        CHAMELEON_Desc_Create(&(RESULT), NULL, ChamRealDouble, 1, 1, 1,
                               1, 1, 0, 0, 1, 1, 1, 1);
-            /*
-             *  ChamLower
-             */
-            if (uplo == ChamLower) {
-                /* Compute local maximum to each tile */
-                for(n = 0; n < minMNT; n++) {
-                    tempkm = n == A->mt-1 ? A->m-n*A->mb : A->mb;
-                    tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
-                    ldan = BLKLDD(A, n);
+        /*
+         *  ChamLower
+         */
+        if (uplo == ChamLower) {
+            /* Compute local maximum to each tile */
+            for(n = 0; n < minMNT; n++) {
+                tempkm = n == A->mt-1 ? A->m-n*A->mb : A->mb;
+                tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
+                ldan = BLKLDD(A, n);
+
+                INSERT_TASK_zlantr(
+                    &options,
+                    ChamMaxNorm, uplo, diag,
+                    tempkm, tempkn, A->nb,
+                    A(n, n), ldan,
+                    W1(n, n));
 
-                    INSERT_TASK_zlantr(
+                for(m = n+1; m < A->mt; m++) {
+                    tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
+                    ldam = BLKLDD(A, m);
+                    INSERT_TASK_zlange(
                         &options,
-                        ChamMaxNorm, uplo, diag,
-                        tempkm, tempkn, A->nb,
-                        A(n, n), ldan,
-                        VECNORMS_STEP1(n, n));
-
-                    for(m = n+1; m < A->mt; m++) {
-                        tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-                        ldam = BLKLDD(A, m);
-                        INSERT_TASK_zlange(
-                            &options,
-                            ChamMaxNorm, tempkm, tempkn, A->nb,
-                            A(m, n), ldam,
-                            VECNORMS_STEP1(m, n));
-                    }
+                        ChamMaxNorm, tempkm, tempkn, A->nb,
+                        A(m, n), ldam,
+                        W1(m, n));
                 }
             }
-            /*
-             *  ChamUpper
-             */
-            else {
-                /* Compute local maximum to each tile */
-                for(m = 0; m < minMNT; m++) {
-                    tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-                    tempkn = m == A->nt-1 ? A->n-m*A->nb : A->nb;
-                    ldam = BLKLDD(A, m);
+        }
+        /*
+         *  ChamUpper
+         */
+        else {
+            /* Compute local maximum to each tile */
+            for(m = 0; m < minMNT; m++) {
+                tempkm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
+                tempkn = m == A->nt-1 ? A->n-m*A->nb : A->nb;
+                ldam = BLKLDD(A, m);
 
-                    INSERT_TASK_zlantr(
+                INSERT_TASK_zlantr(
+                    &options,
+                    ChamMaxNorm, uplo, diag,
+                    tempkm, tempkn, A->nb,
+                    A(m, m), ldam,
+                    W1(m, m));
+
+                for(n = m+1; n < A->nt; n++) {
+                    tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
+                    INSERT_TASK_zlange(
                         &options,
-                        ChamMaxNorm, uplo, diag,
-                        tempkm, tempkn, A->nb,
-                        A(m, m), ldam,
-                        VECNORMS_STEP1(m, m));
-
-                    for(n = m+1; n < A->nt; n++) {
-                        tempkn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
-                        INSERT_TASK_zlange(
-                            &options,
-                            ChamMaxNorm, tempkm, tempkn, A->nb,
-                            A(m, n), ldam,
-                            VECNORMS_STEP1(m, n));
-                    }
+                        ChamMaxNorm, tempkm, tempkn, A->nb,
+                        A(m, n), ldam,
+                        W1(m, n));
                 }
             }
+        }
 
-            /* Initialize RESULT array */
-            INSERT_TASK_dlaset(
-                &options,
-                ChamUpperLower, 1, 1,
-                0., 0.,
-                RESULT(0,0), 1);
-
-            /*
-             *  ChamLower
-             */
-            if (uplo == ChamLower) {
-                /* Compute max norm between tiles */
-                for(n = 0; n < minMNT; n++) {
-                    for(m = n; m < A->mt; m++) {
-                        INSERT_TASK_dlange_max(
-                            &options,
-                            VECNORMS_STEP1(m, n),
-                            RESULT(0,0));
-                    }
+        /* Initialize RESULT array */
+        INSERT_TASK_dlaset(
+            &options,
+            ChamUpperLower, 1, 1,
+            0., 0.,
+            RESULT(0,0), 1);
+
+        /*
+         *  ChamLower
+         */
+        if (uplo == ChamLower) {
+            /* Compute max norm between tiles */
+            for(n = 0; n < minMNT; n++) {
+                for(m = n; m < A->mt; m++) {
+                    INSERT_TASK_dlange_max(
+                        &options,
+                        W1(m, n),
+                        RESULT(0,0));
                 }
             }
-            /*
-             *  ChamUpper
-             */
-            else {
-                /* Compute max norm between tiles */
-                for(m = 0; m < minMNT; m++) {
-                    for(n = m; n < A->nt; n++) {
-                        INSERT_TASK_dlange_max(
-                            &options,
-                            VECNORMS_STEP1(m, n),
-                            RESULT(0,0));
-                    }
+        }
+        /*
+         *  ChamUpper
+         */
+        else {
+            /* Compute max norm between tiles */
+            for(m = 0; m < minMNT; m++) {
+                for(n = m; n < A->nt; n++) {
+                    INSERT_TASK_dlange_max(
+                        &options,
+                        W1(m, n),
+                        RESULT(0,0));
                 }
             }
+        }
 
-            /* Copy max norm in tiles to dispatch on every nodes */
-            for(m = 0; m < A->p; m++) {
-                for(n = 0; n < A->q; n++) {
-                    INSERT_TASK_dlacpy(
-                        &options,
-                        ChamUpperLower, 1, 1, 1,
-                        RESULT(0,0), 1,
-                        VECNORMS_STEP1(m, n), 1 );
-                }
+        /* Copy max norm in tiles to dispatch on every nodes */
+        for(m = 0; m < A->p; m++) {
+            for(n = 0; n < A->q; n++) {
+                INSERT_TASK_dlacpy(
+                    &options,
+                    ChamUpperLower, 1, 1, 1,
+                    RESULT(0,0), 1,
+                    W1(m, n), 1 );
             }
+        }
 
-            CHAMELEON_Desc_Flush( VECNORMS_STEP1, sequence );
-            CHAMELEON_Desc_Flush( RESULT, sequence );
-            RUNTIME_sequence_wait(chamctxt, sequence);
-            *result = *(double *)VECNORMS_STEP1->get_blkaddr(VECNORMS_STEP1, A->myrank / A->q, A->myrank % A->q );
-            CHAMELEON_Desc_Destroy( &(VECNORMS_STEP1) );
-            CHAMELEON_Desc_Destroy( &(RESULT) );
+        CHAMELEON_Desc_Flush( W1, sequence );
+        CHAMELEON_Desc_Flush( RESULT, sequence );
+        RUNTIME_sequence_wait(chamctxt, sequence);
+        *result = *(double *)W1->get_blkaddr(W1, A->myrank / A->q, A->myrank % A->q );
+        CHAMELEON_Desc_Destroy( &(W1) );
+        CHAMELEON_Desc_Destroy( &(RESULT) );
     }
     RUNTIME_options_ws_free(&options);
     RUNTIME_options_finalize(&options, chamctxt);
diff --git a/compute/pztpgqrt.c b/compute/pztpgqrt.c
index e159c781a5c6e2cbe72a0f08847ada39e06b3638..f2a68870afb8922e0ef76953c25750ce65ce5d11 100644
--- a/compute/pztpgqrt.c
+++ b/compute/pztpgqrt.c
@@ -44,7 +44,7 @@ void chameleon_pztpgqrt( int genD, int L,
     size_t ws_host = 0;
 
     int k, m, n;
-    int ldvk, ldvm;
+    int ldvk, ldvm, lddk;
     int ldqk, ldqm;
     int tempkm, tempkn, tempkk, tempnn, tempmm, templm;
     int ib;
@@ -92,6 +92,7 @@ void chameleon_pztpgqrt( int genD, int L,
         tempkk = k == V1->nt-1 ? V1->n-k*V1->nb : V1->nb;
         tempkn = k == Q1->nt-1 ? Q1->n-k*Q1->nb : Q1->nb;
         ldvk = BLKLDD(V1, k);
+        lddk = BLKLDD(D,  k);
         ldqk = BLKLDD(Q1, k);
 
         /* Equivalent to the tsmqr step on Q1,Q2 */
@@ -139,13 +140,13 @@ void chameleon_pztpgqrt( int genD, int L,
                 &options,
                 ChamLower, tempkm, tempkk, V1->nb,
                 V1(k, k), ldvk,
-                D(k), ldvk );
+                D(k), lddk );
 #if defined(CHAMELEON_USE_CUDA)
             INSERT_TASK_zlaset(
                 &options,
                 ChamUpper, tempkm, tempkk,
                 0., 1.,
-                D(k), ldvk );
+                D(k), lddk );
 #endif
         }
         for (n = k; n < Q1->nt; n++) {
@@ -154,7 +155,7 @@ void chameleon_pztpgqrt( int genD, int L,
                 &options,
                 ChamLeft, ChamNoTrans,
                 tempkm, tempnn, tempkk, ib, T1->nb,
-                D(k),     ldvk,
+                D(k),     lddk,
                 T1(k, k), T1->mb,
                 Q1(k, n), ldqk);
         }
diff --git a/compute/pzunglq.c b/compute/pzunglq.c
index 9b754c679b47057fb1f60f94b387f33db3dfa23b..17dc51b0a719b8bf628743b8f09d978e05a800c6 100644
--- a/compute/pzunglq.c
+++ b/compute/pzunglq.c
@@ -42,7 +42,7 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T
     size_t ws_host = 0;
 
     int k, m, n;
-    int ldak, ldqm;
+    int ldak, ldqm, lddk;
     int tempnn, tempmm, tempkmin, tempkn;
     int tempAkm, tempAkn;
     int ib, minMT;
@@ -94,6 +94,8 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T
         tempkmin = chameleon_min( tempAkn, tempAkm );
         tempkn   = k == Q->nt-1 ? Q->n-k*Q->nb : Q->nb;
         ldak = BLKLDD(A, k);
+        lddk = BLKLDD(D, k);
+
         for (n = Q->nt-1; n > k; n--) {
             tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
             for (m = 0; m < Q->mt; m++) {
@@ -117,19 +119,18 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T
             RUNTIME_data_flush( sequence, T(k, n) );
         }
 
-
         if ( genD ) {
             INSERT_TASK_zlacpy(
                 &options,
                 ChamUpper, tempkmin, tempkn, A->nb,
                 A(k, k), ldak,
-                D(k), ldak );
+                D(k),    lddk );
 #if defined(CHAMELEON_USE_CUDA)
             INSERT_TASK_zlaset(
                 &options,
                 ChamLower, tempkmin, tempkn,
                 0., 1.,
-                D(k), ldak );
+                D(k), lddk );
 #endif
         }
         for (m = k; m < Q->mt; m++) {
@@ -143,7 +144,7 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T
                 &options,
                 ChamRight, ChamNoTrans,
                 tempmm, tempkn, tempkmin, ib, T->nb,
-                D(k), ldak,
+                D(k),    lddk,
                 T(k, k), T->mb,
                 Q(m, k), ldqm);
         }
diff --git a/compute/pzunglq_param.c b/compute/pzunglq_param.c
index 564d23c605bd03d0a250aa4a64e7afa947282045..7e8d35051d8514ff79d68cac5e6c4d03ef429d66 100644
--- a/compute/pzunglq_param.c
+++ b/compute/pzunglq_param.c
@@ -41,7 +41,7 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
 
     int k, m, n, i, p;
     int K, L;
-    int ldak, ldqm;
+    int ldak, ldqm, lddk;
     int tempkm, tempkmin, temppn, tempnn, tempmm;
     int ib;
     int *tiles;
@@ -93,6 +93,7 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
 
         tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
         ldak = BLKLDD(A, k);
+        lddk = BLKLDD(D, k);
 
         /* Setting the order of the tiles*/
         libhqr_walk_stepk(qrtree, k, tiles + (k+1));
@@ -147,13 +148,13 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
                     &options,
                     ChamUpper, tempkmin, temppn, A->nb,
                     A(k, p), ldak,
-                    D(k, p), ldak );
+                    D(k, p), lddk );
 #if defined(CHAMELEON_USE_CUDA)
                 INSERT_TASK_zlaset(
                     &options,
                     ChamLower, tempkmin, temppn,
                     0., 1.,
-                    D(k, p), ldak );
+                    D(k, p), lddk );
 #endif
             }
             for (m = k; m < Q->mt; m++) {
@@ -167,7 +168,7 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
                     &options,
                     ChamRight, ChamNoTrans,
                     tempmm, temppn, tempkmin, ib, T->nb,
-                    D(k, p), ldak,
+                    D(k, p), lddk,
                     T(k, p), T->mb,
                     Q(m, p), ldqm);
             }
diff --git a/compute/pzunglqrh.c b/compute/pzunglqrh.c
index 878c8763010c2557a78d47e4bd110a15e635da20..e1d94006617ca35a9372701fd52151e4d9751617 100644
--- a/compute/pzunglqrh.c
+++ b/compute/pzunglqrh.c
@@ -46,8 +46,7 @@ void chameleon_pzunglqrh( int genD, int BS,
 
     int k, m, n;
     int K, N, RD, lastRD;
-    int ldak;
-    int ldqm;
+    int ldak, lddk, ldqm;
     int tempkm, tempkmin, tempNn, tempnn, tempmm, tempNRDn;
     int ib;
 
@@ -89,6 +88,7 @@ void chameleon_pzunglqrh( int genD, int BS,
 
         tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
         ldak = BLKLDD(A, k);
+        lddk = BLKLDD(D, k);
         lastRD = 0;
         for (RD = BS; RD < A->nt-k; RD *= 2)
             lastRD = RD;
@@ -154,13 +154,13 @@ void chameleon_pzunglqrh( int genD, int BS,
                     &options,
                     ChamUpper, tempkmin, tempNn, A->nb,
                     A(k, N), ldak,
-                    D(k, N), ldak );
+                    D(k, N), lddk );
 #if defined(CHAMELEON_USE_CUDA)
                 INSERT_TASK_zlaset(
                     &options,
                     ChamLower, tempkmin, tempNn,
                     0., 1.,
-                    D(k, N), ldak );
+                    D(k, N), lddk );
 #endif
             }
             for (m = k; m < Q->mt; m++) {
@@ -175,7 +175,7 @@ void chameleon_pzunglqrh( int genD, int BS,
                     ChamRight, ChamNoTrans,
                     tempmm, tempNn,
                     tempkmin, ib, T->nb,
-                    D(k, N), ldak,
+                    D(k, N), lddk,
                     T(k, N), T->mb,
                     Q(m, N), ldqm);
             }
diff --git a/compute/pzungqr.c b/compute/pzungqr.c
index ed0e4ed9b1352f6529300e846e9e2dbe38401339..b4aca1d4a2c2f58cfa3fd4416fc3cfee2f564a72 100644
--- a/compute/pzungqr.c
+++ b/compute/pzungqr.c
@@ -43,7 +43,7 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q,
     size_t ws_host = 0;
 
     int k, m, n;
-    int ldak, ldqk, ldam, ldqm;
+    int ldak, ldqk, ldam, ldqm, lddk;
     int tempmm, tempnn, tempkmin, tempkm;
     int tempAkm, tempAkn;
     int ib, minMT;
@@ -95,6 +95,7 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q,
         tempkmin = chameleon_min( tempAkn, tempAkm );
         tempkm   = k == Q->mt-1 ? Q->m-k*Q->mb : Q->mb;
         ldak = BLKLDD(A, k);
+        lddk = BLKLDD(D, k);
         ldqk = BLKLDD(Q, k);
         for (m = Q->mt - 1; m > k; m--) {
             tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
@@ -125,13 +126,13 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q,
                 &options,
                 ChamLower, tempkm, tempkmin, A->nb,
                 A(k, k), ldak,
-                D(k), ldak );
+                D(k),    lddk );
 #if defined(CHAMELEON_USE_CUDA)
             INSERT_TASK_zlaset(
                 &options,
                 ChamUpper, tempkm, tempkmin,
                 0., 1.,
-                D(k), ldak );
+                D(k), lddk );
 #endif
         }
         for (n = k; n < Q->nt; n++) {
@@ -145,7 +146,7 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q,
                 &options,
                 ChamLeft, ChamNoTrans,
                 tempkm, tempnn, tempkmin, ib, T->nb,
-                D(k),    ldak,
+                D(k),    lddk,
                 T(k, k), T->mb,
                 Q(k, n), ldqk);
         }
diff --git a/compute/pzungqr_param.c b/compute/pzungqr_param.c
index 6c1cd3697667a5377a0dc1101b272775596db8a0..beaa6a8c626b3637090bef97fbebdf0906d29910 100644
--- a/compute/pzungqr_param.c
+++ b/compute/pzungqr_param.c
@@ -41,7 +41,7 @@ void chameleon_pzungqr_param( int genD, const libhqr_tree_t *qrtree,
     size_t ws_host = 0;
 
     int k, m, n, i, p, L;
-    int ldam, ldqm, ldqp;
+    int ldam, ldqm, ldqp, lddm;
     int tempmm, tempnn, tempkmin, tempkn;
     int ib, minMT;
     int *tiles;
@@ -145,6 +145,7 @@ void chameleon_pzungqr_param( int genD, const libhqr_tree_t *qrtree,
             tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
             tempkmin = chameleon_min(tempmm, tempkn);
             ldam = BLKLDD(A, m);
+            lddm = BLKLDD(D, m);
             ldqm = BLKLDD(Q, m);
 
             if ( genD ) {
@@ -152,13 +153,13 @@ void chameleon_pzungqr_param( int genD, const libhqr_tree_t *qrtree,
                     &options,
                     ChamLower, tempmm, tempkmin, A->nb,
                     A(m, k), ldam,
-                    D(m, k), ldam );
+                    D(m, k), lddm );
 #if defined(CHAMELEON_USE_CUDA)
                 INSERT_TASK_zlaset(
                     &options,
                     ChamUpper, tempmm, tempkmin,
                     0., 1.,
-                    D(m, k), ldam );
+                    D(m, k), lddm );
 #endif
             }
 
@@ -173,7 +174,7 @@ void chameleon_pzungqr_param( int genD, const libhqr_tree_t *qrtree,
                     &options,
                     ChamLeft, ChamNoTrans,
                     tempmm, tempnn, tempkmin, ib, T->nb,
-                    D(m, k), ldam,
+                    D(m, k), lddm,
                     T(m, k), T->mb,
                     Q(m, n), ldqm);
             }
diff --git a/compute/pzungqrrh.c b/compute/pzungqrrh.c
index d22e6e052b14454b3ac9971d60c2ce8d1824b337..40c182370a4890eac0a5ab12efd79e9fe014460f 100644
--- a/compute/pzungqrrh.c
+++ b/compute/pzungqrrh.c
@@ -48,7 +48,7 @@ void chameleon_pzungqrrh( int genD, int BS,
 
     int k, m, n;
     int K, M, RD, lastRD;
-    int ldaM, ldam, ldaMRD;
+    int ldaM, ldam, ldaMRD, lddM;
     int ldqM, ldqm, ldqMRD;
     int tempkn, tempMm, tempnn, tempmm, tempMRDm, tempkmin;
     int ib;
@@ -127,6 +127,7 @@ void chameleon_pzungqrrh( int genD, int BS,
             tempMm   = M == A->mt-1 ? A->m-M*A->mb : A->mb;
             tempkmin = chameleon_min(tempMm, tempkn);
             ldaM = BLKLDD(A, M);
+            lddM = BLKLDD(D, M);
             ldqM = BLKLDD(Q, M);
             for (m = chameleon_min(M+BS, A->mt)-1; m > M; m--) {
                 tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
@@ -160,13 +161,13 @@ void chameleon_pzungqrrh( int genD, int BS,
                     &options,
                     ChamLower, tempMm, tempkmin, A->nb,
                     A(M, k), ldaM,
-                    D(M, k), ldaM );
+                    D(M, k), lddM );
 #if defined(CHAMELEON_USE_CUDA)
                 INSERT_TASK_zlaset(
                     &options,
                     ChamUpper, tempMm, tempkmin,
                     0., 1.,
-                    D(M, k), ldaM );
+                    D(M, k), lddM );
 #endif
             }
             for (n = k; n < Q->nt; n++) {
@@ -181,7 +182,7 @@ void chameleon_pzungqrrh( int genD, int BS,
                     ChamLeft, ChamNoTrans,
                     tempMm, tempnn,
                     tempkmin, ib, T->nb,
-                    D(M, k), ldaM,
+                    D(M, k), lddM,
                     T(M, k), T->mb,
                     Q(M, n), ldqM);
             }
diff --git a/compute/pzunmlq.c b/compute/pzunmlq.c
index a7691d927f17d7156e758abfbeb80a28eb993937..74b583011aba123c2e17e809844387f34b982cfc 100644
--- a/compute/pzunmlq.c
+++ b/compute/pzunmlq.c
@@ -44,7 +44,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
     size_t ws_host = 0;
 
     int k, m, n;
-    int ldak, ldbk, ldbm;
+    int ldak, ldbk, ldbm, lddk;
     int tempmm, tempnn, tempkn, tempkm, tempkmin;
     int ib, minMT, minM;
 
@@ -97,21 +97,24 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                 RUNTIME_iteration_push(chamctxt, k);
 
                 tempkm   = k == B->mt-1 ? B->m-k*B->mb : B->mb;
+                tempkn   = k == A->nt-1 ? A->n-k*A->nb : A->nb;
                 tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb;
                 ldak = BLKLDD(A, k);
                 ldbk = BLKLDD(B, k);
+                lddk = BLKLDD(D, k);
+
                 if ( genD ) {
                     INSERT_TASK_zlacpy(
                         &options,
-                        ChamUpper, tempkmin, tempkm, A->nb,
+                        ChamUpper, tempkmin, tempkn, A->nb,
                         A(k, k), ldak,
-                        D(k), ldak );
+                        D(k),    lddk );
 #if defined(CHAMELEON_USE_CUDA)
                     INSERT_TASK_zlaset(
                         &options,
-                        ChamLower, tempkmin, tempkm,
+                        ChamLower, tempkmin, tempkn,
                         0., 1.,
-                        D(k), ldak );
+                        D(k), lddk );
 #endif
                 }
                 for (n = 0; n < B->nt; n++) {
@@ -120,7 +123,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                         &options,
                         side, trans,
                         tempkm, tempnn, tempkmin, ib, T->nb,
-                        D(k), ldak,
+                        D(k),    lddk,
                         T(k, k), T->mb,
                         B(k, n), ldbk);
                 }
@@ -168,10 +171,13 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
             for (k = minMT-1; k >= 0; k--) {
                 RUNTIME_iteration_push(chamctxt, k);
 
+                tempkn   = k == A->nt-1 ? A->n-k*A->nb : A->nb;
                 tempkm   = k == B->mt-1 ? B->m-k*B->mb : B->mb;
                 tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb;
                 ldak = BLKLDD(A, k);
                 ldbk = BLKLDD(B, k);
+                lddk = BLKLDD(D, k);
+
                 for (m = B->mt-1; m > k; m--) {
                     tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
                     ldbm = BLKLDD(B, m);
@@ -198,15 +204,15 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                 if ( genD ) {
                     INSERT_TASK_zlacpy(
                         &options,
-                        ChamUpper, tempkmin, tempkm, A->nb,
+                        ChamUpper, tempkmin, tempkn, A->nb,
                         A(k, k), ldak,
-                        D(k), ldak );
+                        D(k),    lddk );
 #if defined(CHAMELEON_USE_CUDA)
                     INSERT_TASK_zlaset(
                         &options,
-                        ChamLower, tempkmin, tempkm,
+                        ChamLower, tempkmin, tempkn,
                         0., 1.,
-                        D(k), ldak );
+                        D(k), lddk );
 #endif
                 }
                 for (n = 0; n < B->nt; n++) {
@@ -219,7 +225,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                         &options,
                         side, trans,
                         tempkm, tempnn, tempkmin, ib, T->nb,
-                        D(k), ldak,
+                        D(k),    lddk,
                         T(k, k), T->mb,
                         B(k, n), ldbk);
                 }
@@ -240,6 +246,8 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                 tempkn   = k == B->nt - 1 ? B->n - k * B->nb : B->nb;
                 tempkmin = k == minMT - 1 ? minM - k * A->nb : A->nb;
                 ldak = BLKLDD(A, k);
+                lddk = BLKLDD(D, k);
+
                 for (n = B->nt-1; n > k; n--) {
                     tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                     for (m = 0; m < B->mt; m++) {
@@ -268,13 +276,13 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                         &options,
                         ChamUpper, tempkmin, tempkn, A->nb,
                         A(k, k), ldak,
-                        D(k), ldak );
+                        D(k),    lddk );
 #if defined(CHAMELEON_USE_CUDA)
                     INSERT_TASK_zlaset(
                         &options,
                         ChamLower, tempkmin, tempkn,
                         0., 1.,
-                        D(k), ldak );
+                        D(k), lddk );
 #endif
                 }
                 for (m = 0; m < B->mt; m++) {
@@ -288,7 +296,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                         &options,
                         side, trans,
                         tempmm, tempkn, tempkmin, ib, T->nb,
-                        D(k), ldak,
+                        D(k),    lddk,
                         T(k, k), T->mb,
                         B(m, k), ldbm);
                 }
@@ -309,18 +317,20 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                 tempkn   = k == B->nt-1 ? B->n-k*B->nb : B->nb;
                 tempkmin = k == minMT-1 ? minM-k*A->mb : A->mb;
                 ldak = BLKLDD(A, k);
+                lddk = BLKLDD(D, k);
+
                 if ( genD ) {
                     INSERT_TASK_zlacpy(
                         &options,
                         ChamUpper, tempkmin, tempkn, A->nb,
                         A(k, k), ldak,
-                        D(k), ldak );
+                        D(k),    lddk );
 #if defined(CHAMELEON_USE_CUDA)
                     INSERT_TASK_zlaset(
                         &options,
                         ChamLower, tempkmin, tempkn,
                         0., 1.,
-                        D(k), ldak );
+                        D(k), lddk );
 #endif
                 }
                 for (m = 0; m < B->mt; m++) {
@@ -330,7 +340,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                         &options,
                         side, trans,
                         tempmm, tempkn, tempkmin, ib, T->nb,
-                        D(k), ldak,
+                        D(k),    lddk,
                         T(k, k), T->mb,
                         B(m, k), ldbm);
                 }
diff --git a/compute/pzunmlq_param.c b/compute/pzunmlq_param.c
index ac54595cf83d6f9da759bc2c3d30aa6603220f12..d818c192cd724e8e06d39e518e5bffcefe4433f0 100644
--- a/compute/pzunmlq_param.c
+++ b/compute/pzunmlq_param.c
@@ -42,7 +42,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
     size_t ws_host = 0;
 
     int k, m, n, i, p;
-    int ldbm, ldak, ldbp;
+    int ldbm, ldak, ldbp, lddk;
     int tempnn, temppn, tempkmin, tempmm, tempkm;
     int ib, K, L;
     int *tiles;
@@ -95,6 +95,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
 
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
                 ldak = BLKLDD(A, k);
+                lddk = BLKLDD(D, k);
 
                 T = TS;
                 for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
@@ -109,13 +110,13 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                             &options,
                             ChamUpper, tempkmin, temppn, A->nb,
                             A(k, p), ldak,
-                            D(k, p), ldak );
+                            D(k, p), lddk );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamLower, tempkmin, temppn,
                             0., 1.,
-                            D(k, p), ldak );
+                            D(k, p), lddk );
 #endif
                     }
                     for (n = 0; n < B->nt; n++) {
@@ -124,7 +125,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                             &options,
                             side, trans,
                             temppn, tempnn, tempkmin, ib, T->nb,
-                            D(k, p), ldak,
+                            D(k, p), lddk,
                             T(k, p), T->mb,
                             B(p, n), ldbp);
                     }
@@ -193,6 +194,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
 
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
                 ldak = BLKLDD(A, k);
+                lddk = BLKLDD(D, k);
 
                 /* Setting the order of the tiles*/
                 libhqr_walk_stepk(qrtree, k, tiles + (k+1));
@@ -249,13 +251,13 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                             &options,
                             ChamUpper, tempkmin, temppn, A->nb,
                             A(k, p), ldak,
-                            D(k, p), ldak );
+                            D(k, p), lddk );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamLower, tempkmin, temppn,
                             0., 1.,
-                            D(k, p), ldak );
+                            D(k, p), lddk );
 #endif
                     }
                     for (n = 0; n < B->nt; n++) {
@@ -268,7 +270,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                             &options,
                             side, trans,
                             temppn, tempnn, tempkmin, ib, T->nb,
-                            D(k, p), ldak,
+                            D(k, p), lddk,
                             T(k, p), T->mb,
                             B(p, n), ldbp);
                     }
@@ -291,6 +293,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
 
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
                 ldak = BLKLDD(A, k);
+                lddk = BLKLDD(D, k);
 
                 /* Setting the order of the tiles*/
                 libhqr_walk_stepk(qrtree, k, tiles + (k+1));
@@ -345,13 +348,13 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                             &options,
                             ChamUpper, tempkmin, temppn, A->nb,
                             A(k, p), ldak,
-                            D(k, p), ldak );
+                            D(k, p), lddk );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamLower, tempkmin, temppn,
                             0., 1.,
-                            D(k, p), ldak );
+                            D(k, p), lddk );
 #endif
                     }
                     for (m = 0; m < B->mt; m++) {
@@ -365,7 +368,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                             &options,
                             side, trans,
                             tempmm, temppn, tempkmin, ib, T->nb,
-                            D(k, p), ldak,
+                            D(k, p), lddk,
                             T(k, p), T->mb,
                             B(m, p), ldbm);
                     }
@@ -386,6 +389,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
 
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
                 ldak = BLKLDD(A, k);
+                lddk = BLKLDD(D, k);
 
                 T = TS;
                 for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
@@ -399,13 +403,13 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                             &options,
                             ChamUpper, tempkmin, temppn, A->nb,
                             A(k, p), ldak,
-                            D(k, p), ldak );
+                            D(k, p), lddk );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamLower, tempkmin, temppn,
                             0., 1.,
-                            D(k, p), ldak );
+                            D(k, p), lddk );
 #endif
                     }
                     for (m = 0; m < B->mt; m++) {
@@ -415,7 +419,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                             &options,
                             side, trans,
                             tempmm, temppn, tempkmin, ib, T->nb,
-                            D(k, p), ldak,
+                            D(k, p), lddk,
                             T(k, p), TS->mb,
                             B(m, p), ldbm);
                     }
diff --git a/compute/pzunmlqrh.c b/compute/pzunmlqrh.c
index 375cf8680e3b4a319ee18abf155e4a63a06cfa7c..ad8ebfc5cec51012ea4cc7260464627dd7ee8721 100644
--- a/compute/pzunmlqrh.c
+++ b/compute/pzunmlqrh.c
@@ -47,7 +47,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
 
     int k, m, n;
     int K, N, RD, lastRD;
-    int ldak, ldbN, ldbm, ldbNRD;
+    int ldak, lddk, ldbN, ldbm, ldbNRD;
     int tempNn, tempkm, tempnn, tempmm, tempNRDn, tempkmin;
     int ib;
 
@@ -95,6 +95,8 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
 
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
                 ldak = BLKLDD(A, k);
+                lddk = BLKLDD(D, k);
+
                 for (N = k; N < A->nt; N += BS) {
                     tempNn   = N == A->nt-1 ? A->n-N*A->nb : A->nb;
                     tempkmin = chameleon_min(tempkm,tempNn);
@@ -104,13 +106,13 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             &options,
                             ChamUpper, tempkmin, tempNn, A->nb,
                             A(k, N), ldak,
-                            D(k, N), ldak );
+                            D(k, N), lddk );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamLower, tempkmin, tempNn,
                             0., 1.,
-                            D(k, N), ldak );
+                            D(k, N), lddk );
 #endif
                     }
                     for (n = 0; n < B->nt; n++) {
@@ -120,7 +122,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             side, trans,
                             tempNn, tempnn,
                             tempkmin, ib, T->nb,
-                            D(k, N), ldak,
+                            D(k, N), lddk,
                             T(k, N), T->mb,
                             B(N, n), ldbN);
                     }
@@ -196,6 +198,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
 
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
                 ldak = BLKLDD(A, k);
+                lddk = BLKLDD(D, k);
                 lastRD = 0;
                 for (RD = BS; RD < A->nt-k; RD *= 2)
                     lastRD = RD;
@@ -259,13 +262,13 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             &options,
                             ChamUpper, tempkmin, tempNn, A->nb,
                             A(k, N), ldak,
-                            D(k, N), ldak );
+                            D(k, N), lddk );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamLower, tempkmin, tempNn,
                             0., 1.,
-                            D(k, N), ldak );
+                            D(k, N), lddk );
 #endif
                     }
                     for (n = 0; n < B->nt; n++) {
@@ -279,7 +282,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             side, trans,
                             tempNn, tempnn,
                             tempkmin, ib, T->nb,
-                            D(k, N), ldak,
+                            D(k, N), lddk,
                             T(k, N), T->mb,
                             B(N, n), ldbN);
                     }
@@ -300,6 +303,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
 
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
                 ldak = BLKLDD(A, k);
+                lddk = BLKLDD(D, k);
                 lastRD = 0;
                 for (RD = BS; RD < A->nt-k; RD *= 2)
                     lastRD = RD;
@@ -361,13 +365,13 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             &options,
                             ChamUpper, tempkmin, tempNn, A->nb,
                             A(k, N), ldak,
-                            D(k, N), ldak );
+                            D(k, N), lddk );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamLower, tempkmin, tempNn,
                             0., 1.,
-                            D(k, N), ldak );
+                            D(k, N), lddk );
 #endif
                     }
                     for (m = 0; m < B->mt; m++) {
@@ -382,7 +386,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             side, trans,
                             tempmm, tempNn,
                             tempkmin, ib, T->nb,
-                            D(k, N), ldak,
+                            D(k, N), lddk,
                             T(k, N), T->mb,
                             B(m, N), ldbm);
                     }
@@ -401,6 +405,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
 
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
                 ldak = BLKLDD(A, k);
+                lddk = BLKLDD(D, k);
                 for (N = k; N < A->nt; N += BS) {
                     tempNn = N == A->nt-1 ? A->n-N*A->nb : A->nb;
                     tempkmin = chameleon_min(tempkm,tempNn);
@@ -409,13 +414,13 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             &options,
                             ChamUpper, tempkmin, tempNn, A->nb,
                             A(k, N), ldak,
-                            D(k, N), ldak );
+                            D(k, N), lddk );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamLower, tempkmin, tempNn,
                             0., 1.,
-                            D(k, N), ldak );
+                            D(k, N), lddk );
 #endif
                     }
                     for (m = 0; m < B->mt; m++) {
@@ -426,7 +431,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             side, trans,
                             tempmm, tempNn,
                             tempkmin, ib, T->nb,
-                            D(k, N), ldak,
+                            D(k, N), lddk,
                             T(k, N), T->mb,
                             B(m, N), ldbm);
                     }
diff --git a/compute/pzunmqr.c b/compute/pzunmqr.c
index d72a830afdc97dfaea70b9c24da8e910e26095ff..a06e0e13e41eefcd571719845b1369be3d375762 100644
--- a/compute/pzunmqr.c
+++ b/compute/pzunmqr.c
@@ -44,7 +44,7 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
     size_t ws_host = 0;
 
     int k, m, n;
-    int ldak, ldbk, ldam, ldan, ldbm;
+    int ldak, ldbk, ldam, ldan, ldbm, lddk;
     int tempkm, tempnn, tempkmin, tempmm, tempkn;
     int ib, minMT, minM;
 
@@ -99,19 +99,20 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
                 tempkm   = k == B->mt-1 ? B->m-k*B->mb : B->mb;
                 tempkmin = k == minMT-1 ? minM-k*A->nb : A->nb;
                 ldak = BLKLDD(A, k);
+                lddk = BLKLDD(D, k);
                 ldbk = BLKLDD(B, k);
                 if ( genD ) {
                     INSERT_TASK_zlacpy(
                         &options,
                         ChamLower, tempkm, tempkmin, A->nb,
                         A(k, k), ldak,
-                        D(k),    ldak );
+                        D(k),    lddk );
 #if defined(CHAMELEON_USE_CUDA)
                     INSERT_TASK_zlaset(
                         &options,
                         ChamUpper, tempkm, tempkmin,
                         0., 1.,
-                        D(k), ldak );
+                        D(k), lddk );
 #endif
                 }
                 for (n = 0; n < B->nt; n++) {
@@ -120,7 +121,7 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
                         &options,
                         side, trans,
                         tempkm, tempnn, tempkmin, ib, T->nb,
-                        D(k),    ldak,
+                        D(k),    lddk,
                         T(k, k), T->mb,
                         B(k, n), ldbk);
                 }
diff --git a/compute/pzunmqr_param.c b/compute/pzunmqr_param.c
index f5e8017e6ed999a2168d9f981c8aeca922239fe1..584829e255eada9a11e8d3d8059850c5c6849d06 100644
--- a/compute/pzunmqr_param.c
+++ b/compute/pzunmqr_param.c
@@ -42,7 +42,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
     size_t ws_host = 0;
 
     int k, m, n, i, p;
-    int ldam, ldan, ldbm, ldbp;
+    int ldam, ldan, ldbm, ldbp, lddn, lddm;
     int tempnn, tempkmin, tempmm, tempkn;
     int ib, K, L;
     int *tiles;
@@ -102,6 +102,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                     tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
                     tempkmin = chameleon_min(tempmm, tempkn);
                     ldam = BLKLDD(A, m);
+                    lddm = BLKLDD(D, m);
                     ldbm = BLKLDD(B, m);
 
                     if ( genD ) {
@@ -109,13 +110,13 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                             &options,
                             ChamLower, tempmm, tempkmin, A->nb,
                             A(m, k), ldam,
-                            D(m, k), ldam );
+                            D(m, k), lddm );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamUpper, tempmm, tempkmin,
                             0., 1.,
-                            D(m, k), ldam );
+                            D(m, k), lddm );
 #endif
                     }
                     for (n = 0; n < B->nt; n++) {
@@ -124,7 +125,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                             &options,
                             side, trans,
                             tempmm, tempnn, tempkmin, ib, T->nb,
-                            D(m, k), ldam,
+                            D(m, k), lddm,
                             T(m, k), T->mb,
                             B(m, n), ldbm);
                     }
@@ -243,6 +244,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                     tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
                     tempkmin = chameleon_min(tempmm, tempkn);
                     ldam = BLKLDD(A, m);
+                    lddm = BLKLDD(D, m);
                     ldbm = BLKLDD(B, m);
 
                     if ( genD ) {
@@ -250,13 +252,13 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                             &options,
                             ChamLower, tempmm, tempkmin, A->nb,
                             A(m, k), ldam,
-                            D(m, k), ldam );
+                            D(m, k), lddm );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamUpper, tempmm, tempkmin,
                             0., 1.,
-                            D(m, k), ldam );
+                            D(m, k), lddm );
 #endif
                     }
                     for (n = 0; n < B->nt; n++) {
@@ -269,7 +271,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                             &options,
                             side, trans,
                             tempmm, tempnn, tempkmin, ib, T->nb,
-                            D(m, k), ldam,
+                            D(m, k), lddm,
                             T(m, k), T->mb,
                             B(m, n), ldbm);
                     }
@@ -342,19 +344,20 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                     tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                     tempkmin = chameleon_min(tempnn, tempkn);
                     ldan = BLKLDD(A, n);
+                    lddn = BLKLDD(D, n);
 
                     if ( genD ) {
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamLower, tempnn, tempkmin, A->nb,
                             A(n, k), ldan,
-                            D(n, k), ldan );
+                            D(n, k), lddn );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamUpper, tempnn, tempkmin,
                             0., 1.,
-                            D(n, k), ldan );
+                            D(n, k), lddn );
 #endif
                     }
                     for (m = 0; m < B->mt; m++) {
@@ -368,7 +371,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                             &options,
                             side, trans,
                             tempmm, tempnn, tempkmin, ib, T->nb,
-                            D(n, k), ldan,
+                            D(n, k), lddn,
                             T(n, k), T->mb,
                             B(m, n), ldbm);
                     }
@@ -394,19 +397,20 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                     tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                     tempkmin = chameleon_min(tempnn, tempkn);
                     ldan = BLKLDD(A, n);
+                    lddn = BLKLDD(D, n);
 
                     if ( genD ) {
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamLower, tempnn, tempkmin, A->nb,
                             A(n, k), ldan,
-                            D(n, k), ldan );
+                            D(n, k), lddn );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamUpper, tempnn, tempkmin,
                             0., 1.,
-                            D(n, k), ldan );
+                            D(n, k), lddn );
 #endif
                     }
                     for (m = 0; m < B->mt; m++) {
@@ -416,7 +420,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                             &options,
                             side, trans,
                             tempmm, tempnn, tempkmin, ib, T->nb,
-                            D(n, k), ldan,
+                            D(n, k), lddn,
                             T(n, k), T->mb,
                             B(m, n), ldbm);
                     }
diff --git a/compute/pzunmqrrh.c b/compute/pzunmqrrh.c
index 282f675876a7b4aa60a5a95fc02ba6804c5bb948..767630ae6349375da2695f42333c0a2e2559dee5 100644
--- a/compute/pzunmqrrh.c
+++ b/compute/pzunmqrrh.c
@@ -47,7 +47,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
 
     int k, m, n;
     int K, M, RD, lastRD;
-    int ldaM, ldam, ldan, ldaMRD;
+    int ldaM, ldam, ldan, ldaMRD, lddM;
     int ldbM, ldbm, ldbMRD;
     int tempMm, tempkn, tempnn, tempmm, tempMRDm, tempkmin;
     int ib;
@@ -99,19 +99,20 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                     tempMm   = M == A->mt-1 ? A->m-M*A->mb : A->mb;
                     tempkmin = chameleon_min(tempMm, tempkn);
                     ldaM = BLKLDD(A, M);
+                    lddM = BLKLDD(D, M);
                     ldbM = BLKLDD(B, M);
                     if ( genD ) {
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamLower, tempMm, tempkmin, A->nb,
                             A(M, k), ldaM,
-                            D(M, k), ldaM );
+                            D(M, k), lddM );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamUpper, tempMm, tempkmin,
                             0., 1.,
-                            D(M, k), ldaM );
+                            D(M, k), lddM );
 #endif
                     }
                     for (n = 0; n < B->nt; n++) {
@@ -120,7 +121,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             &options,
                             side, trans,
                             tempMm, tempnn, tempkmin, ib, T->nb,
-                            D(M, k), ldaM,
+                            D(M, k), lddM,
                             T(M, k), T->mb,
                             B(M, n), ldbM);
                     }
@@ -230,6 +231,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                     tempMm   = M == A->mt-1 ? A->m-M*A->mb : A->mb;
                     tempkmin = chameleon_min(tempMm, tempkn);
                     ldaM = BLKLDD(A, M);
+                    lddM = BLKLDD(D, M);
                     ldbM = BLKLDD(B, M);
                     for (m = chameleon_min(M+BS, A->mt)-1; m > M; m--) {
                         tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
@@ -260,13 +262,13 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             &options,
                             ChamLower, tempMm, tempkmin, A->nb,
                             A(M, k), ldaM,
-                            D(M, k), ldaM );
+                            D(M, k), lddM );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamUpper, tempMm, tempkmin,
                             0., 1.,
-                            D(M, k), ldaM );
+                            D(M, k), lddM );
 #endif
                     }
                     for (n = 0; n < B->nt; n++) {
@@ -278,7 +280,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                         INSERT_TASK_zunmqr(
                             &options, side, trans,
                             tempMm, tempnn, tempkmin, ib, T->nb,
-                            D(M, k), ldaM,
+                            D(M, k), lddM,
                             T(M, k), T->mb,
                             B(M, n), ldbM);
                     }
@@ -331,6 +333,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                     tempMm   = M == A->mt-1 ? A->m-M*A->mb : A->mb;
                     tempkmin = chameleon_min(tempMm, tempkn);
                     ldaM = BLKLDD(A, M);
+                    lddM = BLKLDD(D, M);
                     for (n = chameleon_min(M+BS, A->mt)-1; n > M; n--) {
                         ldan = BLKLDD(A, n);
                         tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
@@ -360,13 +363,13 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             &options,
                             ChamLower, tempMm, tempkmin, A->nb,
                             A(M, k), ldaM,
-                            D(M, k), ldaM );
+                            D(M, k), lddM );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamUpper, tempMm, tempkmin,
                             0., 1.,
-                            D(M, k), ldaM );
+                            D(M, k), lddM );
 #endif
                     }
                     for (m = 0; m < B->mt; m++) {
@@ -380,7 +383,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             &options,
                             side, trans,
                             tempmm, tempMm, tempkmin, ib, T->nb,
-                            D(M, k), ldaM,
+                            D(M, k), lddM,
                             T(M, k), T->mb,
                             B(m, M), ldbm);
                     }
@@ -402,18 +405,19 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                     tempMm   = M == A->mt-1 ? A->m-M*A->mb : A->mb;
                     tempkmin = chameleon_min(tempMm, tempkn);
                     ldaM = BLKLDD(A, M);
+                    lddM = BLKLDD(D, M);
                     if ( genD ) {
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamLower, tempMm, tempkmin, A->nb,
                             A(M, k), ldaM,
-                            D(M, k), ldaM );
+                            D(M, k), lddM );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamUpper, tempMm, tempkmin,
                             0., 1.,
-                            D(M, k), ldaM );
+                            D(M, k), lddM );
 #endif
                     }
                     for (m = 0; m < B->mt; m++) {
@@ -423,7 +427,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             &options,
                             side, trans,
                             tempmm, tempMm, tempkmin, ib, T->nb,
-                            D(M, k), ldaM,
+                            D(M, k), lddM,
                             T(M, k), T->mb,
                             B(m, M), ldbm);
                     }
diff --git a/compute/zgelqf.c b/compute/zgelqf.c
index 5a0e2cd7815a5f78e890c3cab40fae540e51a1b9..f64b4ee0ca8a4d0d0cad5f32fb527a48420d83f2 100644
--- a/compute/zgelqf.c
+++ b/compute/zgelqf.c
@@ -280,7 +280,7 @@ int CHAMELEON_zgelqf_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *T,
      */
 #if defined(CHAMELEON_COPY_DIAG)
     {
-        int m = chameleon_min(A->mt, A->nt) * A->mb;
+        int m = chameleon_min(A->m, A->n);
         chameleon_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, );
         Dptr = &D;
     }
diff --git a/compute/zgelqf_param.c b/compute/zgelqf_param.c
index c686323ed04565cade1b1d6797ba00f91291c0b1..031f2f921c1e52bda0e15b664a37df4f57771180 100644
--- a/compute/zgelqf_param.c
+++ b/compute/zgelqf_param.c
@@ -284,7 +284,7 @@ int CHAMELEON_zgelqf_param_Tile_Async( const libhqr_tree_t *qrtree, CHAM_desc_t
      */
 #if defined(CHAMELEON_COPY_DIAG)
     {
-        int m = chameleon_min(A->mt, A->nt) * A->mb;
+        int m = chameleon_min(A->m, A->n);
         chameleon_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, );
         Dptr = &D;
     }
diff --git a/compute/zgelqs.c b/compute/zgelqs.c
index 46fd149c9c763b9ed2f805980738e055bb56b51e..d2795a97e71d2e471059186491f4ff6493505f10 100644
--- a/compute/zgelqs.c
+++ b/compute/zgelqs.c
@@ -320,7 +320,7 @@ int CHAMELEON_zgelqs_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B,
 
 #if defined(CHAMELEON_COPY_DIAG)
     {
-        int m = chameleon_min(A->mt, A->nt) * A->mb;
+        int m = chameleon_min(A->m, A->n);
         chameleon_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, );
         Dptr = &D;
     }
diff --git a/compute/zgelqs_param.c b/compute/zgelqs_param.c
index 70627ae7434aacad4d09f61d333155219b46e86c..b24d70740a12989054a4ccb1f61f16a44518b975 100644
--- a/compute/zgelqs_param.c
+++ b/compute/zgelqs_param.c
@@ -332,7 +332,7 @@ int CHAMELEON_zgelqs_param_Tile_Async( const libhqr_tree_t *qrtree, CHAM_desc_t
 
 #if defined(CHAMELEON_COPY_DIAG)
     {
-        int m = chameleon_min(A->mt, A->nt) * A->mb;
+        int m = chameleon_min(A->m, A->n);
         chameleon_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, );
         Dptr = &D;
     }
diff --git a/compute/zgels.c b/compute/zgels.c
index e49f27f9c8f987f8715037e1b47cb976923588a1..bf620d79d333bd0ea5a66c68f74ea5be6f6df0d4 100644
--- a/compute/zgels.c
+++ b/compute/zgels.c
@@ -373,10 +373,9 @@ int CHAMELEON_zgels_Tile_Async( cham_trans_t trans, CHAM_desc_t *A,
      }
      */
     if (A->m >= A->n) {
-
 #if defined(CHAMELEON_COPY_DIAG)
         {
-            int n = chameleon_min(A->mt, A->nt) * A->nb;
+            int n = chameleon_min(A->m, A->n);
             chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, );
             Dptr = &D;
         }
@@ -401,7 +400,7 @@ int CHAMELEON_zgels_Tile_Async( cham_trans_t trans, CHAM_desc_t *A,
          free(subB); */
 #if defined(CHAMELEON_COPY_DIAG)
         {
-            int m = chameleon_min(A->mt, A->nt) * A->mb;
+            int m = chameleon_min(A->m, A->n);
             chameleon_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, );
             Dptr = &D;
         }
diff --git a/compute/zgels_param.c b/compute/zgels_param.c
index c7e0f790354a1d54e8daaeb8f2b31efab1e39bb9..9ee807f0c47f93fa6667a6fd94d1884acfaad53b 100644
--- a/compute/zgels_param.c
+++ b/compute/zgels_param.c
@@ -387,7 +387,7 @@ int CHAMELEON_zgels_param_Tile_Async( const libhqr_tree_t *qrtree, cham_trans_t
 
 #if defined(CHAMELEON_COPY_DIAG)
         {
-            int n = chameleon_min(A->mt, A->nt) * A->nb;
+            int n = chameleon_min(A->m, A->n);
             chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, );
             Dptr = &D;
         }
@@ -403,7 +403,7 @@ int CHAMELEON_zgels_param_Tile_Async( const libhqr_tree_t *qrtree, cham_trans_t
     else {
 #if defined(CHAMELEON_COPY_DIAG)
         {
-            int m = chameleon_min(A->mt, A->nt) * A->mb;
+            int m = chameleon_min(A->m, A->n);
             chameleon_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, );
             Dptr = &D;
         }
diff --git a/compute/zgeqrf.c b/compute/zgeqrf.c
index b64b6d105e912d4688ef18471e6e60856944c310..ed7f122f66f88e1471059c890cf6878b2057e877 100644
--- a/compute/zgeqrf.c
+++ b/compute/zgeqrf.c
@@ -279,7 +279,7 @@ int CHAMELEON_zgeqrf_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *T,
      */
 #if defined(CHAMELEON_COPY_DIAG)
     {
-        int n = chameleon_min(A->mt, A->nt) * A->nb;
+        int n = chameleon_min(A->m, A->n);
         chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, );
         Dptr = &D;
     }
diff --git a/compute/zgeqrf_param.c b/compute/zgeqrf_param.c
index 4565554b3a8643c5858ef6f6ba57ce298751d655..5915c3c9fbea12eef035d256e04c7921efa72675 100644
--- a/compute/zgeqrf_param.c
+++ b/compute/zgeqrf_param.c
@@ -295,7 +295,7 @@ int CHAMELEON_zgeqrf_param_Tile_Async( const libhqr_tree_t *qrtree, CHAM_desc_t
      */
 #if defined(CHAMELEON_COPY_DIAG)
     {
-        int n = chameleon_min(A->mt, A->nt) * A->nb;
+        int n = chameleon_min(A->m, A->n);
         chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, );
         Dptr = &D;
     }
diff --git a/compute/zgeqrs.c b/compute/zgeqrs.c
index bd1a839e24a03c0a89385580d6f8af719e78d818..bfbf1355356484e7f23589d6c47a9651ec67d0ad 100644
--- a/compute/zgeqrs.c
+++ b/compute/zgeqrs.c
@@ -309,7 +309,7 @@ int CHAMELEON_zgeqrs_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *B,
      */
 #if defined(CHAMELEON_COPY_DIAG)
     {
-        int n = chameleon_min(A->mt, A->nt) * A->nb;
+        int n = chameleon_min(A->m, A->n);
         chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, );
         Dptr = &D;
     }
diff --git a/compute/zgeqrs_param.c b/compute/zgeqrs_param.c
index 131992abdc141e09b76f25a6371725060b904042..a26d4d55086828f22e73c5e52825994f576b583f 100644
--- a/compute/zgeqrs_param.c
+++ b/compute/zgeqrs_param.c
@@ -313,7 +313,7 @@ int CHAMELEON_zgeqrs_param_Tile_Async( const libhqr_tree_t *qrtree,
      */
 #if defined(CHAMELEON_COPY_DIAG)
     {
-        int n = chameleon_min(A->mt, A->nt) * A->nb;
+        int n = chameleon_min(A->m, A->n);
         chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, );
         Dptr = &D;
     }
diff --git a/compute/zunglq.c b/compute/zunglq.c
index 0693e929f1eb362882bfd0b2f3299b435c9dc0d1..6b851c40fe07fc56cdf74539d0a225e71a202ff1 100644
--- a/compute/zunglq.c
+++ b/compute/zunglq.c
@@ -302,7 +302,7 @@ int CHAMELEON_zunglq_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *Q,
 
 #if defined(CHAMELEON_COPY_DIAG)
     {
-        int m = chameleon_min(A->mt, A->nt) * A->mb;
+        int m = chameleon_min(A->m, A->n);
         chameleon_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, );
         Dptr = &D;
     }
diff --git a/compute/zunglq_param.c b/compute/zunglq_param.c
index b89d0d5c8dd345c1030c4f50bb8b276598b00c57..5b2856c241681e86448cdb8093a692546065b4b3 100644
--- a/compute/zunglq_param.c
+++ b/compute/zunglq_param.c
@@ -305,7 +305,7 @@ int CHAMELEON_zunglq_param_Tile_Async( const libhqr_tree_t *qrtree, CHAM_desc_t
      */
 #if defined(CHAMELEON_COPY_DIAG)
     {
-        int m = chameleon_min(A->mt, A->nt) * A->mb;
+        int m = chameleon_min(A->m, A->n);
         chameleon_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, );
         Dptr = &D;
     }
diff --git a/compute/zungqr.c b/compute/zungqr.c
index 372aebe46298c48c5fd1390a0c9fee437082fae5..afbb19dbc81e571ab421ca88800f3ae434b7f5a0 100644
--- a/compute/zungqr.c
+++ b/compute/zungqr.c
@@ -299,7 +299,7 @@ int CHAMELEON_zungqr_Tile_Async( CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *Q,
      */
 #if defined(CHAMELEON_COPY_DIAG)
     {
-        int n = chameleon_min(A->mt, A->nt) * A->nb;
+        int n = chameleon_min(A->m, A->n);
         chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, );
         Dptr = &D;
     }
diff --git a/compute/zungqr_param.c b/compute/zungqr_param.c
index de8caa0638e130f6fd11749c313c040af026b3e3..fa2bcc46db1bca49c72defe44101f332a386b9a4 100644
--- a/compute/zungqr_param.c
+++ b/compute/zungqr_param.c
@@ -303,7 +303,7 @@ int CHAMELEON_zungqr_param_Tile_Async( const libhqr_tree_t *qrtree, CHAM_desc_t
      */
 #if defined(CHAMELEON_COPY_DIAG)
     {
-        int n = chameleon_min(A->mt, A->nt) * A->nb;
+        int n = chameleon_min(A->m, A->n);
         chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, );
         Dptr = &D;
     }
diff --git a/compute/zunmlq.c b/compute/zunmlq.c
index 1448429838d34d5a7b1a3075280a4045b5331779..50763f9f572030ed801f3b05f78ae4307900b638 100644
--- a/compute/zunmlq.c
+++ b/compute/zunmlq.c
@@ -362,7 +362,7 @@ int CHAMELEON_zunmlq_Tile_Async( cham_side_t side, cham_trans_t trans,
      */
 #if defined(CHAMELEON_COPY_DIAG)
     {
-        int m = chameleon_min(A->mt, A->nt) * A->mb;
+        int m = chameleon_min(A->m, A->n);
         chameleon_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, );
         Dptr = &D;
     }
diff --git a/compute/zunmlq_param.c b/compute/zunmlq_param.c
index affdaff790e0c7137ef67e3c24e57cdbef9230ce..623a54ee9f5f8e49a24514ec0857c1ec987aedb1 100644
--- a/compute/zunmlq_param.c
+++ b/compute/zunmlq_param.c
@@ -367,7 +367,7 @@ int CHAMELEON_zunmlq_param_Tile_Async( const libhqr_tree_t *qrtree, cham_side_t
 
 #if defined(CHAMELEON_COPY_DIAG)
     {
-        int m = chameleon_min(A->mt, A->nt) * A->mb;
+        int m = chameleon_min(A->m, A->n);
         chameleon_zdesc_alloc(D, A->mb, A->nb, m, A->n, 0, 0, m, A->n, );
         Dptr = &D;
     }
diff --git a/compute/zunmqr.c b/compute/zunmqr.c
index 18d5a0ff336b8924281fb613a6e770740e952a58..5f20272584e13f37031aad737186b5747371969f 100644
--- a/compute/zunmqr.c
+++ b/compute/zunmqr.c
@@ -365,7 +365,7 @@ int CHAMELEON_zunmqr_Tile_Async( cham_side_t side, cham_trans_t trans,
 
 #if defined(CHAMELEON_COPY_DIAG)
     {
-        int n = chameleon_min(A->mt, A->nt) * A->nb;
+        int n = chameleon_min(A->m, A->n);
         chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, );
         Dptr = &D;
     }
diff --git a/compute/zunmqr_param.c b/compute/zunmqr_param.c
index 8751255da2eba667ad427c90ccb8e9a3821c1231..78d59cc037adf0fcb826240bfa2ef53faf8fc82f 100644
--- a/compute/zunmqr_param.c
+++ b/compute/zunmqr_param.c
@@ -374,7 +374,7 @@ int CHAMELEON_zunmqr_param_Tile_Async( const libhqr_tree_t *qrtree,
 
 #if defined(CHAMELEON_COPY_DIAG)
     {
-        int n = chameleon_min(A->mt, A->nt) * A->nb;
+        int n = chameleon_min(A->m, A->n);
         chameleon_zdesc_alloc(D, A->mb, A->nb, A->m, n, 0, 0, A->m, n, );
         Dptr = &D;
     }
diff --git a/control/workspace.c b/control/workspace.c
index 28ffa861552114188a5814670d681842f258265e..e29a9358c70fa6a89973f9d14d0fbd92f029f11c 100644
--- a/control/workspace.c
+++ b/control/workspace.c
@@ -67,7 +67,9 @@ int chameleon_alloc_ibnb_tile(int M, int N, cham_tasktype_t func, int type, CHAM
          (func == CHAMELEON_FUNC_DGESVD) ||
          (func == CHAMELEON_FUNC_CGESVD) ||
          (func == CHAMELEON_FUNC_ZGESVD)))
+    {
         NT *= 2;
+    }
 
     lm = IB * MT;
     ln = NB * NT;
diff --git a/testing/testing_zlange.c b/testing/testing_zlange.c
index b0aaf5cd50025ca1e037d0626561409c771a8104..5aff7317778d37ad70541698c1f742b24c355b4e 100644
--- a/testing/testing_zlange.c
+++ b/testing/testing_zlange.c
@@ -54,6 +54,8 @@ int testing_zlange(int argc, char **argv)
     double            *work = (double*) malloc(max(M,N)*sizeof(double));
     double normcham, normlapack, result;
 
+    RUNTIME_comm_set_tag_sizes( 31, 16 );
+
     eps = LAPACKE_dlamch_work('e');
 
     printf("\n");
@@ -238,5 +240,5 @@ int testing_zlange(int argc, char **argv)
 
     free(A);
     free(work);
-    return 0 /*hres*/;
+    return hres;
 }