diff --git a/compute/pzgelqf.c b/compute/pzgelqf.c
index c72ed710e0d5d3f9cc20339c3d1e9d75904db1c8..32346a6e8805d671620a8722ba2a0c7852a97815 100644
--- a/compute/pzgelqf.c
+++ b/compute/pzgelqf.c
@@ -104,7 +104,7 @@ void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
             int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
             INSERT_TASK_zlacpy(
                 &options,
-                ChamUpper, tempDkm, tempDkn, A->nb,
+                ChamUpper, tempDkm, tempDkn,
                 A(k, k),
                 D(k) );
 #if defined(CHAMELEON_USE_CUDA)
diff --git a/compute/pzgelqf_param.c b/compute/pzgelqf_param.c
index a8c45399f82897c073438226d45157cd2c8d0eab..b69b84b50d7c2a0ce4359e83e9bdb475a8bf2cc1 100644
--- a/compute/pzgelqf_param.c
+++ b/compute/pzgelqf_param.c
@@ -115,7 +115,7 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
 
                 INSERT_TASK_zlacpy(
                     &options,
-                    ChamUpper, tempDkm, tempDpn, A->nb,
+                    ChamUpper, tempDkm, tempDpn,
                     A(k, p), D(k, p) );
 #if defined(CHAMELEON_USE_CUDA)
                 INSERT_TASK_zlaset(
diff --git a/compute/pzgelqfrh.c b/compute/pzgelqfrh.c
index 5d7fae952cc43166f09384730c17d05d27ae4b49..d192c978c2345cf5434add595606d3734c18a46e 100644
--- a/compute/pzgelqfrh.c
+++ b/compute/pzgelqfrh.c
@@ -107,7 +107,7 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
 
                 INSERT_TASK_zlacpy(
                     &options,
-                    ChamUpper, tempDkm, tempDNn, A->nb,
+                    ChamUpper, tempDkm, tempDNn,
                     A(k, N),
                     D(k, N) );
 #if defined(CHAMELEON_USE_CUDA)
diff --git a/compute/pzgemm.c b/compute/pzgemm.c
index b2a71d51163d12ba390896c906a9efc7399dfc1f..66a79bcde0a26f21c9e18f5827695a7b36e4fbb6 100644
--- a/compute/pzgemm.c
+++ b/compute/pzgemm.c
@@ -72,7 +72,7 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
             if ( transA == ChamNoTrans ) {
                 INSERT_TASK_zlacpy(
                     options,
-                    ChamUpperLower, tempmm, tempkk, C->mb,
+                    ChamUpperLower, tempmm, tempkk,
                     A(  m,  k ),
                     WA( m, (k % C->q) + lq ) );
 
@@ -81,7 +81,7 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
                 for ( q=1; q < C->q; q++ ) {
                     INSERT_TASK_zlacpy(
                         options,
-                        ChamUpperLower, tempmm, tempkk, C->mb,
+                        ChamUpperLower, tempmm, tempkk,
                         WA( m, ((k+q-1) % C->q) + lq ),
                         WA( m, ((k+q)   % C->q) + lq ) );
                 }
@@ -89,7 +89,7 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
             else {
                 INSERT_TASK_zlacpy(
                     options,
-                    ChamUpperLower, tempkk, tempmm, C->mb,
+                    ChamUpperLower, tempkk, tempmm,
                     A(  k,  m ),
                     WA( m, (k % C->q) + lq ) );
 
@@ -98,7 +98,7 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
                 for ( q=1; q < C->q; q++ ) {
                     INSERT_TASK_zlacpy(
                         options,
-                        ChamUpperLower, tempkk, tempmm, C->mb,
+                        ChamUpperLower, tempkk, tempmm,
                         WA( m, ((k+q-1) % C->q) + lq ),
                         WA( m, ((k+q)   % C->q) + lq ) );
                 }
@@ -112,7 +112,7 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
             if ( transB == ChamNoTrans ) {
                 INSERT_TASK_zlacpy(
                     options,
-                    ChamUpperLower, tempkk, tempnn, C->mb,
+                    ChamUpperLower, tempkk, tempnn,
                     B(   k,              n ),
                     WB( (k % C->p) + lp, n ) );
 
@@ -121,7 +121,7 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
                 for ( p=1; p < C->p; p++ ) {
                     INSERT_TASK_zlacpy(
                         options,
-                        ChamUpperLower, tempkk, tempnn, C->mb,
+                        ChamUpperLower, tempkk, tempnn,
                         WB( ((k+p-1) % C->p) + lp, n ),
                         WB( ((k+p)   % C->p) + lp, n ) );
                 }
@@ -129,7 +129,7 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
             else {
                 INSERT_TASK_zlacpy(
                     options,
-                    ChamUpperLower, tempnn, tempkk, C->mb,
+                    ChamUpperLower, tempnn, tempkk,
                     B(   n,              k ),
                     WB( (k % C->p) + lp, n ) );
 
@@ -138,7 +138,7 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
                 for ( p=1; p < C->p; p++ ) {
                     INSERT_TASK_zlacpy(
                         options,
-                        ChamUpperLower, tempnn, tempkk, C->mb,
+                        ChamUpperLower, tempnn, tempkk,
                         WB( ((k+p-1) % C->p) + lp, n ),
                         WB( ((k+p)   % C->p) + lp, n ) );
                 }
diff --git a/compute/pzgenm2.c b/compute/pzgenm2.c
index ec1c9cfc7a1033838aad9bdb755ff6eb4992bc12..c7b209946b0e18d713d80fa67bf0249d15e4d169 100644
--- a/compute/pzgenm2.c
+++ b/compute/pzgenm2.c
@@ -144,7 +144,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
             if ( (m != 0) || (n != 0) ) {
                 INSERT_TASK_dlacpy(
                     &options,
-                    ChamUpperLower, 1, 1, 1,
+                    ChamUpperLower, 1, 1,
                     NRMX(0, 0),
                     NRMX(m, n) );
             }
@@ -200,7 +200,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
 #else
                     INSERT_TASK_zlacpy(
                         &options,
-                        ChamUpperLower, 1, tempnn, tempnn,
+                        ChamUpperLower, 1, tempnn,
                         DROW( 0, n ),
                         X(    0, n ) );
 #endif
@@ -210,7 +210,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
                 for (m = 1; m < A->p; m++) {
                     INSERT_TASK_zlacpy(
                         &options,
-                        ChamUpperLower, 1, tempnn, tempnn,
+                        ChamUpperLower, 1, tempnn,
                         X( 0, n ),
                         X( m, n ) );
                 }
@@ -263,7 +263,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
             for (k = 1; k < A->q; k++) {
                 INSERT_TASK_zlacpy(
                     &options,
-                    ChamUpperLower, tempmm, 1, tempmm,
+                    ChamUpperLower, tempmm, 1,
                     SX( m, 0 ),
                     SX( m, k ) );
             }
@@ -298,7 +298,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
             for (k = 1; k < A->p; k++) {
                 INSERT_TASK_zlacpy(
                     &options,
-                    ChamUpperLower, 1, tempnn, tempnn,
+                    ChamUpperLower, 1, tempnn,
                     X( 0, n ),
                     X( k, n ) );
             }
@@ -340,7 +340,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
             for(n = 1; n < A->q; n++) {
                 INSERT_TASK_dlacpy(
                     &options,
-                    ChamUpperLower, 1, 1, 1,
+                    ChamUpperLower, 1, 1,
                     NRMX( myp, 0 ),
                     NRMX( myp, n ) );
             }
@@ -381,7 +381,7 @@ chameleon_pzgenm2( double tol, const CHAM_desc_t *A, double *result,
             for(m = 1; m < A->p; m++) {
                 INSERT_TASK_dlacpy(
                     &options,
-                    ChamUpperLower, 1, 1, 1,
+                    ChamUpperLower, 1, 1,
                     NRMSX( 0, myq ),
                     NRMSX( m, myq ) );
             }
diff --git a/compute/pzgeqrf.c b/compute/pzgeqrf.c
index 9baf7ae0028a7fb00bd1dd2a7b995fc3bc4074d2..a7637392de7071440c77ae8e17d30a41ea879605 100644
--- a/compute/pzgeqrf.c
+++ b/compute/pzgeqrf.c
@@ -98,7 +98,7 @@ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
             int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
             INSERT_TASK_zlacpy(
                 &options,
-                ChamLower, tempDkm, tempDkn, A->nb,
+                ChamLower, tempDkm, tempDkn,
                 A(k, k),
                 D(k) );
 #if defined(CHAMELEON_USE_CUDA)
diff --git a/compute/pzgeqrf_param.c b/compute/pzgeqrf_param.c
index accf33506175e76baa0dad7319b4919b00afbcc1..a4e08e061087e47896d9586013b44878c0283c9c 100644
--- a/compute/pzgeqrf_param.c
+++ b/compute/pzgeqrf_param.c
@@ -77,7 +77,7 @@ int chameleon_pzgeqrf_param_step( int genD, cham_uplo_t uplo, int k, int ib,
 
             INSERT_TASK_zlacpy(
                 options,
-                ChamLower, tempDpm, tempDkn, A->nb,
+                ChamLower, tempDpm, tempDkn,
                 A(p, k), D(p, k) );
 #if defined(CHAMELEON_USE_CUDA)
             INSERT_TASK_zlaset(
diff --git a/compute/pzgeqrfrh.c b/compute/pzgeqrfrh.c
index 8cd96c65cd87c4b47201a6d48be55561a1f56f47..21c1b43ed4ccaf04c9b7a484b64768bfe2260c44 100644
--- a/compute/pzgeqrfrh.c
+++ b/compute/pzgeqrfrh.c
@@ -104,7 +104,7 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
 
                 INSERT_TASK_zlacpy(
                     &options,
-                    ChamLower, tempDMm, tempDkn, A->nb,
+                    ChamLower, tempDMm, tempDkn,
                     A(M, k),
                     D(M, k) );
 #if defined(CHAMELEON_USE_CUDA)
diff --git a/compute/pzgetrf_incpiv.c b/compute/pzgetrf_incpiv.c
index 69505a5c4f1528d657d234b5e4f396229029607d..1b61fa1e62042f47ea6264feaeaa2639a967e3d0 100644
--- a/compute/pzgetrf_incpiv.c
+++ b/compute/pzgetrf_incpiv.c
@@ -97,7 +97,7 @@ void chameleon_pzgetrf_incpiv( CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, i
 #if defined(CHAMELEON_COPY_DIAG)
             INSERT_TASK_zlacpy(
                 &options,
-                ChamUpperLower, tempkm, tempkn, A->nb,
+                ChamUpperLower, tempkm, tempkn,
                 A(k, k),
                 D(k));
 #endif
diff --git a/compute/pzhemm.c b/compute/pzhemm.c
index 175f2ce2007c388f6b371b4c8aed5e77ac592e78..75f1ab66be514b1eaf22f0f47efd8321cbcebc31 100644
--- a/compute/pzhemm.c
+++ b/compute/pzhemm.c
@@ -88,7 +88,7 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
 
             INSERT_TASK_zlacpy(
                 options,
-                ChamUpperLower, tempam, tempak, C->mb,
+                ChamUpperLower, tempam, tempak,
                 A( Am, Ak ),
                 WA( m, (k % C->q) + lq ) );
 
@@ -97,7 +97,7 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
             for ( q=1; q < C->q; q++ ) {
                 INSERT_TASK_zlacpy(
                     options,
-                    ChamUpperLower, tempam, tempak, C->mb,
+                    ChamUpperLower, tempam, tempak,
                     WA( m, ((k+q-1) % C->q) + lq ),
                     WA( m, ((k+q)   % C->q) + lq ) );
             }
@@ -110,7 +110,7 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
 
             INSERT_TASK_zlacpy(
                 options,
-                ChamUpperLower, tempkk, tempnn, C->mb,
+                ChamUpperLower, tempkk, tempnn,
                 B(   k,              n ),
                 WB( (k % C->p) + lp, n ) );
 
@@ -119,7 +119,7 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
             for ( p=1; p < C->p; p++ ) {
                 INSERT_TASK_zlacpy(
                     options,
-                    ChamUpperLower, tempkk, tempnn, C->mb,
+                    ChamUpperLower, tempkk, tempnn,
                     WB( ((k+p-1) % C->p) + lp, n ),
                     WB( ((k+p)   % C->p) + lp, n ) );
             }
@@ -205,7 +205,7 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
 
             INSERT_TASK_zlacpy(
                 options,
-                ChamUpperLower, tempmm, tempkk, C->mb,
+                ChamUpperLower, tempmm, tempkk,
                 B(  m,  k ),
                 WA( m, (k % C->q) + lq ) );
 
@@ -214,7 +214,7 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
             for ( q=1; q < C->q; q++ ) {
                 INSERT_TASK_zlacpy(
                     options,
-                    ChamUpperLower, tempmm, tempkk, C->mb,
+                    ChamUpperLower, tempmm, tempkk,
                     WA( m, ((k+q-1) % C->q) + lq ),
                     WA( m, ((k+q)   % C->q) + lq ) );
             }
@@ -245,7 +245,7 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
 
             INSERT_TASK_zlacpy(
                 options,
-                ChamUpperLower, tempak, tempan, C->mb,
+                ChamUpperLower, tempak, tempan,
                 A(  Ak,              An ),
                 WB( (k % C->p) + lp, n  ) );
 
@@ -254,7 +254,7 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
             for ( p=1; p < C->p; p++ ) {
                 INSERT_TASK_zlacpy(
                     options,
-                    ChamUpperLower, tempak, tempan, C->mb,
+                    ChamUpperLower, tempak, tempan,
                     WB( ((k+p-1) % C->p) + lp, n ),
                     WB( ((k+p)   % C->p) + lp, n ) );
             }
diff --git a/compute/pzhetrd_he2hb.c b/compute/pzhetrd_he2hb.c
index 8522f39d6e085ad0c9c513491978712948157a39..600e1ec7be7d036b474c97ea8f052237de4d5d24 100644
--- a/compute/pzhetrd_he2hb.c
+++ b/compute/pzhetrd_he2hb.c
@@ -120,7 +120,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
 #if defined(CHAMELEON_COPY_DIAG)
            INSERT_TASK_zlacpy(
                &options,
-               ChamLower, tempkm, tempkn, A->nb,
+               ChamLower, tempkm, tempkn,
                A(k+1, k),
                E(k+1, k) );
 #if defined(CHAMELEON_USE_CUDA)
@@ -271,7 +271,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
 #if defined(CHAMELEON_COPY_DIAG)
            INSERT_TASK_zlacpy(
                &options,
-               ChamUpper, tempkm, tempkn, A->nb,
+               ChamUpper, tempkm, tempkn,
                A(k, k+1),
                E(k, k+1) );
 #if defined(CHAMELEON_USE_CUDA)
@@ -411,7 +411,7 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
     for (k = 1; k < A->nt; k++){
         tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
         INSERT_TASK_zlacpy( &options,
-                            uplo, tempkn, tempkn, A->mb,
+                            uplo, tempkn, tempkn,
                             D(k), A(k, k));
     }
 
diff --git a/compute/pzlacpy.c b/compute/pzlacpy.c
index 1a2307992c54f65841a088c8dd40f41742c61666..280ff84731420ee417fdbf55e926a5f9ad17b8b9 100644
--- a/compute/pzlacpy.c
+++ b/compute/pzlacpy.c
@@ -53,7 +53,7 @@ void chameleon_pzlacpy(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B,
                 INSERT_TASK_zlacpy(
                     &options,
                     ChamUpper,
-                    X, Y, A->mb,
+                    X, Y,
                     A(m, m),
                     B(m, m));
             }
@@ -62,7 +62,7 @@ void chameleon_pzlacpy(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B,
                 INSERT_TASK_zlacpy(
                     &options,
                     ChamUpperLower,
-                    X, Y, A->mb,
+                    X, Y,
                     A(m, n),
                     B(m, n));
             }
@@ -79,7 +79,7 @@ void chameleon_pzlacpy(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B,
                 INSERT_TASK_zlacpy(
                     &options,
                     ChamLower,
-                    X, Y, A->mb,
+                    X, Y,
                     A(m, m),
                     B(m, m));
             }
@@ -88,7 +88,7 @@ void chameleon_pzlacpy(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B,
                 INSERT_TASK_zlacpy(
                     &options,
                     ChamUpperLower,
-                    X, Y, A->mb,
+                    X, Y,
                     A(m, n),
                     B(m, n));
             }
@@ -106,7 +106,7 @@ void chameleon_pzlacpy(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B,
                 INSERT_TASK_zlacpy(
                     &options,
                     ChamUpperLower,
-                    X, Y, A->mb,
+                    X, Y,
                     A(m, n),
                     B(m, n));
             }
diff --git a/compute/pzlange.c b/compute/pzlange.c
index 4acf29aa1c5e1edcf15b58f29eda0302dcbd4410..833b0f6953a1e2962ca98b83e4f0e06a0e690538 100644
--- a/compute/pzlange.c
+++ b/compute/pzlange.c
@@ -507,7 +507,7 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
             if ( (m != 0) || (n != 0) ) {
                 INSERT_TASK_dlacpy(
                     &options,
-                    ChamUpperLower, 1, 1, 1,
+                    ChamUpperLower, 1, 1,
                     W( &Welt, 0, 0 ), W( &Welt, m, n ) );
             }
         }
diff --git a/compute/pzlansy.c b/compute/pzlansy.c
index 5bb6b62425dbafbbb1b47917094bb4f55bc8ff4e..1472f0a69f30c44707c88cb5f730e007d7a97537 100644
--- a/compute/pzlansy.c
+++ b/compute/pzlansy.c
@@ -413,7 +413,7 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra
             if ( (m != 0) || (n != 0) ) {
                 INSERT_TASK_dlacpy(
                     &options,
-                    ChamUpperLower, 1, 1, 1,
+                    ChamUpperLower, 1, 1,
                     W( &Welt, 0, 0 ), W( &Welt, m, n ));
             }
         }
diff --git a/compute/pzsymm.c b/compute/pzsymm.c
index 27adf0cdfb541371f367aad4131703c747326bb6..2f2fa1997472bc2fc56002b5bc7cf1df12d48676 100644
--- a/compute/pzsymm.c
+++ b/compute/pzsymm.c
@@ -88,7 +88,7 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
 
             INSERT_TASK_zlacpy(
                 options,
-                ChamUpperLower, tempam, tempak, C->mb,
+                ChamUpperLower, tempam, tempak,
                 A( Am, Ak ),
                 WA( m, (k % C->q) + lq ) );
 
@@ -97,7 +97,7 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
             for ( q=1; q < C->q; q++ ) {
                 INSERT_TASK_zlacpy(
                     options,
-                    ChamUpperLower, tempam, tempak, C->mb,
+                    ChamUpperLower, tempam, tempak,
                     WA( m, ((k+q-1) % C->q) + lq ),
                     WA( m, ((k+q)   % C->q) + lq ) );
             }
@@ -110,7 +110,7 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
 
             INSERT_TASK_zlacpy(
                 options,
-                ChamUpperLower, tempkk, tempnn, C->mb,
+                ChamUpperLower, tempkk, tempnn,
                 B(   k,              n ),
                 WB( (k % C->p) + lp, n ) );
 
@@ -119,7 +119,7 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
             for ( p=1; p < C->p; p++ ) {
                 INSERT_TASK_zlacpy(
                     options,
-                    ChamUpperLower, tempkk, tempnn, C->mb,
+                    ChamUpperLower, tempkk, tempnn,
                     WB( ((k+p-1) % C->p) + lp, n ),
                     WB( ((k+p)   % C->p) + lp, n ) );
             }
@@ -205,7 +205,7 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
 
             INSERT_TASK_zlacpy(
                 options,
-                ChamUpperLower, tempmm, tempkk, C->mb,
+                ChamUpperLower, tempmm, tempkk,
                 B(  m,  k ),
                 WA( m, (k % C->q) + lq ) );
 
@@ -214,7 +214,7 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
             for ( q=1; q < C->q; q++ ) {
                 INSERT_TASK_zlacpy(
                     options,
-                    ChamUpperLower, tempmm, tempkk, C->mb,
+                    ChamUpperLower, tempmm, tempkk,
                     WA( m, ((k+q-1) % C->q) + lq ),
                     WA( m, ((k+q)   % C->q) + lq ) );
             }
@@ -245,7 +245,7 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
 
             INSERT_TASK_zlacpy(
                 options,
-                ChamUpperLower, tempak, tempan, C->mb,
+                ChamUpperLower, tempak, tempan,
                 A(  Ak,              An ),
                 WB( (k % C->p) + lp, n  ) );
 
@@ -254,7 +254,7 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
             for ( p=1; p < C->p; p++ ) {
                 INSERT_TASK_zlacpy(
                     options,
-                    ChamUpperLower, tempak, tempan, C->mb,
+                    ChamUpperLower, tempak, tempan,
                     WB( ((k+p-1) % C->p) + lp, n ),
                     WB( ((k+p)   % C->p) + lp, n ) );
             }
diff --git a/compute/pzunglq.c b/compute/pzunglq.c
index f65f73e60ac098915495872d52f5b65a1c5fe769..42f623cb28a63928221b5e7d7118b28d749e0833 100644
--- a/compute/pzunglq.c
+++ b/compute/pzunglq.c
@@ -123,7 +123,7 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T
             int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
             INSERT_TASK_zlacpy(
                 &options,
-                ChamUpper, tempkmin, tempDkn, A->nb,
+                ChamUpper, tempkmin, tempDkn,
                 A(k, k),
                 D(k) );
 #if defined(CHAMELEON_USE_CUDA)
diff --git a/compute/pzunglq_param.c b/compute/pzunglq_param.c
index 1a15e384ed920a25b6748a37dc67bd85aed2e24f..1f1963cee614ddaade991adcd4f536a16be19b6a 100644
--- a/compute/pzunglq_param.c
+++ b/compute/pzunglq_param.c
@@ -137,7 +137,7 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
                 int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb;
                 INSERT_TASK_zlacpy(
                     &options,
-                    ChamUpper, tempkmin, tempDpn, A->nb,
+                    ChamUpper, tempkmin, tempDpn,
                     A(k, p),
                     D(k, p) );
 #if defined(CHAMELEON_USE_CUDA)
diff --git a/compute/pzunglqrh.c b/compute/pzunglqrh.c
index 397b5f1e88a7479d1db78fbad77c184f6b029397..f904846cd8bc8aa2888e1dd24e5758d8ad5fd972 100644
--- a/compute/pzunglqrh.c
+++ b/compute/pzunglqrh.c
@@ -150,7 +150,7 @@ void chameleon_pzunglqrh( int genD, int BS,
 
                 INSERT_TASK_zlacpy(
                     &options,
-                    ChamUpper, tempkmin, tempDNn, A->nb,
+                    ChamUpper, tempkmin, tempDNn,
                     A(k, N),
                     D(k, N) );
 #if defined(CHAMELEON_USE_CUDA)
diff --git a/compute/pzungqr.c b/compute/pzungqr.c
index c7ee3f5ddf49bb71c3aaf7a4f9e0c9e4367fce85..cb4b8e967c0d77c42380699090a234bea0f6afea 100644
--- a/compute/pzungqr.c
+++ b/compute/pzungqr.c
@@ -123,7 +123,7 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q,
 
             INSERT_TASK_zlacpy(
                 &options,
-                ChamLower, tempDkm, tempkmin, A->nb,
+                ChamLower, tempDkm, tempkmin,
                 A(k, k),
                 D(k) );
 #if defined(CHAMELEON_USE_CUDA)
diff --git a/compute/pzungqr_param.c b/compute/pzungqr_param.c
index 16fbefb8dea33c5ca32d10f0e35c9edbc4837db6..2f5c24983acbde9e159f9038849f8521f003bdbd 100644
--- a/compute/pzungqr_param.c
+++ b/compute/pzungqr_param.c
@@ -114,7 +114,7 @@ void chameleon_pzungqr_param_step( int genD, cham_uplo_t uplo, int k, int ib,
             int tempDmm = m == D->mt-1 ? D->m - m * D->mb : D->mb;
             INSERT_TASK_zlacpy(
                 options,
-                ChamLower, tempDmm, tempkmin, A->nb,
+                ChamLower, tempDmm, tempkmin,
                 A(m, k),
                 D(m, k) );
 #if defined(CHAMELEON_USE_CUDA)
diff --git a/compute/pzungqrrh.c b/compute/pzungqrrh.c
index d500fe2db7c750f8ff4ead41da200a1c03dac2f8..e48b37ba13adbcf80a3d8fac09b512cc22adf4c1 100644
--- a/compute/pzungqrrh.c
+++ b/compute/pzungqrrh.c
@@ -150,7 +150,7 @@ void chameleon_pzungqrrh( int genD, int BS,
                 int tempDMm = M == D->mt-1 ? D->m-M*D->mb : D->mb;
                 INSERT_TASK_zlacpy(
                     &options,
-                    ChamLower, tempDMm, tempkmin, A->nb,
+                    ChamLower, tempDMm, tempkmin,
                     A(M, k),
                     D(M, k) );
 #if defined(CHAMELEON_USE_CUDA)
diff --git a/compute/pzunmlq.c b/compute/pzunmlq.c
index 9edd667d7e74f1d06cd8007a17de9752b68ee7cf..43e695fc6748ae876a55d17006291bf0c34adecc 100644
--- a/compute/pzunmlq.c
+++ b/compute/pzunmlq.c
@@ -107,7 +107,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                     int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
                     INSERT_TASK_zlacpy(
                         &options,
-                        ChamUpper, tempkmin, tempDkn, A->nb,
+                        ChamUpper, tempkmin, tempDkn,
                         A(k, k),
                         D(k) );
 #if defined(CHAMELEON_USE_CUDA)
@@ -202,7 +202,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                     int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
                     INSERT_TASK_zlacpy(
                         &options,
-                        ChamUpper, tempkmin, tempDkn, A->nb,
+                        ChamUpper, tempkmin, tempDkn,
                         A(k, k),
                         D(k) );
 #if defined(CHAMELEON_USE_CUDA)
@@ -271,7 +271,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                     int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
                     INSERT_TASK_zlacpy(
                         &options,
-                        ChamUpper, tempkmin, tempDkn, A->nb,
+                        ChamUpper, tempkmin, tempDkn,
                         A(k, k),
                         D(k) );
 #if defined(CHAMELEON_USE_CUDA)
@@ -318,7 +318,7 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
 
                     INSERT_TASK_zlacpy(
                         &options,
-                        ChamUpper, tempkmin, tempDkn, A->nb,
+                        ChamUpper, tempkmin, tempDkn,
                         A(k, k),
                         D(k) );
 #if defined(CHAMELEON_USE_CUDA)
diff --git a/compute/pzunmlq_param.c b/compute/pzunmlq_param.c
index b01b7e7a3438f1a84cbd45f7333ac54b9219640d..3acb7af525aa50937a98e1e4fc42dd11c73ffc7c 100644
--- a/compute/pzunmlq_param.c
+++ b/compute/pzunmlq_param.c
@@ -106,7 +106,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamUpper, tempkmin, tempDpn, A->nb,
+                            ChamUpper, tempkmin, tempDpn,
                             A(k, p),
                             D(k, p) );
 #if defined(CHAMELEON_USE_CUDA)
@@ -237,7 +237,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamUpper, tempkmin, tempDpn, A->nb,
+                            ChamUpper, tempkmin, tempDpn,
                             A(k, p),
                             D(k, p) );
 #if defined(CHAMELEON_USE_CUDA)
@@ -331,7 +331,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamUpper, tempkmin, tempDpn, A->nb,
+                            ChamUpper, tempkmin, tempDpn,
                             A(k, p),
                             D(k, p) );
 #if defined(CHAMELEON_USE_CUDA)
@@ -383,7 +383,7 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamUpper, tempkmin, tempDpn, A->nb,
+                            ChamUpper, tempkmin, tempDpn,
                             A(k, p),
                             D(k, p) );
 #if defined(CHAMELEON_USE_CUDA)
diff --git a/compute/pzunmlqrh.c b/compute/pzunmlqrh.c
index 8d9ad2ee5cf66314372b026d52313c9de29efda8..af977675d0905c4b2f5723a23cffb7e7ad3643aa 100644
--- a/compute/pzunmlqrh.c
+++ b/compute/pzunmlqrh.c
@@ -108,7 +108,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamUpper, tempkmin, tempDpn, A->nb,
+                            ChamUpper, tempkmin, tempDpn,
                             A(k, p),
                             D(k, p) );
 #if defined(CHAMELEON_USE_CUDA)
@@ -263,7 +263,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamUpper, tempkmin, tempDpn, A->nb,
+                            ChamUpper, tempkmin, tempDpn,
                             A(k, p),
                             D(k, p) );
 #if defined(CHAMELEON_USE_CUDA)
@@ -367,7 +367,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamUpper, tempkmin, tempDpn, A->nb,
+                            ChamUpper, tempkmin, tempDpn,
                             A(k, p),
                             D(k, p) );
 #if defined(CHAMELEON_USE_CUDA)
@@ -416,7 +416,7 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamUpper, tempkmin, tempDpn, A->nb,
+                            ChamUpper, tempkmin, tempDpn,
                             A(k, p),
                             D(k, p) );
 #if defined(CHAMELEON_USE_CUDA)
diff --git a/compute/pzunmqr.c b/compute/pzunmqr.c
index d9f5e1850e53e589d7aedaa2ca2552dc692df24e..17839a5412c1cdf3addca474310bee846359d280 100644
--- a/compute/pzunmqr.c
+++ b/compute/pzunmqr.c
@@ -108,7 +108,7 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
 
                     INSERT_TASK_zlacpy(
                         &options,
-                        ChamLower, tempDkm, tempkmin, A->nb,
+                        ChamLower, tempDkm, tempkmin,
                         A(k, k),
                         D(k) );
 #if defined(CHAMELEON_USE_CUDA)
@@ -203,7 +203,7 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
 
                     INSERT_TASK_zlacpy(
                         &options,
-                        ChamLower, tempDkm, tempkmin, A->nb,
+                        ChamLower, tempDkm, tempkmin,
                         A(k, k),
                         D(k) );
 #if defined(CHAMELEON_USE_CUDA)
@@ -273,7 +273,7 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
 
                     INSERT_TASK_zlacpy(
                         &options,
-                        ChamLower, tempDkm, tempkmin, A->nb,
+                        ChamLower, tempDkm, tempkmin,
                         A(k, k),
                         D(k) );
 #if defined(CHAMELEON_USE_CUDA)
@@ -320,7 +320,7 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
 
                     INSERT_TASK_zlacpy(
                         &options,
-                        ChamLower, tempDkm, tempkmin, A->nb,
+                        ChamLower, tempDkm, tempkmin,
                         A(k, k),
                         D(k) );
 #if defined(CHAMELEON_USE_CUDA)
diff --git a/compute/pzunmqr_param.c b/compute/pzunmqr_param.c
index ee2d0da7eaacefd456c8b764c6cb0601415c555b..e2507120b9f4c3c29821adc00c03e6dce8cf150c 100644
--- a/compute/pzunmqr_param.c
+++ b/compute/pzunmqr_param.c
@@ -106,7 +106,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamLower, tempDpm, tempkmin, A->nb,
+                            ChamLower, tempDpm, tempkmin,
                             A(p, k),
                             D(p, k) );
 #if defined(CHAMELEON_USE_CUDA)
@@ -237,7 +237,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamLower, tempDpm, tempkmin, A->nb,
+                            ChamLower, tempDpm, tempkmin,
                             A(p, k),
                             D(p, k) );
 #if defined(CHAMELEON_USE_CUDA)
@@ -330,7 +330,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamLower, tempDpm, tempkmin, A->nb,
+                            ChamLower, tempDpm, tempkmin,
                             A(p, k),
                             D(p, k) );
 #if defined(CHAMELEON_USE_CUDA)
@@ -382,7 +382,7 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamLower, tempDpm, tempkmin, A->nb,
+                            ChamLower, tempDpm, tempkmin,
                             A(p, k),
                             D(p, k) );
 #if defined(CHAMELEON_USE_CUDA)
diff --git a/compute/pzunmqrrh.c b/compute/pzunmqrrh.c
index 98a816cd1866d56303a7ea09224c073a98d120aa..6ec5691cea7efc15a566dcbbf09bc3bf9dfe1867 100644
--- a/compute/pzunmqrrh.c
+++ b/compute/pzunmqrrh.c
@@ -107,7 +107,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamLower, tempDpm, tempkmin, A->nb,
+                            ChamLower, tempDpm, tempkmin,
                             A(p, k),
                             D(p, k) );
 #if defined(CHAMELEON_USE_CUDA)
@@ -260,7 +260,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamLower, tempDpm, tempkmin, A->nb,
+                            ChamLower, tempDpm, tempkmin,
                             A(p, k),
                             D(p, k) );
 #if defined(CHAMELEON_USE_CUDA)
@@ -365,7 +365,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamLower, tempDpm, tempkmin, A->nb,
+                            ChamLower, tempDpm, tempkmin,
                             A(p, k),
                             D(p, k) );
 #if defined(CHAMELEON_USE_CUDA)
@@ -415,7 +415,7 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
 
                         INSERT_TASK_zlacpy(
                             &options,
-                            ChamLower, tempDpm, tempkmin, A->nb,
+                            ChamLower, tempDpm, tempkmin,
                             A(p, k),
                             D(p, k) );
 #if defined(CHAMELEON_USE_CUDA)
diff --git a/compute/zplghe.c b/compute/zplghe.c
index efbd069113f1f9415a6135cd2442ad71c2b7f116..2a097f637cd1de991873dea7dd51bda98d503301 100644
--- a/compute/zplghe.c
+++ b/compute/zplghe.c
@@ -31,7 +31,8 @@
  *
  * @ingroup CHAMELEON_Complex64_t
  *
- *  CHAMELEON_zplghe - Generate a random hermitian (positive definite if 'bump' is large enough) half-matrix by tiles.
+ * @brief Generate a random hermitian (positive definite if 'bump' is large
+ * enough) half-matrix by tiles.
  *
  *******************************************************************************
  *
@@ -136,11 +137,12 @@ int CHAMELEON_zplghe( double bump, cham_uplo_t uplo, int N,
  *
  * @ingroup CHAMELEON_Complex64_t_Tile
  *
- *  CHAMELEON_zplghe_Tile - Generate a random hermitian (positive definite if 'bump' is large enough) half-matrix by tiles.
- *  Tile equivalent of CHAMELEON_zplghe().
- *  Operates on matrices stored by tiles.
- *  All matrices are passed through descriptors.
- *  All dimensions are taken from the descriptors.
+ * @brief Generate a random hermitian (positive definite if 'bump' is large
+ * enough) half-matrix by tiles.
+ *
+ * Tile equivalent of CHAMELEON_zplghe().  Operates on matrices stored by
+ * tiles.  All matrices are passed through descriptors.  All dimensions are
+ * taken from the descriptors.
  *
  *******************************************************************************
  *
diff --git a/coreblas/compute/core_ztile.c b/coreblas/compute/core_ztile.c
index dcec74350500739b6fdbed7c38cd09f9588447e7..ef9d191ac03ec2a4d9b859a138f178b385e05716 100644
--- a/coreblas/compute/core_ztile.c
+++ b/coreblas/compute/core_ztile.c
@@ -368,7 +368,7 @@ TCORE_zlacpy( cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, CHAM_tile_t
 }
 
 void
-TCORE_zlacpyx( cham_uplo_t uplo, int M, int N, int displA, int displB, const CHAM_tile_t *A, int LDA, CHAM_tile_t *B, int LDB )
+TCORE_zlacpyx( cham_uplo_t uplo, int M, int N, int displA, const CHAM_tile_t *A, int LDA, int displB, CHAM_tile_t *B, int LDB )
 {
     assert( A->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
     assert( B->format & (CHAMELEON_TILE_FULLRANK | CHAMELEON_TILE_DESC) );
diff --git a/coreblas/compute/core_ztile_empty.c b/coreblas/compute/core_ztile_empty.c
index 30347d3320ba80c657ffa39795021357a43d9b7f..3af4ac44729e4ad08bbbc6d7f0e1a170a1e296d1 100644
--- a/coreblas/compute/core_ztile_empty.c
+++ b/coreblas/compute/core_ztile_empty.c
@@ -264,7 +264,7 @@ TCORE_zlacpy( cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, CHAM_tile_t
 }
 
 void
-TCORE_zlacpyx( cham_uplo_t uplo, int M, int N, int displA, int displB, const CHAM_tile_t *A, int LDA, CHAM_tile_t *B, int LDB )
+TCORE_zlacpyx( cham_uplo_t uplo, int M, int N, int displA, const CHAM_tile_t *A, int LDA, int displB, CHAM_tile_t *B, int LDB )
 {
     return;
 }
diff --git a/coreblas/include/coreblas/coreblas_ztile.h b/coreblas/include/coreblas/coreblas_ztile.h
index 74443b88724db7a787cb93be514bdd7108372cb3..8779531ad3bdb8786d8e5078c152e94420020dfc 100644
--- a/coreblas/include/coreblas/coreblas_ztile.h
+++ b/coreblas/include/coreblas/coreblas_ztile.h
@@ -43,7 +43,7 @@ int  TCORE_zherfb( cham_uplo_t uplo, int N, int K, int IB, int NB, const CHAM_ti
 int  TCORE_zhessq( cham_store_t storev, cham_uplo_t uplo, int N, const CHAM_tile_t *A, CHAM_tile_t *sclssq );
 #endif
 void TCORE_zlacpy( cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *B );
-void TCORE_zlacpyx( cham_uplo_t uplo, int M, int N, int displA, int displB, const CHAM_tile_t *A, int LDA, CHAM_tile_t *B, int LDB );
+void TCORE_zlacpyx( cham_uplo_t uplo, int M, int N, int displA, const CHAM_tile_t *A, int LDA, int displB, CHAM_tile_t *B, int LDB );
 void TCORE_zlange( cham_normtype_t norm, int M, int N, const CHAM_tile_t *A, double *work, double *normA );
 #if defined(PRECISION_z) || defined(PRECISION_c)
 void TCORE_zlanhe( cham_normtype_t norm, cham_uplo_t uplo, int N, const CHAM_tile_t *A, double *work, double *normA );
diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h
index 85f0e30ee377818e24ac0e20362e9720b5bfda42..9a26496f199568818f63751ef98a3c58d46b725f 100644
--- a/include/chameleon/tasks_z.h
+++ b/include/chameleon/tasks_z.h
@@ -126,13 +126,13 @@ void INSERT_TASK_zhessq( const RUNTIME_option_t *options,
                          const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn );
 void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
-                         cham_uplo_t uplo, int m, int n, int mb,
+                         cham_uplo_t uplo, int m, int n,
                          const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *B, int Bm, int Bn );
 void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
-                          cham_uplo_t uplo, int m, int n, int mb,
-                          int displA, const CHAM_desc_t *A, int Am, int An,
-                          int displB, const CHAM_desc_t *B, int Bm, int Bn );
+                          cham_uplo_t uplo, int m, int n,
+                          int displA, const CHAM_desc_t *A, int Am, int An, int lda,
+                          int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb );
 void INSERT_TASK_zlange( const RUNTIME_option_t *options,
                          cham_normtype_t norm, int M, int N, int NB,
                          const CHAM_desc_t *A, int Am, int An,
diff --git a/runtime/openmp/codelets/codelet_zlacpy.c b/runtime/openmp/codelets/codelet_zlacpy.c
index 25de93f94bbba0cfeb4d66f21b27a85ab572161b..8c5696d6ac417d56031cafd369a48bcc6f33c3ea 100644
--- a/runtime/openmp/codelets/codelet_zlacpy.c
+++ b/runtime/openmp/codelets/codelet_zlacpy.c
@@ -20,10 +20,10 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_ztile.h"
 
-void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
-                          cham_uplo_t uplo, int m, int n, int nb,
-                          int displA, const CHAM_desc_t *A, int Am, int An,
-                          int displB, const CHAM_desc_t *B, int Bm, int Bn )
+void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, int m, int n,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
     CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
@@ -31,24 +31,29 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
     assert( tileA->format & CHAMELEON_TILE_FULLRANK );
     assert( tileB->format & CHAMELEON_TILE_FULLRANK );
 
-#pragma omp task firstprivate( uplo, m, n, displA, tileA, displB, tileB ) depend( in:tileA[0] ) depend( inout:tileB[0] )
+#pragma omp task firstprivate( uplo, m, n, tileA, tileB ) depend( in:tileA[0] ) depend( inout:tileB[0] )
     {
-        CHAMELEON_Complex64_t *A = tileA->mat;
-        CHAMELEON_Complex64_t *B = tileB->mat;
-
-        CORE_zlacpy( uplo, m, n, A + displA, tileA->ld, B + displB, tileB->ld );
+        TCORE_zlacpy( uplo, m, n, tileA, tileB );
     }
 
     (void)options;
-    (void)nb;
 }
 
-void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
-                         cham_uplo_t uplo, int m, int n, int nb,
-                         const CHAM_desc_t *A, int Am, int An,
-                         const CHAM_desc_t *B, int Bm, int Bn )
+void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
+                          cham_uplo_t uplo, int m, int n,
+                          int displA, const CHAM_desc_t *A, int Am, int An, int lda,
+                          int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
-    INSERT_TASK_zlacpyx( options, uplo, m, n, nb,
-                         0, A, Am, An,
-                         0, B, Bm, Bn );
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+
+    assert( tileA->format & CHAMELEON_TILE_FULLRANK );
+    assert( tileB->format & CHAMELEON_TILE_FULLRANK );
+
+#pragma omp task firstprivate( uplo, m, n, displA, tileA, lda, displB, tileB, ldb ) depend( in:tileA[0] ) depend( inout:tileB[0] )
+    {
+        TCORE_zlacpyx( uplo, m, n, displA, tileA, lda, displB, tileB, ldb );
+    }
+
+    (void)options;
 }
diff --git a/runtime/parsec/codelets/codelet_zlacpy.c b/runtime/parsec/codelets/codelet_zlacpy.c
index 5c047798d8227eb7618d6490d08ab00e2d4abee2..65b9bc0542dae69579db42ef476705cebfa4bca7 100644
--- a/runtime/parsec/codelets/codelet_zlacpy.c
+++ b/runtime/parsec/codelets/codelet_zlacpy.c
@@ -22,6 +22,48 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
+static inline int
+CORE_zlacpy_parsec( parsec_execution_stream_t *context,
+                    parsec_task_t             *this_task )
+{
+    cham_uplo_t uplo;
+    int M;
+    int N;
+    CHAMELEON_Complex64_t *A;
+    int LDA;
+    CHAMELEON_Complex64_t *B;
+    int LDB;
+
+    parsec_dtd_unpack_args(
+        this_task, &uplo, &M, &N, &A, &LDA, &B, &LDB );
+
+    CORE_zlacpy( uplo, M, N, A, LDA, B, LDB );
+
+    (void)context;
+    return PARSEC_HOOK_RETURN_DONE;
+}
+
+void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, int m, int n,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
+{
+    parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+
+    parsec_dtd_taskpool_insert_task(
+        PARSEC_dtd_taskpool, CORE_zlacpy_parsec, options->priority, "lacpy",
+        sizeof(cham_uplo_t), &uplo,        VALUE,
+        sizeof(int),         &m,           VALUE,
+        sizeof(int),         &n,           VALUE,
+        PASSED_BY_REF,       RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
+        sizeof(int),         &(tileA->ld), VALUE,
+        PASSED_BY_REF,       RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), chameleon_parsec_get_arena_index( B ) | OUTPUT | AFFINITY,
+        sizeof(int),         &(tileB->ld), VALUE,
+        PARSEC_DTD_ARG_END );
+}
+
 static inline int
 CORE_zlacpyx_parsec( parsec_execution_stream_t *context,
                     parsec_task_t             *this_task )
@@ -39,42 +81,29 @@ CORE_zlacpyx_parsec( parsec_execution_stream_t *context,
     parsec_dtd_unpack_args(
         this_task, &uplo, &M, &N, &displA, &A, &LDA, &displB, &B, &LDB );
 
-    CORE_zlacpy( uplo, M, N, A + (displA), LDA, B + (displB), LDB );
+    CORE_zlacpy( uplo, M, N, A + displA, LDA, B + displB, LDB );
 
     (void)context;
     return PARSEC_HOOK_RETURN_DONE;
 }
 
 void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
-                          cham_uplo_t uplo, int m, int n, int nb,
-                          int displA, const CHAM_desc_t *A, int Am, int An,
-                          int displB, const CHAM_desc_t *B, int Bm, int Bn )
+                          cham_uplo_t uplo, int m, int n,
+                          int displA, const CHAM_desc_t *A, int Am, int An, int lda,
+                          int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
-    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
-    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zlacpyx_parsec, options->priority, "lacpy",
-        sizeof(cham_uplo_t),    &uplo,                      VALUE,
-        sizeof(int),           &m,                         VALUE,
-        sizeof(int),           &n,                         VALUE,
-        sizeof(int),           &displA,                    VALUE,
-        PASSED_BY_REF,         RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int), &(tileA->ld), VALUE,
-        sizeof(int),           &displB,                    VALUE,
-        PASSED_BY_REF,         RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), chameleon_parsec_get_arena_index( B ) | OUTPUT | AFFINITY,
-        sizeof(int), &(tileB->ld), VALUE,
+        sizeof(cham_uplo_t), &uplo,   VALUE,
+        sizeof(int),         &m,      VALUE,
+        sizeof(int),         &n,      VALUE,
+        sizeof(int),         &displA, VALUE,
+        PASSED_BY_REF,       RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
+        sizeof(int),         &lda,    VALUE,
+        sizeof(int),         &displB, VALUE,
+        PASSED_BY_REF,       RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), chameleon_parsec_get_arena_index( B ) | OUTPUT | AFFINITY,
+        sizeof(int),         &ldb,    VALUE,
         PARSEC_DTD_ARG_END );
-    (void)nb;
-}
-
-void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
-                         cham_uplo_t uplo, int m, int n, int nb,
-                         const CHAM_desc_t *A, int Am, int An,
-                         const CHAM_desc_t *B, int Bm, int Bn )
-{
-    INSERT_TASK_zlacpyx( options, uplo, m, n, nb,
-                         0, A, Am, An,
-                         0, B, Bm, Bn );
 }
diff --git a/runtime/quark/codelets/codelet_zlacpy.c b/runtime/quark/codelets/codelet_zlacpy.c
index 11992d320efc8269d9e2a5ea62f9f409849b7487..90a9ae5fbca18270d2e75f4e95c408c14096ba93 100644
--- a/runtime/quark/codelets/codelet_zlacpy.c
+++ b/runtime/quark/codelets/codelet_zlacpy.c
@@ -29,50 +29,66 @@
 static inline void CORE_zlacpy_quark(Quark *quark)
 {
     cham_uplo_t uplo;
-    int M;
-    int N;
-    int displA;
-    CHAM_tile_t *tileA;
-    CHAMELEON_Complex64_t *A;
-    int displB;
-    CHAM_tile_t *tileB;
-    CHAMELEON_Complex64_t *B;
+    int M, N;
+    int LDA, LDB;
+    CHAM_tile_t *tileA, *tileB;
 
-    quark_unpack_args_7(quark, uplo, M, N, displA, tileA, displB, tileB);
+    quark_unpack_args_5(quark, uplo, M, N, tileA, tileB);
 
     assert( tileA->format & CHAMELEON_TILE_FULLRANK );
     assert( tileB->format & CHAMELEON_TILE_FULLRANK );
 
-    A = tileA->mat;
-    B = tileB->mat;
-    CORE_zlacpy( uplo, M, N, A + displA, tileA->ld, B + displB, tileB->ld );
+    TCORE_zlacpy( uplo, M, N, tileA, tileB );
 }
 
-void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
-                          cham_uplo_t uplo, int m, int n, int nb,
-                          int displA, const CHAM_desc_t *A, int Am, int An,
-                          int displB, const CHAM_desc_t *B, int Bm, int Bn )
+void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, int m, int n,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_LACPY;
     QUARK_Insert_Task(opt->quark, CORE_zlacpy_quark, (Quark_Task_Flags*)opt,
-        sizeof(int),              &uplo,   VALUE,
-        sizeof(int),                     &m,      VALUE,
-        sizeof(int),                     &n,      VALUE,
-        sizeof(int),                     &displA, VALUE,
+        sizeof(int),   &uplo,   VALUE,
+        sizeof(int),   &m,      VALUE,
+        sizeof(int),   &n,      VALUE,
         sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),             INPUT,
-        sizeof(int),                     &displB, VALUE,
         sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),             OUTPUT,
         0);
-    (void)nb;
 }
 
-void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
-                         cham_uplo_t uplo, int m, int n, int nb,
-                         const CHAM_desc_t *A, int Am, int An,
-                         const CHAM_desc_t *B, int Bm, int Bn )
+static inline void CORE_zlacpyx_quark(Quark *quark)
 {
-    INSERT_TASK_zlacpyx( options, uplo, m, n, nb,
-                         0, A, Am, An,
-                         0, B, Bm, Bn );
+    cham_uplo_t uplo;
+    int M, N;
+    int displA, displB;
+    int LDA, LDB;
+    CHAM_tile_t *tileA, *tileB;
+
+    quark_unpack_args_9(quark, uplo, M, N, displA, tileA, LDA, displB, tileB, LDB);
+
+    assert( tileA->format & CHAMELEON_TILE_FULLRANK );
+    assert( tileB->format & CHAMELEON_TILE_FULLRANK );
+
+    TCORE_zlacpyx( uplo, M, N, displA, tileA, LDA, displB, tileB, LDB );
+}
+
+void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
+                          cham_uplo_t uplo, int m, int n,
+                          int displA, const CHAM_desc_t *A, int Am, int An, int lda,
+                          int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+{
+    quark_option_t *opt = (quark_option_t*)(options->schedopt);
+    DAG_CORE_LACPY;
+    QUARK_Insert_Task(opt->quark, CORE_zlacpy_quark, (Quark_Task_Flags*)opt,
+        sizeof(int),   &uplo,   VALUE,
+        sizeof(int),   &m,      VALUE,
+        sizeof(int),   &n,      VALUE,
+        sizeof(int),   &displA, VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),             INPUT,
+        sizeof(int),   &lda,    VALUE,
+        sizeof(int),   &displB, VALUE,
+        sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),             OUTPUT,
+        sizeof(int),   &ldb,    VALUE,
+        0);
 }
diff --git a/runtime/starpu/codelets/codelet_zlacpy.c b/runtime/starpu/codelets/codelet_zlacpy.c
index 7302ec42c94f7aa69adfa5692fa2ded758b797c9..af49ba0c96514250b362b41dc839cb674a9c5262 100644
--- a/runtime/starpu/codelets/codelet_zlacpy.c
+++ b/runtime/starpu/codelets/codelet_zlacpy.c
@@ -52,9 +52,7 @@ cl_zlacpy_cpu_func(void *descr[], void *cl_arg)
 
     assert( clargs->displA == 0 );
     assert( clargs->displB == 0 );
-    CHAMELEON_Complex64_t *A = tileA->mat;
-    CHAMELEON_Complex64_t *B = tileB->mat;
-    // CORE_zlacpy( clargs->uplo, clargs->m, clargs->n, A + clargs->displA, tileA->ld, B + clargs->displB, tileB->ld );
+
     TCORE_zlacpy( clargs->uplo, clargs->m, clargs->n, tileA, tileB );
 }
 
@@ -68,8 +66,8 @@ cl_zlacpyx_cpu_func(void *descr[], void *cl_arg)
     tileA = cti_interface_get(descr[0]);
     tileB = cti_interface_get(descr[1]);
 
-    TCORE_zlacpyx( clargs->uplo, clargs->m, clargs->n, clargs->displA, clargs->displB,
-                   tileA, clargs->lda, tileB, clargs->ldb );
+    TCORE_zlacpyx( clargs->uplo, clargs->m, clargs->n, clargs->displA,
+                   tileA, clargs->lda, clargs->displB, tileB, clargs->ldb );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -80,9 +78,9 @@ CODELETS_CPU( zlacpy,  cl_zlacpy_cpu_func  )
 CODELETS_CPU( zlacpyx, cl_zlacpyx_cpu_func )
 
 void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
-                          cham_uplo_t uplo, int m, int n, int nb,
-                          int displA, const CHAM_desc_t *A, int Am, int An,
-                          int displB, const CHAM_desc_t *B, int Bm, int Bn )
+                          cham_uplo_t uplo, int m, int n,
+                          int displA, const CHAM_desc_t *A, int Am, int An, int lda,
+                          int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     struct cl_zlacpy_args_s *clargs = NULL;
     void (*callback)(void*);
@@ -105,8 +103,8 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
         clargs->displB = displB;
         clargs->tileA  = A->get_blktile( A, Am, An );
         clargs->tileB  = B->get_blktile( B, Bm, Bn );
-        clargs->lda    = clargs->tileA->ld;
-        clargs->ldb    = clargs->tileB->ld;
+        clargs->lda    = lda;
+        clargs->ldb    = ldb;
     }
 
     /* Callback fro profiling information */
@@ -129,12 +127,10 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
 #endif
 
         0 );
-
-    (void)nb;
 }
 
 void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
-                         cham_uplo_t uplo, int m, int n, int nb,
+                         cham_uplo_t uplo, int m, int n,
                          const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *B, int Bm, int Bn )
 {
@@ -183,6 +179,4 @@ void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
 #endif
 
         0 );
-
-    (void)nb;
 }