diff --git a/compute/pzbuild.c b/compute/pzbuild.c
index c8f4f536fe22fabef4722dbbbd00a85bf9a0a772..cd25892b84dc68dcced8794a779029263a2c6667 100644
--- a/compute/pzbuild.c
+++ b/compute/pzbuild.c
@@ -60,7 +60,6 @@ void chameleon_pzbuild( cham_uplo_t uplo, CHAM_desc_t *A, void *user_data, void*
   RUNTIME_option_t options;
 
   int m, n;
-  int ldam;
 
   chamctxt = chameleon_context_self();
   if (sequence->status != CHAMELEON_SUCCESS)
@@ -68,7 +67,6 @@ void chameleon_pzbuild( cham_uplo_t uplo, CHAM_desc_t *A, void *user_data, void*
   RUNTIME_options_init(&options, chamctxt, sequence, request);
 
   for (m = 0; m < A->mt; m++) {
-    ldam = BLKLDD(A, m);
     for (n = 0; n < A->nt; n++) {
 
       if ( ( uplo == ChamUpper && m <= n ) ||
@@ -76,7 +74,7 @@ void chameleon_pzbuild( cham_uplo_t uplo, CHAM_desc_t *A, void *user_data, void*
            ( uplo == ChamUpperLower ) )
         INSERT_TASK_zbuild(
               &options,
-              A(m, n), ldam,
+              A(m, n),
               user_data, user_build_callback );
     }
   }
diff --git a/compute/pzgelqf.c b/compute/pzgelqf.c
index a85cad0fd91ad8ca14eac15d155e886426a2d73d..c0e9b10052a7da83be3176a4c8b43812f4c9f849 100644
--- a/compute/pzgelqf.c
+++ b/compute/pzgelqf.c
@@ -41,7 +41,6 @@ void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
     size_t ws_host = 0;
 
     int k, m, n;
-    int ldak, ldam, lddk;
     int tempkm, tempkn, tempmm, tempnn;
     int ib, minMNT;
 
@@ -92,39 +91,36 @@ void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
 
         tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
         tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-        ldak = BLKLDD(A, k);
-        lddk = BLKLDD(D, k);
         INSERT_TASK_zgelqt(
             &options,
             tempkm, tempkn, ib, T->nb,
-            A(k, k), ldak,
-            T(k, k), T->mb);
+            A(k, k),
+            T(k, k));
         if ( genD ) {
             int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb;
             int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
             INSERT_TASK_zlacpy(
                 &options,
                 ChamUpper, tempDkm, tempDkn, A->nb,
-                A(k, k), ldak,
-                D(k), lddk );
+                A(k, k),
+                D(k) );
 #if defined(CHAMELEON_USE_CUDA)
             INSERT_TASK_zlaset(
                 &options,
                 ChamLower, tempDkm, tempDkn,
                 0., 1.,
-                D(k), lddk );
+                D(k) );
 #endif
         }
         for (m = k+1; m < A->mt; m++) {
             tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-            ldam = BLKLDD(A, m);
             INSERT_TASK_zunmlq(
                 &options,
                 ChamRight, ChamConjTrans,
                 tempmm, tempkn, tempkn, ib, T->nb,
-                D(k), lddk,
-                T(k, k), T->mb,
-                A(m, k), ldam);
+                D(k),
+                T(k, k),
+                A(m, k));
         }
         RUNTIME_data_flush( sequence, D(k)    );
         RUNTIME_data_flush( sequence, T(k, k) );
@@ -139,12 +135,11 @@ void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
             INSERT_TASK_ztplqt(
                 &options,
                 tempkm, tempnn, 0, ib, T->nb,
-                A(k, k), ldak,
-                A(k, n), ldak,
-                T(k, n), T->mb);
+                A(k, k),
+                A(k, n),
+                T(k, n));
             for (m = k+1; m < A->mt; m++) {
                 tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-                ldam = BLKLDD(A, m);
 
                 RUNTIME_data_migrate( sequence, A(m, k),
                                       A->get_rankof( A, m, n ) );
@@ -153,10 +148,10 @@ void chameleon_pzgelqf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
                     &options,
                     ChamRight, ChamConjTrans,
                     tempmm, tempnn, A->mb, 0, ib, T->nb,
-                    A(k, n), ldak,
-                    T(k, n), T->mb,
-                    A(m, k), ldam,
-                    A(m, n), ldam);
+                    A(k, n),
+                    T(k, n),
+                    A(m, k),
+                    A(m, n));
             }
             RUNTIME_data_flush( sequence, A(k, n) );
             RUNTIME_data_flush( sequence, T(k, n) );
diff --git a/compute/pzgelqf_param.c b/compute/pzgelqf_param.c
index 511125da43777535f433b7e6b5ed649a02b50d01..5ba6cbe30d25eeebcb68fc94ab8465a404c7aff6 100644
--- a/compute/pzgelqf_param.c
+++ b/compute/pzgelqf_param.c
@@ -41,7 +41,6 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
 
     int k, m, n, i, p;
     int K, L, nbgeqrt;
-    int ldak, ldam, lddk;
     int tempkmin, tempkm, tempnn, tempmm, temppn;
     int ib, node, nbtiles, *tiles;
 
@@ -89,8 +88,6 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
         RUNTIME_iteration_push(chamctxt, k);
 
         tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-        ldak = BLKLDD(A, k);
-        lddk = BLKLDD(D, k);
 
         /* The number of geqrt to apply */
         nbgeqrt = qrtree->getnbgeqrf(qrtree, k);
@@ -104,8 +101,8 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
             INSERT_TASK_zgelqt(
                 &options,
                 tempkm, temppn, ib, T->nb,
-                A( k, p), ldak,
-                T(k, p), T->mb);
+                A( k, p),
+                T(k, p));
 
             if ( genD ) {
                 int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb;
@@ -114,27 +111,26 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
                 INSERT_TASK_zlacpy(
                     &options,
                     ChamUpper, tempDkm, tempDpn, A->nb,
-                    A(k, p), ldak,
-                    D(k, p), lddk );
+                    A(k, p),
+                    D(k, p) );
 #if defined(CHAMELEON_USE_CUDA)
                 INSERT_TASK_zlaset(
                     &options,
                     ChamLower, tempDkm, tempDpn,
                     0., 1.,
-                    D(k, p), lddk );
+                    D(k, p) );
 #endif
             }
 
             for (m = k+1; m < A->mt; m++) {
                 tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-                ldam = BLKLDD(A, m);
                 INSERT_TASK_zunmlq(
                     &options,
                     ChamRight, ChamConjTrans,
                     tempmm, temppn, tempkmin, ib, T->nb,
-                    D(k, p), lddk,
-                    T(k, p), T->mb,
-                    A(m, p), ldam);
+                    D(k, p),
+                    T(k, p),
+                    A(m, p));
             }
             RUNTIME_data_flush( sequence, D(k, p) );
             RUNTIME_data_flush( sequence, T(k, p) );
@@ -167,13 +163,12 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
             INSERT_TASK_ztplqt(
                 &options,
                 tempkm, tempnn, chameleon_min(L, tempkm), ib, T->nb,
-                A(k, p), ldak,
-                A(k, n), ldak,
-                T(k, n), T->mb);
+                A(k, p),
+                A(k, n),
+                T(k, n));
 
             for (m = k+1; m < A->mt; m++) {
                 tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-                ldam = BLKLDD(A, m);
 
                 node = A->get_rankof( A, m, n );
                 RUNTIME_data_migrate( sequence, A(m, p), node );
@@ -183,10 +178,10 @@ void chameleon_pzgelqf_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
                     &options,
                     ChamRight, ChamConjTrans,
                     tempmm, tempnn, tempkm, L, ib, T->nb,
-                    A(k, n), ldak,
-                    T(k, n), T->mb,
-                    A(m, p), ldam,
-                    A(m, n), ldam);
+                    A(k, n),
+                    T(k, n),
+                    A(m, p),
+                    A(m, n));
             }
             RUNTIME_data_flush( sequence, A(k, n) );
             RUNTIME_data_flush( sequence, T(k, n) );
diff --git a/compute/pzgelqfrh.c b/compute/pzgelqfrh.c
index 8eb69f56aee4cff2ad5c431607bb3683cad77b58..2b531416f21d3057026a75d3aeabc4f6d10e8672 100644
--- a/compute/pzgelqfrh.c
+++ b/compute/pzgelqfrh.c
@@ -44,7 +44,6 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
 
     int k, m, n;
     int K, N, RD;
-    int ldak, ldam, lddk;
     int tempkmin, tempkm, tempNn, tempnn, tempmm, tempNRDn;
     int ib, node;
 
@@ -91,8 +90,6 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
         RUNTIME_iteration_push(chamctxt, k);
 
         tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-        ldak = BLKLDD(A, k);
-        lddk = BLKLDD(D, k);
 
         for (N = k; N < A->nt; N += BS) {
             tempNn = N == A->nt-1 ? A->n-N*A->nb : A->nb;
@@ -100,8 +97,8 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
             INSERT_TASK_zgelqt(
                 &options,
                 tempkm, tempNn, ib, T->nb,
-                A(k, N), ldak,
-                T(k, N), T->mb);
+                A(k, N),
+                T(k, N));
             if ( genD ) {
                 int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb;
                 int tempDNn = N == D->nt-1 ? D->n-N*D->nb : D->nb;
@@ -109,26 +106,25 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
                 INSERT_TASK_zlacpy(
                     &options,
                     ChamUpper, tempDkm, tempDNn, A->nb,
-                    A(k, N), ldak,
-                    D(k, N), lddk );
+                    A(k, N),
+                    D(k, N) );
 #if defined(CHAMELEON_USE_CUDA)
                 INSERT_TASK_zlaset(
                     &options,
                     ChamLower, tempDkm, tempDNn,
                     0., 1.,
-                    D(k, N), lddk );
+                    D(k, N) );
 #endif
             }
             for (m = k+1; m < A->mt; m++) {
                 tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-                ldam = BLKLDD(A, m);
                 INSERT_TASK_zunmlq(
                     &options,
                     ChamRight, ChamConjTrans,
                     tempmm, tempNn, tempkmin, ib, T->nb,
-                    D(k, N), lddk,
-                    T(k, N), T->mb,
-                    A(m, N), ldam);
+                    D(k, N),
+                    T(k, N),
+                    A(m, N));
             }
             RUNTIME_data_flush( sequence, D(k, N) );
             RUNTIME_data_flush( sequence, T(k, N) );
@@ -143,13 +139,12 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
                 INSERT_TASK_ztplqt(
                     &options,
                     tempkm, tempnn, 0, ib, T->nb,
-                    A(k, N), ldak,
-                    A(k, n), ldak,
-                    T(k, n), T->mb);
+                    A(k, N),
+                    A(k, n),
+                    T(k, n));
 
                 for (m = k+1; m < A->mt; m++) {
                     tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-                    ldam = BLKLDD(A, m);
 
                     RUNTIME_data_migrate( sequence, A(m, N),
                                           A->get_rankof( A, m, n ) );
@@ -158,10 +153,10 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
                         &options,
                         ChamRight, ChamConjTrans,
                         tempmm, tempnn, tempkm, 0, ib, T->nb,
-                        A(k, n), ldak,
-                        T(k, n), T->mb,
-                        A(m, N), ldam,
-                        A(m, n), ldam);
+                        A(k, n),
+                        T(k, n),
+                        A(m, N),
+                        A(m, n));
                 }
                 RUNTIME_data_flush( sequence, A(k, n) );
                 RUNTIME_data_flush( sequence, T(k, n) );
@@ -179,13 +174,12 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
                 INSERT_TASK_ztplqt(
                     &options,
                     tempkm, tempNRDn, chameleon_min(tempNRDn, tempkm), ib, T->nb,
-                    A (k, N   ), ldak,
-                    A (k, N+RD), ldak,
-                    T2(k, N+RD), T->mb);
+                    A (k, N   ),
+                    A (k, N+RD),
+                    T2(k, N+RD));
 
                 for (m = k+1; m < A->mt; m++) {
                     tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-                    ldam   = BLKLDD(A, m );
 
                     node = A->get_rankof( A, m, N+RD );
                     RUNTIME_data_migrate( sequence, A(m, N),    node );
@@ -195,10 +189,10 @@ void chameleon_pzgelqfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
                         &options,
                         ChamRight, ChamConjTrans,
                         tempmm, tempNRDn, tempkm, tempNRDn, ib, T->nb,
-                        A (k, N+RD), ldak,
-                        T2(k, N+RD), T->mb,
-                        A (m, N   ), ldam,
-                        A (m, N+RD), ldam);
+                        A (k, N+RD),
+                        T2(k, N+RD),
+                        A (m, N   ),
+                        A (m, N+RD));
                 }
                 RUNTIME_data_flush( sequence, A (k, N+RD) );
                 RUNTIME_data_flush( sequence, T2(k, N+RD) );
diff --git a/compute/pzgemm.c b/compute/pzgemm.c
index d6d9c7eec81e74a2da0bcd0eea344ccf2dacc98b..52b885d4099d3893994069f5c73cb86658a441ea 100644
--- a/compute/pzgemm.c
+++ b/compute/pzgemm.c
@@ -41,7 +41,6 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
 {
     RUNTIME_sequence_t *sequence = options->sequence;
     int m, n, k, p, q, KT, K, lp, lq;
-    int ldam, ldak, ldbn, ldbk, ldcm;
     int tempmm, tempnn, tempkk;
     int lookahead, myp, myq;
 
@@ -74,20 +73,17 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
         lq = (k % lookahead) * C->q;
         tempkk = k == KT - 1 ? K - k * A->nb : A->nb;
         zbeta = k == 0 ? beta : zone;
-        ldak = BLKLDD(A, k);
-        ldbk = BLKLDD(B, k);
 
         /* Transfert ownership of the k column of A */
         for (m = 0; m < C->mt; m ++ ) {
             tempmm = m == C->mt-1 ? C->m - m * C->mb : C->mb;
-            ldam = BLKLDD(A, m);
 
             if ( transA == ChamNoTrans ) {
                 INSERT_TASK_zlacpy(
                     options,
                     ChamUpperLower, tempmm, tempkk, C->mb,
-                    A(  m,  k ),             ldam,
-                    WA( m, (k % C->q) + lq ), WA.mb );
+                    A(  m,  k ),
+                    WA( m, (k % C->q) + lq ) );
 
                 RUNTIME_data_flush( sequence, A( m, k ) );
 
@@ -95,16 +91,16 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
                     INSERT_TASK_zlacpy(
                         options,
                         ChamUpperLower, tempmm, tempkk, C->mb,
-                        WA( m, ((k+q-1) % C->q) + lq ), WA.mb,
-                        WA( m, ((k+q)   % C->q) + lq ), WA.mb );
+                        WA( m, ((k+q-1) % C->q) + lq ),
+                        WA( m, ((k+q)   % C->q) + lq ) );
                 }
             }
             else {
                 INSERT_TASK_zlacpy(
                     options,
                     ChamUpperLower, tempkk, tempmm, C->mb,
-                    A(  k,  m ),              ldak,
-                    WA( m, (k % C->q) + lq ), WA.mb );
+                    A(  k,  m ),
+                    WA( m, (k % C->q) + lq ) );
 
                 RUNTIME_data_flush( sequence, A( k, m ) );
 
@@ -112,8 +108,8 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
                     INSERT_TASK_zlacpy(
                         options,
                         ChamUpperLower, tempkk, tempmm, C->mb,
-                        WA( m, ((k+q-1) % C->q) + lq ), WA.mb,
-                        WA( m, ((k+q)   % C->q) + lq ), WA.mb );
+                        WA( m, ((k+q-1) % C->q) + lq ),
+                        WA( m, ((k+q)   % C->q) + lq ) );
                 }
             }
         }
@@ -121,14 +117,13 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
         /* Transfert ownership of the k row of B */
         for (n = 0; n < C->nt; n++) {
             tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
-            ldbn = BLKLDD(B, n);
 
             if ( transB == ChamNoTrans ) {
                 INSERT_TASK_zlacpy(
                     options,
                     ChamUpperLower, tempkk, tempnn, C->mb,
-                    B(   k,              n ), ldbk,
-                    WB( (k % C->p) + lp, n ), WB.mb );
+                    B(   k,              n ),
+                    WB( (k % C->p) + lp, n ) );
 
                 RUNTIME_data_flush( sequence, B( k, n ) );
 
@@ -136,16 +131,16 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
                     INSERT_TASK_zlacpy(
                         options,
                         ChamUpperLower, tempkk, tempnn, C->mb,
-                        WB( ((k+p-1) % C->p) + lp, n ), WB.mb,
-                        WB( ((k+p)   % C->p) + lp, n ), WB.mb );
+                        WB( ((k+p-1) % C->p) + lp, n ),
+                        WB( ((k+p)   % C->p) + lp, n ) );
                 }
             }
             else {
                 INSERT_TASK_zlacpy(
                     options,
                     ChamUpperLower, tempnn, tempkk, C->mb,
-                    B(   n,              k ), ldbn,
-                    WB( (k % C->p) + lp, n ), WB.mb );
+                    B(   n,              k ),
+                    WB( (k % C->p) + lp, n ) );
 
                 RUNTIME_data_flush( sequence, B( n, k ) );
 
@@ -153,15 +148,14 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
                     INSERT_TASK_zlacpy(
                         options,
                         ChamUpperLower, tempnn, tempkk, C->mb,
-                        WB( ((k+p-1) % C->p) + lp, n ), WB.mb,
-                        WB( ((k+p)   % C->p) + lp, n ), WB.mb );
+                        WB( ((k+p-1) % C->p) + lp, n ),
+                        WB( ((k+p)   % C->p) + lp, n ) );
                 }
             }
         }
 
         for (m = myp; m < C->mt; m+=C->p) {
             tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-            ldcm = BLKLDD(C, m);
 
             for (n = myq; n < C->nt; n+=C->q) {
                 tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
@@ -170,9 +164,9 @@ chameleon_pzgemm_summa( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tran
                     options,
                     transA, transB,
                     tempmm, tempnn, tempkk, A->mb,
-                    alpha, WA( m,        myq + lq ), WA.mb,  /* lda * Z */
-                           WB( myp + lp, n        ), WB.mb,  /* ldb * Y */
-                    zbeta, C(  m,        n        ), ldcm ); /* ldc * Y */
+                    alpha, WA( m,        myq + lq ),  /* lda * Z */
+                           WB( myp + lp, n        ),  /* ldb * Y */
+                    zbeta, C(  m,        n        ) ); /* ldc * Y */
             }
         }
     }
@@ -198,7 +192,6 @@ chameleon_pzgemm_generic( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tr
     RUNTIME_sequence_t *sequence = options->sequence;
 
     int m, n, k;
-    int ldam, ldak, ldbn, ldbk, ldcm;
     int tempmm, tempnn, tempkn, tempkm;
 
     CHAMELEON_Complex64_t zbeta;
@@ -206,33 +199,29 @@ chameleon_pzgemm_generic( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tr
 
     for (m = 0; m < C->mt; m++) {
         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-        ldcm = BLKLDD(C, m);
         for (n = 0; n < C->nt; n++) {
             tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
             /*
              *  A: ChamNoTrans / B: ChamNoTrans
              */
             if (transA == ChamNoTrans) {
-                ldam = BLKLDD(A, m);
                 if (transB == ChamNoTrans) {
                     for (k = 0; k < A->nt; k++) {
                         tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-                        ldbk = BLKLDD(B, k);
                         zbeta = k == 0 ? beta : zone;
                         INSERT_TASK_zgemm(
                             options,
                             transA, transB,
                             tempmm, tempnn, tempkn, A->mb,
-                            alpha, A(m, k), ldam,  /* lda * Z */
-                            B(k, n), ldbk,  /* ldb * Y */
-                            zbeta, C(m, n), ldcm); /* ldc * Y */
+                            alpha, A(m, k),  /* lda * Z */
+                            B(k, n),  /* ldb * Y */
+                            zbeta, C(m, n)); /* ldc * Y */
                     }
                 }
                 /*
                  *  A: ChamNoTrans / B: Cham[Conj]Trans
                  */
                 else {
-                    ldbn = BLKLDD(B, n);
                     for (k = 0; k < A->nt; k++) {
                         tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
                         zbeta = k == 0 ? beta : zone;
@@ -240,9 +229,9 @@ chameleon_pzgemm_generic( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tr
                             options,
                             transA, transB,
                             tempmm, tempnn, tempkn, A->mb,
-                            alpha, A(m, k), ldam,  /* lda * Z */
-                            B(n, k), ldbn,  /* ldb * Z */
-                            zbeta, C(m, n), ldcm); /* ldc * Y */
+                            alpha, A(m, k),  /* lda * Z */
+                            B(n, k),  /* ldb * Z */
+                            zbeta, C(m, n)); /* ldc * Y */
                     }
                 }
             }
@@ -253,34 +242,30 @@ chameleon_pzgemm_generic( CHAM_context_t *chamctxt, cham_trans_t transA, cham_tr
                 if (transB == ChamNoTrans) {
                     for (k = 0; k < A->mt; k++) {
                         tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-                        ldak = BLKLDD(A, k);
-                        ldbk = BLKLDD(B, k);
                         zbeta = k == 0 ? beta : zone;
                         INSERT_TASK_zgemm(
                             options,
                             transA, transB,
                             tempmm, tempnn, tempkm, A->mb,
-                            alpha, A(k, m), ldak,  /* lda * X */
-                            B(k, n), ldbk,  /* ldb * Y */
-                            zbeta, C(m, n), ldcm); /* ldc * Y */
+                            alpha, A(k, m),  /* lda * X */
+                            B(k, n),  /* ldb * Y */
+                            zbeta, C(m, n)); /* ldc * Y */
                     }
                 }
                 /*
                  *  A: Cham[Conj]Trans / B: Cham[Conj]Trans
                  */
                 else {
-                    ldbn = BLKLDD(B, n);
                     for (k = 0; k < A->mt; k++) {
                         tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-                        ldak = BLKLDD(A, k);
                         zbeta = k == 0 ? beta : zone;
                         INSERT_TASK_zgemm(
                             options,
                             transA, transB,
                             tempmm, tempnn, tempkm, A->mb,
-                            alpha, A(k, m), ldak,  /* lda * X */
-                            B(n, k), ldbn,  /* ldb * Z */
-                            zbeta, C(m, n), ldcm); /* ldc * Y */
+                            alpha, A(k, m),  /* lda * X */
+                            B(n, k),  /* ldb * Z */
+                            zbeta, C(m, n)); /* ldc * Y */
                     }
                 }
             }
diff --git a/compute/pzgeqrf.c b/compute/pzgeqrf.c
index 3f92fa3926b2bd9959cd8b03b827589c0e769154..29b03c6d8209d67dc86d295475ca4d023a7cbcd4 100644
--- a/compute/pzgeqrf.c
+++ b/compute/pzgeqrf.c
@@ -41,7 +41,6 @@ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
     size_t ws_host = 0;
 
     int k, m, n;
-    int ldak, ldam, lddk;
     int tempkm, tempkn, tempnn, tempmm;
     int ib;
     int minMNT = chameleon_min(A->mt, A->nt);
@@ -87,27 +86,25 @@ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
 
         tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
         tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-        ldak = BLKLDD(A, k);
-        lddk = BLKLDD(D, k);
         INSERT_TASK_zgeqrt(
             &options,
             tempkm, tempkn, ib, T->nb,
-            A(k, k), ldak,
-            T(k, k), T->mb);
+            A(k, k),
+            T(k, k));
         if ( genD ) {
             int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb;
             int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
             INSERT_TASK_zlacpy(
                 &options,
                 ChamLower, tempDkm, tempDkn, A->nb,
-                A(k, k), ldak,
-                D(k), lddk );
+                A(k, k),
+                D(k) );
 #if defined(CHAMELEON_USE_CUDA)
             INSERT_TASK_zlaset(
                 &options,
                 ChamUpper, tempDkm, tempDkn,
                 0., 1.,
-                D(k), lddk );
+                D(k) );
 #endif
         }
         for (n = k+1; n < A->nt; n++) {
@@ -116,16 +113,15 @@ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
                 &options,
                 ChamLeft, ChamConjTrans,
                 tempkm, tempnn, tempkm, ib, T->nb,
-                D(k),    lddk,
-                T(k, k), T->mb,
-                A(k, n), ldak);
+                D(k),
+                T(k, k),
+                A(k, n));
         }
         RUNTIME_data_flush( sequence, D(k)    );
         RUNTIME_data_flush( sequence, T(k, k) );
 
         for (m = k+1; m < A->mt; m++) {
             tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-            ldam = BLKLDD(A, m);
 
             RUNTIME_data_migrate( sequence, A(k, k),
                                   A->get_rankof( A, m, k ) );
@@ -134,9 +130,9 @@ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
             INSERT_TASK_ztpqrt(
                 &options,
                 tempmm, tempkn, 0, ib, T->nb,
-                A(k, k), ldak,
-                A(m, k), ldam,
-                T(m, k), T->mb);
+                A(k, k),
+                A(m, k),
+                T(m, k));
 
             for (n = k+1; n < A->nt; n++) {
                 tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
@@ -149,10 +145,10 @@ void chameleon_pzgeqrf( int genD, CHAM_desc_t *A, CHAM_desc_t *T, CHAM_desc_t *D
                     &options,
                     ChamLeft, ChamConjTrans,
                     tempmm, tempnn, A->nb, 0, ib, T->nb,
-                    A(m, k), ldam,
-                    T(m, k), T->mb,
-                    A(k, n), ldak,
-                    A(m, n), ldam);
+                    A(m, k),
+                    T(m, k),
+                    A(k, n),
+                    A(m, n));
             }
             RUNTIME_data_flush( sequence, A(m, k) );
             RUNTIME_data_flush( sequence, T(m, k) );
diff --git a/compute/pzgeqrf_param.c b/compute/pzgeqrf_param.c
index ae417f76eb7c1676a549496cca73e4d6fa20e02c..d6742f7c8057ea165d1f47c2840d4be890531e80 100644
--- a/compute/pzgeqrf_param.c
+++ b/compute/pzgeqrf_param.c
@@ -46,7 +46,6 @@ void chameleon_pzgeqrf_param( int genD, int K,
 
     int k, m, n, i, p;
     int L, nbgeqrt;
-    int ldap, ldam, lddm;
     int tempkmin, tempkn, tempnn, tempmm;
     int ib, node, nbtiles, *tiles;
 
@@ -100,14 +99,12 @@ void chameleon_pzgeqrf_param( int genD, int K,
             m = qrtree->getm(qrtree, k, i);
             tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
             tempkmin = chameleon_min(tempmm, tempkn);
-            ldam = BLKLDD(A, m);
-            lddm = BLKLDD(D, m);
 
             INSERT_TASK_zgeqrt(
                 &options,
                 tempmm, tempkn, ib, T->nb,
-                A(m, k), ldam,
-                T(m, k), T->mb);
+                A(m, k),
+                T(m, k));
 
             if ( genD ) {
                 int tempDmm = m == D->mt-1 ? D->m-m*D->mb : D->mb;
@@ -116,14 +113,14 @@ void chameleon_pzgeqrf_param( int genD, int K,
                 INSERT_TASK_zlacpy(
                     &options,
                     ChamLower, tempDmm, tempDkn, A->nb,
-                    A(m, k), ldam,
-                    D(m, k), lddm );
+                    A(m, k),
+                    D(m, k) );
 #if defined(CHAMELEON_USE_CUDA)
                 INSERT_TASK_zlaset(
                     &options,
                     ChamUpper, tempDmm, tempDkn,
                     0., 1.,
-                    D(m, k), lddm );
+                    D(m, k) );
 #endif
             }
 
@@ -133,9 +130,9 @@ void chameleon_pzgeqrf_param( int genD, int K,
                     &options,
                     ChamLeft, ChamConjTrans,
                     tempmm, tempnn, tempkmin, ib, T->nb,
-                    D(m, k), lddm,
-                    T(m, k), T->mb,
-                    A(m, n), ldam);
+                    D(m, k),
+                    T(m, k),
+                    A(m, n));
             }
             RUNTIME_data_flush( sequence, D(m, k) );
             RUNTIME_data_flush( sequence, T(m, k) );
@@ -149,8 +146,6 @@ void chameleon_pzgeqrf_param( int genD, int K,
             p = qrtree->currpiv(qrtree, k, m);
 
             tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-            ldap = BLKLDD(A, p);
-            ldam = BLKLDD(A, m);
 
             if ( qrtree->gettype(qrtree, k, m) == LIBHQR_KILLED_BY_TS ) {
                 /* TS kernel */
@@ -170,9 +165,9 @@ void chameleon_pzgeqrf_param( int genD, int K,
             INSERT_TASK_ztpqrt(
                 &options,
                 tempmm, tempkn, chameleon_min(L, tempkn), ib, T->nb,
-                A(p, k), ldap,
-                A(m, k), ldam,
-                T(m, k), T->mb);
+                A(p, k),
+                A(m, k),
+                T(m, k));
 
             for (n = k+1; n < A->nt; n++) {
                 tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
@@ -185,10 +180,10 @@ void chameleon_pzgeqrf_param( int genD, int K,
                     &options,
                     ChamLeft, ChamConjTrans,
                     tempmm, tempnn, A->nb, L, ib, T->nb,
-                    A(m, k), ldam,
-                    T(m, k), T->mb,
-                    A(p, n), ldap,
-                    A(m, n), ldam);
+                    A(m, k),
+                    T(m, k),
+                    A(p, n),
+                    A(m, n));
             }
             RUNTIME_data_flush( sequence, A(m, k) );
             RUNTIME_data_flush( sequence, T(m, k) );
diff --git a/compute/pzgeqrfrh.c b/compute/pzgeqrfrh.c
index 1182e5bc19ba569cecf38d949d45ab873ce53375..6f51507f819ff4aabaa220748bb3b71163ba0bdf 100644
--- a/compute/pzgeqrfrh.c
+++ b/compute/pzgeqrfrh.c
@@ -41,10 +41,8 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
     RUNTIME_option_t options;
     size_t ws_worker = 0;
     size_t ws_host = 0;
-
     int k, m, n;
     int K, M, RD;
-    int ldaM, ldam, ldaMRD, lddM;
     int tempkmin, tempkn, tempMm, tempnn, tempmm, tempMRDm;
     int ib, node;
 
@@ -92,14 +90,12 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
         for (M = k; M < A->mt; M += BS) {
             tempMm = M == A->mt-1 ? A->m-M*A->mb : A->mb;
             tempkmin = chameleon_min(tempMm, tempkn);
-            ldaM = BLKLDD(A, M);
-            lddM = BLKLDD(D, M);
 
             INSERT_TASK_zgeqrt(
                 &options,
                 tempMm, tempkn, ib, T->nb,
-                A(M, k), ldaM,
-                T(M, k), T->mb);
+                A(M, k),
+                T(M, k));
             if ( genD ) {
                 int tempDMm = M == D->mt-1 ? D->m-M*D->mb : D->mb;
                 int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
@@ -107,14 +103,14 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
                 INSERT_TASK_zlacpy(
                     &options,
                     ChamLower, tempDMm, tempDkn, A->nb,
-                    A(M, k), ldaM,
-                    D(M, k), lddM );
+                    A(M, k),
+                    D(M, k) );
 #if defined(CHAMELEON_USE_CUDA)
                 INSERT_TASK_zlaset(
                     &options,
                     ChamUpper, tempDMm, tempDkn,
                     0., 1.,
-                    D(M, k), lddM );
+                    D(M, k) );
 #endif
             }
             for (n = k+1; n < A->nt; n++) {
@@ -123,16 +119,15 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
                     &options,
                     ChamLeft, ChamConjTrans,
                     tempMm, tempnn, tempkmin, ib, T->nb,
-                    D(M, k), lddM,
-                    T(M, k), T->mb,
-                    A(M, n), ldaM);
+                    D(M, k),
+                    T(M, k),
+                    A(M, n));
             }
             RUNTIME_data_flush( sequence, D(M, k) );
             RUNTIME_data_flush( sequence, T(M, k) );
 
             for (m = M+1; m < chameleon_min(M+BS, A->mt); m++) {
                 tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-                ldam = BLKLDD(A, m);
 
                 RUNTIME_data_migrate( sequence, A(M, k),
                                       A->get_rankof( A, m, k ) );
@@ -141,9 +136,9 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
                 INSERT_TASK_ztpqrt(
                     &options,
                     tempmm, tempkn, 0, ib, T->nb,
-                    A(M, k), ldaM,
-                    A(m, k), ldam,
-                    T(m, k), T->mb);
+                    A(M, k),
+                    A(m, k),
+                    T(m, k));
 
                 for (n = k+1; n < A->nt; n++) {
                     tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
@@ -155,10 +150,10 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
                         &options,
                         ChamLeft, ChamConjTrans,
                         tempmm, tempnn, A->nb, 0, ib, T->nb,
-                        A(m, k), ldam,
-                        T(m, k), T->mb,
-                        A(M, n), ldaM,
-                        A(m, n), ldam);
+                        A(m, k),
+                        T(m, k),
+                        A(M, n),
+                        A(m, n));
                 }
                 RUNTIME_data_flush( sequence, A(m, k) );
                 RUNTIME_data_flush( sequence, T(m, k) );
@@ -167,8 +162,6 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
         for (RD = BS; RD < A->mt-k; RD *= 2) {
             for (M = k; M+RD < A->mt; M += 2*RD) {
                 tempMRDm = M+RD == A->mt-1 ? A->m-(M+RD)*A->mb : A->mb;
-                ldaM   = BLKLDD(A, M   );
-                ldaMRD = BLKLDD(A, M+RD);
 
                 node = A->get_rankof( A, M+RD, k );
                 RUNTIME_data_migrate( sequence, A(M, k),    node );
@@ -178,9 +171,9 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
                 INSERT_TASK_ztpqrt(
                     &options,
                     tempMRDm, tempkn, chameleon_min( tempMRDm, tempkn ), ib, T->nb,
-                    A (M   , k), ldaM,
-                    A (M+RD, k), ldaMRD,
-                    T2(M+RD, k), T->mb);
+                    A (M   , k),
+                    A (M+RD, k),
+                    T2(M+RD, k));
 
                 for (n = k+1; n < A->nt; n++) {
                     tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
@@ -193,10 +186,10 @@ void chameleon_pzgeqrfrh( int genD, int BS, CHAM_desc_t *A, CHAM_desc_t *T, CHAM
                         &options,
                         ChamLeft, ChamConjTrans,
                         tempMRDm, tempnn, A->nb, tempMRDm, ib, T->nb,
-                        A (M+RD, k), ldaMRD,
-                        T2(M+RD, k), T->mb,
-                        A (M,    n), ldaM,
-                        A (M+RD, n), ldaMRD);
+                        A (M+RD, k),
+                        T2(M+RD, k),
+                        A (M,    n),
+                        A (M+RD, n));
                 }
                 RUNTIME_data_flush( sequence, A (M+RD, k) );
                 RUNTIME_data_flush( sequence, T2(M+RD, k) );
diff --git a/compute/pzgetrf_incpiv.c b/compute/pzgetrf_incpiv.c
index 642970b628cede045e6f6e4ea3d9456d898718d1..1e6b5513727f2b2f85fde9a6f8cfb50534c5198a 100644
--- a/compute/pzgetrf_incpiv.c
+++ b/compute/pzgetrf_incpiv.c
@@ -49,7 +49,6 @@ void chameleon_pzgetrf_incpiv( CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, i
     size_t ws_host = 0;
 
     int k, m, n;
-    int ldak, ldam, lddk;
     int tempkm, tempkn, tempmm, tempnn;
     int ib;
     int minMNT = chameleon_min(A->mt, A->nt);
@@ -84,13 +83,11 @@ void chameleon_pzgetrf_incpiv( CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, i
 
         tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
         tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-        ldak = BLKLDD(A, k);
-        lddk = BLKLDD(D, k);
         INSERT_TASK_zgetrf_incpiv(
             &options,
             tempkm, tempkn, ib, L->nb,
-            A(k, k), ldak,
-            L(k, k), L->mb,
+            A(k, k),
+            L(k, k),
             IPIV(k, k),
             k == A->mt-1, A->nb*k);
 
@@ -99,8 +96,8 @@ void chameleon_pzgetrf_incpiv( CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, i
             INSERT_TASK_zlacpy(
                 &options,
                 ChamUpperLower, tempkm, tempkn, A->nb,
-                A(k, k), ldak,
-                D(k),    lddk);
+                A(k, k),
+                D(k));
 #endif
         }
 
@@ -110,19 +107,18 @@ void chameleon_pzgetrf_incpiv( CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, i
                 &options,
                 tempkm, tempnn, tempkm, ib, L->nb,
                 IPIV(k, k),
-                L(k, k), L->mb,
-                D(k),    lddk,
-                A(k, n), ldak);
+                L(k, k),
+                D(k),
+                A(k, n));
         }
         for (m = k+1; m < A->mt; m++) {
             tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-            ldam = BLKLDD(A, m);
             INSERT_TASK_ztstrf(
                 &options,
                 tempmm, tempkn, ib, L->nb,
-                A(k, k), ldak,
-                A(m, k), ldam,
-                L(m, k), L->mb,
+                A(k, k),
+                A(m, k),
+                L(m, k),
                 IPIV(m, k),
                 m == A->mt-1, A->nb*k);
 
@@ -131,10 +127,10 @@ void chameleon_pzgetrf_incpiv( CHAM_desc_t *A, CHAM_desc_t *L, CHAM_desc_t *D, i
                 INSERT_TASK_zssssm(
                     &options,
                     A->nb, tempnn, tempmm, tempnn, A->nb, ib, L->nb,
-                    A(k, n), ldak,
-                    A(m, n), ldam,
-                    L(m, k), L->mb,
-                    A(m, k), ldam,
+                    A(k, n),
+                    A(m, n),
+                    L(m, k),
+                    A(m, k),
                     IPIV(m, k));
             }
         }
diff --git a/compute/pzgetrf_nopiv.c b/compute/pzgetrf_nopiv.c
index d67dec3b604860bd39703515806dd26073501a72..eb9d6ea29a77cd27c5f03b1ff9bb5b1d43ae07ad 100644
--- a/compute/pzgetrf_nopiv.c
+++ b/compute/pzgetrf_nopiv.c
@@ -35,7 +35,6 @@ void chameleon_pzgetrf_nopiv(CHAM_desc_t *A,
     RUNTIME_option_t options;
 
     int k, m, n, ib;
-    int ldak, ldam;
     int tempkm, tempkn, tempmm, tempnn;
 
     CHAMELEON_Complex64_t zone  = (CHAMELEON_Complex64_t) 1.0;
@@ -54,24 +53,22 @@ void chameleon_pzgetrf_nopiv(CHAM_desc_t *A,
 
         tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
         tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-        ldak = BLKLDD(A, k);
 
         options.priority = 2*A->nt - 2*k;
         INSERT_TASK_zgetrf_nopiv(
             &options,
             tempkm, tempkn, ib, A->mb,
-            A(k, k), ldak, A->mb*k);
+            A(k, k), A->mb*k);
 
         for (m = k+1; m < A->mt; m++) {
             options.priority = 2*A->nt - 2*k - m;
             tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-            ldam = BLKLDD(A, m);
             INSERT_TASK_ztrsm(
                 &options,
                 ChamRight, ChamUpper, ChamNoTrans, ChamNonUnit,
                 tempmm, tempkn, A->mb,
-                zone, A(k, k), ldak,
-                      A(m, k), ldam);
+                zone, A(k, k),
+                      A(m, k));
         }
         for (n = k+1; n < A->nt; n++) {
             tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
@@ -80,20 +77,19 @@ void chameleon_pzgetrf_nopiv(CHAM_desc_t *A,
                 &options,
                 ChamLeft, ChamLower, ChamNoTrans, ChamUnit,
                 tempkm, tempnn, A->mb,
-                zone, A(k, k), ldak,
-                      A(k, n), ldak);
+                zone, A(k, k),
+                      A(k, n));
 
             for (m = k+1; m < A->mt; m++) {
                 tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
                 options.priority = 2*A->nt - 2*k  - n - m;
-                ldam = BLKLDD(A, m);
                 INSERT_TASK_zgemm(
                     &options,
                     ChamNoTrans, ChamNoTrans,
                     tempmm, tempnn, A->mb, A->mb,
-                    mzone, A(m, k), ldam,
-                           A(k, n), ldak,
-                    zone,  A(m, n), ldam);
+                    mzone, A(m, k),
+                           A(k, n),
+                    zone,  A(m, n));
             }
         }
 
diff --git a/compute/pzgram.c b/compute/pzgram.c
index c8fc0b34e4e38043bfc8468c747fa0f2b1fd5a0f..3218b1a78757b8ef7802837ff5fe224bff6403ff 100644
--- a/compute/pzgram.c
+++ b/compute/pzgram.c
@@ -46,21 +46,20 @@ chameleon_pzgram_internal( cham_uplo_t uplo,
 
         for(m = mmin; m < mmax; m++) {
             int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
-            int ldam = BLKLDD( A, m );
 
             if ( n == m ) {
                 INSERT_TASK_dsyssq(
                     options, ChamColumnwise, uplo, tempmm,
-                    A(m, n), ldam, W( Wcol, m, n) );
+                    A(m, n), W( Wcol, m, n) );
             }
             else {
                 INSERT_TASK_dgessq(
                     options, ChamColumnwise, tempmm, tempnn,
-                    A(m, n), ldam, W( Wcol, m, n) );
+                    A(m, n), W( Wcol, m, n) );
                 if ( uplo != ChamUpperLower ) {
                     INSERT_TASK_dgessq(
                         options, ChamRowwise, tempmm, tempnn,
-                        A(m, n), ldam, W( Wcol, n, m) );
+                        A(m, n), W( Wcol, n, m) );
                 }
             }
         }
@@ -121,16 +120,15 @@ chameleon_pzgram_internal( cham_uplo_t uplo,
 
         for(m = mmin; m < mmax; m++) {
             int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
-            int ldam = BLKLDD( A, m );
 
             INSERT_TASK_zgram(
                 options,
                 ( m == n ) ? uplo : ChamUpperLower,
                 A->m, A->n, tempmm, tempnn,
-                W( Wcol, 0, m ), 2,
-                W( Wcol, 0, n ), 2,
+                W( Wcol, 0, m ),
+                W( Wcol, 0, n ),
                 W( Welt, 0, 0 ),
-                A( m, n ), ldam );
+                A( m, n ) );
         }
     }
 }
@@ -144,9 +142,8 @@ void chameleon_pzgram( cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *seq
     RUNTIME_option_t options;
     CHAM_desc_t Wcol;
     CHAM_desc_t Welt;
-
     int workmt, worknt;
-    int m, n, tempmm, tempnn, ldw;
+    int m, n, tempmm, tempnn;
 
     chamctxt = chameleon_context_self();
     if ( sequence->status != CHAMELEON_SUCCESS ) {
@@ -170,27 +167,25 @@ void chameleon_pzgram( cham_uplo_t uplo, CHAM_desc_t *A, RUNTIME_sequence_t *seq
     /* Initialize Wcol */
     for(m = 0; m < Wcol.mt; m++) {
         tempmm = m == Wcol.mt-1 ? Wcol.m-m*Wcol.mb : Wcol.mb;
-        ldw = Wcol.get_blkldd(&Wcol, m);
         for(n = 0; n < Wcol.nt; n++) {
             tempnn = n == Wcol.nt-1 ? Wcol.n-n*Wcol.nb : Wcol.nb;
             INSERT_TASK_dlaset(
                 &options,
                 ChamUpperLower, tempmm, tempnn,
                 -1., -1.,
-                W( &Wcol, m, n ), ldw );
+                W( &Wcol, m, n ) );
         }
     }
     /* Initialize Welt */
     for(m = 0; m < Welt.mt; m++) {
         tempmm = m == Welt.mt-1 ? Welt.m-m*Welt.mb : Welt.mb;
-        ldw = Welt.get_blkldd(&Welt, m);
         for(n = 0; n < Welt.nt; n++) {
             tempnn = n == Welt.nt-1 ? Welt.n-n*Welt.nb : Welt.nb;
             INSERT_TASK_dlaset(
                 &options,
                 ChamUpperLower, tempmm, tempnn,
                 -1., -1.,
-                W( &Welt, m, n ), ldw );
+                W( &Welt, m, n ) );
         }
     }
 
diff --git a/compute/pzhemm.c b/compute/pzhemm.c
index b41220165efa574408a907cdd54e6e55dd705652..4339b77ff1458dc1d565ae1a4719fa73b7f2161a 100644
--- a/compute/pzhemm.c
+++ b/compute/pzhemm.c
@@ -43,7 +43,6 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
     RUNTIME_sequence_t *sequence = options->sequence;
     cham_trans_t transA;
     int m, n, k, p, q, KT, K, lp, lq;
-    int ldcm;
     int tempmm, tempnn, tempkk;
     int lookahead, myp, myq;
 
@@ -64,7 +63,7 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
 
         /* Transfert ownership of the k column of A or B */
         for (m = 0; m < C->mt; m ++ ) {
-            int Am, Ak, ldam;
+            int Am, Ak;
             int tempam, tempak;
 
             tempmm = m == C->mt-1 ? C->m - m * C->mb : C->mb;
@@ -85,13 +84,12 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
                 tempam = tempmm;
                 tempak = tempkk;
             }
-            ldam = BLKLDD( A, Am );
 
             INSERT_TASK_zlacpy(
                 options,
                 ChamUpperLower, tempam, tempak, C->mb,
-                A( Am, Ak ),              ldam,
-                WA( m, (k % C->q) + lq ), WA->mb );
+                A( Am, Ak ),
+                WA( m, (k % C->q) + lq ) );
 
             RUNTIME_data_flush( sequence, A( Am, Ak ) );
 
@@ -99,23 +97,21 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
                 INSERT_TASK_zlacpy(
                     options,
                     ChamUpperLower, tempam, tempak, C->mb,
-                    WA( m, ((k+q-1) % C->q) + lq ), WA->mb,
-                    WA( m, ((k+q)   % C->q) + lq ), WA->mb );
+                    WA( m, ((k+q-1) % C->q) + lq ),
+                    WA( m, ((k+q)   % C->q) + lq ) );
             }
         }
 
         /* Transfert ownership of the k row of B, or A */
         for (n = 0; n < C->nt; n++) {
-            int ldbk;
 
             tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
-            ldbk = BLKLDD( B, k );
 
             INSERT_TASK_zlacpy(
                 options,
                 ChamUpperLower, tempkk, tempnn, C->mb,
-                B(   k,              n ), ldbk,
-                WB( (k % C->p) + lp, n ), WB->mb );
+                B(   k,              n ),
+                WB( (k % C->p) + lp, n ) );
 
             RUNTIME_data_flush( sequence, B( k, n ) );
 
@@ -123,15 +119,14 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
                 INSERT_TASK_zlacpy(
                     options,
                     ChamUpperLower, tempkk, tempnn, C->mb,
-                    WB( ((k+p-1) % C->p) + lp, n ), WB->mb,
-                    WB( ((k+p)   % C->p) + lp, n ), WB->mb );
+                    WB( ((k+p-1) % C->p) + lp, n ),
+                    WB( ((k+p)   % C->p) + lp, n ) );
             }
         }
 
         /* Perform the update of this iteration */
         for (m = myp; m < C->mt; m+=C->p) {
             tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-            ldcm = BLKLDD(C, m);
 
             if ( k == m ) {
                 for (n = myq; n < C->nt; n+=C->q) {
@@ -140,9 +135,9 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
                     INSERT_TASK_zhemm(
                         options, ChamLeft, uplo,
                         tempmm, tempnn, A->mb,
-                        alpha, WA( m,        myq + lq ), WA->mb,
-                               WB( myp + lp, n        ), WB->mb,
-                        zbeta, C(  m,        n        ), ldcm );
+                        alpha, WA( m,        myq + lq ),
+                               WB( myp + lp, n        ),
+                        zbeta, C(  m,        n        ) );
                 }
             }
             else {
@@ -161,9 +156,9 @@ chameleon_pzhemm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
                     INSERT_TASK_zgemm(
                         options, transA, ChamNoTrans,
                         tempmm, tempnn, tempkk, A->mb,
-                        alpha, WA( m,        myq + lq ), WA->mb,
-                               WB( myp + lp, n        ), WB->mb,
-                        zbeta, C(  m,        n        ), ldcm );
+                        alpha, WA( m,        myq + lq ),
+                               WB( myp + lp, n        ),
+                        zbeta, C(  m,        n        ) );
                 }
             }
         }
@@ -184,7 +179,6 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
     RUNTIME_sequence_t *sequence = options->sequence;
     cham_trans_t transA;
     int m, n, k, p, q, KT, K, lp, lq;
-    int ldcm;
     int tempmm, tempnn, tempkk;
     int lookahead, myp, myq;
 
@@ -205,16 +199,14 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
 
         /* Transfert ownership of the k column of A or B */
         for (m = 0; m < C->mt; m++ ) {
-            int ldbm;
 
             tempmm = m == C->mt-1 ? C->m - m * C->mb : C->mb;
-            ldbm = BLKLDD( B, m );
 
             INSERT_TASK_zlacpy(
                 options,
                 ChamUpperLower, tempmm, tempkk, C->mb,
-                B(  m,  k ),              ldbm,
-                WA( m, (k % C->q) + lq ), WA->mb );
+                B(  m,  k ),
+                WA( m, (k % C->q) + lq ) );
 
             RUNTIME_data_flush( sequence, B( m, k ) );
 
@@ -222,14 +214,14 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
                 INSERT_TASK_zlacpy(
                     options,
                     ChamUpperLower, tempmm, tempkk, C->mb,
-                    WA( m, ((k+q-1) % C->q) + lq ), WA->mb,
-                    WA( m, ((k+q)   % C->q) + lq ), WA->mb );
+                    WA( m, ((k+q-1) % C->q) + lq ),
+                    WA( m, ((k+q)   % C->q) + lq ) );
             }
         }
 
         /* Transfert ownership of the k row of B, or A */
         for (n = 0; n < C->nt; n++) {
-            int Ak, An, ldak;
+            int Ak, An;
             int tempak, tempan;
 
             tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
@@ -249,13 +241,12 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
                 tempak = tempkk;
                 tempan = tempnn;
             }
-            ldak = BLKLDD( A, Ak );
 
             INSERT_TASK_zlacpy(
                 options,
                 ChamUpperLower, tempak, tempan, C->mb,
-                A(  Ak,              An ), ldak,
-                WB( (k % C->p) + lp, n  ), WB->mb );
+                A(  Ak,              An ),
+                WB( (k % C->p) + lp, n  ) );
 
             RUNTIME_data_flush( sequence, A( Ak, An ) );
 
@@ -263,8 +254,8 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
                 INSERT_TASK_zlacpy(
                     options,
                     ChamUpperLower, tempak, tempan, C->mb,
-                    WB( ((k+p-1) % C->p) + lp, n ), WB->mb,
-                    WB( ((k+p)   % C->p) + lp, n ), WB->mb );
+                    WB( ((k+p-1) % C->p) + lp, n ),
+                    WB( ((k+p)   % C->p) + lp, n ) );
             }
         }
 
@@ -275,15 +266,14 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
             if ( k == n ) {
                 for (m = myp; m < C->mt; m+=C->p) {
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldcm = BLKLDD(C, m);
 
                     /* A has been stored in WA or WB for the summa ring */
                     INSERT_TASK_zhemm(
                         options, ChamRight, uplo,
                         tempmm, tempnn, A->mb,
-                        alpha, WB( myp + lp, n        ), WB->mb,
-                               WA( m,        myq + lq ), WA->mb,
-                        zbeta, C(  m,        n        ), ldcm );
+                        alpha, WB( myp + lp, n        ),
+                               WA( m,        myq + lq ),
+                        zbeta, C(  m,        n        ) );
                 }
             }
             else {
@@ -298,14 +288,13 @@ chameleon_pzhemm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
 
                 for (m = myp; m < C->mt; m+=C->p) {
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldcm = BLKLDD(C, m);
 
                     INSERT_TASK_zgemm(
                         options, ChamNoTrans, transA,
                         tempmm, tempnn, tempkk, A->mb,
-                        alpha, WA( m,        myq + lq ), WA->mb,
-                               WB( myp + lp, n        ), WB->mb,
-                        zbeta, C(  m,        n        ), ldcm );
+                        alpha, WA( m,        myq + lq ),
+                               WB( myp + lp, n        ),
+                        zbeta, C(  m,        n        ) );
                 }
             }
         }
@@ -366,7 +355,6 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
                           RUNTIME_option_t *options )
 {
     int k, m, n;
-    int ldam, ldan, ldak, ldbk, ldbm, ldcm;
     int tempmm, tempnn, tempkn, tempkm;
 
     CHAMELEON_Complex64_t zbeta;
@@ -374,28 +362,24 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
 
     for(m = 0; m < C->mt; m++) {
         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-        ldcm = BLKLDD(C, m);
         for(n = 0; n < C->nt; n++) {
             tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
             /*
              *  ChamLeft / ChamLower
              */
             if (side == ChamLeft) {
-                ldam = BLKLDD(A, m);
                 if (uplo == ChamLower) {
                     for (k = 0; k < C->mt; k++) {
                         tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb;
-                        ldak = BLKLDD(A, k);
-                        ldbk = BLKLDD(B, k);
                         zbeta = k == 0 ? beta : zone;
                         if (k < m) {
                             INSERT_TASK_zgemm(
                                 options,
                                 ChamNoTrans, ChamNoTrans,
                                 tempmm, tempnn, tempkm, A->mb,
-                                alpha, A(m, k), ldam,  /* lda * K */
-                                       B(k, n), ldbk,  /* ldb * Y */
-                                zbeta, C(m, n), ldcm); /* ldc * Y */
+                                alpha, A(m, k),  /* lda * K */
+                                       B(k, n),  /* ldb * Y */
+                                zbeta, C(m, n)); /* ldc * Y */
                         }
                         else {
                             if (k == m) {
@@ -403,18 +387,18 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
                                     options,
                                     side, uplo,
                                     tempmm, tempnn, A->mb,
-                                    alpha, A(k, k), ldak,  /* ldak * X */
-                                           B(k, n), ldbk,  /* ldb  * Y */
-                                    zbeta, C(m, n), ldcm); /* ldc  * Y */
+                                    alpha, A(k, k),  /* ldak * X */
+                                           B(k, n),  /* ldb  * Y */
+                                    zbeta, C(m, n)); /* ldc  * Y */
                             }
                             else {
                                 INSERT_TASK_zgemm(
                                     options,
                                     ChamConjTrans, ChamNoTrans,
                                     tempmm, tempnn, tempkm, A->mb,
-                                    alpha, A(k, m), ldak,  /* ldak * X */
-                                           B(k, n), ldbk,  /* ldb  * Y */
-                                    zbeta, C(m, n), ldcm); /* ldc  * Y */
+                                    alpha, A(k, m),  /* ldak * X */
+                                           B(k, n),  /* ldb  * Y */
+                                    zbeta, C(m, n)); /* ldc  * Y */
                             }
                         }
                     }
@@ -425,17 +409,15 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
                 else {
                     for (k = 0; k < C->mt; k++) {
                         tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb;
-                        ldak = BLKLDD(A, k);
-                        ldbk = BLKLDD(B, k);
                         zbeta = k == 0 ? beta : zone;
                         if (k < m) {
                             INSERT_TASK_zgemm(
                                 options,
                                 ChamConjTrans, ChamNoTrans,
                                 tempmm, tempnn, tempkm, A->mb,
-                                alpha, A(k, m), ldak,  /* ldak * X */
-                                       B(k, n), ldbk,  /* ldb  * Y */
-                                zbeta, C(m, n), ldcm); /* ldc  * Y */
+                                alpha, A(k, m),  /* ldak * X */
+                                       B(k, n),  /* ldb  * Y */
+                                zbeta, C(m, n)); /* ldc  * Y */
                         }
                         else {
                             if (k == m) {
@@ -443,18 +425,18 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
                                     options,
                                     side, uplo,
                                     tempmm, tempnn, A->mb,
-                                    alpha, A(k, k), ldak,  /* ldak * K */
-                                           B(k, n), ldbk,  /* ldb  * Y */
-                                    zbeta, C(m, n), ldcm); /* ldc  * Y */
+                                    alpha, A(k, k),  /* ldak * K */
+                                           B(k, n),  /* ldb  * Y */
+                                    zbeta, C(m, n)); /* ldc  * Y */
                             }
                             else {
                                 INSERT_TASK_zgemm(
                                     options,
                                     ChamNoTrans, ChamNoTrans,
                                     tempmm, tempnn, tempkm, A->mb,
-                                    alpha, A(m, k), ldam,  /* lda * K */
-                                           B(k, n), ldbk,  /* ldb * Y */
-                                    zbeta, C(m, n), ldcm); /* ldc * Y */
+                                    alpha, A(m, k),  /* lda * K */
+                                           B(k, n),  /* ldb * Y */
+                                    zbeta, C(m, n)); /* ldc * Y */
                             }
                         }
                     }
@@ -464,21 +446,18 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
              *  ChamRight / ChamLower
              */
             else {
-                ldan = BLKLDD(A, n);
-                ldbm = BLKLDD(B, m);
                 if (uplo == ChamLower) {
                     for (k = 0; k < C->nt; k++) {
                         tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb;
-                        ldak = BLKLDD(A, k);
                         zbeta = k == 0 ? beta : zone;
                         if (k < n) {
                             INSERT_TASK_zgemm(
                                 options,
                                 ChamNoTrans, ChamConjTrans,
                                 tempmm, tempnn, tempkn, A->mb,
-                                alpha, B(m, k), ldbm,  /* ldb * K */
-                                       A(n, k), ldan,  /* lda * K */
-                                zbeta, C(m, n), ldcm); /* ldc * Y */
+                                alpha, B(m, k),  /* ldb * K */
+                                       A(n, k),  /* lda * K */
+                                zbeta, C(m, n)); /* ldc * Y */
                         }
                         else {
                             if (k == n) {
@@ -486,18 +465,18 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
                                     options,
                                     side, uplo,
                                     tempmm, tempnn, A->mb,
-                                    alpha, A(k, k), ldak,  /* ldak * Y */
-                                           B(m, k), ldbm,  /* ldb  * Y */
-                                    zbeta, C(m, n), ldcm); /* ldc  * Y */
+                                    alpha, A(k, k),  /* ldak * Y */
+                                           B(m, k),  /* ldb  * Y */
+                                    zbeta, C(m, n)); /* ldc  * Y */
                             }
                             else {
                                 INSERT_TASK_zgemm(
                                     options,
                                     ChamNoTrans, ChamNoTrans,
                                     tempmm, tempnn, tempkn, A->mb,
-                                    alpha, B(m, k), ldbm,  /* ldb  * K */
-                                           A(k, n), ldak,  /* ldak * Y */
-                                    zbeta, C(m, n), ldcm); /* ldc  * Y */
+                                    alpha, B(m, k),  /* ldb  * K */
+                                           A(k, n),  /* ldak * Y */
+                                    zbeta, C(m, n)); /* ldc  * Y */
                             }
                         }
                     }
@@ -508,16 +487,15 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
                 else {
                     for (k = 0; k < C->nt; k++) {
                         tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb;
-                        ldak = BLKLDD(A, k);
                         zbeta = k == 0 ? beta : zone;
                         if (k < n) {
                             INSERT_TASK_zgemm(
                                 options,
                                 ChamNoTrans, ChamNoTrans,
                                 tempmm, tempnn, tempkn, A->mb,
-                                alpha, B(m, k), ldbm,  /* ldb  * K */
-                                       A(k, n), ldak,  /* ldak * Y */
-                                zbeta, C(m, n), ldcm); /* ldc  * Y */
+                                alpha, B(m, k),  /* ldb  * K */
+                                       A(k, n),  /* ldak * Y */
+                                zbeta, C(m, n)); /* ldc  * Y */
                         }
                         else {
                             if (k == n) {
@@ -525,18 +503,18 @@ chameleon_pzhemm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
                                     options,
                                     side, uplo,
                                     tempmm, tempnn, A->mb,
-                                    alpha, A(k, k), ldak,  /* ldak * Y */
-                                           B(m, k), ldbm,  /* ldb  * Y */
-                                    zbeta, C(m, n), ldcm); /* ldc  * Y */
+                                    alpha, A(k, k),  /* ldak * Y */
+                                           B(m, k),  /* ldb  * Y */
+                                    zbeta, C(m, n)); /* ldc  * Y */
                             }
                             else {
                                 INSERT_TASK_zgemm(
                                     options,
                                     ChamNoTrans, ChamConjTrans,
                                     tempmm, tempnn, tempkn, A->mb,
-                                    alpha, B(m, k), ldbm,  /* ldb * K */
-                                           A(n, k), ldan,  /* lda * K */
-                                    zbeta, C(m, n), ldcm); /* ldc * Y */
+                                    alpha, B(m, k),  /* ldb * K */
+                                           A(n, k),  /* lda * K */
+                                    zbeta, C(m, n)); /* ldc * Y */
                             }
                         }
                     }
diff --git a/compute/pzher2k.c b/compute/pzher2k.c
index 8f51860bb996a404e4307e4436a2f06e98d59b53..8e2cb085cd394c15d6220c97675f9e77a6f3f3e3 100644
--- a/compute/pzher2k.c
+++ b/compute/pzher2k.c
@@ -38,8 +38,6 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans,
     RUNTIME_option_t options;
 
     int m, n, k, mmin, mmax;
-    int ldak, ldam, ldan, ldcm, ldcn;
-    int ldbk, ldbm, ldbn;
     int tempnn, tempmm, tempkn, tempkm;
 
     CHAMELEON_Complex64_t zone   = (CHAMELEON_Complex64_t)1.0;
@@ -54,9 +52,6 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans,
 
     for (n = 0; n < C->nt; n++) {
         tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
-        ldan = BLKLDD(A, n);
-        ldbn = BLKLDD(B, n);
-        ldcn = BLKLDD(C, n);
 
         if (uplo == ChamLower) {
             mmin = n+1;
@@ -78,15 +73,12 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans,
                     &options,
                     uplo, trans,
                     tempnn, tempkn, A->mb,
-                    alpha, A(n, k), ldan, /* ldan * K */
-                           B(n, k), ldbn,
-                    dbeta, C(n, n), ldcn); /* ldc  * N */
+                    alpha, A(n, k), /* ldan * K */
+                           B(n, k),
+                    dbeta, C(n, n)); /* ldc  * N */
             }
             for (m = mmin; m < mmax; m++) {
                 tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                ldam = BLKLDD(A, m);
-                ldbm = BLKLDD(B, m);
-                ldcm = BLKLDD(C, m);
                 for (k = 0; k < A->nt; k++) {
                     tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
                     zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
@@ -94,17 +86,17 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans,
                         &options,
                         ChamNoTrans, ChamConjTrans,
                         tempmm, tempnn, tempkn, A->mb,
-                        alpha, A(m, k), ldam,
-                               B(n, k), ldbn,
-                        zbeta, C(m, n), ldcm);
+                        alpha, A(m, k),
+                               B(n, k),
+                        zbeta, C(m, n));
 
                     INSERT_TASK_zgemm(
                         &options,
                         ChamNoTrans, ChamConjTrans,
                         tempmm, tempnn, tempkn, A->mb,
-                        conj(alpha), B(m, k), ldbm,
-                                     A(n, k), ldan,
-                        zone,        C(m, n), ldcm);
+                        conj(alpha), B(m, k),
+                                     A(n, k),
+                        zone,        C(m, n));
                 }
             }
         }
@@ -114,40 +106,35 @@ void chameleon_pzher2k( cham_uplo_t uplo, cham_trans_t trans,
         else {
             for (k = 0; k < A->mt; k++) {
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-                ldak = BLKLDD(A, k);
-                ldbk = BLKLDD(B, k);
                 dbeta = k == 0 ? beta : 1.0;
                 INSERT_TASK_zher2k(
                     &options,
                     uplo, trans,
                     tempnn, tempkm, A->mb,
-                    alpha, A(k, n), ldak,  /* lda * N */
-                           B(k, n), ldbk,
-                    dbeta, C(n, n), ldcn); /* ldc * N */
+                    alpha, A(k, n),  /* lda * N */
+                           B(k, n),
+                    dbeta, C(n, n)); /* ldc * N */
             }
             for (m = mmin; m < mmax; m++) {
                 tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                ldcm = BLKLDD(C, m);
                 for (k = 0; k < A->mt; k++) {
                     tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-                    ldak = BLKLDD(A, k);
-                    ldbk = BLKLDD(B, k);
                     zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
                     INSERT_TASK_zgemm(
                         &options,
                         ChamConjTrans, ChamNoTrans,
                         tempmm, tempnn, tempkm, A->mb,
-                        alpha, A(k, m), ldak,
-                               B(k, n), ldbk,
-                        zbeta, C(m, n), ldcm);
+                        alpha, A(k, m),
+                               B(k, n),
+                        zbeta, C(m, n));
 
                     INSERT_TASK_zgemm(
                         &options,
                         ChamConjTrans, ChamNoTrans,
                         tempmm, tempnn, tempkm, A->mb,
-                        conj(alpha), B(k, m), ldbk,
-                                     A(k, n), ldak,
-                        zone,        C(m, n), ldcm );
+                        conj(alpha), B(k, m),
+                                     A(k, n),
+                        zone,        C(m, n) );
                 }
             }
         }
diff --git a/compute/pzherk.c b/compute/pzherk.c
index 29635ccdd50a8a21775ed39f62a3efa981ee11fa..bd3fa544100ec378763c66ca6ce852c5edf13f8c 100644
--- a/compute/pzherk.c
+++ b/compute/pzherk.c
@@ -37,7 +37,6 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans,
     RUNTIME_option_t options;
 
     int m, n, k;
-    int ldak, ldam, ldan, ldcm, ldcn;
     int tempnn, tempmm, tempkn, tempkm;
 
     CHAMELEON_Complex64_t zone   = (CHAMELEON_Complex64_t)1.0;
@@ -53,8 +52,6 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans,
 
     for (n = 0; n < C->nt; n++) {
         tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
-        ldan = BLKLDD(A, n);
-        ldcn = BLKLDD(C, n);
         /*
          *  ChamNoTrans
          */
@@ -66,8 +63,8 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans,
                     &options,
                     uplo, trans,
                     tempnn, tempkn, A->mb,
-                    alpha, A(n, k), ldan, /* ldan * K */
-                    dbeta, C(n, n), ldcn); /* ldc  * N */
+                    alpha, A(n, k), /* ldan * K */
+                    dbeta, C(n, n)); /* ldc  * N */
             }
             /*
              *  ChamNoTrans / ChamLower
@@ -75,8 +72,6 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans,
             if (uplo == ChamLower) {
                 for (m = n+1; m < C->mt; m++) {
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldam = BLKLDD(A, m);
-                    ldcm = BLKLDD(C, m);
                     for (k = 0; k < A->nt; k++) {
                         tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
                         zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
@@ -84,9 +79,9 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans,
                             &options,
                             trans, ChamConjTrans,
                             tempmm, tempnn, tempkn, A->mb,
-                            zalpha, A(m, k), ldam,  /* ldam * K */
-                                    A(n, k), ldan,  /* ldan * K */
-                            zbeta,  C(m, n), ldcm); /* ldc  * N */
+                            zalpha, A(m, k),  /* ldam * K */
+                                    A(n, k),  /* ldan * K */
+                            zbeta,  C(m, n)); /* ldc  * N */
                     }
                 }
             }
@@ -96,7 +91,6 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans,
             else {
                 for (m = n+1; m < C->mt; m++) {
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldam = BLKLDD(A, m);
                     for (k = 0; k < A->nt; k++) {
                         tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
                         zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
@@ -104,9 +98,9 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans,
                             &options,
                             trans, ChamConjTrans,
                             tempnn, tempmm, tempkn, A->mb,
-                            zalpha, A(n, k), ldan,  /* ldan * K */
-                                    A(m, k), ldam,  /* ldam * M */
-                            zbeta,  C(n, m), ldcn); /* ldc  * M */
+                            zalpha, A(n, k),  /* ldan * K */
+                                    A(m, k),  /* ldam * M */
+                            zbeta,  C(n, m)); /* ldc  * M */
                     }
                 }
             }
@@ -117,14 +111,13 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans,
         else {
             for (k = 0; k < A->mt; k++) {
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-                ldak = BLKLDD(A, k);
                 dbeta = k == 0 ? beta : 1.0;
                 INSERT_TASK_zherk(
                     &options,
                     uplo, trans,
                     tempnn, tempkm, A->mb,
-                    alpha, A(k, n), ldak,  /* lda * N */
-                    dbeta, C(n, n), ldcn); /* ldc * N */
+                    alpha, A(k, n),  /* lda * N */
+                    dbeta, C(n, n)); /* ldc * N */
             }
             /*
              *  Cham[Conj]Trans / ChamLower
@@ -132,18 +125,16 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans,
             if (uplo == ChamLower) {
                 for (m = n+1; m < C->mt; m++) {
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldcm = BLKLDD(C, m);
                     for (k = 0; k < A->mt; k++) {
                         tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-                        ldak = BLKLDD(A, k);
                         zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
                         INSERT_TASK_zgemm(
                             &options,
                             trans, ChamNoTrans,
                             tempmm, tempnn, tempkm, A->mb,
-                            zalpha, A(k, m), ldak,  /* lda * M */
-                                    A(k, n), ldak,  /* lda * N */
-                            zbeta,  C(m, n), ldcm); /* ldc * N */
+                            zalpha, A(k, m),  /* lda * M */
+                                    A(k, n),  /* lda * N */
+                            zbeta,  C(m, n)); /* ldc * N */
                     }
                 }
             }
@@ -155,15 +146,14 @@ void chameleon_pzherk(cham_uplo_t uplo, cham_trans_t trans,
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
                     for (k = 0; k < A->mt; k++) {
                         tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-                        ldak = BLKLDD(A, k);
                         zbeta = k == 0 ? (CHAMELEON_Complex64_t)beta : zone;
                         INSERT_TASK_zgemm(
                             &options,
                             trans, ChamNoTrans,
                             tempnn, tempmm, tempkm, A->mb,
-                            zalpha, A(k, n), ldak,  /* lda * K */
-                                    A(k, m), ldak,  /* lda * M */
-                            zbeta,  C(n, m), ldcn); /* ldc * M */
+                            zalpha, A(k, n),  /* lda * K */
+                                    A(k, m),  /* lda * M */
+                            zbeta,  C(n, m)); /* ldc * M */
                     }
                 }
             }
diff --git a/compute/pzhetrd_he2hb.c b/compute/pzhetrd_he2hb.c
index a5b1aeb3ccac4d2f23520ddc296cded783f61061..8ebd19da5c7efc00082eb7e14f48e21b53e439c3 100644
--- a/compute/pzhetrd_he2hb.c
+++ b/compute/pzhetrd_he2hb.c
@@ -48,8 +48,6 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
     size_t ws_host = 0;
 
     int k, m, n, i, j;
-    int ldak, ldak1, ldam, ldan, ldaj, ldai;
-    int lddk, lddk1, lddm, lddn, ldek, ldek1;
     int tempkm, tempkn, tempmm, tempnn, tempjj;
     int ib;
 
@@ -98,14 +96,10 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
     /* Let's extract the diagonal in a temporary copy that contains A and A' */
     for (k = 1; k < A->nt; k++){
         tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-        ldak = BLKLDD(A, k);
-        lddk = BLKLDD((&D), k);
-
-        INSERT_TASK_zhe2ge(&options,
-                          uplo,
-                          tempkn, tempkn, ldak,
-                          A(k, k), ldak,
-                          D(k),    lddk);
+
+        INSERT_TASK_zhe2ge( &options,
+                            uplo, tempkn, tempkn, A->mb, 
+                            A(k, k), D(k) );
     }
 
     if (uplo == ChamLower) {
@@ -114,28 +108,25 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
 
            tempkm = k+1 == A->mt-1 ? A->m-(k+1)*A->mb : A->mb;
            tempkn = k   == A->nt-1 ? A->n- k   *A->nb : A->nb;
-           ldak1 = BLKLDD(A, k+1);
-           lddk1 = BLKLDD((&D), k+1);
-           ldek1 = BLKLDD(E, k+1);
 
            INSERT_TASK_zgeqrt(
                &options,
                tempkm, tempkn, ib, A->nb,
-               A(k+1, k), ldak1,
-               T(k+1, k), T->mb);
+               A(k+1, k),
+               T(k+1, k));
 
 #if defined(CHAMELEON_COPY_DIAG)
            INSERT_TASK_zlacpy(
                &options,
                ChamLower, tempkm, tempkn, A->nb,
-               A(k+1, k), ldak1,
-               E(k+1, k), ldek1 );
+               A(k+1, k),
+               E(k+1, k) );
 #if defined(CHAMELEON_USE_CUDA)
            INSERT_TASK_zlaset(
                &options,
                ChamUpper, tempkm, tempkn,
                0., 1.,
-               E(k+1, k), ldek1 );
+               E(k+1, k) );
 #endif
 #endif
 
@@ -144,62 +135,57 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
                &options,
                ChamLower,
                tempkm, tempkm, ib, A->nb,
-               E(k+1, k), ldak1,
-               T(k+1, k), T->mb,
-               D(k+1),    lddk1);
+               E(k+1, k),
+               T(k+1, k),
+               D(k+1));
 
            /* RIGHT on the remaining tiles until the bottom */
            for (m = k+2; m < A->mt ; m++) {
                tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-               ldam = BLKLDD(A, m);
                INSERT_TASK_zunmqr(
                    &options,
                    ChamRight, ChamNoTrans,
                    tempmm, A->nb, tempkm, ib, A->nb,
-                   E(k+1, k),   ldek1,
-                   T(k+1, k),   T->mb,
-                   A(m,   k+1), ldam);
+                   E(k+1, k),
+                   T(k+1, k),
+                   A(m,   k+1));
            }
 
            for (m = k+2; m < A->mt; m++) {
                tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-               ldam = BLKLDD(A, m);
-               lddm = BLKLDD((&D), m);
 
                options.priority = 1;
                INSERT_TASK_ztsqrt(
                    &options,
                    tempmm, A->nb, ib, A->nb,
-                   A(k+1, k), ldak1,
-                   A(m  , k), ldam,
-                   T(m  , k), T->mb);
+                   A(k+1, k),
+                   A(m  , k),
+                   T(m  , k));
                options.priority = 0;
 
                /* LEFT */
                for (i = k+2; i < m; i++) {
-                   ldai = BLKLDD(A, i);
                    INSERT_TASK_ztsmqr_hetra1(
                        &options,
                        ChamLeft, ChamConjTrans,
                        A->mb, A->nb, tempmm, A->nb, A->nb, ib, A->nb,
-                       A(i, k+1), ldai,
-                       A(m,   i), ldam,
-                       A(m,   k), ldam,
-                       T(m,   k), T->mb);
+                       A(i, k+1),
+                       A(m,   i),
+                       A(m,   k),
+                       T(m,   k));
                }
 
                /* RIGHT */
                for (j = m+1; j < A->mt ; j++) {
                    tempjj = j == A->mt-1 ? A->m-j*A->mb : A->mb;
-                   ldaj = BLKLDD(A, j);
                    INSERT_TASK_ztsmqr(
                        &options,
                        ChamRight, ChamNoTrans,
                        tempjj, A->nb, tempjj, tempmm, A->nb, ib, A->nb,
-                       A(j, k+1), ldaj,
-                       A(j,   m), ldaj,
-                       A(m,   k), ldam,
-                       T(m,   k), T->mb);
+                       A(j, k+1),
+                       A(j,   m),
+                       A(m,   k),
+                       T(m,   k));
                }
 
                /* LEFT->RIGHT */
@@ -218,8 +204,8 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
                    &options,
                    ChamUpperLower, ChamConjTrans,
                    tempmm, A->nb, A->nb,
-                   A(m, k+1), ldam,
-                   AT(m),  ldak1);
+                   A(m, k+1),
+                   AT(m));
 
                /*  Left application on |A1| */
                /*                      |A2| */
@@ -227,10 +213,10 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
                    &options,
                    ChamLeft, ChamConjTrans,
                    A->mb, A->nb, tempmm, A->nb, A->nb, ib, A->nb,
-                   D(k+1),    lddk1,
-                   A(m, k+1), ldam,
-                   A(m,   k), ldam,
-                   T(m,   k), T->mb);
+                   D(k+1),
+                   A(m, k+1),
+                   A(m,   k),
+                   T(m,   k));
 
                /*  Left application on | A2'| */
                /*                      | A3 | */
@@ -238,30 +224,30 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
                    &options,
                    ChamLeft, ChamConjTrans,
                    A->mb, tempmm, tempmm, tempmm, A->nb, ib, A->nb,
-                   AT(m),    ldak1,
-                   D(m),     lddm,
-                   A(m,  k), ldam,
-                   T(m,  k), T->mb);
+                   AT(m),
+                   D(m),
+                   A(m,  k),
+                   T(m,  k));
 
                /*  Right application on | A1 A2' | */
                INSERT_TASK_ztsmqr(
                    &options,
                    ChamRight, ChamNoTrans,
                    A->mb, A->nb, A->mb, tempmm, A->nb, ib, A->nb,
-                   D(k+1), lddk1,
-                   AT(m) , ldak1,
-                   A(m,   k), ldam,
-                   T(m,   k), T->mb);
+                   D(k+1),
+                   AT(m) ,
+                   A(m,   k),
+                   T(m,   k));
 
                /*  Right application on | A2 A3 | */
                INSERT_TASK_ztsmqr(
                    &options,
                    ChamRight, ChamNoTrans,
                    tempmm, A->nb, tempmm, tempmm, A->nb, ib, A->nb,
-                   A(m, k+1), ldam,
-                   D(m),      lddm,
-                   A(m,   k), ldam,
-                   T(m,   k), T->mb);
+                   A(m, k+1),
+                   D(m),
+                   A(m,   k),
+                   T(m,   k));
                options.priority = 0;
            }
 
@@ -274,28 +260,24 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
 
            tempkn = k+1 == A->nt-1 ? A->n-(k+1)*A->nb : A->nb;
            tempkm = k   == A->mt-1 ? A->m- k   *A->mb : A->mb;
-           ldak  = BLKLDD(A, k);
-           ldek  = BLKLDD(E, k);
-           ldak1 = BLKLDD(A, k+1);
-           lddk1 = BLKLDD((&D), k+1);
            INSERT_TASK_zgelqt(
                &options,
                tempkm, tempkn, ib, A->nb,
-               A(k, k+1), ldak,
-               T(k, k+1), T->mb);
+               A(k, k+1),
+               T(k, k+1));
 
 #if defined(CHAMELEON_COPY_DIAG)
            INSERT_TASK_zlacpy(
                &options,
                ChamUpper, tempkm, tempkn, A->nb,
-               A(k, k+1), ldak,
-               E(k, k+1), ldek );
+               A(k, k+1),
+               E(k, k+1) );
 #if defined(CHAMELEON_USE_CUDA)
            INSERT_TASK_zlaset(
                &options,
                ChamLower, tempkm, tempkn,
                0., 1.,
-               E(k, k+1), ldek );
+               E(k, k+1) );
 #endif
 #endif
 
@@ -304,9 +286,9 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
                &options,
                ChamUpper,
                tempkn, tempkn, ib, A->nb,
-               E(k, k+1), ldek,
-               T(k, k+1), T->mb,
-               D(k+1),    lddk1);
+               E(k, k+1),
+               T(k, k+1),
+               D(k+1));
 
            /* LEFT on the remaining tiles until the left side */
            for (n = k+2; n < A->nt ; n++) {
@@ -315,35 +297,32 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
                    &options,
                    ChamLeft, ChamNoTrans,
                    A->mb, tempnn, tempkn, ib, A->nb,
-                   E(k,   k+1), ldek,
-                   T(k,   k+1), T->mb,
-                   A(k+1, n  ), ldak1);
+                   E(k,   k+1),
+                   T(k,   k+1),
+                   A(k+1, n  ));
            }
 
            for (n = k+2; n < A->nt; n++) {
                tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
-               ldan = BLKLDD(A, n);
-               lddn = BLKLDD((&D), n);
                options.priority = 1;
                INSERT_TASK_ztslqt(
                    &options,
                    A->mb, tempnn, ib, A->nb,
-                   A(k, k+1), ldak,
-                   A(k, n  ), ldak,
-                   T(k, n  ), T->mb);
+                   A(k, k+1),
+                   A(k, n  ),
+                   T(k, n  ));
                options.priority = 0;
 
                /* RIGHT */
                for (i = k+2; i < n; i++) {
-                   ldai = BLKLDD(A, i);
                    INSERT_TASK_ztsmlq_hetra1(
                        &options,
                        ChamRight, ChamConjTrans,
                        A->mb, A->nb, A->nb, tempnn, A->nb, ib, A->nb,
-                       A(k+1, i), ldak1,
-                       A(i,   n), ldai,
-                       A(k,   n), ldak,
-                       T(k,   n), T->mb);
+                       A(k+1, i),
+                       A(i,   n),
+                       A(k,   n),
+                       T(k,   n));
                }
 
                /* LEFT */
@@ -353,10 +332,10 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
                        &options,
                        ChamLeft, ChamNoTrans,
                        A->nb, tempjj, tempnn, tempjj, A->nb, ib, A->nb,
-                       A(k+1, j), ldak1,
-                       A(n,   j), ldan,
-                       A(k,   n), ldak,
-                       T(k,   n), T->mb);
+                       A(k+1, j),
+                       A(n,   j),
+                       A(k,   n),
+                       T(k,   n));
                }
 
                /* RIGHT->LEFT */
@@ -375,28 +354,28 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
                    &options,
                    ChamUpperLower, ChamConjTrans,
                    A->mb, tempnn, A->nb,
-                   A(k+1, n), ldak1,
-                   AT(n),     A->mb);
+                   A(k+1, n),
+                   AT(n) );
 
                /*  Right application on | A1 A2 | */
                INSERT_TASK_ztsmlq(
                    &options,
                    ChamRight, ChamConjTrans,
                    A->mb, A->nb, A->mb, tempnn, A->nb, ib, A->nb,
-                   D(k+1),    lddk1,
-                   A(k+1, n), ldak1,
-                   A(k,   n), ldak,
-                   T(k,   n), T->mb);
+                   D(k+1),
+                   A(k+1, n),
+                   A(k,   n),
+                   T(k,   n));
 
                /*  Right application on | A2' A3 | */
                INSERT_TASK_ztsmlq(
                    &options,
                    ChamRight, ChamConjTrans,
                    tempnn, A->nb, tempnn, tempnn, A->nb, ib, A->nb,
-                   AT(n),    A->mb,
-                   D(n),     lddn,
-                   A(k,  n), ldak,
-                   T(k,  n), T->mb);
+                   AT(n),
+                   D(n),
+                   A(k,  n),
+                   T(k,  n));
 
                /*  Left application on |A1 | */
                /*                      |A2'| */
@@ -404,10 +383,10 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
                    &options,
                    ChamLeft, ChamNoTrans,
                    A->mb, A->nb, tempnn, A->nb, A->nb, ib, A->nb,
-                   D(k+1),  lddk1,
-                   AT(n),   A->mb,
-                   A(k, n), ldak,
-                   T(k, n), T->mb);
+                   D(k+1),
+                   AT(n),
+                   A(k, n),
+                   T(k, n));
 
                /*  Left application on | A2 | */
                /*                      | A3 | */
@@ -415,10 +394,10 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
                    &options,
                    ChamLeft, ChamNoTrans,
                    A->mb, tempnn, tempnn, tempnn, A->nb, ib, A->nb,
-                   A(k+1, n), ldak1,
-                   D(n),      lddn,
-                   A(k,   n), ldak,
-                   T(k,   n), T->mb);
+                   A(k+1, n),
+                   D(n),
+                   A(k,   n),
+                   T(k,   n));
            }
            options.priority = 0;
 
@@ -429,13 +408,9 @@ void chameleon_pzhetrd_he2hb(cham_uplo_t uplo,
     /* Copy-back into A */
     for (k = 1; k < A->nt; k++){
         tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-        ldak = BLKLDD(A, k);
-        lddk = BLKLDD((&D), k);
-        INSERT_TASK_zlacpy(&options,
-                          uplo,
-                          tempkn, tempkn, ldak,
-                          D(k), lddk,
-                          A(k, k), ldak);
+        INSERT_TASK_zlacpy( &options,
+                            uplo, tempkn, tempkn, A->mb,
+                            D(k), A(k, k));
     }
 
 
diff --git a/compute/pzlacpy.c b/compute/pzlacpy.c
index d526cbe4d145be47b5f5f2dc2c0abc3f4c8b0eb4..397b122b5298fc9999945be1fadd15bffd173027 100644
--- a/compute/pzlacpy.c
+++ b/compute/pzlacpy.c
@@ -25,21 +25,14 @@
 
 #define A(m,n) A,  m,  n
 #define B(m,n) B,  m,  n
-/**
- *
- */
-/**
- *
- */
 void chameleon_pzlacpy(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B,
-                          RUNTIME_sequence_t *sequence, RUNTIME_request_t *request)
+                       RUNTIME_sequence_t *sequence, RUNTIME_request_t *request)
 {
     CHAM_context_t *chamctxt;
     RUNTIME_option_t options;
 
     int X, Y;
     int m, n;
-    int ldam, ldbm;
 
     chamctxt = chameleon_context_self();
     if (sequence->status != CHAMELEON_SUCCESS) {
@@ -54,16 +47,14 @@ void chameleon_pzlacpy(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B,
     case ChamUpper:
         for (m = 0; m < A->mt; m++) {
             X = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-            ldam = BLKLDD(A, m);
-            ldbm = BLKLDD(B, m);
             if (m < A->nt) {
                 Y = m == A->nt-1 ? A->n-m*A->nb : A->nb;
                 INSERT_TASK_zlacpy(
                     &options,
                     ChamUpper,
                     X, Y, A->mb,
-                    A(m, m), ldam,
-                    B(m, m), ldbm);
+                    A(m, m),
+                    B(m, m));
             }
             for (n = m+1; n < A->nt; n++) {
                 Y = n == A->nt-1 ? A->n-n*A->nb : A->nb;
@@ -71,8 +62,8 @@ void chameleon_pzlacpy(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B,
                     &options,
                     ChamUpperLower,
                     X, Y, A->mb,
-                    A(m, n), ldam,
-                    B(m, n), ldbm);
+                    A(m, n),
+                    B(m, n));
             }
         }
         break;
@@ -82,16 +73,14 @@ void chameleon_pzlacpy(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B,
     case ChamLower:
         for (m = 0; m < A->mt; m++) {
             X = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-            ldam = BLKLDD(A, m);
-            ldbm = BLKLDD(B, m);
             if (m < A->nt) {
                 Y = m == A->nt-1 ? A->n-m*A->nb : A->nb;
                 INSERT_TASK_zlacpy(
                     &options,
                     ChamLower,
                     X, Y, A->mb,
-                    A(m, m), ldam,
-                    B(m, m), ldbm);
+                    A(m, m),
+                    B(m, m));
             }
             for (n = 0; n < chameleon_min(m, A->nt); n++) {
                 Y = n == A->nt-1 ? A->n-n*A->nb : A->nb;
@@ -99,8 +88,8 @@ void chameleon_pzlacpy(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B,
                     &options,
                     ChamUpperLower,
                     X, Y, A->mb,
-                    A(m, n), ldam,
-                    B(m, n), ldbm);
+                    A(m, n),
+                    B(m, n));
             }
         }
         break;
@@ -111,16 +100,14 @@ void chameleon_pzlacpy(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B,
     default:
         for (m = 0; m < A->mt; m++) {
             X = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-            ldam = BLKLDD(A, m);
-            ldbm = BLKLDD(B, m);
             for (n = 0; n < A->nt; n++) {
                 Y = n == A->nt-1 ? A->n-n*A->nb : A->nb;
                 INSERT_TASK_zlacpy(
                     &options,
                     ChamUpperLower,
                     X, Y, A->mb,
-                    A(m, n), ldam,
-                    B(m, n), ldbm);
+                    A(m, n),
+                    B(m, n));
             }
         }
     }
diff --git a/compute/pzlag2c.c b/compute/pzlag2c.c
index 7704fffdd254e5571cf0064f02b84d0b8855b459..b574b2a3370715eadc40c6b4972cfb6e3b7e245b 100644
--- a/compute/pzlag2c.c
+++ b/compute/pzlag2c.c
@@ -41,7 +41,6 @@ void chameleon_pclag2z(CHAM_desc_t *SA, CHAM_desc_t *B,
 
     int X, Y;
     int m, n;
-    int ldam, ldbm;
 
     chamctxt = chameleon_context_self();
     if (sequence->status != CHAMELEON_SUCCESS) {
@@ -51,15 +50,13 @@ void chameleon_pclag2z(CHAM_desc_t *SA, CHAM_desc_t *B,
 
     for(m = 0; m < SA->mt; m++) {
         X = m == SA->mt-1 ? SA->m-m*SA->mb : SA->mb;
-        ldam = BLKLDD(SA, m);
-        ldbm = BLKLDD(B, m);
         for(n = 0; n < SA->nt; n++) {
             Y = n == SA->nt-1 ? SA->n-n*SA->nb : SA->nb;
             INSERT_TASK_clag2z(
                 &options,
                 X, Y, SA->mb,
-                SA(m, n), ldam,
-                B(m, n), ldbm);
+                SA(m, n),
+                B(m, n));
         }
     }
     RUNTIME_options_finalize(&options, chamctxt);
diff --git a/compute/pzlange.c b/compute/pzlange.c
index f909fa25fa34ff17bc6d0c72b811916a187d284e..a0b6a6d90faa23ddb98cf5354300f34573b513cd 100644
--- a/compute/pzlange.c
+++ b/compute/pzlange.c
@@ -56,19 +56,18 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
 
         for(m = mmin; m < mmax; m++) {
             int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
-            int ldam = BLKLDD( A, m );
 
             if ( (n == m) && (uplo != ChamUpperLower) ) {
                 INSERT_TASK_ztrasm(
                     options,
                     ChamColumnwise, uplo, diag, tempmm, tempnn,
-                    A(m, n), ldam, W( Wcol, m, n ) );
+                    A(m, n), W( Wcol, m, n ) );
             }
             else {
                 INSERT_TASK_dzasum(
                     options,
                     ChamColumnwise, ChamUpperLower, tempmm, tempnn,
-                    A(m, n), ldam, W( Wcol, m, n ) );
+                    A(m, n), W( Wcol, m, n ) );
             }
 
             if ( m >= P ) {
@@ -93,7 +92,7 @@ chameleon_pzlange_one( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
         INSERT_TASK_dlange(
             options,
             ChamMaxNorm, 1, tempnn, A->nb,
-            W( Wcol, 0, n ), 1,
+            W( Wcol, 0, n ),
             W( Welt, 0, n ) );
     }
 
@@ -144,7 +143,6 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
         int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT;
 
         int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
-        int ldam = BLKLDD( A, m );
 
         for(n = nmin; n < nmax; n++) {
             int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
@@ -153,13 +151,13 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
                 INSERT_TASK_ztrasm(
                     options,
                     ChamRowwise, uplo, diag, tempmm, tempnn,
-                    A(m, n), ldam, W( Wcol, m, n) );
+                    A(m, n), W( Wcol, m, n) );
             }
             else {
                 INSERT_TASK_dzasum(
                     options,
                     ChamRowwise, ChamUpperLower, tempmm, tempnn,
-                    A(m, n), ldam, W( Wcol, m, n) );
+                    A(m, n), W( Wcol, m, n) );
             }
 
             if ( n >= Q ) {
@@ -184,7 +182,7 @@ chameleon_pzlange_inf( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
         INSERT_TASK_dlange(
             options,
             ChamMaxNorm, tempmm, 1, A->nb,
-            W( Wcol, m, 0), 1, W( Welt, m, 0));
+            W( Wcol, m, 0), W( Welt, m, 0));
     }
 
     /**
@@ -231,7 +229,6 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
         int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT;
 
         int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
-        int ldam = BLKLDD( A, m );
 
         for(n = nmin; n < nmax; n++) {
             int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
@@ -240,13 +237,13 @@ chameleon_pzlange_max( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
                 INSERT_TASK_zlantr(
                     options,
                     ChamMaxNorm, uplo, diag, tempmm, tempnn, A->nb,
-                    A(m, n), ldam, W( Welt, m, n));
+                    A(m, n), W( Welt, m, n));
             }
             else {
                 INSERT_TASK_zlange(
                     options,
                     ChamMaxNorm, tempmm, tempnn, A->nb,
-                    A(m, n), ldam, W( Welt, m, n ));
+                    A(m, n), W( Welt, m, n ));
             }
 
             if ( n >= Q ) {
@@ -315,7 +312,6 @@ chameleon_pzlange_frb( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
         int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT;
 
         int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
-        int ldam = BLKLDD( A, m );
 
         for(n = nmin; n < nmax; n++) {
             int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
@@ -324,14 +320,14 @@ chameleon_pzlange_frb( cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A, CHAM_
                 INSERT_TASK_ztrssq(
                     options,
                     uplo, diag, tempmm, tempnn,
-                    A(m, n), ldam, W( Welt, m, n) );
+                    A(m, n), W( Welt, m, n) );
             }
             else {
                 INSERT_TASK_zgessq(
                     options,
                     ChamEltwise,
                     tempmm, tempnn,
-                    A(m, n), ldam, W( Welt, m, n) );
+                    A(m, n), W( Welt, m, n) );
             }
 
             if ( n >= Q ) {
@@ -469,7 +465,7 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
                     &options,
                     ChamUpperLower, Wcol.mb, Wcol.nb,
                     alpha, beta,
-                    W( &Wcol, m, n ), Wcol.mb );
+                    W( &Wcol, m, n ) );
             }
         }
     }
@@ -479,7 +475,7 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
                 &options,
                 ChamUpperLower, Welt.mb, Welt.nb,
                 alpha, beta,
-                W( &Welt, m, n ), Welt.mb );
+                W( &Welt, m, n ) );
         }
     }
 
@@ -512,7 +508,7 @@ void chameleon_pzlange_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_dia
                 INSERT_TASK_dlacpy(
                     &options,
                     ChamUpperLower, 1, 1, 1,
-                    W( &Welt, 0, 0 ), 1, W( &Welt, m, n ), 1);
+                    W( &Welt, 0, 0 ), W( &Welt, m, n ) );
             }
         }
     }
diff --git a/compute/pzlansy.c b/compute/pzlansy.c
index e799f57285430e09eccd348abdca54d0c31d6cdc..7698ed3899f08e7b9337491226caffc90854345f 100644
--- a/compute/pzlansy.c
+++ b/compute/pzlansy.c
@@ -52,7 +52,6 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A,
         int nmax = ( uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT;
 
         int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
-        int ldam = BLKLDD( A, m );
 
         for(n = nmin; n < nmax; n++) {
             int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
@@ -61,18 +60,18 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A,
                 INSERT_TASK_dzasum(
                     options,
                     ChamRowwise, uplo, tempmm, tempnn,
-                    A(m, n), ldam, W( Wcol, m, n) );
+                    A(m, n), W( Wcol, m, n) );
             }
             else {
                 INSERT_TASK_dzasum(
                     options,
                     ChamRowwise, ChamUpperLower, tempmm, tempnn,
-                    A(m, n), ldam, W( Wcol, m, n) );
+                    A(m, n), W( Wcol, m, n) );
 
                 INSERT_TASK_dzasum(
                     options,
                     ChamColumnwise, ChamUpperLower, tempmm, tempnn,
-                    A(m, n), ldam, W( Wcol, n, m) );
+                    A(m, n), W( Wcol, n, m) );
             }
         }
     }
@@ -101,7 +100,7 @@ chameleon_pzlansy_inf( cham_uplo_t uplo, CHAM_desc_t *A,
         INSERT_TASK_dlange(
             options,
             ChamMaxNorm, tempmm, 1, A->nb,
-            W( Wcol, m, 0), 1, W( Welt, m, 0));
+            W( Wcol, m, 0), W( Welt, m, 0));
     }
 
     /**
@@ -146,7 +145,6 @@ chameleon_pzlansy_max( cham_trans_t trans, cham_uplo_t uplo, CHAM_desc_t *A,
         int nmax = (uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT;
 
         int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
-        int ldam = BLKLDD( A, m );
 
         for(n = nmin; n < nmax; n++) {
             int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
@@ -156,20 +154,20 @@ chameleon_pzlansy_max( cham_trans_t trans, cham_uplo_t uplo, CHAM_desc_t *A,
                     INSERT_TASK_zlanhe(
                         options,
                         ChamMaxNorm, uplo, tempmm, A->nb,
-                        A(m, n), ldam, W( Welt, m, n));
+                        A(m, n), W( Welt, m, n));
                 }
                 else {
                     INSERT_TASK_zlansy(
                         options,
                         ChamMaxNorm, uplo, tempmm, A->nb,
-                        A(m, n), ldam, W( Welt, m, n));
+                        A(m, n), W( Welt, m, n));
                 }
             }
             else {
                 INSERT_TASK_zlange(
                     options,
                     ChamMaxNorm, tempmm, tempnn, A->nb,
-                    A(m, n), ldam, W( Welt, m, n));
+                    A(m, n), W( Welt, m, n));
             }
 
             if ( n >= Q ) {
@@ -233,7 +231,6 @@ chameleon_pzlansy_frb( cham_trans_t trans, cham_uplo_t uplo,
         int nmax = (uplo == ChamLower ) ? chameleon_min(m+1, NT) : NT;
 
         int tempmm = ( m == (MT-1) ) ? M - m * A->mb : A->mb;
-        int ldam = BLKLDD( A, m );
 
         for(n = nmin; n < nmax; n++) {
             int tempnn = ( n == (NT-1) ) ? N - n * A->nb : A->nb;
@@ -242,21 +239,21 @@ chameleon_pzlansy_frb( cham_trans_t trans, cham_uplo_t uplo,
                 if ( trans == ChamConjTrans) {
                     INSERT_TASK_zhessq(
                         options, ChamEltwise, uplo, tempmm,
-                        A(m, n), ldam, W( Welt, m, n) );
+                        A(m, n), W( Welt, m, n) );
                 }
                 else {
                     INSERT_TASK_zsyssq(
                         options, ChamEltwise, uplo, tempmm,
-                        A(m, n), ldam, W( Welt, m, n) );
+                        A(m, n), W( Welt, m, n) );
                 }
             }
             else {
                 INSERT_TASK_zgessq(
                     options, ChamEltwise, tempmm, tempnn,
-                    A(m, n), ldam, W( Welt, m, n) );
+                    A(m, n), W( Welt, m, n) );
                 INSERT_TASK_zgessq(
                     options, ChamEltwise, tempmm, tempnn,
-                    A(m, n), ldam, W( Welt, n, m) );
+                    A(m, n), W( Welt, n, m) );
             }
         }
     }
@@ -380,7 +377,7 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra
                     &options,
                     ChamUpperLower, Wcol.mb, Wcol.nb,
                     alpha, beta,
-                    W( &Wcol, m, n ), Wcol.mb );
+                    W( &Wcol, m, n ) );
             }
         }
     }
@@ -390,7 +387,7 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra
                 &options,
                 ChamUpperLower, Welt.mb, Welt.nb,
                 alpha, beta,
-                W( &Welt, m, n ), Welt.mb );
+                W( &Welt, m, n ) );
         }
     }
 
@@ -419,7 +416,7 @@ void chameleon_pzlansy_generic( cham_normtype_t norm, cham_uplo_t uplo, cham_tra
                 INSERT_TASK_dlacpy(
                     &options,
                     ChamUpperLower, 1, 1, 1,
-                    W( &Welt, 0, 0 ), 1, W( &Welt, m, n ), 1);
+                    W( &Welt, 0, 0 ), W( &Welt, m, n ));
             }
         }
     }
diff --git a/compute/pzlascal.c b/compute/pzlascal.c
index 93119debc461d8bf2f69062e176f5df409ee2004..15c81af25faaf89ee1ba38c8a99cb123e5b3a776 100644
--- a/compute/pzlascal.c
+++ b/compute/pzlascal.c
@@ -31,7 +31,6 @@ void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc
 
     int tempmm, tempnn, tempmn, tempnm;
     int m, n;
-    int ldam, ldan;
     int minmnt = chameleon_min(A->mt, A->nt);
 
     chamctxt = chameleon_context_self();
@@ -46,21 +45,19 @@ void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc
         for (n = 0; n < minmnt; n++) {
             tempnm = n == A->mt-1 ? A->m-n*A->mb : A->mb;
             tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
-            ldan = BLKLDD(A, n);
 
             INSERT_TASK_zlascal(
                 &options,
                 ChamLower, tempnm, tempnn, A->mb,
-                alpha, A(n, n), ldan);
+                alpha, A(n, n));
 
             for (m = n+1; m < A->mt; m++) {
                 tempmm = m == A->mt-1 ? A->m-A->mb*m : A->nb;
-                ldam = BLKLDD(A, m);
 
                 INSERT_TASK_zlascal(
                     &options,
                     ChamUpperLower, tempmm, tempnn, A->mb,
-                    alpha, A(m, n), ldam);
+                    alpha, A(m, n));
             }
         }
         break;
@@ -69,12 +66,11 @@ void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc
         for (m = 0; m < minmnt; m++) {
             tempmm = m == A->mt-1 ? A->m-A->mb*m : A->nb;
             tempmn = m == A->nt-1 ? A->n-m*A->nb : A->nb;
-            ldam = BLKLDD(A, m);
 
             INSERT_TASK_zlascal(
                 &options,
                 ChamUpper, tempmm, tempmn, A->mb,
-                alpha, A(m, m), ldam);
+                alpha, A(m, m));
 
             for (n = m+1; n < A->nt; n++) {
                 tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
@@ -82,7 +78,7 @@ void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc
                 INSERT_TASK_zlascal(
                     &options,
                     ChamUpperLower, tempmm, tempnn, A->mb,
-                    alpha, A(m, n), ldam);
+                    alpha, A(m, n));
             }
         }
         break;
@@ -91,7 +87,6 @@ void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc
     default:
         for (m = 0; m < A->mt; m++) {
             tempmm = m == A->mt-1 ? A->m-A->mb*m : A->nb;
-            ldam = BLKLDD(A, m);
 
             for (n = 0; n < A->nt; n++) {
                 tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
@@ -99,7 +94,7 @@ void chameleon_pzlascal(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha, CHAM_desc
                 INSERT_TASK_zlascal(
                     &options,
                     ChamUpperLower, tempmm, tempnn, A->mb,
-                    alpha, A(m, n), ldam);
+                    alpha, A(m, n));
             }
         }
     }
diff --git a/compute/pzlaset.c b/compute/pzlaset.c
index d874f8dd8798fb72a3bc1536ab55dc1e4e47a698..02fd03af9b2ac03cb6cf9c91efeaa14e1c2cb814 100644
--- a/compute/pzlaset.c
+++ b/compute/pzlaset.c
@@ -38,7 +38,6 @@ void chameleon_pzlaset( cham_uplo_t uplo,
     RUNTIME_option_t options;
 
     int i, j;
-    int ldai, ldaj;
     int tempim;
     int tempjm, tempjn;
     int minmn = chameleon_min(A->mt, A->nt);
@@ -54,26 +53,23 @@ void chameleon_pzlaset( cham_uplo_t uplo,
        for (j = 0; j < minmn; j++){
            tempjm = j == A->mt-1 ? A->m-j*A->mb : A->mb;
            tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb;
-           ldaj = BLKLDD(A, j);
            INSERT_TASK_zlaset(
                &options,
                ChamLower, tempjm, tempjn, alpha, beta,
-               A(j, j), ldaj);
+               A(j, j));
 
            for (i = j+1; i < A->mt; i++){
                tempim = i == A->mt-1 ? A->m-i*A->mb : A->mb;
-               ldai = BLKLDD(A, i);
                INSERT_TASK_zlaset(
                    &options,
                    ChamUpperLower, tempim, tempjn, alpha, alpha,
-                   A(i, j), ldai);
+                   A(i, j));
            }
        }
     }
     else if (uplo == ChamUpper) {
         for (i = 0; i < A->mt; i++) {
             tempim = i == A->mt-1 ? A->m-i*A->mb : A->mb;
-            ldai = BLKLDD(A, i);
 
             if ( i < A->nt ) {
                 j = i;
@@ -82,7 +78,7 @@ void chameleon_pzlaset( cham_uplo_t uplo,
                 INSERT_TASK_zlaset(
                     &options,
                     uplo, tempim, tempjn,
-                    alpha, beta, A(i, j), ldai);
+                    alpha, beta, A(i, j));
             }
             for (j = i+1; j < A->nt; j++) {
                 tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb;
@@ -90,21 +86,20 @@ void chameleon_pzlaset( cham_uplo_t uplo,
                 INSERT_TASK_zlaset(
                     &options,
                     ChamUpperLower, tempim, tempjn,
-                    alpha, alpha, A(i, j), ldai);
+                    alpha, alpha, A(i, j));
             }
         }
     }
     else {
        for (i = 0; i < A->mt; i++){
            tempim = i == A->mt-1 ? A->m-i*A->mb : A->mb;
-           ldai = BLKLDD(A, i);
            for (j = 0; j < A->nt; j++){
                tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb;
                INSERT_TASK_zlaset(
                    &options,
                    ChamUpperLower, tempim, tempjn,
                    alpha, (i == j) ? beta : alpha,
-                   A(i, j), ldai);
+                   A(i, j));
            }
        }
     }
diff --git a/compute/pzlaset2.c b/compute/pzlaset2.c
index b982ed47e9df7d2638a64ca169567d851e6d8b29..8fb02e425c8426e9814696e48d40027691761280 100644
--- a/compute/pzlaset2.c
+++ b/compute/pzlaset2.c
@@ -37,7 +37,6 @@ void chameleon_pzlaset2(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha,
     RUNTIME_option_t options;
 
     int i, j;
-    int ldai, ldaj;
     int tempim;
     int tempjm, tempjn;
     int minmn = chameleon_min(A->mt, A->nt);
@@ -53,19 +52,17 @@ void chameleon_pzlaset2(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha,
        for (j = 0; j < minmn; j++){
            tempjm = j == A->mt-1 ? A->m-j*A->mb : A->mb;
            tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb;
-           ldaj = BLKLDD(A, j);
            INSERT_TASK_zlaset2(
                &options,
                ChamLower, tempjm, tempjn, alpha,
-               A(j, j), ldaj);
+               A(j, j));
 
            for (i = j+1; i < A->mt; i++){
                tempim = i == A->mt-1 ? A->m-i*A->mb : A->mb;
-               ldai = BLKLDD(A, i);
                INSERT_TASK_zlaset2(
                    &options,
                    ChamUpperLower, tempim, tempjn, alpha,
-                   A(i, j), ldai);
+                   A(i, j));
            }
        }
     }
@@ -74,33 +71,30 @@ void chameleon_pzlaset2(cham_uplo_t uplo, CHAMELEON_Complex64_t alpha,
            tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb;
            for (i = 0; i < chameleon_min(j, A->mt); i++){
                tempim = i == A->mt-1 ? A->m-i*A->mb : A->mb;
-               ldai = BLKLDD(A, i);
                INSERT_TASK_zlaset2(
                    &options,
                    ChamUpperLower, tempim, tempjn, alpha,
-                   A(i, j), ldai);
+                   A(i, j));
            }
        }
        for (j = 0; j < minmn; j++){
            tempjm = j == A->mt-1 ? A->m-j*A->mb : A->mb;
            tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb;
-           ldaj = BLKLDD(A, j);
            INSERT_TASK_zlaset2(
                &options,
                ChamUpper, tempjm, tempjn, alpha,
-               A(j, j), ldaj);
+               A(j, j));
        }
     }
     else {
        for (i = 0; i < A->mt; i++){
            tempim = i == A->mt-1 ? A->m-i*A->mb : A->mb;
-           ldai = BLKLDD(A, i);
            for (j = 0; j < A->nt; j++){
                tempjn = j == A->nt-1 ? A->n-j*A->nb : A->nb;
                INSERT_TASK_zlaset2(
                    &options,
                    ChamUpperLower, tempim, tempjn, alpha,
-                   A(i, j), ldai);
+                   A(i, j));
            }
        }
     }
diff --git a/compute/pzlauum.c b/compute/pzlauum.c
index 660ab4e80e8e86fd79bb3115cae265940909ff53..9ad726dff6828c646cf259271d333f1f35131035 100644
--- a/compute/pzlauum.c
+++ b/compute/pzlauum.c
@@ -36,7 +36,6 @@ void chameleon_pzlauum(cham_uplo_t uplo, CHAM_desc_t *A,
     RUNTIME_option_t options;
 
     int k, m, n;
-    int ldak, ldam, ldan;
     int tempkm, tempkn;
 
     chamctxt = chameleon_context_self();
@@ -50,25 +49,22 @@ void chameleon_pzlauum(cham_uplo_t uplo, CHAM_desc_t *A,
     if (uplo == ChamLower) {
         for (k = 0; k < A->mt; k++) {
             tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-            ldak = BLKLDD(A, k);
             for(n = 0; n < k; n++) {
-                ldan = BLKLDD(A, n);
                 INSERT_TASK_zherk(
                     &options,
                     uplo, ChamConjTrans,
                     A->mb, tempkm, A->mb,
-                    1.0, A(k, n), ldak,
-                    1.0, A(n, n), ldan);
+                    1.0, A(k, n),
+                    1.0, A(n, n));
 
                 for(m = n+1; m < k; m++) {
-                    ldam = BLKLDD(A, m);
                     INSERT_TASK_zgemm(
                         &options,
                         ChamConjTrans, ChamNoTrans,
                         A->mb, A->nb, tempkm, A->mb,
-                        1.0, A(k, m), ldak,
-                             A(k, n), ldak,
-                        1.0, A(m, n), ldam);
+                        1.0, A(k, m),
+                             A(k, n),
+                        1.0, A(m, n));
                 }
             }
             for (n = 0; n < k; n++) {
@@ -77,14 +73,14 @@ void chameleon_pzlauum(cham_uplo_t uplo, CHAM_desc_t *A,
                     &options,
                     ChamLeft, uplo, ChamConjTrans, ChamNonUnit,
                     tempkm, A->nb, A->mb,
-                    1.0, A(k, k), ldak,
-                         A(k, n), ldak);
+                    1.0, A(k, k),
+                         A(k, n));
             }
             RUNTIME_data_flush( sequence, A(k, k) );
             INSERT_TASK_zlauum(
                 &options,
                 uplo, tempkm, A->mb,
-                A(k, k), ldak);
+                A(k, k));
         }
     }
     /*
@@ -93,43 +89,39 @@ void chameleon_pzlauum(cham_uplo_t uplo, CHAM_desc_t *A,
     else {
         for (k = 0; k < A->mt; k++) {
             tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-            ldak = BLKLDD(A, k);
 
             for (m = 0; m < k; m++) {
-                ldam = BLKLDD(A, m);
                 INSERT_TASK_zherk(
                     &options,
                     uplo, ChamNoTrans,
                     A->mb, tempkn, A->mb,
-                    1.0, A(m, k), ldam,
-                    1.0, A(m, m), ldam);
+                    1.0, A(m, k),
+                    1.0, A(m, m));
 
                 for (n = m+1; n < k; n++){
-                    ldan = BLKLDD(A, n);
                     INSERT_TASK_zgemm(
                         &options,
                         ChamNoTrans, ChamConjTrans,
                         A->mb, A->nb, tempkn, A->mb,
-                        1.0, A(m, k), ldam,
-                             A(n, k), ldan,
-                        1.0, A(m, n), ldam);
+                        1.0, A(m, k),
+                             A(n, k),
+                        1.0, A(m, n));
                 }
             }
             for (m = 0; m < k; m++) {
-                ldam = BLKLDD(A, m);
                 RUNTIME_data_flush( sequence, A(m, k) );
                 INSERT_TASK_ztrmm(
                     &options,
                     ChamRight, uplo, ChamConjTrans, ChamNonUnit,
                     A->mb, tempkn, A->mb,
-                    1.0, A(k, k), ldak,
-                         A(m, k), ldam);
+                    1.0, A(k, k),
+                         A(m, k));
             }
             RUNTIME_data_flush( sequence, A(k, k) );
             INSERT_TASK_zlauum(
                 &options,
                 uplo, tempkn, A->mb,
-                A(k, k), ldak);
+                A(k, k));
         }
     }
     RUNTIME_options_finalize(&options, chamctxt);
diff --git a/compute/pzplghe.c b/compute/pzplghe.c
index 27ae7d355601139cef866aca8ceca1e3be7d99ee..26fa75c031c2b85b57ae60b972c87d3f934f9a90 100644
--- a/compute/pzplghe.c
+++ b/compute/pzplghe.c
@@ -35,7 +35,6 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A,
     RUNTIME_option_t options;
 
     int m, n, minmn;
-    int ldam;
     int tempmm, tempnn;
 
     chamctxt = chameleon_context_self();
@@ -52,12 +51,11 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A,
 
             for (m = n; m < A->mt; m++) {
                 tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-                ldam = BLKLDD(A, m);
 
                 options.priority = m + n;
                 INSERT_TASK_zplghe(
                     &options,
-                    bump, tempmm, tempnn, A(m, n), ldam,
+                    bump, tempmm, tempnn, A(m, n),
                     A->m, m*A->mb, n*A->nb, seed );
             }
         }
@@ -66,7 +64,6 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A,
     case ChamUpper:
         for (m = 0; m < minmn; m++) {
             tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-            ldam = BLKLDD(A, m);
 
             for (n = m; n < A->nt; n++) {
                 tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
@@ -74,7 +71,7 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A,
                 options.priority = m + n;
                 INSERT_TASK_zplghe(
                     &options,
-                    bump, tempmm, tempnn, A(m, n), ldam,
+                    bump, tempmm, tempnn, A(m, n),
                     A->m, m*A->mb, n*A->nb, seed );
             }
         }
@@ -83,7 +80,6 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A,
     default:
         for (m = 0; m < A->mt; m++) {
             tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-            ldam = BLKLDD(A, m);
 
             for (n = 0; n < A->nt; n++) {
                 tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
@@ -91,7 +87,7 @@ void chameleon_pzplghe( double bump, cham_uplo_t uplo, CHAM_desc_t *A,
                 options.priority = m + n;
                 INSERT_TASK_zplghe(
                     &options,
-                    bump, tempmm, tempnn, A(m, n), ldam,
+                    bump, tempmm, tempnn, A(m, n),
                     A->m, m*A->mb, n*A->nb, seed );
             }
         }
diff --git a/compute/pzplgsy.c b/compute/pzplgsy.c
index 38bc8fcba6f6dd5831f0a5493efb6bf8400df559..cddda67a55e03239b06ab91726bb23ef8546268c 100644
--- a/compute/pzplgsy.c
+++ b/compute/pzplgsy.c
@@ -35,7 +35,6 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_
     RUNTIME_option_t options;
 
     int m, n, minmn;
-    int ldam;
     int tempmm, tempnn;
 
     chamctxt = chameleon_context_self();
@@ -52,12 +51,11 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_
 
             for (m = n; m < A->mt; m++) {
                 tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-                ldam = BLKLDD(A, m);
 
                 options.priority = m + n;
                 INSERT_TASK_zplgsy(
                     &options,
-                    bump, tempmm, tempnn, A(m, n), ldam,
+                    bump, tempmm, tempnn, A(m, n),
                     A->m, m*A->mb, n*A->nb, seed );
             }
         }
@@ -66,7 +64,6 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_
     case ChamUpper:
         for (m = 0; m < minmn; m++) {
             tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-            ldam = BLKLDD(A, m);
 
             for (n = m; n < A->nt; n++) {
                 tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
@@ -74,7 +71,7 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_
                 options.priority = m + n;
                 INSERT_TASK_zplgsy(
                     &options,
-                    bump, tempmm, tempnn, A(m, n), ldam,
+                    bump, tempmm, tempnn, A(m, n),
                     A->m, m*A->mb, n*A->nb, seed );
             }
         }
@@ -83,7 +80,6 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_
     default:
         for (m = 0; m < A->mt; m++) {
             tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-            ldam = BLKLDD(A, m);
 
             for (n = 0; n < A->nt; n++) {
                 tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
@@ -91,7 +87,7 @@ void chameleon_pzplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, CHAM_desc_
                 options.priority = m + n;
                 INSERT_TASK_zplgsy(
                     &options,
-                    bump, tempmm, tempnn, A(m, n), ldam,
+                    bump, tempmm, tempnn, A(m, n),
                     A->m, m*A->mb, n*A->nb, seed );
             }
         }
diff --git a/compute/pzplrnt.c b/compute/pzplrnt.c
index d7b18ae25d80ed549a343d98edb102c259722bf0..cbcc50a674a1c6654df7b5e8f7f76df8f2781563 100644
--- a/compute/pzplrnt.c
+++ b/compute/pzplrnt.c
@@ -34,7 +34,6 @@ void chameleon_pzplrnt( CHAM_desc_t *A, unsigned long long int seed,
     RUNTIME_option_t options;
 
     int m, n;
-    int ldam;
     int tempmm, tempnn;
 
     chamctxt = chameleon_context_self();
@@ -45,14 +44,13 @@ void chameleon_pzplrnt( CHAM_desc_t *A, unsigned long long int seed,
 
     for (m = 0; m < A->mt; m++) {
         tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-        ldam = BLKLDD(A, m);
 
         for (n = 0; n < A->nt; n++) {
             tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
 
             INSERT_TASK_zplrnt(
                 &options,
-                tempmm, tempnn, A(m, n), ldam,
+                tempmm, tempnn, A(m, n),
                 A->m, m*A->mb, n*A->nb, seed );
         }
     }
diff --git a/compute/pzpotrf.c b/compute/pzpotrf.c
index 68b0e926887d73c3ef7847c50d4cf667ae6c8d31..c9c77abbf5ab8476f38a1036735b15507bf315b6 100644
--- a/compute/pzpotrf.c
+++ b/compute/pzpotrf.c
@@ -37,7 +37,6 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A,
     RUNTIME_option_t options;
 
     int k, m, n;
-    int ldak, ldam, ldan;
     int tempkm, tempmm, tempnn;
     size_t ws_host   = 0;
 
@@ -60,52 +59,48 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A,
             RUNTIME_iteration_push(chamctxt, k);
 
             tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-            ldak = BLKLDD(A, k);
 
             options.priority = 2*A->mt - 2*k;
             INSERT_TASK_zpotrf(
                 &options,
                 ChamLower, tempkm, A->mb,
-                A(k, k), ldak, A->nb*k);
+                A(k, k), A->nb*k);
 
             for (m = k+1; m < A->mt; m++) {
                 tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-                ldam = BLKLDD(A, m);
 
                 options.priority = 2*A->mt - 2*k - m;
                 INSERT_TASK_ztrsm(
                     &options,
                     ChamRight, ChamLower, ChamConjTrans, ChamNonUnit,
                     tempmm, A->mb, A->mb,
-                    zone, A(k, k), ldak,
-                          A(m, k), ldam);
+                    zone, A(k, k),
+                          A(m, k));
             }
             RUNTIME_data_flush( sequence, A(k, k) );
 
             for (n = k+1; n < A->nt; n++) {
                 tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
-                ldan = BLKLDD(A, n);
 
                 options.priority = 2*A->mt - 2*k - n;
                 INSERT_TASK_zherk(
                     &options,
                     ChamLower, ChamNoTrans,
                     tempnn, A->nb, A->mb,
-                    -1.0, A(n, k), ldan,
-                     1.0, A(n, n), ldan);
+                    -1.0, A(n, k),
+                     1.0, A(n, n));
 
                 for (m = n+1; m < A->mt; m++) {
                     tempmm = m == A->mt-1 ? A->m - m*A->mb : A->mb;
-                    ldam = BLKLDD(A, m);
 
                     options.priority = 2*A->mt - 2*k - n - m;
                     INSERT_TASK_zgemm(
                         &options,
                         ChamNoTrans, ChamConjTrans,
                         tempmm, tempnn, A->mb, A->mb,
-                        mzone, A(m, k), ldam,
-                               A(n, k), ldan,
-                        zone,  A(m, n), ldam);
+                        mzone, A(m, k),
+                               A(n, k),
+                        zone,  A(m, n));
                 }
                 RUNTIME_data_flush( sequence, A(n, k) );
             }
@@ -120,14 +115,13 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A,
             RUNTIME_iteration_push(chamctxt, k);
 
             tempkm = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-            ldak = BLKLDD(A, k);
 
             options.priority = 2*A->nt - 2*k;
             INSERT_TASK_zpotrf(
                 &options,
                 ChamUpper,
                 tempkm, A->mb,
-                A(k, k), ldak, A->nb*k);
+                A(k, k), A->nb*k);
 
             for (n = k+1; n < A->nt; n++) {
                 tempnn = n == A->nt-1 ? A->n - n*A->nb : A->nb;
@@ -137,22 +131,21 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A,
                     &options,
                     ChamLeft, ChamUpper, ChamConjTrans, ChamNonUnit,
                     A->mb, tempnn, A->mb,
-                    zone, A(k, k), ldak,
-                          A(k, n), ldak);
+                    zone, A(k, k),
+                          A(k, n));
             }
             RUNTIME_data_flush( sequence, A(k, k) );
 
             for (m = k+1; m < A->mt; m++) {
                 tempmm = m == A->mt-1 ? A->m - m*A->mb : A->mb;
-                ldam = BLKLDD(A, m);
 
                 options.priority = 2*A->nt - 2*k  - m;
                 INSERT_TASK_zherk(
                     &options,
                     ChamUpper, ChamConjTrans,
                     tempmm, A->mb, A->mb,
-                    -1.0, A(k, m), ldak,
-                     1.0, A(m, m), ldam);
+                    -1.0, A(k, m),
+                     1.0, A(m, m));
 
                 for (n = m+1; n < A->nt; n++) {
                     tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
@@ -162,9 +155,9 @@ void chameleon_pzpotrf(cham_uplo_t uplo, CHAM_desc_t *A,
                         &options,
                         ChamConjTrans, ChamNoTrans,
                         tempmm, tempnn, A->mb, A->mb,
-                        mzone, A(k, m), ldak,
-                               A(k, n), ldak,
-                        zone,  A(m, n), ldam);
+                        mzone, A(k, m),
+                               A(k, n),
+                        zone,  A(m, n));
                 }
                 RUNTIME_data_flush( sequence, A(k, m) );
             }
diff --git a/compute/pzpotrimm.c b/compute/pzpotrimm.c
index 8982ad9ced319b0ce12105a7c0510d315dd2ee12..7d924b9fe16a40168d55fbd8c739f0ee4c626169 100644
--- a/compute/pzpotrimm.c
+++ b/compute/pzpotrimm.c
@@ -36,8 +36,6 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
     RUNTIME_option_t options;
 
     int k, m, n;
-    int ldbm, ldcm;
-    int ldak, ldam, ldan;
     int tempkm, tempmm, tempnn, tempkn;
 
     CHAMELEON_Complex64_t alpha = (CHAMELEON_Complex64_t) 1.0;
@@ -64,45 +62,41 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
             RUNTIME_iteration_push(chamctxt, k);
 
             tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-            ldak = BLKLDD(A, k);
 
             INSERT_TASK_zpotrf(
                 &options,
                 ChamLower, tempkm, A->mb,
-                A(k, k), ldak, A->nb*k);
+                A(k, k), A->nb*k);
 
             for (m = k+1; m < A->mt; m++) {
                 tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-                ldam = BLKLDD(A, m);
                 INSERT_TASK_ztrsm(
                     &options,
                     ChamRight, ChamLower, ChamConjTrans, ChamNonUnit,
                     tempmm, A->mb, A->mb,
-                    zone, A(k, k), ldak,
-                          A(m, k), ldam);
+                    zone, A(k, k),
+                          A(m, k));
             }
             RUNTIME_data_flush( sequence, A(k, k) );
 
             for (n = k+1; n < A->nt; n++) {
                 tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
-                ldan = BLKLDD(A, n);
                 INSERT_TASK_zherk(
                     &options,
                     ChamLower, ChamNoTrans,
                     tempnn, A->nb, A->mb,
-                    -1.0, A(n, k), ldan,
-                     1.0, A(n, n), ldan);
+                    -1.0, A(n, k),
+                     1.0, A(n, n));
 
                 for (m = n+1; m < A->mt; m++) {
                     tempmm = m == A->mt-1 ? A->m - m*A->mb : A->mb;
-                    ldam = BLKLDD(A, m);
                     INSERT_TASK_zgemm(
                         &options,
                         ChamNoTrans, ChamConjTrans,
                         tempmm, tempnn, A->mb, A->mb,
-                        mzone, A(m, k), ldam,
-                               A(n, k), ldan,
-                        zone,  A(m, n), ldam);
+                        mzone, A(m, k),
+                               A(n, k),
+                        zone,  A(m, n));
                 }
                 RUNTIME_data_flush( sequence, A(n, k) );
             }
@@ -116,28 +110,25 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
             RUNTIME_iteration_push(chamctxt, A->nt + k);
 
             tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-            ldak = BLKLDD(A, k);
             for (m = k+1; m < A->mt; m++) {
                 tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-                ldam = BLKLDD(A, m);
                 INSERT_TASK_ztrsm(
                     &options,
                     ChamRight, uplo, ChamNoTrans, ChamNonUnit,
                     tempmm, tempkn, A->mb,
-                    mzone, A(k, k), ldak,
-                           A(m, k), ldam);
+                    mzone, A(k, k),
+                           A(m, k));
             }
             for (m = k+1; m < A->mt; m++) {
                 tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-                ldam = BLKLDD(A, m);
                 for (n = 0; n < k; n++) {
                     INSERT_TASK_zgemm(
                         &options,
                         ChamNoTrans, ChamNoTrans,
                         tempmm, A->nb, tempkn, A->mb,
-                        zone, A(m, k), ldam,
-                              A(k, n), ldak,
-                        zone, A(m, n), ldam);
+                        zone, A(m, k),
+                              A(k, n),
+                        zone, A(m, n));
                 }
                 RUNTIME_data_flush( sequence, A(m, k) );
             }
@@ -147,15 +138,15 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
                     &options,
                     ChamLeft, uplo, ChamNoTrans, ChamNonUnit,
                     tempkn, A->nb, A->mb,
-                    zone, A(k, k), ldak,
-                          A(k, n), ldak);
+                    zone, A(k, k),
+                          A(k, n));
             }
             RUNTIME_data_flush( sequence, A(k, k) );
             INSERT_TASK_ztrtri(
                 &options,
                 uplo, ChamNonUnit,
                 tempkn, A->mb,
-                A(k, k), ldak, A->nb*k);
+                A(k, k), A->nb*k);
 
             RUNTIME_iteration_pop(chamctxt);
         }
@@ -166,25 +157,22 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
             RUNTIME_iteration_push(chamctxt, 2*A->nt + k);
 
             tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-            ldak = BLKLDD(A, k);
             for(n = 0; n < k; n++) {
-                ldan = BLKLDD(A, n);
                 INSERT_TASK_zherk(
                     &options,
                     uplo, ChamConjTrans,
                     A->mb, tempkm, A->mb,
-                    1.0, A(k, n), ldak,
-                    1.0, A(n, n), ldan);
+                    1.0, A(k, n),
+                    1.0, A(n, n));
 
                 for(m = n+1; m < k; m++) {
-                    ldam = BLKLDD(A, m);
                     INSERT_TASK_zgemm(
                         &options,
                         ChamConjTrans, ChamNoTrans,
                         A->mb, A->nb, tempkm, A->mb,
-                        1.0, A(k, m), ldak,
-                             A(k, n), ldak,
-                        1.0, A(m, n), ldam);
+                        1.0, A(k, m),
+                             A(k, n),
+                        1.0, A(m, n));
                 }
             }
             for (n = 0; n < k; n++) {
@@ -193,14 +181,14 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
                     &options,
                     ChamLeft, uplo, ChamConjTrans, ChamNonUnit,
                     tempkm, A->nb, A->mb,
-                    1.0, A(k, k), ldak,
-                         A(k, n), ldak);
+                    1.0, A(k, k),
+                         A(k, n));
             }
             RUNTIME_data_flush( sequence, A(k, k) );
             INSERT_TASK_zlauum(
                 &options,
                 uplo, tempkm, A->mb,
-                A(k, k), ldak);
+                A(k, k));
 
             RUNTIME_iteration_pop(chamctxt);
         }
@@ -211,26 +199,22 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
             RUNTIME_iteration_push(chamctxt, 3*A->nt + k);
 
             tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb;
-            ldak = BLKLDD(A, k);
             zbeta = k == 0 ? beta : zone;
 
             for (m = 0; m < C->mt; m++) {
                 tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                ldbm = BLKLDD(B, m);
-                ldcm = BLKLDD(C, m);
 
                 for (n = 0; n < C->nt; n++) {
                     tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
-                    ldan = BLKLDD(A, n);
 
                     if (k < n) {
                        INSERT_TASK_zgemm(
                            &options,
                            ChamNoTrans, ChamTrans,
                            tempmm, tempnn, tempkn, A->mb,
-                           alpha, B(m, k), ldbm,  /* ldbm * K */
-                                  A(n, k), ldan,  /* ldan * K */
-                           zbeta, C(m, n), ldcm); /* ldcm * Y */
+                           alpha, B(m, k),  /* ldbm * K */
+                                  A(n, k),  /* ldan * K */
+                           zbeta, C(m, n)); /* ldcm * Y */
                     }
                     else {
                         if (k == n) {
@@ -238,18 +222,18 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
                                &options,
                                ChamRight, uplo,
                                tempmm, tempnn, A->mb,
-                               alpha, A(k, k), ldak,  /* ldak * Y */
-                                      B(m, k), ldbm,  /* ldbm * Y */
-                               zbeta, C(m, n), ldcm); /* ldcm * Y */
+                               alpha, A(k, k),  /* ldak * Y */
+                                      B(m, k),  /* ldbm * Y */
+                               zbeta, C(m, n)); /* ldcm * Y */
                         }
                         else {
                             INSERT_TASK_zgemm(
                                 &options,
                                 ChamNoTrans, ChamNoTrans,
                                 tempmm, tempnn, tempkn, A->mb,
-                                alpha, B(m, k), ldbm,  /* ldbm * K */
-                                       A(k, n), ldak,  /* ldak * Y */
-                                zbeta, C(m, n), ldcm); /* ldcm * Y */
+                                alpha, B(m, k),  /* ldbm * K */
+                                       A(k, n),  /* ldak * Y */
+                                zbeta, C(m, n)); /* ldcm * Y */
                         }
                     }
                 }
@@ -273,12 +257,11 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
             RUNTIME_iteration_push(chamctxt, k);
 
             tempkm = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-            ldak = BLKLDD(A, k);
             INSERT_TASK_zpotrf(
                 &options,
                 ChamUpper,
                 tempkm, A->mb,
-                A(k, k), ldak, A->nb*k);
+                A(k, k), A->nb*k);
 
             for (n = k+1; n < A->nt; n++) {
                 tempnn = n == A->nt-1 ? A->n - n*A->nb : A->nb;
@@ -286,21 +269,20 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
                     &options,
                     ChamLeft, ChamUpper, ChamConjTrans, ChamNonUnit,
                     A->mb, tempnn, A->mb,
-                    zone, A(k, k), ldak,
-                          A(k, n), ldak);
+                    zone, A(k, k),
+                          A(k, n));
             }
             RUNTIME_data_flush( sequence, A(k, k) );
 
             for (m = k+1; m < A->mt; m++) {
                 tempmm = m == A->mt-1 ? A->m - m*A->mb : A->mb;
-                ldam = BLKLDD(A, m);
 
                 INSERT_TASK_zherk(
                     &options,
                     ChamUpper, ChamConjTrans,
                     tempmm, A->mb, A->mb,
-                    -1.0, A(k, m), ldak,
-                     1.0, A(m, m), ldam);
+                    -1.0, A(k, m),
+                     1.0, A(m, m));
 
                 for (n = m+1; n < A->nt; n++) {
                     tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
@@ -309,9 +291,9 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
                         &options,
                         ChamConjTrans, ChamNoTrans,
                         tempmm, tempnn, A->mb, A->mb,
-                        mzone, A(k, m), ldak,
-                               A(k, n), ldak,
-                        zone,  A(m, n), ldam);
+                        mzone, A(k, m),
+                               A(k, n),
+                        zone,  A(m, n));
                 }
                 RUNTIME_data_flush( sequence, A(k, m) );
             }
@@ -325,46 +307,43 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
             RUNTIME_iteration_push(chamctxt, A->nt + k);
 
             tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-            ldak = BLKLDD(A, k);
             for (n = k+1; n < A->nt; n++) {
                 tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
                 INSERT_TASK_ztrsm(
                     &options,
                     ChamLeft, uplo, ChamNoTrans, ChamNonUnit,
                     tempkm, tempnn, A->mb,
-                    mzone, A(k, k), ldak,
-                           A(k, n), ldak);
+                    mzone, A(k, k),
+                           A(k, n));
             }
             for (n = k+1; n < A->nt; n++) {
                 tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
                 for (m = 0; m < k; m++) {
-                    ldam = BLKLDD(A, m);
                     INSERT_TASK_zgemm(
                         &options,
                         ChamNoTrans, ChamNoTrans,
                         A->mb, tempnn, tempkm, A->mb,
-                        zone, A(m, k), ldam,
-                              A(k, n), ldak,
-                        zone, A(m, n), ldam);
+                        zone, A(m, k),
+                              A(k, n),
+                        zone, A(m, n));
                 }
                 RUNTIME_data_flush( sequence, A(k, n) );
             }
             for (m = 0; m < k; m++) {
-                ldam = BLKLDD(A, m);
                 RUNTIME_data_flush( sequence, A(m, k) );
                 INSERT_TASK_ztrsm(
                     &options,
                     ChamRight, uplo, ChamNoTrans, ChamNonUnit,
                     A->mb, tempkm, A->mb,
-                    zone, A(k, k), ldak,
-                          A(m, k), ldam);
+                    zone, A(k, k),
+                          A(m, k));
             }
             RUNTIME_data_flush( sequence, A(k, k) );
             INSERT_TASK_ztrtri(
                 &options,
                 uplo, ChamNonUnit,
                 tempkm, A->mb,
-                A(k, k), ldak, A->mb*k);
+                A(k, k), A->mb*k);
 
             RUNTIME_iteration_pop(chamctxt);
         }
@@ -375,43 +354,39 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
             RUNTIME_iteration_push(chamctxt, 2*A->nt + k);
 
             tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-            ldak = BLKLDD(A, k);
 
             for (m = 0; m < k; m++) {
-                ldam = BLKLDD(A, m);
                 INSERT_TASK_zherk(
                     &options,
                     uplo, ChamNoTrans,
                     A->mb, tempkn, A->mb,
-                    1.0, A(m, k), ldam,
-                    1.0, A(m, m), ldam);
+                    1.0, A(m, k),
+                    1.0, A(m, m));
 
                 for (n = m+1; n < k; n++){
-                    ldan = BLKLDD(A, n);
                     INSERT_TASK_zgemm(
                         &options,
                         ChamNoTrans, ChamConjTrans,
                         A->mb, A->nb, tempkn, A->mb,
-                        1.0, A(m, k), ldam,
-                             A(n, k), ldan,
-                        1.0, A(m, n), ldam);
+                        1.0, A(m, k),
+                             A(n, k),
+                        1.0, A(m, n));
                 }
             }
             for (m = 0; m < k; m++) {
-                ldam = BLKLDD(A, m);
                 RUNTIME_data_flush( sequence, A(m, k) );
                 INSERT_TASK_ztrmm(
                     &options,
                     ChamRight, uplo, ChamConjTrans, ChamNonUnit,
                     A->mb, tempkn, A->mb,
-                    1.0, A(k, k), ldak,
-                         A(m, k), ldam);
+                    1.0, A(k, k),
+                         A(m, k));
             }
             RUNTIME_data_flush( sequence, A(k, k) );
             INSERT_TASK_zlauum(
                 &options,
                 uplo, tempkn, A->mb,
-                A(k, k), ldak);
+                A(k, k));
 
             RUNTIME_iteration_pop(chamctxt);
         }
@@ -422,26 +397,22 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
             RUNTIME_iteration_push(chamctxt, 3*A->nt + k);
 
             tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb;
-            ldak = BLKLDD(A, k);
             zbeta = k == 0 ? beta : zone;
 
             for (m = 0; m < C->mt; m++) {
                 tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                ldbm = BLKLDD(B, m);
-                ldcm = BLKLDD(C, m);
 
                 for (n = 0; n < C->nt; n++) {
                     tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
-                    ldan = BLKLDD(A, n);
 
                     if (k < n) {
                         INSERT_TASK_zgemm(
                             &options,
                             ChamNoTrans, ChamNoTrans,
                             tempmm, tempnn, tempkn, A->mb,
-                            alpha, B(m, k), ldbm,  /* ldbm * K */
-                                   A(k, n), ldak,  /* ldak * Y */
-                            zbeta, C(m, n), ldcm); /* ldcm * Y */
+                            alpha, B(m, k),  /* ldbm * K */
+                                   A(k, n),  /* ldak * Y */
+                            zbeta, C(m, n)); /* ldcm * Y */
                     }
                     else {
                         if (k == n) {
@@ -449,18 +420,18 @@ void chameleon_pzpotrimm(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_
                                 &options,
                                 ChamRight, uplo,
                                 tempmm, tempnn, A->mb,
-                                alpha, A(k, k), ldak,  /* ldak * Y */
-                                       B(m, k), ldbm,  /* ldbm * Y */
-                                zbeta, C(m, n), ldcm); /* ldcm * Y */
+                                alpha, A(k, k),  /* ldak * Y */
+                                       B(m, k),  /* ldbm * Y */
+                                zbeta, C(m, n)); /* ldcm * Y */
                         }
                         else {
                             INSERT_TASK_zgemm(
                                 &options,
                                 ChamNoTrans, ChamTrans,
                                 tempmm, tempnn, tempkn, A->mb,
-                                alpha, B(m, k), ldbm,  /* ldbm * K */
-                                       A(n, k), ldan,  /* ldan * K */
-                                zbeta, C(m, n), ldcm); /* ldcm * Y */
+                                alpha, B(m, k),  /* ldbm * K */
+                                       A(n, k),  /* ldan * K */
+                                zbeta, C(m, n)); /* ldcm * Y */
                         }
                     }
                 }
diff --git a/compute/pzsymm.c b/compute/pzsymm.c
index f9d724c08ab8835e1dddffc332bb4298facc9ee2..d953dacf503be5cc2e3c513d0b4d43ab8647e033 100644
--- a/compute/pzsymm.c
+++ b/compute/pzsymm.c
@@ -43,7 +43,6 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
     RUNTIME_sequence_t *sequence = options->sequence;
     cham_trans_t transA;
     int m, n, k, p, q, KT, K, lp, lq;
-    int ldcm;
     int tempmm, tempnn, tempkk;
     int lookahead, myp, myq;
 
@@ -64,7 +63,7 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
 
         /* Transfert ownership of the k column of A or B */
         for (m = 0; m < C->mt; m ++ ) {
-            int Am, Ak, ldam;
+            int Am, Ak;
             int tempam, tempak;
 
             tempmm = m == C->mt-1 ? C->m - m * C->mb : C->mb;
@@ -85,13 +84,12 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
                 tempam = tempmm;
                 tempak = tempkk;
             }
-            ldam = BLKLDD( A, Am );
 
             INSERT_TASK_zlacpy(
                 options,
                 ChamUpperLower, tempam, tempak, C->mb,
-                A( Am, Ak ),              ldam,
-                WA( m, (k % C->q) + lq ), WA->mb );
+                A( Am, Ak ),
+                WA( m, (k % C->q) + lq ) );
 
             RUNTIME_data_flush( sequence, A( Am, Ak ) );
 
@@ -99,23 +97,21 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
                 INSERT_TASK_zlacpy(
                     options,
                     ChamUpperLower, tempam, tempak, C->mb,
-                    WA( m, ((k+q-1) % C->q) + lq ), WA->mb,
-                    WA( m, ((k+q)   % C->q) + lq ), WA->mb );
+                    WA( m, ((k+q-1) % C->q) + lq ),
+                    WA( m, ((k+q)   % C->q) + lq ) );
             }
         }
 
         /* Transfert ownership of the k row of B, or A */
         for (n = 0; n < C->nt; n++) {
-            int ldbk;
 
             tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
-            ldbk = BLKLDD( B, k );
 
             INSERT_TASK_zlacpy(
                 options,
                 ChamUpperLower, tempkk, tempnn, C->mb,
-                B(   k,              n ), ldbk,
-                WB( (k % C->p) + lp, n ), WB->mb );
+                B(   k,              n ),
+                WB( (k % C->p) + lp, n ) );
 
             RUNTIME_data_flush( sequence, B( k, n ) );
 
@@ -123,15 +119,14 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
                 INSERT_TASK_zlacpy(
                     options,
                     ChamUpperLower, tempkk, tempnn, C->mb,
-                    WB( ((k+p-1) % C->p) + lp, n ), WB->mb,
-                    WB( ((k+p)   % C->p) + lp, n ), WB->mb );
+                    WB( ((k+p-1) % C->p) + lp, n ),
+                    WB( ((k+p)   % C->p) + lp, n ) );
             }
         }
 
         /* Perform the update of this iteration */
         for (m = myp; m < C->mt; m+=C->p) {
             tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-            ldcm = BLKLDD(C, m);
 
             if ( k == m ) {
                 for (n = myq; n < C->nt; n+=C->q) {
@@ -140,9 +135,9 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
                     INSERT_TASK_zsymm(
                         options, ChamLeft, uplo,
                         tempmm, tempnn, A->mb,
-                        alpha, WA( m,        myq + lq ), WA->mb,
-                               WB( myp + lp, n        ), WB->mb,
-                        zbeta, C(  m,        n        ), ldcm );
+                        alpha, WA( m,        myq + lq ),
+                               WB( myp + lp, n        ),
+                        zbeta, C(  m,        n        ) );
                 }
             }
             else {
@@ -161,9 +156,9 @@ chameleon_pzsymm_summa_left( CHAM_context_t *chamctxt, cham_uplo_t uplo,
                     INSERT_TASK_zgemm(
                         options, transA, ChamNoTrans,
                         tempmm, tempnn, tempkk, A->mb,
-                        alpha, WA( m,        myq + lq ), WA->mb,
-                               WB( myp + lp, n        ), WB->mb,
-                        zbeta, C(  m,        n        ), ldcm );
+                        alpha, WA( m,        myq + lq ),
+                               WB( myp + lp, n        ),
+                        zbeta, C(  m,        n        ) );
                 }
             }
         }
@@ -184,7 +179,6 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
     RUNTIME_sequence_t *sequence = options->sequence;
     cham_trans_t transA;
     int m, n, k, p, q, KT, K, lp, lq;
-    int ldcm;
     int tempmm, tempnn, tempkk;
     int lookahead, myp, myq;
 
@@ -205,16 +199,14 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
 
         /* Transfert ownership of the k column of A or B */
         for (m = 0; m < C->mt; m++ ) {
-            int ldbm;
 
             tempmm = m == C->mt-1 ? C->m - m * C->mb : C->mb;
-            ldbm = BLKLDD( B, m );
 
             INSERT_TASK_zlacpy(
                 options,
                 ChamUpperLower, tempmm, tempkk, C->mb,
-                B(  m,  k ),              ldbm,
-                WA( m, (k % C->q) + lq ), WA->mb );
+                B(  m,  k ),
+                WA( m, (k % C->q) + lq ) );
 
             RUNTIME_data_flush( sequence, B( m, k ) );
 
@@ -222,14 +214,14 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
                 INSERT_TASK_zlacpy(
                     options,
                     ChamUpperLower, tempmm, tempkk, C->mb,
-                    WA( m, ((k+q-1) % C->q) + lq ), WA->mb,
-                    WA( m, ((k+q)   % C->q) + lq ), WA->mb );
+                    WA( m, ((k+q-1) % C->q) + lq ),
+                    WA( m, ((k+q)   % C->q) + lq ) );
             }
         }
 
         /* Transfert ownership of the k row of B, or A */
         for (n = 0; n < C->nt; n++) {
-            int Ak, An, ldak;
+            int Ak, An;
             int tempak, tempan;
 
             tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
@@ -249,13 +241,12 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
                 tempak = tempkk;
                 tempan = tempnn;
             }
-            ldak = BLKLDD( A, Ak );
 
             INSERT_TASK_zlacpy(
                 options,
                 ChamUpperLower, tempak, tempan, C->mb,
-                A(  Ak,              An ), ldak,
-                WB( (k % C->p) + lp, n  ), WB->mb );
+                A(  Ak,              An ),
+                WB( (k % C->p) + lp, n  ) );
 
             RUNTIME_data_flush( sequence, A( Ak, An ) );
 
@@ -263,8 +254,8 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
                 INSERT_TASK_zlacpy(
                     options,
                     ChamUpperLower, tempak, tempan, C->mb,
-                    WB( ((k+p-1) % C->p) + lp, n ), WB->mb,
-                    WB( ((k+p)   % C->p) + lp, n ), WB->mb );
+                    WB( ((k+p-1) % C->p) + lp, n ),
+                    WB( ((k+p)   % C->p) + lp, n ) );
             }
         }
 
@@ -275,15 +266,14 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
             if ( k == n ) {
                 for (m = myp; m < C->mt; m+=C->p) {
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldcm = BLKLDD(C, m);
 
                     /* A has been stored in WA or WB for the summa ring */
                     INSERT_TASK_zsymm(
                         options, ChamRight, uplo,
                         tempmm, tempnn, A->mb,
-                        alpha, WB( myp + lp, n        ), WB->mb,
-                               WA( m,        myq + lq ), WA->mb,
-                        zbeta, C(  m,        n        ), ldcm );
+                        alpha, WB( myp + lp, n        ),
+                               WA( m,        myq + lq ),
+                        zbeta, C(  m,        n        ) );
                 }
             }
             else {
@@ -298,14 +288,13 @@ chameleon_pzsymm_summa_right( CHAM_context_t *chamctxt, cham_uplo_t uplo,
 
                 for (m = myp; m < C->mt; m+=C->p) {
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldcm = BLKLDD(C, m);
 
                     INSERT_TASK_zgemm(
                         options, ChamNoTrans, transA,
                         tempmm, tempnn, tempkk, A->mb,
-                        alpha, WA( m,        myq + lq ), WA->mb,
-                               WB( myp + lp, n        ), WB->mb,
-                        zbeta, C(  m,        n        ), ldcm );
+                        alpha, WA( m,        myq + lq ),
+                               WB( myp + lp, n        ),
+                        zbeta, C(  m,        n        ) );
                 }
             }
         }
@@ -366,7 +355,6 @@ chameleon_pzsymm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
                           RUNTIME_option_t *options )
 {
     int k, m, n;
-    int ldam, ldan, ldak, ldbk, ldbm, ldcm;
     int tempmm, tempnn, tempkn, tempkm;
 
     CHAMELEON_Complex64_t zbeta;
@@ -374,28 +362,24 @@ chameleon_pzsymm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
 
     for(m = 0; m < C->mt; m++) {
         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-        ldcm = BLKLDD(C, m);
         for(n = 0; n < C->nt; n++) {
             tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
             /*
              *  ChamLeft / ChamLower
              */
             if (side == ChamLeft) {
-                ldam = BLKLDD(A, m);
                 if (uplo == ChamLower) {
                     for (k = 0; k < C->mt; k++) {
                         tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb;
-                        ldak = BLKLDD(A, k);
-                        ldbk = BLKLDD(B, k);
                         zbeta = k == 0 ? beta : zone;
                         if (k < m) {
                             INSERT_TASK_zgemm(
                                 options,
                                 ChamNoTrans, ChamNoTrans,
                                 tempmm, tempnn, tempkm, A->mb,
-                                alpha, A(m, k), ldam,  /* lda * K */
-                                       B(k, n), ldbk,  /* ldb * Y */
-                                zbeta, C(m, n), ldcm); /* ldc * Y */
+                                alpha, A(m, k),  /* lda * K */
+                                       B(k, n),  /* ldb * Y */
+                                zbeta, C(m, n)); /* ldc * Y */
                         }
                         else {
                             if (k == m) {
@@ -403,18 +387,18 @@ chameleon_pzsymm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
                                     options,
                                     side, uplo,
                                     tempmm, tempnn, A->mb,
-                                    alpha, A(k, k), ldak,  /* ldak * X */
-                                           B(k, n), ldbk,  /* ldb  * Y */
-                                    zbeta, C(m, n), ldcm); /* ldc  * Y */
+                                    alpha, A(k, k),  /* ldak * X */
+                                           B(k, n),  /* ldb  * Y */
+                                    zbeta, C(m, n)); /* ldc  * Y */
                             }
                             else {
                                 INSERT_TASK_zgemm(
                                     options,
                                     ChamTrans, ChamNoTrans,
                                     tempmm, tempnn, tempkm, A->mb,
-                                    alpha, A(k, m), ldak,  /* ldak * X */
-                                           B(k, n), ldbk,  /* ldb  * Y */
-                                    zbeta, C(m, n), ldcm); /* ldc  * Y */
+                                    alpha, A(k, m),  /* ldak * X */
+                                           B(k, n),  /* ldb  * Y */
+                                    zbeta, C(m, n)); /* ldc  * Y */
                             }
                         }
                     }
@@ -425,17 +409,15 @@ chameleon_pzsymm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
                 else {
                     for (k = 0; k < C->mt; k++) {
                         tempkm = k == C->mt-1 ? C->m-k*C->mb : C->mb;
-                        ldak = BLKLDD(A, k);
-                        ldbk = BLKLDD(B, k);
                         zbeta = k == 0 ? beta : zone;
                         if (k < m) {
                             INSERT_TASK_zgemm(
                                 options,
                                 ChamTrans, ChamNoTrans,
                                 tempmm, tempnn, tempkm, A->mb,
-                                alpha, A(k, m), ldak,  /* ldak * X */
-                                       B(k, n), ldbk,  /* ldb  * Y */
-                                zbeta, C(m, n), ldcm); /* ldc  * Y */
+                                alpha, A(k, m),  /* ldak * X */
+                                       B(k, n),  /* ldb  * Y */
+                                zbeta, C(m, n)); /* ldc  * Y */
                         }
                         else {
                             if (k == m) {
@@ -443,18 +425,18 @@ chameleon_pzsymm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
                                     options,
                                     side, uplo,
                                     tempmm, tempnn, A->mb,
-                                    alpha, A(k, k), ldak,  /* ldak * K */
-                                           B(k, n), ldbk,  /* ldb  * Y */
-                                    zbeta, C(m, n), ldcm); /* ldc  * Y */
+                                    alpha, A(k, k),  /* ldak * K */
+                                           B(k, n),  /* ldb  * Y */
+                                    zbeta, C(m, n)); /* ldc  * Y */
                             }
                             else {
                                 INSERT_TASK_zgemm(
                                     options,
                                     ChamNoTrans, ChamNoTrans,
                                     tempmm, tempnn, tempkm, A->mb,
-                                    alpha, A(m, k), ldam,  /* lda * K */
-                                           B(k, n), ldbk,  /* ldb * Y */
-                                    zbeta, C(m, n), ldcm); /* ldc * Y */
+                                    alpha, A(m, k),  /* lda * K */
+                                           B(k, n),  /* ldb * Y */
+                                    zbeta, C(m, n)); /* ldc * Y */
                             }
                         }
                     }
@@ -464,21 +446,18 @@ chameleon_pzsymm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
              *  ChamRight / ChamLower
              */
             else {
-                ldan = BLKLDD(A, n);
-                ldbm = BLKLDD(B, m);
                 if (uplo == ChamLower) {
                     for (k = 0; k < C->nt; k++) {
                         tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb;
-                        ldak = BLKLDD(A, k);
                         zbeta = k == 0 ? beta : zone;
                         if (k < n) {
                             INSERT_TASK_zgemm(
                                 options,
                                 ChamNoTrans, ChamTrans,
                                 tempmm, tempnn, tempkn, A->mb,
-                                alpha, B(m, k), ldbm,  /* ldb * K */
-                                       A(n, k), ldan,  /* lda * K */
-                                zbeta, C(m, n), ldcm); /* ldc * Y */
+                                alpha, B(m, k),  /* ldb * K */
+                                       A(n, k),  /* lda * K */
+                                zbeta, C(m, n)); /* ldc * Y */
                         }
                         else {
                             if (k == n) {
@@ -486,18 +465,18 @@ chameleon_pzsymm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
                                     options,
                                     side, uplo,
                                     tempmm, tempnn, A->mb,
-                                    alpha, A(k, k), ldak,  /* ldak * Y */
-                                           B(m, k), ldbm,  /* ldb  * Y */
-                                    zbeta, C(m, n), ldcm); /* ldc  * Y */
+                                    alpha, A(k, k),  /* ldak * Y */
+                                           B(m, k),  /* ldb  * Y */
+                                    zbeta, C(m, n)); /* ldc  * Y */
                             }
                             else {
                                 INSERT_TASK_zgemm(
                                     options,
                                     ChamNoTrans, ChamNoTrans,
                                     tempmm, tempnn, tempkn, A->mb,
-                                    alpha, B(m, k), ldbm,  /* ldb  * K */
-                                           A(k, n), ldak,  /* ldak * Y */
-                                    zbeta, C(m, n), ldcm); /* ldc  * Y */
+                                    alpha, B(m, k),  /* ldb  * K */
+                                           A(k, n),  /* ldak * Y */
+                                    zbeta, C(m, n)); /* ldc  * Y */
                             }
                         }
                     }
@@ -508,16 +487,15 @@ chameleon_pzsymm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
                 else {
                     for (k = 0; k < C->nt; k++) {
                         tempkn = k == C->nt-1 ? C->n-k*C->nb : C->nb;
-                        ldak = BLKLDD(A, k);
                         zbeta = k == 0 ? beta : zone;
                         if (k < n) {
                             INSERT_TASK_zgemm(
                                 options,
                                 ChamNoTrans, ChamNoTrans,
                                 tempmm, tempnn, tempkn, A->mb,
-                                alpha, B(m, k), ldbm,  /* ldb  * K */
-                                       A(k, n), ldak,  /* ldak * Y */
-                                zbeta, C(m, n), ldcm); /* ldc  * Y */
+                                alpha, B(m, k),  /* ldb  * K */
+                                       A(k, n),  /* ldak * Y */
+                                zbeta, C(m, n)); /* ldc  * Y */
                         }
                         else {
                             if (k == n) {
@@ -525,18 +503,18 @@ chameleon_pzsymm_generic( CHAM_context_t *chamctxt, cham_side_t side, cham_uplo_
                                     options,
                                     side, uplo,
                                     tempmm, tempnn, A->mb,
-                                    alpha, A(k, k), ldak,  /* ldak * Y */
-                                           B(m, k), ldbm,  /* ldb  * Y */
-                                    zbeta, C(m, n), ldcm); /* ldc  * Y */
+                                    alpha, A(k, k),  /* ldak * Y */
+                                           B(m, k),  /* ldb  * Y */
+                                    zbeta, C(m, n)); /* ldc  * Y */
                             }
                             else {
                                 INSERT_TASK_zgemm(
                                     options,
                                     ChamNoTrans, ChamTrans,
                                     tempmm, tempnn, tempkn, A->mb,
-                                    alpha, B(m, k), ldbm,  /* ldb * K */
-                                           A(n, k), ldan,  /* lda * K */
-                                    zbeta, C(m, n), ldcm); /* ldc * Y */
+                                    alpha, B(m, k),  /* ldb * K */
+                                           A(n, k),  /* lda * K */
+                                    zbeta, C(m, n)); /* ldc * Y */
                             }
                         }
                     }
diff --git a/compute/pzsyr2k.c b/compute/pzsyr2k.c
index 0b5f7195f398bd8319a38868cc6ac8ac7de92d55..c0c34cfd62605e0d85cd80c2b4115b46bbf89538 100644
--- a/compute/pzsyr2k.c
+++ b/compute/pzsyr2k.c
@@ -38,8 +38,6 @@ void chameleon_pzsyr2k( cham_uplo_t uplo, cham_trans_t trans,
     RUNTIME_option_t options;
 
     int m, n, k, mmin, mmax;
-    int ldak, ldam, ldan, ldcm, ldcn;
-    int ldbk, ldbm, ldbn;
     int tempnn, tempmm, tempkn, tempkm;
 
     CHAMELEON_Complex64_t zone   = (CHAMELEON_Complex64_t)1.0;
@@ -53,9 +51,6 @@ void chameleon_pzsyr2k( cham_uplo_t uplo, cham_trans_t trans,
 
     for (n = 0; n < C->nt; n++) {
         tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
-        ldan = BLKLDD(A, n);
-        ldbn = BLKLDD(B, n);
-        ldcn = BLKLDD(C, n);
 
         if (uplo == ChamLower) {
             mmin = n+1;
@@ -77,15 +72,12 @@ void chameleon_pzsyr2k( cham_uplo_t uplo, cham_trans_t trans,
                     &options,
                     uplo, trans,
                     tempnn, tempkn, A->mb,
-                    alpha, A(n, k), ldan, /* ldan * K */
-                           B(n, k), ldbn,
-                    zbeta, C(n, n), ldcn); /* ldc  * N */
+                    alpha, A(n, k), /* ldan * K */
+                           B(n, k),
+                    zbeta, C(n, n)); /* ldc  * N */
             }
             for (m = mmin; m < mmax; m++) {
                 tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                ldam = BLKLDD(A, m);
-                ldbm = BLKLDD(B, m);
-                ldcm = BLKLDD(C, m);
                 for (k = 0; k < A->nt; k++) {
                     tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
                     zbeta = k == 0 ? beta : zone;
@@ -93,17 +85,17 @@ void chameleon_pzsyr2k( cham_uplo_t uplo, cham_trans_t trans,
                         &options,
                         ChamNoTrans, ChamTrans,
                         tempmm, tempnn, tempkn, A->mb,
-                        alpha, A(m, k), ldam,
-                               B(n, k), ldbn,
-                        zbeta, C(m, n), ldcm);
+                        alpha, A(m, k),
+                               B(n, k),
+                        zbeta, C(m, n));
 
                     INSERT_TASK_zgemm(
                         &options,
                         ChamNoTrans, ChamTrans,
                         tempmm, tempnn, tempkn, A->mb,
-                        alpha, B(m, k), ldbm,
-                               A(n, k), ldan,
-                        zone,  C(m, n), ldcm);
+                        alpha, B(m, k),
+                               A(n, k),
+                        zone,  C(m, n));
                 }
             }
         }
@@ -113,40 +105,35 @@ void chameleon_pzsyr2k( cham_uplo_t uplo, cham_trans_t trans,
         else {
             for (k = 0; k < A->mt; k++) {
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-                ldak = BLKLDD(A, k);
-                ldbk = BLKLDD(B, k);
                 zbeta = k == 0 ? beta : zone;
                 INSERT_TASK_zsyr2k(
                     &options,
                     uplo, trans,
                     tempnn, tempkm, A->mb,
-                    alpha, A(k, n), ldak,  /* lda * N */
-                           B(k, n), ldbk,
-                    zbeta, C(n, n), ldcn); /* ldc * N */
+                    alpha, A(k, n),  /* lda * N */
+                           B(k, n),
+                    zbeta, C(n, n)); /* ldc * N */
             }
             for (m = mmin; m < mmax; m++) {
                 tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                ldcm = BLKLDD(C, m);
                 for (k = 0; k < A->mt; k++) {
                     tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-                    ldak = BLKLDD(A, k);
-                    ldbk = BLKLDD(B, k);
                     zbeta = k == 0 ? beta : zone;
                     INSERT_TASK_zgemm(
                         &options,
                         ChamTrans, ChamNoTrans,
                         tempmm, tempnn, tempkm, A->mb,
-                        alpha, A(k, m), ldak,
-                               B(k, n), ldbk,
-                        zbeta, C(m, n), ldcm);
+                        alpha, A(k, m),
+                               B(k, n),
+                        zbeta, C(m, n));
 
                     INSERT_TASK_zgemm(
                         &options,
                         ChamTrans, ChamNoTrans,
                         tempmm, tempnn, tempkm, A->mb,
-                        alpha, B(k, m), ldbk,
-                               A(k, n), ldak,
-                        zone,  C(m, n), ldcm );
+                        alpha, B(k, m),
+                               A(k, n),
+                        zone,  C(m, n) );
                 }
             }
         }
diff --git a/compute/pzsyrk.c b/compute/pzsyrk.c
index 8f6b3013ac6f6bd8632e225c502d5296ab1b7567..74ea7112e6ef2074568d133532e5f42256e49d92 100644
--- a/compute/pzsyrk.c
+++ b/compute/pzsyrk.c
@@ -38,7 +38,6 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans,
     RUNTIME_option_t options;
 
     int m, n, k;
-    int ldak, ldam, ldan, ldcm, ldcn;
     int tempnn, tempmm, tempkn, tempkm;
 
     CHAMELEON_Complex64_t zbeta;
@@ -52,8 +51,6 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans,
 
     for (n = 0; n < C->nt; n++) {
         tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
-        ldan = BLKLDD(A, n);
-        ldcn = BLKLDD(C, n);
         /*
          *  ChamNoTrans
          */
@@ -65,8 +62,8 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans,
                     &options,
                     uplo, trans,
                     tempnn, tempkn, A->mb,
-                    alpha, A(n, k), ldan, /* ldan * K */
-                    zbeta, C(n, n), ldcn); /* ldc  * N */
+                    alpha, A(n, k), /* ldan * K */
+                    zbeta, C(n, n)); /* ldc  * N */
             }
             /*
              *  ChamNoTrans / ChamLower
@@ -74,8 +71,6 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans,
             if (uplo == ChamLower) {
                 for (m = n+1; m < C->mt; m++) {
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldam = BLKLDD(A, m);
-                    ldcm = BLKLDD(C, m);
                     for (k = 0; k < A->nt; k++) {
                         tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
                         zbeta = k == 0 ? beta : zone;
@@ -83,9 +78,9 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans,
                             &options,
                             trans, ChamTrans,
                             tempmm, tempnn, tempkn, A->mb,
-                            alpha, A(m, k), ldam,  /* ldam * K */
-                                   A(n, k), ldan,  /* ldan * K */
-                            zbeta, C(m, n), ldcm); /* ldc  * N */
+                            alpha, A(m, k),  /* ldam * K */
+                                   A(n, k),  /* ldan * K */
+                            zbeta, C(m, n)); /* ldc  * N */
                     }
                 }
             }
@@ -95,7 +90,6 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans,
             else {
                 for (m = n+1; m < C->mt; m++) {
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldam = BLKLDD(A, m);
                     for (k = 0; k < A->nt; k++) {
                         tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
                         zbeta = k == 0 ? beta : zone;
@@ -103,9 +97,9 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans,
                             &options,
                             trans, ChamTrans,
                             tempnn, tempmm, tempkn, A->mb,
-                            alpha, A(n, k), ldan,  /* ldan * K */
-                                   A(m, k), ldam,  /* ldam * M */
-                            zbeta, C(n, m), ldcn); /* ldc  * M */
+                            alpha, A(n, k),  /* ldan * K */
+                                   A(m, k),  /* ldam * M */
+                            zbeta, C(n, m)); /* ldc  * M */
                     }
                 }
             }
@@ -116,14 +110,13 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans,
         else {
             for (k = 0; k < A->mt; k++) {
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-                ldak = BLKLDD(A, k);
                 zbeta = k == 0 ? beta : zone;
                 INSERT_TASK_zsyrk(
                     &options,
                     uplo, trans,
                     tempnn, tempkm, A->mb,
-                    alpha, A(k, n), ldak,  /* lda * N */
-                    zbeta, C(n, n), ldcn); /* ldc * N */
+                    alpha, A(k, n),  /* lda * N */
+                    zbeta, C(n, n)); /* ldc * N */
             }
             /*
              *  ChamTrans / ChamLower
@@ -131,18 +124,16 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans,
             if (uplo == ChamLower) {
                 for (m = n+1; m < C->mt; m++) {
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldcm = BLKLDD(C, m);
                     for (k = 0; k < A->mt; k++) {
                         tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-                        ldak = BLKLDD(A, k);
                         zbeta = k == 0 ? beta : zone;
                         INSERT_TASK_zgemm(
                             &options,
                             trans, ChamNoTrans,
                             tempmm, tempnn, tempkm, A->mb,
-                            alpha, A(k, m), ldak,  /* lda * M */
-                                   A(k, n), ldak,  /* lda * N */
-                            zbeta, C(m, n), ldcm); /* ldc * N */
+                            alpha, A(k, m),  /* lda * M */
+                                   A(k, n),  /* lda * N */
+                            zbeta, C(m, n)); /* ldc * N */
                     }
                 }
             }
@@ -154,15 +145,14 @@ void chameleon_pzsyrk(cham_uplo_t uplo, cham_trans_t trans,
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
                     for (k = 0; k < A->mt; k++) {
                         tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-                        ldak = BLKLDD(A, k);
                         zbeta = k == 0 ? beta : zone;
                         INSERT_TASK_zgemm(
                             &options,
                             trans, ChamNoTrans,
                             tempnn, tempmm, tempkm, A->mb,
-                            alpha, A(k, n), ldak,  /* lda * K */
-                                   A(k, m), ldak,  /* lda * M */
-                            zbeta, C(n, m), ldcn); /* ldc * M */
+                            alpha, A(k, n),  /* lda * K */
+                                   A(k, m),  /* lda * M */
+                            zbeta, C(n, m)); /* ldc * M */
                     }
                 }
             }
diff --git a/compute/pzsytrf.c b/compute/pzsytrf.c
index 381f7e4a1f7533919f5877a3362a8427893db552..42bced9d3db381927f2b27cf7fa75a39e60822ba 100644
--- a/compute/pzsytrf.c
+++ b/compute/pzsytrf.c
@@ -36,7 +36,6 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A,
     RUNTIME_option_t options;
 
     int k, m, n;
-    int ldak, ldam, ldan;
     int tempkm, tempmm, tempnn;
     size_t ws_host   = 0;
 
@@ -59,45 +58,41 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A,
             RUNTIME_iteration_push(chamctxt, k);
 
             tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-            ldak = BLKLDD(A, k);
 
             INSERT_TASK_zsytrf_nopiv(
                 &options,
                 ChamLower, tempkm, A->mb,
-                A(k, k), ldak, A->nb*k);
+                A(k, k), A->nb*k);
 
             for (m = k+1; m < A->mt; m++) {
                 tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-                ldam = BLKLDD(A, m);
                 INSERT_TASK_ztrsm(
                     &options,
                     ChamRight, ChamLower, ChamTrans, ChamNonUnit,
                     tempmm, A->mb, A->mb,
-                    zone, A(k, k), ldak,
-                          A(m, k), ldam);
+                    zone, A(k, k),
+                          A(m, k));
             }
             RUNTIME_data_flush( sequence, A(k, k) );
 
             for (n = k+1; n < A->nt; n++) {
                 tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
-                ldan = BLKLDD(A, n);
                 INSERT_TASK_zsyrk(
                     &options,
                     ChamLower, ChamNoTrans,
                     tempnn, A->nb, A->mb,
-                    -1.0, A(n, k), ldan,
-                     1.0, A(n, n), ldan);
+                    -1.0, A(n, k),
+                     1.0, A(n, n));
 
                 for (m = n+1; m < A->mt; m++) {
                     tempmm = m == A->mt-1 ? A->m - m*A->mb : A->mb;
-                    ldam = BLKLDD(A, m);
                     INSERT_TASK_zgemm(
                         &options,
                         ChamNoTrans, ChamTrans,
                         tempmm, tempnn, A->mb, A->mb,
-                        mzone, A(m, k), ldam,
-                               A(n, k), ldan,
-                        zone,  A(m, n), ldam);
+                        mzone, A(m, k),
+                               A(n, k),
+                        zone,  A(m, n));
                 }
                 RUNTIME_data_flush( sequence, A(n, k) );
             }
@@ -113,12 +108,11 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A,
             RUNTIME_iteration_push(chamctxt, k);
 
             tempkm = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-            ldak = BLKLDD(A, k);
             INSERT_TASK_zsytrf_nopiv(
                 &options,
                 ChamUpper,
                 tempkm, A->mb,
-                A(k, k), ldak, A->nb*k);
+                A(k, k), A->nb*k);
 
             for (n = k+1; n < A->nt; n++) {
                 tempnn = n == A->nt-1 ? A->n - n*A->nb : A->nb;
@@ -126,21 +120,20 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A,
                     &options,
                     ChamLeft, ChamUpper, ChamTrans, ChamNonUnit,
                     A->mb, tempnn, A->mb,
-                    zone, A(k, k), ldak,
-                          A(k, n), ldak);
+                    zone, A(k, k),
+                          A(k, n));
             }
             RUNTIME_data_flush( sequence, A(k, k) );
 
             for (m = k+1; m < A->mt; m++) {
                 tempmm = m == A->mt-1 ? A->m - m*A->mb : A->mb;
-                ldam = BLKLDD(A, m);
 
                 INSERT_TASK_zsyrk(
                     &options,
                     ChamUpper, ChamTrans,
                     tempmm, A->mb, A->mb,
-                    -1.0, A(k, m), ldak,
-                     1.0, A(m, m), ldam);
+                    -1.0, A(k, m),
+                     1.0, A(m, m));
 
                 for (n = m+1; n < A->nt; n++) {
                     tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
@@ -149,9 +142,9 @@ void chameleon_pzsytrf(cham_uplo_t uplo, CHAM_desc_t *A,
                         &options,
                         ChamTrans, ChamNoTrans,
                         tempmm, tempnn, A->mb, A->mb,
-                        mzone, A(k, m), ldak,
-                               A(k, n), ldak,
-                        zone,  A(m, n), ldam);
+                        mzone, A(k, m),
+                               A(k, n),
+                        zone,  A(m, n));
                 }
                 RUNTIME_data_flush( sequence, A(k, m) );
             }
diff --git a/compute/pztile2band.c b/compute/pztile2band.c
index 45766f7a028926ae61909d1168fd5e10124d8acb..ef9d54becc0d1dc245af56591152ceab371835bf 100644
--- a/compute/pztile2band.c
+++ b/compute/pztile2band.c
@@ -34,7 +34,6 @@ void chameleon_pztile2band(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B,
     RUNTIME_option_t options;
 
     int j;
-    int ldaj, ldx;
     int tempjm, tempjn;
     int minmnt = chameleon_min(A->mt, A->nt);
 
@@ -44,7 +43,8 @@ void chameleon_pztile2band(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B,
     }
     RUNTIME_options_init(&options, chamctxt, sequence, request);
 
-    ldx = B->mb-1;
+    /* The code is actually incorrect due to the removal of the ld (Need new insert_task dedicated) */
+    assert( 0 );
 
     /*
      *  ChamLower => Lower Band
@@ -57,28 +57,26 @@ void chameleon_pztile2band(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B,
 
            tempjm = j == A->mt-1 ? A->m - j * A->mb : A->mb;
            tempjn = j == B->nt-1 ? B->n - j * B->nb : B->nb;
-           ldaj = BLKLDD(A, j);
 
            INSERT_TASK_zlaset(
                &options,
                ChamUpperLower, B->mb, tempjn,
                0., 0.,
-               B(0, j), B->mb );
+               B(0, j) );
 
            INSERT_TASK_zlacpy(
                &options,
                ChamLower, tempjm, tempjn, A->nb,
-               A(j, j), ldaj,
-               B(0, j), ldx );
+               A(j, j),
+               B(0, j) );
 
            if( j<minmnt-1 ){
                tempjm = (j+1) == A->mt-1 ? A->m-(j+1)*A->mb : A->mb;
-               ldaj = BLKLDD(A, j+1);
                INSERT_TASK_zlacpyx(
                    &options,
                    ChamUpper, tempjm, tempjn, A->nb,
-                   0,     A(j+1, j), ldaj,
-                   A->nb, B(0,   j), ldx);
+                   0,     A(j+1, j),
+                   A->nb, B(0,   j));
            }
        }
     }
@@ -88,28 +86,27 @@ void chameleon_pztile2band(cham_uplo_t uplo, CHAM_desc_t *A, CHAM_desc_t *B,
            assert( A->n == B->n );
            assert( A->m >= B->n );
            tempjn = j == A->nt-1 ? A->n - j * A->nb : A->nb;
-           ldaj = BLKLDD(A, j);
 
            INSERT_TASK_zlaset(
                &options,
                ChamUpperLower, B->mb, tempjn,
                0., 0.,
-               B(0, j), B->mb );
+               B(0, j) );
 
            if(j > 0){
                INSERT_TASK_zlacpy(
                    &options,
                    ChamLower, A->mb, tempjn, A->nb,
-                   A(j-1, j), BLKLDD(A, j-1),
-                   B(0,   j), ldx);
+                   A(j-1, j),
+                   B(0,   j));
            }
 
            tempjm = j == B->nt-1 ? B->n - j * B->nb : B->nb;
            INSERT_TASK_zlacpyx(
                &options,
                ChamUpper, tempjm, tempjn, A->nb,
-               0,     A(j, j), ldaj,
-               A->nb, B(0, j), ldx);
+               0,     A(j, j),
+               A->nb, B(0, j));
        }
     }
     RUNTIME_options_finalize(&options, chamctxt);
diff --git a/compute/pztpgqrt.c b/compute/pztpgqrt.c
index c49cb0452008910b14135bf2f9b34ccb9a9e1b0a..73b43d461991515c34dad9b06896f4c66960e09b 100644
--- a/compute/pztpgqrt.c
+++ b/compute/pztpgqrt.c
@@ -40,7 +40,6 @@ void chameleon_pztpgqrt( int KT, int L,
     size_t ws_host = 0;
 
     int k, m, n;
-    int ldvm, ldqk, ldqm;
     int tempkn, tempnn, tempmm, templm;
     int ib;
 
@@ -80,15 +79,12 @@ void chameleon_pztpgqrt( int KT, int L,
         RUNTIME_iteration_push(chamctxt, k);
 
         tempkn = k == Q1->nt-1 ? Q1->n-k*Q1->nb : Q1->nb;
-        ldqk = BLKLDD(Q1, k);
 
         /* Equivalent to the tsmqr step on Q1,Q2 */
         maxmtk = chameleon_min( Q2->mt, maxmt+k ) - 1;
         for (m = maxmtk; m > -1; m--) {
             tempmm = m == Q2->mt-1 ? Q2->m-m*Q2->mb : Q2->mb;
             templm = ((L > 0) && (m == maxmtk)) ? tempmm : 0;
-            ldvm = BLKLDD(V2, m);
-            ldqm = BLKLDD(Q2, m);
 
             for (n = k; n < Q2->nt; n++) {
                 tempnn = n == Q2->nt-1 ? Q2->n-n*Q2->nb : Q2->nb;
@@ -97,10 +93,10 @@ void chameleon_pztpgqrt( int KT, int L,
                     &options,
                     ChamLeft, ChamNoTrans,
                     tempmm, tempnn, tempkn, templm, ib, T2->nb,
-                    V2(m, k), ldvm,
-                    T2(m, k), T2->mb,
-                    Q1(k, n), ldqk,
-                    Q2(m, n), ldqm );
+                    V2(m, k),
+                    T2(m, k),
+                    Q1(k, n),
+                    Q2(m, n) );
             }
         }
 
diff --git a/compute/pztpqrt.c b/compute/pztpqrt.c
index 88800dd26eb6efd3606327516afa24d968a2c75d..28effe7ca4ab72e66bd4a7ea2c770bf84b095b8e 100644
--- a/compute/pztpqrt.c
+++ b/compute/pztpqrt.c
@@ -36,7 +36,6 @@ void chameleon_pztpqrt( int L, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *T,
     size_t ws_host = 0;
 
     int k, m, n;
-    int ldak, ldbm;
     int tempkm, tempkn, tempnn, tempmm, templm;
     int ib;
 
@@ -76,19 +75,17 @@ void chameleon_pztpqrt( int L, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *T,
 
         tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
         tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-        ldak = BLKLDD(A, k);
 
         for (m = 0; m < maxmt; m++) {
             tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
             templm = ((L > 0) && (m == maxmt-1)) ? tempmm : 0;
-            ldbm = BLKLDD(B, m);
             /* TT kernel */
             INSERT_TASK_ztpqrt(
                 &options,
                 tempmm, tempkn, templm, ib, T->nb,
-                A(k, k), ldak,
-                B(m, k), ldbm,
-                T(m, k), T->mb );
+                A(k, k),
+                B(m, k),
+                T(m, k) );
 
             for (n = k+1; n < B->nt; n++) {
                 tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
@@ -96,10 +93,10 @@ void chameleon_pztpqrt( int L, CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *T,
                     &options,
                     ChamLeft, ChamConjTrans,
                     tempmm, tempnn, tempkm, templm, ib, T->nb,
-                    B(m, k), ldbm,
-                    T(m, k), T->mb,
-                    A(k, n), ldak,
-                    B(m, n), ldbm );
+                    B(m, k),
+                    T(m, k),
+                    A(k, n),
+                    B(m, n) );
             }
         }
 
diff --git a/compute/pztradd.c b/compute/pztradd.c
index 3a29fa4f1e8a9e5ad311febf4d039c43edd98e92..4918f70f39b218636e44b9b2fbb6370613005776 100644
--- a/compute/pztradd.c
+++ b/compute/pztradd.c
@@ -38,7 +38,6 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans,
 
     int tempmm, tempnn, tempmn, tempnm;
     int m, n;
-    int ldam, ldan, ldbm, ldbn;
 
     chamctxt = chameleon_context_self();
     if (sequence->status != CHAMELEON_SUCCESS) {
@@ -52,25 +51,21 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans,
             for (n = 0; n < chameleon_min(B->mt,B->nt); n++) {
                 tempnm = n == B->mt-1 ? B->m-n*B->mb : B->mb;
                 tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                ldan = BLKLDD(A, n);
-                ldbn = BLKLDD(B, n);
 
                 INSERT_TASK_ztradd(
                     &options,
                     uplo, trans, tempnm, tempnn, B->mb,
-                    alpha, A(n, n), ldan,
-                    beta,  B(n, n), ldbn);
+                    alpha, A(n, n),
+                    beta,  B(n, n));
 
                 for (m = n+1; m < B->mt; m++) {
                     tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb;
-                    ldam = BLKLDD(A, m);
-                    ldbm = BLKLDD(B, m);
 
                     INSERT_TASK_zgeadd(
                         &options,
                         trans, tempmm, tempnn, B->mb,
-                        alpha, A(m, n), ldam,
-                        beta,  B(m, n), ldbm);
+                        alpha, A(m, n),
+                        beta,  B(m, n));
                 }
             }
         }
@@ -78,24 +73,21 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans,
             for (n = 0; n < chameleon_min(B->mt,B->nt); n++) {
                 tempnm = n == B->mt-1 ? B->m-n*B->mb : B->mb;
                 tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                ldan = BLKLDD(A, n);
-                ldbn = BLKLDD(B, n);
 
                 INSERT_TASK_ztradd(
                     &options,
                     uplo, trans, tempnm, tempnn, B->mb,
-                    alpha, A(n, n), ldan,
-                    beta,  B(n, n), ldbn);
+                    alpha, A(n, n),
+                    beta,  B(n, n));
 
                 for (m = n+1; m < B->mt; m++) {
                     tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb;
-                    ldbm = BLKLDD(B, m);
 
                     INSERT_TASK_zgeadd(
                         &options,
                         trans, tempmm, tempnn, B->mb,
-                        alpha, A(n, m), ldan,
-                        beta,  B(m, n), ldbm);
+                        alpha, A(n, m),
+                        beta,  B(m, n));
                 }
             }
         }
@@ -105,14 +97,12 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans,
             for (m = 0; m < chameleon_min(B->mt,B->nt); m++) {
                 tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb;
                 tempmn = m == B->nt-1 ? B->n-m*B->nb : B->nb;
-                ldam = BLKLDD(A, m);
-                ldbm = BLKLDD(B, m);
 
                 INSERT_TASK_ztradd(
                     &options,
                     uplo, trans, tempmm, tempmn, B->mb,
-                    alpha, A(m, m), ldam,
-                    beta,  B(m, m), ldbm);
+                    alpha, A(m, m),
+                    beta,  B(m, m));
 
                 for (n = m+1; n < B->nt; n++) {
                     tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
@@ -120,8 +110,8 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans,
                     INSERT_TASK_zgeadd(
                         &options,
                         trans, tempmm, tempnn, B->mb,
-                        alpha, A(m, n), ldam,
-                        beta,  B(m, n), ldbm);
+                        alpha, A(m, n),
+                        beta,  B(m, n));
                 }
             }
         }
@@ -129,24 +119,21 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans,
             for (m = 0; m < chameleon_min(B->mt,B->nt); m++) {
                 tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb;
                 tempmn = m == B->nt-1 ? B->n-m*B->nb : B->nb;
-                ldam = BLKLDD(A, m);
-                ldbm = BLKLDD(B, m);
 
                 INSERT_TASK_ztradd(
                     &options,
                     uplo, trans, tempmm, tempmn, B->mb,
-                    alpha, A(m, m), ldam,
-                    beta,  B(m, m), ldbm);
+                    alpha, A(m, m),
+                    beta,  B(m, m));
 
                 for (n = m+1; n < B->nt; n++) {
                     tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                    ldan = BLKLDD(A, n);
 
                     INSERT_TASK_zgeadd(
                         &options,
                         trans, tempmm, tempnn, B->mb,
-                        alpha, A(n, m), ldan,
-                        beta,  B(m, n), ldbm);
+                        alpha, A(n, m),
+                        beta,  B(m, n));
                 }
             }
         }
@@ -156,8 +143,6 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans,
         if (trans == ChamNoTrans) {
             for (m = 0; m < B->mt; m++) {
                 tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb;
-                ldam = BLKLDD(A, m);
-                ldbm = BLKLDD(B, m);
 
                 for (n = 0; n < B->nt; n++) {
                     tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
@@ -165,25 +150,23 @@ void chameleon_pztradd(cham_uplo_t uplo, cham_trans_t trans,
                     INSERT_TASK_zgeadd(
                         &options,
                         trans, tempmm, tempnn, B->mb,
-                        alpha, A(m, n), ldam,
-                        beta,  B(m, n), ldbm);
+                        alpha, A(m, n),
+                        beta,  B(m, n));
                 }
             }
         }
         else {
             for (m = 0; m < B->mt; m++) {
                 tempmm = m == B->mt-1 ? B->m-B->mb*m : B->nb;
-                ldbm = BLKLDD(B, m);
 
                 for (n = 0; n < B->nt; n++) {
                     tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                    ldan = BLKLDD(A, n);
 
                     INSERT_TASK_zgeadd(
                         &options,
                         trans, tempmm, tempnn, B->mb,
-                        alpha, A(n, m), ldan,
-                        beta,  B(m, n), ldbm);
+                        alpha, A(n, m),
+                        beta,  B(m, n));
                 }
             }
         }
diff --git a/compute/pztrmm.c b/compute/pztrmm.c
index 3f6376274307a8f140a88f1482b82d0aaa3bb015..d0bddb77bd0830b786ffaf29808340cad8f28fc4 100644
--- a/compute/pztrmm.c
+++ b/compute/pztrmm.c
@@ -40,7 +40,6 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo,
     RUNTIME_option_t options;
 
     int k, m, n;
-    int ldak, ldam, ldan, ldbk, ldbm;
     int tempkm, tempkn, tempmm, tempnn;
 
     CHAMELEON_Complex64_t zone = (CHAMELEON_Complex64_t)1.0;
@@ -58,27 +57,24 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo,
             if (trans == ChamNoTrans) {
                 for (m = 0; m < B->mt; m++) {
                     tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                    ldbm = BLKLDD(B, m);
-                    ldam = BLKLDD(A, m);
                     for (n = 0; n < B->nt; n++) {
                         tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                         INSERT_TASK_ztrmm(
                             &options,
                             side, uplo, trans, diag,
                             tempmm, tempnn, A->mb,
-                            alpha, A(m, m), ldam,  /* lda * tempkm */
-                                   B(m, n), ldbm); /* ldb * tempnn */
+                            alpha, A(m, m),  /* lda * tempkm */
+                                   B(m, n)); /* ldb * tempnn */
 
                         for (k = m+1; k < A->mt; k++) {
                             tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-                            ldbk = BLKLDD(B, k);
                             INSERT_TASK_zgemm(
                                 &options,
                                 trans, ChamNoTrans,
                                 tempmm, tempnn, tempkn, A->mb,
-                                alpha, A(m, k), ldam,
-                                       B(k, n), ldbk,
-                                zone,  B(m, n), ldbm);
+                                alpha, A(m, k),
+                                       B(k, n),
+                                zone,  B(m, n));
                         }
                     }
                 }
@@ -89,27 +85,23 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo,
             else {
                 for (m = B->mt-1; m > -1; m--) {
                     tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                    ldbm = BLKLDD(B, m);
-                    ldam = BLKLDD(A, m);
                     for (n = 0; n < B->nt; n++) {
                         tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                         INSERT_TASK_ztrmm(
                             &options,
                             side, uplo, trans, diag,
                             tempmm, tempnn, A->mb,
-                            alpha, A(m, m), ldam,  /* lda * tempkm */
-                                   B(m, n), ldbm); /* ldb * tempnn */
+                            alpha, A(m, m),  /* lda * tempkm */
+                                   B(m, n)); /* ldb * tempnn */
 
                         for (k = 0; k < m; k++) {
-                            ldbk = BLKLDD(B, k);
-                            ldak = BLKLDD(A, k);
                             INSERT_TASK_zgemm(
                                 &options,
                                 trans, ChamNoTrans,
                                 tempmm, tempnn, B->mb, A->mb,
-                                alpha, A(k, m), ldak,
-                                       B(k, n), ldbk,
-                                zone,  B(m, n), ldbm);
+                                alpha, A(k, m),
+                                       B(k, n),
+                                zone,  B(m, n));
                         }
                     }
                 }
@@ -122,26 +114,23 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo,
             if (trans == ChamNoTrans) {
                 for (m = B->mt-1; m > -1; m--) {
                     tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                    ldbm = BLKLDD(B, m);
-                    ldam = BLKLDD(A, m);
                     for (n = 0; n < B->nt; n++) {
                         tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                         INSERT_TASK_ztrmm(
                             &options,
                             side, uplo, trans, diag,
                             tempmm, tempnn, A->mb,
-                            alpha, A(m, m), ldam,  /* lda * tempkm */
-                                   B(m, n), ldbm); /* ldb * tempnn */
+                            alpha, A(m, m),  /* lda * tempkm */
+                                   B(m, n)); /* ldb * tempnn */
 
                         for (k = 0; k < m; k++) {
-                            ldbk = BLKLDD(B, k);
                             INSERT_TASK_zgemm(
                                 &options,
                                 trans, ChamNoTrans,
                                 tempmm, tempnn, B->mb, A->mb,
-                                alpha, A(m, k), ldam,
-                                       B(k, n), ldbk,
-                                zone,  B(m, n), ldbm);
+                                alpha, A(m, k),
+                                       B(k, n),
+                                zone,  B(m, n));
                         }
                     }
                 }
@@ -152,28 +141,24 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo,
             else {
                 for (m = 0; m < B->mt; m++) {
                     tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                    ldbm = BLKLDD(B, m);
-                    ldam = BLKLDD(A, m);
                     for (n = 0; n < B->nt; n++) {
                         tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                         INSERT_TASK_ztrmm(
                             &options,
                             side, uplo, trans, diag,
                             tempmm, tempnn, A->mb,
-                            alpha, A(m, m), ldam,  /* lda * tempkm */
-                                   B(m, n), ldbm); /* ldb * tempnn */
+                            alpha, A(m, m),  /* lda * tempkm */
+                                   B(m, n)); /* ldb * tempnn */
 
                         for (k = m+1; k < A->mt; k++) {
                             tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-                            ldak = BLKLDD(A, k);
-                            ldbk = BLKLDD(B, k);
                             INSERT_TASK_zgemm(
                                 &options,
                                 trans, ChamNoTrans,
                                 tempmm, tempnn, tempkm, A->mb,
-                                alpha, A(k, m), ldak,
-                                       B(k, n), ldbk,
-                                zone,  B(m, n), ldbm);
+                                alpha, A(k, m),
+                                       B(k, n),
+                                zone,  B(m, n));
                         }
                     }
                 }
@@ -188,26 +173,23 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo,
             if (trans == ChamNoTrans) {
                 for (n = B->nt-1; n > -1; n--) {
                     tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                    ldan = BLKLDD(A, n);
                     for (m = 0; m < B->mt; m++) {
                         tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                        ldbm = BLKLDD(B, m);
                         INSERT_TASK_ztrmm(
                             &options,
                             side, uplo, trans, diag,
                             tempmm, tempnn, A->mb,
-                            alpha, A(n, n), ldan,  /* lda * tempkm */
-                                   B(m, n), ldbm); /* ldb * tempnn */
+                            alpha, A(n, n),  /* lda * tempkm */
+                                   B(m, n)); /* ldb * tempnn */
 
                         for (k = 0; k < n; k++) {
-                            ldak = BLKLDD(A, k);
                             INSERT_TASK_zgemm(
                                 &options,
                                 ChamNoTrans, trans,
                                 tempmm, tempnn, B->mb, A->mb,
-                                alpha, B(m, k), ldbm,
-                                       A(k, n), ldak,
-                                zone,  B(m, n), ldbm);
+                                alpha, B(m, k),
+                                       A(k, n),
+                                zone,  B(m, n));
                         }
                     }
                 }
@@ -218,16 +200,14 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo,
             else {
                 for (n = 0; n < B->nt; n++) {
                     tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                    ldan = BLKLDD(A, n);
                     for (m = 0; m < B->mt; m++) {
                         tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                        ldbm = BLKLDD(B, m);
                         INSERT_TASK_ztrmm(
                             &options,
                             side, uplo, trans, diag,
                             tempmm, tempnn, A->mb,
-                            alpha, A(n, n), ldan,  /* lda * tempkm */
-                                   B(m, n), ldbm); /* ldb * tempnn */
+                            alpha, A(n, n),  /* lda * tempkm */
+                                   B(m, n)); /* ldb * tempnn */
 
                         for (k = n+1; k < A->mt; k++) {
                             tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
@@ -235,9 +215,9 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo,
                                 &options,
                                 ChamNoTrans, trans,
                                 tempmm, tempnn, tempkn, A->mb,
-                                alpha, B(m, k), ldbm,
-                                       A(n, k), ldan,
-                                zone,  B(m, n), ldbm);
+                                alpha, B(m, k),
+                                       A(n, k),
+                                zone,  B(m, n));
                         }
                     }
                 }
@@ -250,27 +230,24 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo,
             if (trans == ChamNoTrans) {
                 for (n = 0; n < B->nt; n++) {
                     tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                    ldan = BLKLDD(A, n);
                     for (m = 0; m < B->mt; m++) {
                         tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                        ldbm = BLKLDD(B, m);
                         INSERT_TASK_ztrmm(
                             &options,
                             side, uplo, trans, diag,
                             tempmm, tempnn, A->mb,
-                            alpha, A(n, n), ldan,  /* lda * tempkm */
-                                   B(m, n), ldbm); /* ldb * tempnn */
+                            alpha, A(n, n),  /* lda * tempkm */
+                                   B(m, n)); /* ldb * tempnn */
 
                         for (k = n+1; k < A->mt; k++) {
                             tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-                            ldak = BLKLDD(A, k);
                             INSERT_TASK_zgemm(
                                 &options,
                                 ChamNoTrans, trans,
                                 tempmm, tempnn, tempkn, A->mb,
-                                alpha, B(m, k), ldbm,
-                                       A(k, n), ldak,
-                                zone,  B(m, n), ldbm);
+                                alpha, B(m, k),
+                                       A(k, n),
+                                zone,  B(m, n));
                         }
                     }
                 }
@@ -281,25 +258,23 @@ void chameleon_pztrmm(cham_side_t side, cham_uplo_t uplo,
             else {
                 for (n = B->nt-1; n > -1; n--) {
                     tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                    ldan = BLKLDD(A, n);
                     for (m = 0; m < B->mt; m++) {
                         tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                        ldbm = BLKLDD(B, m);
                         INSERT_TASK_ztrmm(
                             &options,
                             side, uplo, trans, diag,
                             tempmm, tempnn, A->mb,
-                            alpha, A(n, n), ldan,  /* lda * tempkm */
-                                   B(m, n), ldbm); /* ldb * tempnn */
+                            alpha, A(n, n),  /* lda * tempkm */
+                                   B(m, n)); /* ldb * tempnn */
 
                         for (k = 0; k < n; k++) {
                             INSERT_TASK_zgemm(
                                 &options,
                                 ChamNoTrans, trans,
                                 tempmm, tempnn, B->mb, A->mb,
-                                alpha, B(m, k), ldbm,
-                                       A(n, k), ldan,
-                                zone,  B(m, n), ldbm);
+                                alpha, B(m, k),
+                                       A(n, k),
+                                zone,  B(m, n));
                         }
                     }
                 }
diff --git a/compute/pztrsm.c b/compute/pztrsm.c
index 38b3cedcab0f041bc1081264bf93b1f47aaad41e..c6e7eac7b8433207064892f50cd3d74020776211 100644
--- a/compute/pztrsm.c
+++ b/compute/pztrsm.c
@@ -38,7 +38,6 @@ void chameleon_pztrsm(cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, ch
     RUNTIME_option_t options;
 
     int k, m, n;
-    int ldak, ldam, ldan, ldbk, ldbm;
     int tempkm, tempkn, tempmm, tempnn;
 
     CHAMELEON_Complex64_t zone       = (CHAMELEON_Complex64_t) 1.0;
@@ -59,8 +58,6 @@ void chameleon_pztrsm(cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, ch
             if (trans == ChamNoTrans) {
                 for (k = 0; k < B->mt; k++) {
                     tempkm = k == 0 ? B->m-(B->mt-1)*B->mb : B->mb;
-                    ldak = BLKLDD(A, B->mt-1-k);
-                    ldbk = BLKLDD(B, B->mt-1-k);
                     lalpha = k == 0 ? alpha : zone;
                     for (n = 0; n < B->nt; n++) {
                         tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
@@ -68,22 +65,20 @@ void chameleon_pztrsm(cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, ch
                             &options,
                             side, uplo, trans, diag,
                             tempkm, tempnn, A->mb,
-                            lalpha, A(B->mt-1-k, B->mt-1-k), ldak,  /* lda * tempkm */
-                                    B(B->mt-1-k,        n), ldbk); /* ldb * tempnn */
+                            lalpha, A(B->mt-1-k, B->mt-1-k),  /* lda * tempkm */
+                                    B(B->mt-1-k,        n)); /* ldb * tempnn */
                     }
                     RUNTIME_data_flush( sequence, A(B->mt-1-k, B->mt-1-k) );
                     for (m = k+1; m < B->mt; m++) {
-                        ldam = BLKLDD(A, B->mt-1-m);
-                        ldbm = BLKLDD(B, B->mt-1-m);
                         for (n = 0; n < B->nt; n++) {
                             tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                             INSERT_TASK_zgemm(
                                 &options,
                                 ChamNoTrans, ChamNoTrans,
                                 B->mb, tempnn, tempkm, A->mb,
-                                mzone,  A(B->mt-1-m, B->mt-1-k), ldam,
-                                        B(B->mt-1-k, n       ), ldbk,
-                                lalpha, B(B->mt-1-m, n       ), ldbm);
+                                mzone,  A(B->mt-1-m, B->mt-1-k),
+                                        B(B->mt-1-k, n       ),
+                                lalpha, B(B->mt-1-m, n       ));
                         }
                         RUNTIME_data_flush( sequence, A(B->mt-1-m, B->mt-1-k) );
                     }
@@ -98,8 +93,6 @@ void chameleon_pztrsm(cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, ch
             else {
                 for (k = 0; k < B->mt; k++) {
                     tempkm = k == B->mt-1 ? B->m-k*B->mb : B->mb;
-                    ldak = BLKLDD(A, k);
-                    ldbk = BLKLDD(B, k);
                     lalpha = k == 0 ? alpha : zone;
                     for (n = 0; n < B->nt; n++) {
                         tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
@@ -107,22 +100,21 @@ void chameleon_pztrsm(cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, ch
                             &options,
                             side, uplo, trans, diag,
                             tempkm, tempnn, A->mb,
-                            lalpha, A(k, k), ldak,
-                                    B(k, n), ldbk);
+                            lalpha, A(k, k),
+                                    B(k, n));
                     }
                     RUNTIME_data_flush( sequence, A(k, k) );
                     for (m = k+1; m < B->mt; m++) {
                         tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                        ldbm = BLKLDD(B, m);
                         for (n = 0; n < B->nt; n++) {
                             tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                             INSERT_TASK_zgemm(
                                 &options,
                                 trans, ChamNoTrans,
                                 tempmm, tempnn, B->mb, A->mb,
-                                mzone,  A(k, m), ldak,
-                                        B(k, n), ldbk,
-                                lalpha, B(m, n), ldbm);
+                                mzone,  A(k, m),
+                                        B(k, n),
+                                lalpha, B(m, n));
                         }
                         RUNTIME_data_flush( sequence, A(k, m) );
                     }
@@ -140,8 +132,6 @@ void chameleon_pztrsm(cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, ch
             if (trans == ChamNoTrans) {
                 for (k = 0; k < B->mt; k++) {
                     tempkm = k == B->mt-1 ? B->m-k*B->mb : B->mb;
-                    ldak = BLKLDD(A, k);
-                    ldbk = BLKLDD(B, k);
                     lalpha = k == 0 ? alpha : zone;
                     for (n = 0; n < B->nt; n++) {
                         tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
@@ -149,23 +139,21 @@ void chameleon_pztrsm(cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, ch
                             &options,
                             side, uplo, trans, diag,
                             tempkm, tempnn, A->mb,
-                            lalpha, A(k, k), ldak,
-                                    B(k, n), ldbk);
+                            lalpha, A(k, k),
+                                    B(k, n));
                     }
                     RUNTIME_data_flush( sequence, A(k, k) );
                     for (m = k+1; m < B->mt; m++) {
                         tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                        ldam = BLKLDD(A, m);
-                        ldbm = BLKLDD(B, m);
                         for (n = 0; n < B->nt; n++) {
                             tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                             INSERT_TASK_zgemm(
                                 &options,
                                 ChamNoTrans, ChamNoTrans,
                                 tempmm, tempnn, B->mb, A->mb,
-                                mzone,  A(m, k), ldam,
-                                        B(k, n), ldbk,
-                                lalpha, B(m, n), ldbm);
+                                mzone,  A(m, k),
+                                        B(k, n),
+                                lalpha, B(m, n));
                         }
                         RUNTIME_data_flush( sequence, A(m, k) );
                     }
@@ -180,8 +168,6 @@ void chameleon_pztrsm(cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, ch
             else {
                 for (k = 0; k < B->mt; k++) {
                     tempkm = k == 0 ? B->m-(B->mt-1)*B->mb : B->mb;
-                    ldak = BLKLDD(A, B->mt-1-k);
-                    ldbk = BLKLDD(B, B->mt-1-k);
                     lalpha = k == 0 ? alpha : zone;
                     for (n = 0; n < B->nt; n++) {
                         tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
@@ -189,21 +175,20 @@ void chameleon_pztrsm(cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, ch
                             &options,
                             side, uplo, trans, diag,
                             tempkm, tempnn, A->mb,
-                            lalpha, A(B->mt-1-k, B->mt-1-k), ldak,
-                                    B(B->mt-1-k,        n), ldbk);
+                            lalpha, A(B->mt-1-k, B->mt-1-k),
+                                    B(B->mt-1-k,        n));
                     }
                     RUNTIME_data_flush( sequence, A(B->mt-1-k, B->mt-1-k) );
                     for (m = k+1; m < B->mt; m++) {
-                        ldbm = BLKLDD(B, B->mt-1-m);
                         for (n = 0; n < B->nt; n++) {
                             tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                             INSERT_TASK_zgemm(
                                 &options,
                                 trans, ChamNoTrans,
                                 B->mb, tempnn, tempkm, A->mb,
-                                mzone,  A(B->mt-1-k, B->mt-1-m), ldak,
-                                        B(B->mt-1-k, n       ), ldbk,
-                                lalpha, B(B->mt-1-m, n       ), ldbm);
+                                mzone,  A(B->mt-1-k, B->mt-1-m),
+                                        B(B->mt-1-k, n       ),
+                                lalpha, B(B->mt-1-m, n       ));
                         }
                         RUNTIME_data_flush( sequence, A(B->mt-1-k, B->mt-1-m) );
                     }
@@ -222,31 +207,28 @@ void chameleon_pztrsm(cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, ch
             if (trans == ChamNoTrans) {
                 for (k = 0; k < B->nt; k++) {
                     tempkn = k == B->nt-1 ? B->n-k*B->nb : B->nb;
-                    ldak = BLKLDD(A, k);
                     lalpha = k == 0 ? alpha : zone;
                     for (m = 0; m < B->mt; m++) {
                         tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                        ldbm = BLKLDD(B, m);
                         INSERT_TASK_ztrsm(
                             &options,
                             side, uplo, trans, diag,
                             tempmm, tempkn, A->mb,
-                            lalpha, A(k, k), ldak,  /* lda * tempkn */
-                                    B(m, k), ldbm); /* ldb * tempkn */
+                            lalpha, A(k, k),  /* lda * tempkn */
+                                    B(m, k)); /* ldb * tempkn */
                     }
                     RUNTIME_data_flush( sequence, A(k, k) );
                     for (m = 0; m < B->mt; m++) {
                         tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                        ldbm = BLKLDD(B, m);
                         for (n = k+1; n < B->nt; n++) {
                             tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                             INSERT_TASK_zgemm(
                                 &options,
                                 ChamNoTrans, ChamNoTrans,
                                 tempmm, tempnn, B->mb, A->mb,
-                                mzone,  B(m, k), ldbm,  /* ldb * B->mb   */
-                                        A(k, n), ldak,  /* lda * tempnn */
-                                lalpha, B(m, n), ldbm); /* ldb * tempnn */
+                                mzone,  B(m, k),  /* ldb * B->mb   */
+                                        A(k, n),  /* lda * tempnn */
+                                lalpha, B(m, n)); /* ldb * tempnn */
                         }
                         RUNTIME_data_flush( sequence, B(m, k) );
                     }
@@ -261,27 +243,24 @@ void chameleon_pztrsm(cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, ch
             else {
                 for (k = 0; k < B->nt; k++) {
                     tempkn = k == 0 ? B->n-(B->nt-1)*B->nb : B->nb;
-                    ldak = BLKLDD(A, B->nt-1-k);
                     for (m = 0; m < B->mt; m++) {
                         tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                        ldbm = BLKLDD(B, m);
                         INSERT_TASK_ztrsm(
                             &options,
                             side, uplo, trans, diag,
                             tempmm, tempkn, A->mb,
-                            alpha, A(B->nt-1-k, B->nt-1-k), ldak,  /* lda * tempkn */
-                                   B(       m, B->nt-1-k), ldbm); /* ldb * tempkn */
+                            alpha, A(B->nt-1-k, B->nt-1-k),  /* lda * tempkn */
+                                   B(       m, B->nt-1-k)); /* ldb * tempkn */
                         RUNTIME_data_flush( sequence, A(B->nt-1-k, B->nt-1-k) );
 
                         for (n = k+1; n < B->nt; n++) {
-                            ldan = BLKLDD(A, B->nt-1-n);
                             INSERT_TASK_zgemm(
                                 &options,
                                 ChamNoTrans, trans,
                                 tempmm, B->nb, tempkn, A->mb,
-                                minvalpha, B(m,        B->nt-1-k), ldbm,  /* ldb  * tempkn */
-                                           A(B->nt-1-n, B->nt-1-k), ldan, /* A->mb * tempkn (Never last row) */
-                                zone,      B(m,        B->nt-1-n), ldbm); /* ldb  * B->nb   */
+                                minvalpha, B(m,        B->nt-1-k),  /* ldb  * tempkn */
+                                           A(B->nt-1-n, B->nt-1-k), /* A->mb * tempkn (Never last row) */
+                                zone,      B(m,        B->nt-1-n)); /* ldb  * B->nb   */
                         }
                         RUNTIME_data_flush( sequence, B(m,        B->nt-1-k) );
                     }
@@ -298,17 +277,15 @@ void chameleon_pztrsm(cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, ch
             if (trans == ChamNoTrans) {
                 for (k = 0; k < B->nt; k++) {
                     tempkn = k == 0 ? B->n-(B->nt-1)*B->nb : B->nb;
-                    ldak = BLKLDD(A, B->nt-1-k);
                     lalpha = k == 0 ? alpha : zone;
                     for (m = 0; m < B->mt; m++) {
                         tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                        ldbm = BLKLDD(B, m);
                         INSERT_TASK_ztrsm(
                             &options,
                             side, uplo, trans, diag,
                             tempmm, tempkn, A->mb,
-                            lalpha, A(B->nt-1-k, B->nt-1-k), ldak,  /* lda * tempkn */
-                                    B(       m, B->nt-1-k), ldbm); /* ldb * tempkn */
+                            lalpha, A(B->nt-1-k, B->nt-1-k),  /* lda * tempkn */
+                                    B(       m, B->nt-1-k)); /* ldb * tempkn */
                         RUNTIME_data_flush( sequence, A(B->nt-1-k, B->nt-1-k) );
 
                         for (n = k+1; n < B->nt; n++) {
@@ -316,9 +293,9 @@ void chameleon_pztrsm(cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, ch
                                 &options,
                                 ChamNoTrans, ChamNoTrans,
                                 tempmm, B->nb, tempkn, A->mb,
-                                mzone,  B(m,        B->nt-1-k), ldbm,  /* ldb * tempkn */
-                                        A(B->nt-1-k, B->nt-1-n), ldak,  /* lda * B->nb   */
-                                lalpha, B(m,        B->nt-1-n), ldbm); /* ldb * B->nb   */
+                                mzone,  B(m,        B->nt-1-k),  /* ldb * tempkn */
+                                        A(B->nt-1-k, B->nt-1-n),  /* lda * B->nb   */
+                                lalpha, B(m,        B->nt-1-n)); /* ldb * B->nb   */
                         }
                         RUNTIME_data_flush( sequence, B(m,        B->nt-1-k) );
                     }
@@ -333,28 +310,25 @@ void chameleon_pztrsm(cham_side_t side, cham_uplo_t uplo, cham_trans_t trans, ch
             else {
                 for (k = 0; k < B->nt; k++) {
                     tempkn = k == B->nt-1 ? B->n-k*B->nb : B->nb;
-                    ldak = BLKLDD(A, k);
                     for (m = 0; m < B->mt; m++) {
                         tempmm = m == B->mt-1 ? B->m-m*B->mb : B->mb;
-                        ldbm = BLKLDD(B, m);
                         INSERT_TASK_ztrsm(
                             &options,
                             side, uplo, trans, diag,
                             tempmm, tempkn, A->mb,
-                            alpha, A(k, k), ldak,  /* lda * tempkn */
-                                   B(m, k), ldbm); /* ldb * tempkn */
+                            alpha, A(k, k),  /* lda * tempkn */
+                                   B(m, k)); /* ldb * tempkn */
                         RUNTIME_data_flush( sequence, A(k, k) );
 
                         for (n = k+1; n < B->nt; n++) {
                             tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
-                            ldan = BLKLDD(A, n);
                             INSERT_TASK_zgemm(
                                 &options,
                                 ChamNoTrans, trans,
                                 tempmm, tempnn, B->mb, A->mb,
-                                minvalpha, B(m, k), ldbm,  /* ldb  * tempkn */
-                                           A(n, k), ldan, /* ldan * tempkn */
-                                zone,      B(m, n), ldbm); /* ldb  * tempnn */
+                                minvalpha, B(m, k),  /* ldb  * tempkn */
+                                           A(n, k), /* ldan * tempkn */
+                                zone,      B(m, n)); /* ldb  * tempnn */
                         }
                         RUNTIME_data_flush( sequence, B(m, k) );
                     }
diff --git a/compute/pztrsmpl.c b/compute/pztrsmpl.c
index aa861daf180cc5e08a29a94af96e8ea31e2bc50c..8a760806b734e4401b01f3192dbff4e87e62dcf6 100644
--- a/compute/pztrsmpl.c
+++ b/compute/pztrsmpl.c
@@ -39,7 +39,6 @@ void chameleon_pztrsmpl( CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *L, int *IP
     RUNTIME_option_t options;
 
     int k, m, n;
-    int ldak, ldam, ldbk, ldbm;
     int tempkm, tempnn, tempkmin, tempmm, tempkn;
     int ib;
 
@@ -54,31 +53,27 @@ void chameleon_pztrsmpl( CHAM_desc_t *A, CHAM_desc_t *B, CHAM_desc_t *L, int *IP
         tempkm   = k == A->mt-1 ? A->m-k*A->mb : A->mb;
         tempkn   = k == A->nt-1 ? A->n-k*A->nb : A->nb;
         tempkmin = k == chameleon_min(A->mt, A->nt)-1 ? chameleon_min(A->m, A->n)-k*A->mb : A->mb;
-        ldak = BLKLDD(A, k);
-        ldbk = BLKLDD(B, k);
         for (n = 0; n < B->nt; n++) {
             tempnn = n == B->nt-1 ? B->n-n*B->nb : B->nb;
             INSERT_TASK_zgessm(
                 &options,
                 tempkm, tempnn, tempkmin, ib, L->nb,
                 IPIV(k, k),
-                L(k, k), L->mb,
-                A(k, k), ldak,
-                B(k, n), ldbk);
+                L(k, k),
+                A(k, k),
+                B(k, n));
         }
         for (m = k+1; m < A->mt; m++) {
             tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-            ldam = BLKLDD(A, m);
-            ldbm = BLKLDD(B, m);
             for (n = 0; n < B->nt; n++) {
                 tempnn  = n == B->nt-1 ? B->n-n*B->nb : B->nb;
                 INSERT_TASK_zssssm(
                     &options,
                     A->nb, tempnn, tempmm, tempnn, tempkn, ib, L->nb,
-                    B(k, n), ldbk,
-                    B(m, n), ldbm,
-                    L(m, k), L->mb,
-                    A(m, k), ldam,
+                    B(k, n),
+                    B(m, n),
+                    L(m, k),
+                    A(m, k),
                     IPIV(m, k));
             }
         }
diff --git a/compute/pztrtri.c b/compute/pztrtri.c
index 925ec975bf6ce7506a913b93869feac78004f5fd..89c1a60740f6e4ae4907fe5c00dc113fdee0e7f1 100644
--- a/compute/pztrtri.c
+++ b/compute/pztrtri.c
@@ -36,7 +36,6 @@ void chameleon_pztrtri(cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
     RUNTIME_option_t options;
 
     int k, m, n;
-    int ldam, ldak;
     int tempkn, tempkm, tempmm, tempnn;
 
     CHAMELEON_Complex64_t zone  = (CHAMELEON_Complex64_t) 1.0;
@@ -55,28 +54,25 @@ void chameleon_pztrtri(cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
             RUNTIME_iteration_push(chamctxt, k);
 
             tempkn = k == A->nt-1 ? A->n-k*A->nb : A->nb;
-            ldak = BLKLDD(A, k);
             for (m = k+1; m < A->mt; m++) {
                 tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-                ldam = BLKLDD(A, m);
                 INSERT_TASK_ztrsm(
                     &options,
                     ChamRight, uplo, ChamNoTrans, diag,
                     tempmm, tempkn, A->mb,
-                    mzone, A(k, k), ldak,
-                           A(m, k), ldam);
+                    mzone, A(k, k),
+                           A(m, k));
             }
             for (m = k+1; m < A->mt; m++) {
                 tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-                ldam = BLKLDD(A, m);
                 for (n = 0; n < k; n++) {
                     INSERT_TASK_zgemm(
                         &options,
                         ChamNoTrans, ChamNoTrans,
                         tempmm, A->nb, tempkn, A->mb,
-                        zone, A(m, k), ldam,
-                              A(k, n), ldak,
-                        zone, A(m, n), ldam);
+                        zone, A(m, k),
+                              A(k, n),
+                        zone, A(m, n));
                 }
                 RUNTIME_data_flush( sequence, A(m, k) );
             }
@@ -86,15 +82,15 @@ void chameleon_pztrtri(cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
                     &options,
                     ChamLeft, uplo, ChamNoTrans, diag,
                     tempkn, A->nb, A->mb,
-                    zone, A(k, k), ldak,
-                          A(k, n), ldak);
+                    zone, A(k, k),
+                          A(k, n));
             }
             RUNTIME_data_flush( sequence, A(k, k) );
             INSERT_TASK_ztrtri(
                 &options,
                 uplo, diag,
                 tempkn, A->mb,
-                A(k, k), ldak, A->nb*k);
+                A(k, k), A->nb*k);
 
             RUNTIME_iteration_pop(chamctxt);
         }
@@ -107,46 +103,43 @@ void chameleon_pztrtri(cham_uplo_t uplo, cham_diag_t diag, CHAM_desc_t *A,
             RUNTIME_iteration_push(chamctxt, k);
 
             tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-            ldak = BLKLDD(A, k);
             for (n = k+1; n < A->nt; n++) {
                 tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
                 INSERT_TASK_ztrsm(
                     &options,
                     ChamLeft, uplo, ChamNoTrans, diag,
                     tempkm, tempnn, A->mb,
-                    mzone, A(k, k), ldak,
-                           A(k, n), ldak);
+                    mzone, A(k, k),
+                           A(k, n));
             }
             for (n = k+1; n < A->nt; n++) {
                 tempnn = n == A->nt-1 ? A->n-n*A->nb : A->nb;
                 for (m = 0; m < k; m++) {
-                    ldam = BLKLDD(A, m);
                     INSERT_TASK_zgemm(
                         &options,
                         ChamNoTrans, ChamNoTrans,
                         A->mb, tempnn, tempkm, A->mb,
-                        zone, A(m, k), ldam,
-                              A(k, n), ldak,
-                        zone, A(m, n), ldam);
+                        zone, A(m, k),
+                              A(k, n),
+                        zone, A(m, n));
                 }
                 RUNTIME_data_flush( sequence, A(k, n) );
             }
             for (m = 0; m < k; m++) {
-                ldam = BLKLDD(A, m);
                 RUNTIME_data_flush( sequence, A(m, k) );
                 INSERT_TASK_ztrsm(
                     &options,
                     ChamRight, uplo, ChamNoTrans, diag,
                     A->mb, tempkm, A->mb,
-                    zone, A(k, k), ldak,
-                          A(m, k), ldam);
+                    zone, A(k, k),
+                          A(m, k));
             }
             RUNTIME_data_flush( sequence, A(k, k) );
             INSERT_TASK_ztrtri(
                 &options,
                 uplo, diag,
                 tempkm, A->mb,
-                A(k, k), ldak, A->mb*k);
+                A(k, k), A->mb*k);
 
             RUNTIME_iteration_pop(chamctxt);
         }
diff --git a/compute/pzunglq.c b/compute/pzunglq.c
index 63c3697f1fb33beb78795c28edb74ad081cb68ae..205678cb781eac775c6399d696e002a3caec0c43 100644
--- a/compute/pzunglq.c
+++ b/compute/pzunglq.c
@@ -42,7 +42,6 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T
     size_t ws_host = 0;
 
     int k, m, n;
-    int ldak, ldqm, lddk;
     int tempnn, tempmm, tempkmin, tempkn;
     int tempAkm, tempAkn;
     int ib, minMT;
@@ -94,14 +93,11 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T
         tempAkn  = k == A->nt-1 ? A->n-k*A->nb : A->nb;
         tempkmin = chameleon_min( tempAkn, tempAkm );
         tempkn   = k == Q->nt-1 ? Q->n-k*Q->nb : Q->nb;
-        ldak = BLKLDD(A, k);
-        lddk = BLKLDD(D, k);
 
         for (n = Q->nt-1; n > k; n--) {
             tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
             for (m = k; m < Q->mt; m++) {
                 tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
-                ldqm = BLKLDD(Q, m);
 
                 RUNTIME_data_migrate( sequence, Q(m, k),
                                       Q->get_rankof( Q, m, n ) );
@@ -111,10 +107,10 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T
                     &options,
                     ChamRight, ChamNoTrans,
                     tempmm, tempnn, tempAkm, 0, ib, T->nb,
-                    A(k, n), ldak,
-                    T(k, n), T->mb,
-                    Q(m, k), ldqm,
-                    Q(m, n), ldqm);
+                    A(k, n),
+                    T(k, n),
+                    Q(m, k),
+                    Q(m, n));
             }
             RUNTIME_data_flush( sequence, A(k, n) );
             RUNTIME_data_flush( sequence, T(k, n) );
@@ -125,19 +121,18 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T
             INSERT_TASK_zlacpy(
                 &options,
                 ChamUpper, tempkmin, tempDkn, A->nb,
-                A(k, k), ldak,
-                D(k),    lddk );
+                A(k, k),
+                D(k) );
 #if defined(CHAMELEON_USE_CUDA)
             INSERT_TASK_zlaset(
                 &options,
                 ChamLower, tempkmin, tempDkn,
                 0., 1.,
-                D(k), lddk );
+                D(k) );
 #endif
         }
         for (m = k; m < Q->mt; m++) {
             tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
-            ldqm = BLKLDD(Q, m);
 
             /* Restore the original location of the tiles */
             RUNTIME_data_migrate( sequence, Q(m, k),
@@ -147,9 +142,9 @@ void chameleon_pzunglq( int genD, CHAM_desc_t *A, CHAM_desc_t *Q, CHAM_desc_t *T
                 &options,
                 ChamRight, ChamNoTrans,
                 tempmm, tempkn, tempkmin, ib, T->nb,
-                D(k),    lddk,
-                T(k, k), T->mb,
-                Q(m, k), ldqm);
+                D(k),
+                T(k, k),
+                Q(m, k));
         }
         RUNTIME_data_flush( sequence, D(k)    );
         RUNTIME_data_flush( sequence, T(k, k) );
diff --git a/compute/pzunglq_param.c b/compute/pzunglq_param.c
index e90345ec7cd63c3a6518f81e89c97056837184dd..af4bfff5eb47af8ef593a9d93d65af5c647f3601 100644
--- a/compute/pzunglq_param.c
+++ b/compute/pzunglq_param.c
@@ -41,7 +41,6 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
 
     int k, m, n, i, p;
     int K, L;
-    int ldak, ldqm, lddk;
     int tempkm, tempkmin, temppn, tempnn, tempmm;
     int ib, node, nbtiles, *tiles;
 
@@ -87,8 +86,6 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
         RUNTIME_iteration_push(chamctxt, k);
 
         tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-        ldak = BLKLDD(A, k);
-        lddk = BLKLDD(D, k);
 
         /* Setting the order of the tiles*/
         nbtiles = libhqr_walk_stepk( qrtree, k, tiles );
@@ -111,7 +108,6 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
             }
             for (m = k; m < Q->mt; m++) {
                 tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
-                ldqm = BLKLDD(Q, m);
 
                 node = Q->get_rankof( Q, m, n );
                 RUNTIME_data_migrate( sequence, Q(m, p), node );
@@ -121,10 +117,10 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
                     &options,
                     ChamRight, ChamNoTrans,
                     tempmm, tempnn, tempkm, L, ib, T->nb,
-                    A(k, n), ldak,
-                    T(k, n), T->mb,
-                    Q(m, p), ldqm,
-                    Q(m, n), ldqm);
+                    A(k, n),
+                    T(k, n),
+                    Q(m, p),
+                    Q(m, n));
             }
             RUNTIME_data_flush( sequence, A(k, n) );
             RUNTIME_data_flush( sequence, T(k, n) );
@@ -142,19 +138,18 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
                 INSERT_TASK_zlacpy(
                     &options,
                     ChamUpper, tempkmin, tempDpn, A->nb,
-                    A(k, p), ldak,
-                    D(k, p), lddk );
+                    A(k, p),
+                    D(k, p) );
 #if defined(CHAMELEON_USE_CUDA)
                 INSERT_TASK_zlaset(
                     &options,
                     ChamLower, tempkmin, tempDpn,
                     0., 1.,
-                    D(k, p), lddk );
+                    D(k, p) );
 #endif
             }
             for (m = k; m < Q->mt; m++) {
                 tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
-                ldqm = BLKLDD(Q, m);
 
                 RUNTIME_data_migrate( sequence, Q(m, p),
                                       Q->get_rankof( Q, m, p ) );
@@ -163,9 +158,9 @@ void chameleon_pzunglq_param( int genD, const libhqr_tree_t *qrtree, CHAM_desc_t
                     &options,
                     ChamRight, ChamNoTrans,
                     tempmm, temppn, tempkmin, ib, T->nb,
-                    D(k, p), lddk,
-                    T(k, p), T->mb,
-                    Q(m, p), ldqm);
+                    D(k, p),
+                    T(k, p),
+                    Q(m, p));
             }
             RUNTIME_data_flush( sequence, D(k, p) );
             RUNTIME_data_flush( sequence, T(k, p) );
diff --git a/compute/pzunglqrh.c b/compute/pzunglqrh.c
index 6bdfcabeaa7220525fda52945f29d87e2b552d4c..0ab0756b67e5f40c2ff215f100e1031710a57c04 100644
--- a/compute/pzunglqrh.c
+++ b/compute/pzunglqrh.c
@@ -46,7 +46,6 @@ void chameleon_pzunglqrh( int genD, int BS,
 
     int k, m, n;
     int K, N, RD, lastRD;
-    int ldak, lddk, ldqm;
     int tempkm, tempkmin, tempNn, tempnn, tempmm, tempNRDn;
     int ib, node;
 
@@ -88,8 +87,6 @@ void chameleon_pzunglqrh( int genD, int BS,
         RUNTIME_iteration_push(chamctxt, k);
 
         tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-        ldak = BLKLDD(A, k);
-        lddk = BLKLDD(D, k);
         lastRD = 0;
         for (RD = BS; RD < A->nt-k; RD *= 2)
             lastRD = RD;
@@ -98,7 +95,6 @@ void chameleon_pzunglqrh( int genD, int BS,
                 tempNRDn = N+RD == A->nt-1 ? A->n-(N+RD)*A->nb : A->nb;
                 for (m = k; m < Q->mt; m++) {
                     tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
-                    ldqm   = BLKLDD(Q, m   );
 
                     node = Q->get_rankof( Q, m, N+RD );
                     RUNTIME_data_migrate( sequence, Q(m, N),    node );
@@ -109,10 +105,10 @@ void chameleon_pzunglqrh( int genD, int BS,
                         &options,
                         ChamRight, ChamNoTrans,
                         tempmm, tempNRDn, tempkm, tempNRDn, ib, T->nb,
-                        A (k, N+RD), ldak,
-                        T2(k, N+RD), T->mb,
-                        Q (m, N   ), ldqm,
-                        Q (m, N+RD), ldqm);
+                        A (k, N+RD),
+                        T2(k, N+RD),
+                        Q (m, N   ),
+                        Q (m, N+RD));
                 }
 
                 RUNTIME_data_flush( sequence, A (k, N+RD) );
@@ -127,7 +123,6 @@ void chameleon_pzunglqrh( int genD, int BS,
 
                 for (m = k; m < Q->mt; m++) {
                     tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
-                    ldqm = BLKLDD(Q, m);
 
                     node = Q->get_rankof( Q, m, n );
                     RUNTIME_data_migrate( sequence, Q(m, N), node );
@@ -138,10 +133,10 @@ void chameleon_pzunglqrh( int genD, int BS,
                         &options,
                         ChamRight, ChamNoTrans,
                         tempmm, tempnn, tempkm, 0, ib, T->nb,
-                        A(k, n), ldak,
-                        T(k, n), T->mb,
-                        Q(m, N), ldqm,
-                        Q(m, n), ldqm);
+                        A(k, n),
+                        T(k, n),
+                        Q(m, N),
+                        Q(m, n));
                 }
 
                 RUNTIME_data_flush( sequence, A(k, n) );
@@ -154,19 +149,18 @@ void chameleon_pzunglqrh( int genD, int BS,
                 INSERT_TASK_zlacpy(
                     &options,
                     ChamUpper, tempkmin, tempDNn, A->nb,
-                    A(k, N), ldak,
-                    D(k, N), lddk );
+                    A(k, N),
+                    D(k, N) );
 #if defined(CHAMELEON_USE_CUDA)
                 INSERT_TASK_zlaset(
                     &options,
                     ChamLower, tempkmin, tempDNn,
                     0., 1.,
-                    D(k, N), lddk );
+                    D(k, N) );
 #endif
             }
             for (m = k; m < Q->mt; m++) {
                 tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
-                ldqm = BLKLDD(Q, m);
 
                 RUNTIME_data_migrate( sequence, Q(m, N),
                                       Q->get_rankof( Q, m, N ) );
@@ -176,9 +170,9 @@ void chameleon_pzunglqrh( int genD, int BS,
                     ChamRight, ChamNoTrans,
                     tempmm, tempNn,
                     tempkmin, ib, T->nb,
-                    D(k, N), lddk,
-                    T(k, N), T->mb,
-                    Q(m, N), ldqm);
+                    D(k, N),
+                    T(k, N),
+                    Q(m, N));
             }
             RUNTIME_data_flush( sequence, D(k, N) );
             RUNTIME_data_flush( sequence, T(k, N) );
diff --git a/compute/pzungqr.c b/compute/pzungqr.c
index dda7b25a1c1bb50004b749ab546d46bbb924b872..fdccd74fe30437185fb753b6af009c59f068cbbe 100644
--- a/compute/pzungqr.c
+++ b/compute/pzungqr.c
@@ -43,7 +43,6 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q,
     size_t ws_host = 0;
 
     int k, m, n;
-    int ldak, ldqk, ldam, ldqm, lddk;
     int tempmm, tempnn, tempkmin, tempkm;
     int tempAkm, tempAkn;
     int ib, minMT;
@@ -95,13 +94,8 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q,
         tempAkn  = k == A->nt-1 ? A->n-k*A->nb : A->nb;
         tempkmin = chameleon_min( tempAkn, tempAkm );
         tempkm   = k == Q->mt-1 ? Q->m-k*Q->mb : Q->mb;
-        ldak = BLKLDD(A, k);
-        lddk = BLKLDD(D, k);
-        ldqk = BLKLDD(Q, k);
         for (m = Q->mt - 1; m > k; m--) {
             tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
-            ldam = BLKLDD(A, m);
-            ldqm = BLKLDD(Q, m);
             for (n = k; n < Q->nt; n++) {
                 tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
 
@@ -113,10 +107,10 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q,
                     &options,
                     ChamLeft, ChamNoTrans,
                     tempmm, tempnn, tempAkn, 0, ib, T->nb,
-                    A(m, k), ldam,
-                    T(m, k), T->mb,
-                    Q(k, n), ldqk,
-                    Q(m, n), ldqm);
+                    A(m, k),
+                    T(m, k),
+                    Q(k, n),
+                    Q(m, n));
             }
             RUNTIME_data_flush( sequence, A(m, k) );
             RUNTIME_data_flush( sequence, T(m, k) );
@@ -128,14 +122,14 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q,
             INSERT_TASK_zlacpy(
                 &options,
                 ChamLower, tempDkm, tempkmin, A->nb,
-                A(k, k), ldak,
-                D(k),    lddk );
+                A(k, k),
+                D(k) );
 #if defined(CHAMELEON_USE_CUDA)
             INSERT_TASK_zlaset(
                 &options,
                 ChamUpper, tempDkm, tempkmin,
                 0., 1.,
-                D(k), lddk );
+                D(k) );
 #endif
         }
         for (n = k; n < Q->nt; n++) {
@@ -149,9 +143,9 @@ void chameleon_pzungqr( int genD, CHAM_desc_t *A, CHAM_desc_t *Q,
                 &options,
                 ChamLeft, ChamNoTrans,
                 tempkm, tempnn, tempkmin, ib, T->nb,
-                D(k),    lddk,
-                T(k, k), T->mb,
-                Q(k, n), ldqk);
+                D(k),
+                T(k, k),
+                Q(k, n));
         }
         RUNTIME_data_flush( sequence, D(k) );
         RUNTIME_data_flush( sequence, T(k, k) );
diff --git a/compute/pzungqr_param.c b/compute/pzungqr_param.c
index 346848b6fc98efe1a0ca00c987dab9e3bfe3d5bb..8458b97def80e8d72016a2ffabc498f43bcf95bc 100644
--- a/compute/pzungqr_param.c
+++ b/compute/pzungqr_param.c
@@ -42,7 +42,6 @@ void chameleon_pzungqr_param( int genD, int K,
     size_t ws_host = 0;
 
     int k, m, n, i, p, L;
-    int ldam, ldqm, ldqp, lddm;
     int tempmm, tempnn, tempkmin, tempkn;
     int ib, nbgeqrt, node, nbtiles, *tiles;
 
@@ -94,9 +93,6 @@ void chameleon_pzungqr_param( int genD, int K,
             p = qrtree->currpiv(qrtree, k, m);
 
             tempmm = m == Q->mt-1 ? Q->m-m*Q->mb : Q->mb;
-            ldqp = BLKLDD(Q, p);
-            ldam = BLKLDD(A, m);
-            ldqm = BLKLDD(Q, m);
 
             if( qrtree->gettype(qrtree, k, m) == LIBHQR_KILLED_BY_TS ) {
                 /* TS kernel */
@@ -120,10 +116,10 @@ void chameleon_pzungqr_param( int genD, int K,
                     &options,
                     ChamLeft, ChamNoTrans,
                     tempmm, tempnn, tempkn, L, ib, T->nb,
-                    A(m, k), ldam,
-                    T(m, k), T->mb,
-                    Q(p, n), ldqp,
-                    Q(m, n), ldqm);
+                    A(m, k),
+                    T(m, k),
+                    Q(p, n),
+                    Q(m, n));
             }
             RUNTIME_data_flush( sequence, A(m, k) );
             RUNTIME_data_flush( sequence, T(m, k) );
@@ -138,23 +134,20 @@ void chameleon_pzungqr_param( int genD, int K,
 
             tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
             tempkmin = chameleon_min(tempmm, tempkn);
-            ldam = BLKLDD(A, m);
-            lddm = BLKLDD(D, m);
-            ldqm = BLKLDD(Q, m);
 
             if ( genD ) {
                 int tempDmm = m == D->mt-1 ? D->m-m*D->mb : D->mb;
                 INSERT_TASK_zlacpy(
                     &options,
                     ChamLower, tempDmm, tempkmin, A->nb,
-                    A(m, k), ldam,
-                    D(m, k), lddm );
+                    A(m, k),
+                    D(m, k) );
 #if defined(CHAMELEON_USE_CUDA)
                 INSERT_TASK_zlaset(
                     &options,
                     ChamUpper, tempDmm, tempkmin,
                     0., 1.,
-                    D(m, k), lddm );
+                    D(m, k) );
 #endif
             }
 
@@ -169,9 +162,9 @@ void chameleon_pzungqr_param( int genD, int K,
                     &options,
                     ChamLeft, ChamNoTrans,
                     tempmm, tempnn, tempkmin, ib, T->nb,
-                    D(m, k), lddm,
-                    T(m, k), T->mb,
-                    Q(m, n), ldqm);
+                    D(m, k),
+                    T(m, k),
+                    Q(m, n));
             }
             RUNTIME_data_flush( sequence, D(m, k) );
             RUNTIME_data_flush( sequence, T(m, k) );
diff --git a/compute/pzungqrrh.c b/compute/pzungqrrh.c
index 8310ae1aee8d6cc85d8c6d8eb8940c5f3932e966..3e964790791a3d04b83bc93f3d90f4da5c6981b8 100644
--- a/compute/pzungqrrh.c
+++ b/compute/pzungqrrh.c
@@ -48,8 +48,6 @@ void chameleon_pzungqrrh( int genD, int BS,
 
     int k, m, n;
     int K, M, RD, lastRD;
-    int ldaM, ldam, ldaMRD, lddM;
-    int ldqM, ldqm, ldqMRD;
     int tempkn, tempMm, tempnn, tempmm, tempMRDm, tempkmin;
     int ib, node;
 
@@ -97,9 +95,6 @@ void chameleon_pzungqrrh( int genD, int BS,
         for (RD = lastRD; RD >= BS; RD /= 2) {
             for (M = k; M+RD < A->mt; M += 2*RD) {
                 tempMRDm = M+RD == A->mt-1 ? A->m-(M+RD)*A->mb : A->mb;
-                ldqM   = BLKLDD(Q, M   );
-                ldqMRD = BLKLDD(Q, M+RD);
-                ldaMRD = BLKLDD(A, M+RD);
                 for (n = k; n < Q->nt; n++) {
                     tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
 
@@ -112,10 +107,10 @@ void chameleon_pzungqrrh( int genD, int BS,
                         &options,
                         ChamLeft, ChamNoTrans,
                         tempMRDm, tempnn, tempkn, tempMRDm, ib, T->nb,
-                        A (M+RD, k), ldaMRD,
-                        T2(M+RD, k), T->mb,
-                        Q (M,    n), ldqM,
-                        Q (M+RD, n), ldqMRD);
+                        A (M+RD, k),
+                        T2(M+RD, k),
+                        Q (M,    n),
+                        Q (M+RD, n));
                 }
 
                 RUNTIME_data_flush( sequence, A (M+RD, k) );
@@ -125,13 +120,8 @@ void chameleon_pzungqrrh( int genD, int BS,
         for (M = k; M < A->mt; M += BS) {
             tempMm   = M == A->mt-1 ? A->m-M*A->mb : A->mb;
             tempkmin = chameleon_min(tempMm, tempkn);
-            ldaM = BLKLDD(A, M);
-            lddM = BLKLDD(D, M);
-            ldqM = BLKLDD(Q, M);
             for (m = chameleon_min(M+BS, A->mt)-1; m > M; m--) {
                 tempmm = m == A->mt-1 ? A->m-m*A->mb : A->mb;
-                ldqm = BLKLDD(Q, m);
-                ldam = BLKLDD(A, m);
 
                 for (n = k; n < Q->nt; n++) {
                     tempnn = n == Q->nt-1 ? Q->n-n*Q->nb : Q->nb;
@@ -145,10 +135,10 @@ void chameleon_pzungqrrh( int genD, int BS,
                         &options,
                         ChamLeft, ChamNoTrans,
                         tempmm, tempnn, tempkn, 0, ib, T->nb,
-                        A(m, k), ldam,
-                        T(m, k), T->mb,
-                        Q(M, n), ldqM,
-                        Q(m, n), ldqm);
+                        A(m, k),
+                        T(m, k),
+                        Q(M, n),
+                        Q(m, n));
                 }
                 RUNTIME_data_flush( sequence, A(m, k) );
                 RUNTIME_data_flush( sequence, T(m, k) );
@@ -159,14 +149,14 @@ void chameleon_pzungqrrh( int genD, int BS,
                 INSERT_TASK_zlacpy(
                     &options,
                     ChamLower, tempDMm, tempkmin, A->nb,
-                    A(M, k), ldaM,
-                    D(M, k), lddM );
+                    A(M, k),
+                    D(M, k) );
 #if defined(CHAMELEON_USE_CUDA)
                 INSERT_TASK_zlaset(
                     &options,
                     ChamUpper, tempDMm, tempkmin,
                     0., 1.,
-                    D(M, k), lddM );
+                    D(M, k) );
 #endif
             }
             for (n = k; n < Q->nt; n++) {
@@ -181,9 +171,9 @@ void chameleon_pzungqrrh( int genD, int BS,
                     ChamLeft, ChamNoTrans,
                     tempMm, tempnn,
                     tempkmin, ib, T->nb,
-                    D(M, k), lddM,
-                    T(M, k), T->mb,
-                    Q(M, n), ldqM);
+                    D(M, k),
+                    T(M, k),
+                    Q(M, n));
             }
             RUNTIME_data_flush( sequence, D(M, k) );
             RUNTIME_data_flush( sequence, T(M, k) );
diff --git a/compute/pzunmlq.c b/compute/pzunmlq.c
index ac2cb0aa76116c1cf70f6cd3efaa0f9621ff5b9b..0b3f109fc611659817339c8a3723fdf34f132b0b 100644
--- a/compute/pzunmlq.c
+++ b/compute/pzunmlq.c
@@ -44,7 +44,6 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
     size_t ws_host = 0;
 
     int k, m, n;
-    int ldak, ldck, ldcm, lddk;
     int tempkm, tempkn, tempkmin, tempmm, tempnn;
     int ib, KT, K;
 
@@ -100,23 +99,20 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                 tempkm   = k == C->mt - 1 ? C->m - k * C->mb : C->mb;
                 tempkmin = k == KT    - 1 ? K    - k * A->nb : A->nb;
 
-                ldak = BLKLDD(A, k);
-                ldck = BLKLDD(C, k);
-                lddk = BLKLDD(D, k);
 
                 if ( genD ) {
                     int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
                     INSERT_TASK_zlacpy(
                         &options,
                         ChamUpper, tempkmin, tempDkn, A->nb,
-                        A(k, k), ldak,
-                        D(k),    lddk );
+                        A(k, k),
+                        D(k) );
 #if defined(CHAMELEON_USE_CUDA)
                     INSERT_TASK_zlaset(
                         &options,
                         ChamLower, tempkmin, tempDkn,
                         0., 1.,
-                        D(k), lddk );
+                        D(k) );
 #endif
                 }
                 for (n = 0; n < C->nt; n++) {
@@ -125,9 +121,9 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                         &options,
                         side, trans,
                         tempkm, tempnn, tempkmin, ib, T->nb,
-                        D(k),    lddk,
-                        T(k, k), T->mb,
-                        C(k, n), ldck);
+                        D(k),
+                        T(k, k),
+                        C(k, n));
                 }
 
                 RUNTIME_data_flush( sequence, D(k)    );
@@ -135,7 +131,6 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
 
                 for (m = k+1; m < C->mt; m++) {
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldcm = BLKLDD(C, m);
                     for (n = 0; n < C->nt; n++) {
                         tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
 
@@ -147,10 +142,10 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                             &options,
                             side, trans,
                             tempmm, tempnn, tempkmin, 0, ib, T->nb,
-                            A(k, m), ldak,
-                            T(k, m), T->mb,
-                            C(k, n), ldck,
-                            C(m, n), ldcm);
+                            A(k, m),
+                            T(k, m),
+                            C(k, n),
+                            C(m, n));
                     }
 
                     RUNTIME_data_flush( sequence, A(k, m) );
@@ -176,13 +171,9 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                 tempkm   = k == C->mt - 1 ? C->m - k * C->mb : C->mb;
                 tempkmin = k == KT    - 1 ? K    - k * A->nb : A->nb;
 
-                ldak = BLKLDD(A, k);
-                ldck = BLKLDD(C, k);
-                lddk = BLKLDD(D, k);
 
                 for (m = C->mt-1; m > k; m--) {
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldcm = BLKLDD(C, m);
                     for (n = 0; n < C->nt; n++) {
                         tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
 
@@ -194,10 +185,10 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                             &options,
                             side, trans,
                             tempmm, tempnn, tempkmin, 0, ib, T->nb,
-                            A(k, m), ldak,
-                            T(k, m), T->mb,
-                            C(k, n), ldck,
-                            C(m, n), ldcm);
+                            A(k, m),
+                            T(k, m),
+                            C(k, n),
+                            C(m, n));
                     }
 
                     RUNTIME_data_flush( sequence, A(k, m) );
@@ -209,14 +200,14 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                     INSERT_TASK_zlacpy(
                         &options,
                         ChamUpper, tempkmin, tempDkn, A->nb,
-                        A(k, k), ldak,
-                        D(k),    lddk );
+                        A(k, k),
+                        D(k) );
 #if defined(CHAMELEON_USE_CUDA)
                     INSERT_TASK_zlaset(
                         &options,
                         ChamLower, tempkmin, tempDkn,
                         0., 1.,
-                        D(k), lddk );
+                        D(k) );
 #endif
                 }
                 for (n = 0; n < C->nt; n++) {
@@ -229,9 +220,9 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                         &options,
                         side, trans,
                         tempkm, tempnn, tempkmin, ib, T->nb,
-                        D(k),    lddk,
-                        T(k, k), T->mb,
-                        C(k, n), ldck);
+                        D(k),
+                        T(k, k),
+                        C(k, n));
                 }
                 RUNTIME_data_flush( sequence, D(k)    );
                 RUNTIME_data_flush( sequence, T(k, k) );
@@ -249,14 +240,11 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
 
                 tempkn   = k == C->nt - 1 ? C->n - k * C->nb : C->nb;
                 tempkmin = k == KT    - 1 ? K    - k * A->nb : A->nb;
-                ldak = BLKLDD(A, k);
-                lddk = BLKLDD(D, k);
 
                 for (n = C->nt-1; n > k; n--) {
                     tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
                     for (m = 0; m < C->mt; m++) {
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                        ldcm = BLKLDD(C, m);
 
                         RUNTIME_data_migrate( sequence, C(m, k),
                                               C->get_rankof( C, m, n ) );
@@ -266,10 +254,10 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                             &options,
                             side, trans,
                             tempmm, tempnn, tempkmin, 0, ib, T->nb,
-                            A(k, n), ldak,
-                            T(k, n), T->mb,
-                            C(m, k), ldcm,
-                            C(m, n), ldcm);
+                            A(k, n),
+                            T(k, n),
+                            C(m, k),
+                            C(m, n));
                     }
 
                     RUNTIME_data_flush( sequence, A(k, n) );
@@ -281,19 +269,18 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                     INSERT_TASK_zlacpy(
                         &options,
                         ChamUpper, tempkmin, tempDkn, A->nb,
-                        A(k, k), ldak,
-                        D(k),    lddk );
+                        A(k, k),
+                        D(k) );
 #if defined(CHAMELEON_USE_CUDA)
                     INSERT_TASK_zlaset(
                         &options,
                         ChamLower, tempkmin, tempDkn,
                         0., 1.,
-                        D(k), lddk );
+                        D(k) );
 #endif
                 }
                 for (m = 0; m < C->mt; m++) {
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldcm = BLKLDD(C, m);
 
                     RUNTIME_data_migrate( sequence, C(m, k),
                                           C->get_rankof( C, m, k ) );
@@ -302,9 +289,9 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                         &options,
                         side, trans,
                         tempmm, tempkn, tempkmin, ib, T->nb,
-                        D(k),    lddk,
-                        T(k, k), T->mb,
-                        C(m, k), ldcm);
+                        D(k),
+                        T(k, k),
+                        C(m, k));
                 }
 
                 RUNTIME_data_flush( sequence, D(k)    );
@@ -322,8 +309,6 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
 
                 tempkn   = k == C->nt - 1 ? C->n - k * C->nb : C->nb;
                 tempkmin = k == KT    - 1 ? K    - k * A->nb : A->nb;
-                ldak = BLKLDD(A, k);
-                lddk = BLKLDD(D, k);
 
                 if ( genD ) {
                     int tempDkn = k == D->nt-1 ? D->n-k*D->nb : D->nb;
@@ -331,26 +316,25 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                     INSERT_TASK_zlacpy(
                         &options,
                         ChamUpper, tempkmin, tempDkn, A->nb,
-                        A(k, k), ldak,
-                        D(k),    lddk );
+                        A(k, k),
+                        D(k) );
 #if defined(CHAMELEON_USE_CUDA)
                     INSERT_TASK_zlaset(
                         &options,
                         ChamLower, tempkmin, tempDkn,
                         0., 1.,
-                        D(k), lddk );
+                        D(k) );
 #endif
                 }
                 for (m = 0; m < C->mt; m++) {
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldcm = BLKLDD(C, m);
                     INSERT_TASK_zunmlq(
                         &options,
                         side, trans,
                         tempmm, tempkn, tempkmin, ib, T->nb,
-                        D(k),    lddk,
-                        T(k, k), T->mb,
-                        C(m, k), ldcm);
+                        D(k),
+                        T(k, k),
+                        C(m, k));
                 }
 
                 RUNTIME_data_flush( sequence, D(k)    );
@@ -360,7 +344,6 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                     tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
                     for (m = 0; m < C->mt; m++) {
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                        ldcm = BLKLDD(C, m);
 
                         RUNTIME_data_migrate( sequence, C(m, k),
                                               C->get_rankof( C, m, n ) );
@@ -370,10 +353,10 @@ void chameleon_pzunmlq( int genD, cham_side_t side, cham_trans_t trans,
                             &options,
                             side, trans,
                             tempmm, tempnn, tempkmin, 0, ib, T->nb,
-                            A(k, n), ldak,
-                            T(k, n), T->mb,
-                            C(m, k), ldcm,
-                            C(m, n), ldcm);
+                            A(k, n),
+                            T(k, n),
+                            C(m, k),
+                            C(m, n));
                     }
 
                     RUNTIME_data_flush( sequence, A(k, n) );
diff --git a/compute/pzunmlq_param.c b/compute/pzunmlq_param.c
index 6e27ca3f610cbb139e0f37edc127e69097fcedab..16c1c588c7981a93e1e39948e4e8199fdcbb17e9 100644
--- a/compute/pzunmlq_param.c
+++ b/compute/pzunmlq_param.c
@@ -42,7 +42,6 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
     size_t ws_host = 0;
 
     int k, m, n, i, p;
-    int ldak, lddk, ldcp, ldcm;
     int temppm, temppn, tempmm, tempnn, tempkm,tempkmin;
     int ib, KT, L;
     int node, nbtiles, *tiles;
@@ -93,8 +92,6 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                 RUNTIME_iteration_push(chamctxt, k);
 
                 tempkm = k == A->mt - 1 ? A->m - k * A->mb : A->mb;
-                ldak = BLKLDD(A, k);
-                lddk = BLKLDD(D, k);
 
                 T = TS;
                 for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
@@ -103,7 +100,6 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                     temppm   = p == C->mt-1 ? C->m - p * C->mb : C->mb;
                     tempkmin = chameleon_min( temppm, tempkm );
 
-                    ldcp = BLKLDD(C, p);
 
                     if ( genD ) {
                         int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb;
@@ -111,14 +107,14 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamUpper, tempkmin, tempDpn, A->nb,
-                            A(k, p), ldak,
-                            D(k, p), lddk );
+                            A(k, p),
+                            D(k, p) );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamLower, tempkmin, tempDpn,
                             0., 1.,
-                            D(k, p), lddk );
+                            D(k, p) );
 #endif
                     }
                     for (n = 0; n < C->nt; n++) {
@@ -126,9 +122,9 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                         INSERT_TASK_zunmlq(
                             &options, side, trans,
                             temppm, tempnn, tempkmin, ib, T->nb,
-                            D(k, p), lddk,
-                            T(k, p), T->mb,
-                            C(p, n), ldcp);
+                            D(k, p),
+                            T(k, p),
+                            C(p, n));
                     }
                     RUNTIME_data_flush( sequence, D(k, p) );
                     RUNTIME_data_flush( sequence, T(k, p) );
@@ -142,8 +138,6 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                     p = qrtree->currpiv(qrtree, k, m);
 
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldcm = BLKLDD(C, m);
-                    ldcp = BLKLDD(C, p);
 
                     if( qrtree->gettype(qrtree, k, m) == LIBHQR_KILLED_BY_TS ) {
                         /* TS kernel */
@@ -165,10 +159,10 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                         INSERT_TASK_ztpmlqt(
                             &options, side, trans,
                             tempmm, tempnn, tempkm, chameleon_min( L, tempnn ), ib, T->nb,
-                            A(k, m), ldak,
-                            T(k, m), T->mb,
-                            C(p, n), ldcp,
-                            C(m, n), ldcm);
+                            A(k, m),
+                            T(k, m),
+                            C(p, n),
+                            C(m, n));
                     }
                     RUNTIME_data_flush( sequence, A(k, m) );
                     RUNTIME_data_flush( sequence, T(k, m) );
@@ -191,8 +185,6 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                 RUNTIME_iteration_push(chamctxt, k);
 
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-                ldak = BLKLDD(A, k);
-                lddk = BLKLDD(D, k);
 
                 /* Setting the order of the tiles*/
                 nbtiles = libhqr_walk_stepk( qrtree, k, tiles );
@@ -202,8 +194,6 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                     p = qrtree->currpiv(qrtree, k, m);
 
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldcp = BLKLDD(C, p);
-                    ldcm = BLKLDD(C, m);
 
                     if( qrtree->gettype(qrtree, k, m) == LIBHQR_KILLED_BY_TS ) {
                         /* TS kernel */
@@ -225,10 +215,10 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                         INSERT_TASK_ztpmlqt(
                             &options, side, trans,
                             tempmm, tempnn, tempkm, chameleon_min(L, tempnn), ib, T->nb,
-                            A(k, m), ldak,
-                            T(k, m), T->mb,
-                            C(p, n), ldcp,
-                            C(m, n), ldcm);
+                            A(k, m),
+                            T(k, m),
+                            C(p, n),
+                            C(m, n));
                     }
                     RUNTIME_data_flush( sequence, A(k, m) );
                     RUNTIME_data_flush( sequence, T(k, m) );
@@ -241,7 +231,6 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                     temppm   = p == C->mt-1 ? C->m-p*C->mb : C->mb;
                     tempkmin = chameleon_min( temppm, tempkm );
 
-                    ldcp = BLKLDD(C, p);
 
                     if ( genD ) {
                         int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb;
@@ -249,14 +238,14 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamUpper, tempkmin, tempDpn, A->nb,
-                            A(k, p), ldak,
-                            D(k, p), lddk );
+                            A(k, p),
+                            D(k, p) );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamLower, tempkmin, tempDpn,
                             0., 1.,
-                            D(k, p), lddk );
+                            D(k, p) );
 #endif
                     }
 
@@ -269,9 +258,9 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                         INSERT_TASK_zunmlq(
                             &options, side, trans,
                             temppm, tempnn, tempkmin, ib, T->nb,
-                            D(k, p), lddk,
-                            T(k, p), T->mb,
-                            C(p, n), ldcp);
+                            D(k, p),
+                            T(k, p),
+                            C(p, n));
                     }
 
                     RUNTIME_data_flush( sequence, D(k, p) );
@@ -290,8 +279,6 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                 RUNTIME_iteration_push(chamctxt, k);
 
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-                ldak = BLKLDD(A, k);
-                lddk = BLKLDD(D, k);
 
                 /* Setting the order of the tiles*/
                 nbtiles = libhqr_walk_stepk( qrtree, k, tiles );
@@ -315,7 +302,6 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
 
                     for (m = 0; m < C->mt; m++) {
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                        ldcm = BLKLDD(C, m);
 
                         node = C->get_rankof( C, m, n );
                         RUNTIME_data_migrate( sequence, C(m, p), node );
@@ -324,10 +310,10 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                         INSERT_TASK_ztpmlqt(
                             &options, side, trans,
                             tempmm, tempnn, tempkm, L, ib, T->nb,
-                            A(k, n), ldak,
-                            T(k, n), T->mb,
-                            C(m, p), ldcm,
-                            C(m, n), ldcm);
+                            A(k, n),
+                            T(k, n),
+                            C(m, p),
+                            C(m, n));
                     }
                     RUNTIME_data_flush( sequence, A(k, n) );
                     RUNTIME_data_flush( sequence, T(k, n) );
@@ -346,20 +332,19 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamUpper, tempkmin, tempDpn, A->nb,
-                            A(k, p), ldak,
-                            D(k, p), lddk );
+                            A(k, p),
+                            D(k, p) );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamLower, tempkmin, tempDpn,
                             0., 1.,
-                            D(k, p), lddk );
+                            D(k, p) );
 #endif
                     }
 
                     for (m = 0; m < C->mt; m++) {
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                        ldcm = BLKLDD(C, m);
 
                         RUNTIME_data_migrate( sequence, C(m, p),
                                               C->get_rankof( C, m, p ) );
@@ -367,9 +352,9 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                         INSERT_TASK_zunmlq(
                             &options, side, trans,
                             tempmm, temppn, tempkmin, ib, T->nb,
-                            D(k, p), lddk,
-                            T(k, p), T->mb,
-                            C(m, p), ldcm);
+                            D(k, p),
+                            T(k, p),
+                            C(m, p));
                     }
                     RUNTIME_data_flush( sequence, D(k, p) );
                     RUNTIME_data_flush( sequence, T(k, p) );
@@ -385,8 +370,6 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                 RUNTIME_iteration_push(chamctxt, k);
 
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-                ldak = BLKLDD(A, k);
-                lddk = BLKLDD(D, k);
 
                 T = TS;
                 for (i = 0; i < qrtree->getnbgeqrf(qrtree, k); i++) {
@@ -401,26 +384,25 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamUpper, tempkmin, tempDpn, A->nb,
-                            A(k, p), ldak,
-                            D(k, p), lddk );
+                            A(k, p),
+                            D(k, p) );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamLower, tempkmin, tempDpn,
                             0., 1.,
-                            D(k, p), lddk );
+                            D(k, p) );
 #endif
                     }
 
                     for (m = 0; m < C->mt; m++) {
-                        ldcm = BLKLDD(C, m);
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
                         INSERT_TASK_zunmlq(
                             &options, side, trans,
                             tempmm, temppn, tempkmin, ib, T->nb,
-                            D(k, p), lddk,
-                            T(k, p), TS->mb,
-                            C(m, p), ldcm);
+                            D(k, p),
+                            T(k, p),
+                            C(m, p));
                     }
                     RUNTIME_data_flush( sequence, D(k, p) );
                     RUNTIME_data_flush( sequence, T(k, p) );
@@ -448,7 +430,6 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
 
                     for (m = 0; m < C->mt; m++) {
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                        ldcm = BLKLDD(C, m);
 
                         node = C->get_rankof( C, m, n );
                         RUNTIME_data_migrate( sequence, C(m, p), node );
@@ -457,10 +438,10 @@ void chameleon_pzunmlq_param( int genD, const libhqr_tree_t *qrtree,
                         INSERT_TASK_ztpmlqt(
                             &options, side, trans,
                             tempmm, tempnn, tempkm, L, ib, T->nb,
-                            A(k, n), ldak,
-                            T(k, n), T->mb,
-                            C(m, p), ldcm,
-                            C(m, n), ldcm);
+                            A(k, n),
+                            T(k, n),
+                            C(m, p),
+                            C(m, n));
                     }
                     RUNTIME_data_flush( sequence, A(k, n) );
                     RUNTIME_data_flush( sequence, T(k, n) );
diff --git a/compute/pzunmlqrh.c b/compute/pzunmlqrh.c
index 8f2931ccdba43f7f7d89f8f5f1b962a28567c4bf..ee64598623793ccda1776ddd2961e254f3063bd0 100644
--- a/compute/pzunmlqrh.c
+++ b/compute/pzunmlqrh.c
@@ -47,8 +47,6 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
 
     int k, m, n, p;
     int KT, RD, lastRD;
-    int ldak, lddk;
-    int ldcp, ldcm;
     int temppm, temppn, tempkm, tempnn, tempmm, tempkmin;
     int ib, node;
 
@@ -96,15 +94,12 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
 
                 tempkm = k == A->mt - 1 ? A->m - k * A->mb : A->mb;
 
-                ldak = BLKLDD(A, k);
-                lddk = BLKLDD(D, k);
 
                 for (p = k; p < C->mt; p += BS) {
 
                     temppm   = p == C->mt-1 ? C->m - p * C->mb : C->mb;
                     tempkmin = chameleon_min( temppm, tempkm );
 
-                    ldcp = BLKLDD(C, p);
 
                     if ( genD ) {
                         int tempDpn = p == D->nt-1 ? D->n-p*D->nb : D->nb;
@@ -112,14 +107,14 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamUpper, tempkmin, tempDpn, A->nb,
-                            A(k, p), ldak,
-                            D(k, p), lddk );
+                            A(k, p),
+                            D(k, p) );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamLower, tempkmin, tempDpn,
                             0., 1.,
-                            D(k, p), lddk );
+                            D(k, p) );
 #endif
                     }
                     for (n = 0; n < C->nt; n++) {
@@ -128,16 +123,15 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             &options,
                             side, trans,
                             temppm, tempnn, tempkmin, ib, T->nb,
-                            D(k, p), lddk,
-                            T(k, p), T->mb,
-                            C(p, n), ldcp);
+                            D(k, p),
+                            T(k, p),
+                            C(p, n));
                     }
                     RUNTIME_data_flush( sequence, D(k, p) );
                     RUNTIME_data_flush( sequence, T(k, p) );
 
                     for (m = p+1; m < chameleon_min(p+BS, C->mt); m++) {
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                        ldcm = BLKLDD(C, m);
 
                         for (n = 0; n < C->nt; n++) {
                             tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
@@ -150,10 +144,10 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             INSERT_TASK_ztpmlqt(
                                 &options, side, trans,
                                 tempmm, tempnn, tempkm, 0, ib, T->nb,
-                                A(k, m), ldak,
-                                T(k, m), T->mb,
-                                C(p, n), ldcp,
-                                C(m, n), ldcm);
+                                A(k, m),
+                                T(k, m),
+                                C(p, n),
+                                C(m, n));
                         }
                         RUNTIME_data_flush( sequence, A(k, m) );
                         RUNTIME_data_flush( sequence, T(k, m) );
@@ -164,8 +158,6 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                         m = p+RD;
 
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                        ldcm = BLKLDD(C, m);
-                        ldcp = BLKLDD(C, p);
 
                         for (n = 0; n < C->nt; n++) {
                             tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
@@ -179,10 +171,10 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                                 &options,
                                 side, trans,
                                 tempmm, tempnn, tempkm, tempnn, ib, T->nb,
-                                A (k, m), ldak,
-                                T2(k, m), T->mb,
-                                C (p, n), ldcp,
-                                C (m, n), ldcm);
+                                A (k, m),
+                                T2(k, m),
+                                C (p, n),
+                                C (m, n));
                         }
                         RUNTIME_data_flush( sequence, A (k, m) );
                         RUNTIME_data_flush( sequence, T2(k, m) );
@@ -206,8 +198,6 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                 RUNTIME_iteration_push(chamctxt, k);
 
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-                ldak = BLKLDD(A, k);
-                lddk = BLKLDD(D, k);
 
                 lastRD = 0;
                 for (RD = BS; RD < C->mt-k; RD *= 2)
@@ -217,8 +207,6 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                         m = p+RD;
 
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                        ldcm = BLKLDD(C, m);
-                        ldcp = BLKLDD(C, p);
 
                         for (n = 0; n < C->nt; n++) {
                             tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
@@ -231,21 +219,19 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             INSERT_TASK_ztpmlqt(
                                 &options, side, trans,
                                 tempmm, tempnn, tempkm, tempnn, ib, T->nb,
-                                A (k, m), ldak,
-                                T2(k, m), T->mb,
-                                C (p, n), ldcp,
-                                C (m, n), ldcm);
+                                A (k, m),
+                                T2(k, m),
+                                C (p, n),
+                                C (m, n));
                         }
                         RUNTIME_data_flush( sequence, A (k, m) );
                         RUNTIME_data_flush( sequence, T2(k, m) );
                     }
                 }
                 for (p = k; p < C->mt; p += BS) {
-                    ldcp = BLKLDD(C, p);
 
                     for (m = chameleon_min(p+BS, C->mt)-1; m > p; m--) {
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                        ldcm = BLKLDD(C, m);
 
                         for (n = 0; n < C->nt; n++) {
                             tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
@@ -258,10 +244,10 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             INSERT_TASK_ztpmlqt(
                                 &options, side, trans,
                                 tempmm, tempnn, tempkm, 0, ib, T->nb,
-                                A(k, m), ldak,
-                                T(k, m), T->mb,
-                                C(p, n), ldcp,
-                                C(m, n), ldcm);
+                                A(k, m),
+                                T(k, m),
+                                C(p, n),
+                                C(m, n));
                         }
                         RUNTIME_data_flush( sequence, A(k, m) );
                         RUNTIME_data_flush( sequence, T(k, m) );
@@ -276,14 +262,14 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamUpper, tempkmin, tempDpn, A->nb,
-                            A(k, p), ldak,
-                            D(k, p), lddk );
+                            A(k, p),
+                            D(k, p) );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamLower, tempkmin, tempDpn,
                             0., 1.,
-                            D(k, p), lddk );
+                            D(k, p) );
 #endif
                     }
 
@@ -296,9 +282,9 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                         INSERT_TASK_zunmlq(
                             &options, side, trans,
                             temppm, tempnn, tempkmin, ib, T->nb,
-                            D(k, p), lddk,
-                            T(k, p), T->mb,
-                            C(p, n), ldcp);
+                            D(k, p),
+                            T(k, p),
+                            C(p, n));
                     }
                     RUNTIME_data_flush( sequence, D(k, p) );
                     RUNTIME_data_flush( sequence, T(k, p) );
@@ -316,8 +302,6 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                 RUNTIME_iteration_push(chamctxt, k);
 
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-                ldak = BLKLDD(A, k);
-                lddk = BLKLDD(D, k);
                 lastRD = 0;
                 for (RD = BS; RD < C->nt-k; RD *= 2)
                     lastRD = RD;
@@ -329,7 +313,6 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
 
                         for (m = 0; m < C->mt; m++) {
                             tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                            ldcm   = BLKLDD(C, m);
 
                             node = C->get_rankof( C, m, n );
                             RUNTIME_data_migrate( sequence, C(m, p), node );
@@ -339,10 +322,10 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             INSERT_TASK_ztpmlqt(
                                 &options, side, trans,
                                 tempmm, tempnn, tempkm, tempnn, ib, T->nb,
-                                A (k, n), ldak,
-                                T2(k, n), T->mb,
-                                C (m, p), ldcm,
-                                C (m, n), ldcm);
+                                A (k, n),
+                                T2(k, n),
+                                C (m, p),
+                                C (m, n));
                         }
                         RUNTIME_data_flush( sequence, A (k, n) );
                         RUNTIME_data_flush( sequence, T2(k, n) );
@@ -356,7 +339,6 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
 
                         for (m = 0; m < C->mt; m++) {
                             tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                            ldcm = BLKLDD(C, m);
 
                             node = C->get_rankof( C, m, n );
                             RUNTIME_data_migrate( sequence, C(m, p), node );
@@ -366,10 +348,10 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             INSERT_TASK_ztpmlqt(
                                 &options, side, trans,
                                 tempmm, tempnn, tempkm, 0, ib, T->nb,
-                                A(k, n), ldak,
-                                T(k, n), T->mb,
-                                C(m, p), ldcm,
-                                C(m, n), ldcm);
+                                A(k, n),
+                                T(k, n),
+                                C(m, p),
+                                C(m, n));
                         }
                         RUNTIME_data_flush( sequence, A(k, n) );
                         RUNTIME_data_flush( sequence, T(k, n) );
@@ -384,20 +366,19 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamUpper, tempkmin, tempDpn, A->nb,
-                            A(k, p), ldak,
-                            D(k, p), lddk );
+                            A(k, p),
+                            D(k, p) );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamLower, tempkmin, tempDpn,
                             0., 1.,
-                            D(k, p), lddk );
+                            D(k, p) );
 #endif
                     }
 
                     for (m = 0; m < C->mt; m++) {
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                        ldcm = BLKLDD(C, m);
 
                         RUNTIME_data_migrate( sequence, C(m, p),
                                               C->get_rankof( C, m, p ) );
@@ -405,9 +386,9 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                         INSERT_TASK_zunmlq(
                             &options, side, trans,
                             tempmm, temppn, tempkmin, ib, T->nb,
-                            D(k, p), lddk,
-                            T(k, p), T->mb,
-                            C(m, p), ldcm);
+                            D(k, p),
+                            T(k, p),
+                            C(m, p));
                     }
                     RUNTIME_data_flush( sequence, D(k, p) );
                     RUNTIME_data_flush( sequence, T(k, p) );
@@ -423,8 +404,6 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                 RUNTIME_iteration_push(chamctxt, k);
 
                 tempkm = k == A->mt-1 ? A->m-k*A->mb : A->mb;
-                ldak = BLKLDD(A, k);
-                lddk = BLKLDD(D, k);
 
                 for (p = k; p < C->nt; p += BS) {
                     temppn   = p == C->nt - 1 ? C->n - p * C->nb : C->nb;
@@ -436,26 +415,25 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamUpper, tempkmin, tempDpn, A->nb,
-                            A(k, p), ldak,
-                            D(k, p), lddk );
+                            A(k, p),
+                            D(k, p) );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamLower, tempkmin, tempDpn,
                             0., 1.,
-                            D(k, p), lddk );
+                            D(k, p) );
 #endif
                     }
 
                     for (m = 0; m < C->mt; m++) {
-                        ldcm = BLKLDD(C, m);
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
                         INSERT_TASK_zunmlq(
                             &options, side, trans,
                             tempmm, temppn, tempkmin, ib, T->nb,
-                            D(k, p), lddk,
-                            T(k, p), T->mb,
-                            C(m, p), ldcm);
+                            D(k, p),
+                            T(k, p),
+                            C(m, p));
                     }
                     RUNTIME_data_flush( sequence, D(k, p) );
                     RUNTIME_data_flush( sequence, T(k, p) );
@@ -464,7 +442,6 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                         tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
                         for (m = 0; m < C->mt; m++) {
                             tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                            ldcm = BLKLDD(C, m);
 
                             node = C->get_rankof( C, m, n );
                             RUNTIME_data_migrate( sequence, C(m, p), node );
@@ -474,10 +451,10 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             INSERT_TASK_ztpmlqt(
                                 &options, side, trans,
                                 tempmm, tempnn, tempkm, 0, ib, T->nb,
-                                A(k, n), ldak,
-                                T(k, n), T->mb,
-                                C(m, p), ldcm,
-                                C(m, n), ldcm);
+                                A(k, n),
+                                T(k, n),
+                                C(m, p),
+                                C(m, n));
                         }
                         RUNTIME_data_flush( sequence, A(k, n) );
                         RUNTIME_data_flush( sequence, T(k, n) );
@@ -490,7 +467,6 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
 
                         for (m = 0; m < C->mt; m++) {
                             tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                            ldcm   = BLKLDD(C, m);
 
                             node = C->get_rankof( C, m, n );
                             RUNTIME_data_migrate( sequence, C(m, p), node );
@@ -501,10 +477,10 @@ void chameleon_pzunmlqrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                                 &options,
                                 side, trans,
                                 tempmm, tempnn, tempkm, tempnn, ib, T->nb,
-                                A (k, n), ldak,
-                                T2(k, n), T->mb,
-                                C (m, p), ldcm,
-                                C (m, n), ldcm);
+                                A (k, n),
+                                T2(k, n),
+                                C (m, p),
+                                C (m, n));
                         }
                         RUNTIME_data_flush( sequence, A (k, n) );
                         RUNTIME_data_flush( sequence, T2(k, n) );
diff --git a/compute/pzunmqr.c b/compute/pzunmqr.c
index 3a9e93bbee000aab330e562ee82bb6befd44d5db..333b81b17c3feeae562c8e2c474294a9957c7e84 100644
--- a/compute/pzunmqr.c
+++ b/compute/pzunmqr.c
@@ -44,7 +44,6 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
     size_t ws_host = 0;
 
     int k, m, n;
-    int ldak, ldck, ldam, ldan, ldcm, lddk;
     int tempkm, tempkn, tempkmin, tempmm, tempnn;
     int ib, KT, K;
 
@@ -100,9 +99,6 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
                 tempkm   = k == C->mt - 1 ? C->m - k * C->mb : C->mb;
                 tempkmin = k == KT    - 1 ? K    - k * A->nb : A->nb;
 
-                ldak = BLKLDD(A, k);
-                ldck = BLKLDD(C, k);
-                lddk = BLKLDD(D, k);
 
                 if ( genD ) {
                     int tempDkm = k == D->mt-1 ? D->m-k*D->mb : D->mb;
@@ -110,14 +106,14 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
                     INSERT_TASK_zlacpy(
                         &options,
                         ChamLower, tempDkm, tempkmin, A->nb,
-                        A(k, k), ldak,
-                        D(k),    lddk );
+                        A(k, k),
+                        D(k) );
 #if defined(CHAMELEON_USE_CUDA)
                     INSERT_TASK_zlaset(
                         &options,
                         ChamUpper, tempDkm, tempkmin,
                         0., 1.,
-                        D(k), lddk );
+                        D(k) );
 #endif
                 }
                 for (n = 0; n < C->nt; n++) {
@@ -126,9 +122,9 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
                         &options,
                         side, trans,
                         tempkm, tempnn, tempkmin, ib, T->nb,
-                        D(k),    lddk,
-                        T(k, k), T->mb,
-                        C(k, n), ldck);
+                        D(k),
+                        T(k, k),
+                        C(k, n));
                 }
 
                 RUNTIME_data_flush( sequence, D(k)    );
@@ -136,8 +132,6 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
 
                 for (m = k+1; m < C->mt; m++) {
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldam = BLKLDD(A, m);
-                    ldcm = BLKLDD(C, m);
                     for (n = 0; n < C->nt; n++) {
                         tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
 
@@ -149,10 +143,10 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
                             &options,
                             side, trans,
                             tempmm, tempnn, tempkmin, 0, ib, T->nb,
-                            A(m, k), ldam,
-                            T(m, k), T->mb,
-                            C(k, n), ldck,
-                            C(m, n), ldcm);
+                            A(m, k),
+                            T(m, k),
+                            C(k, n),
+                            C(m, n));
                     }
 
                     RUNTIME_data_flush( sequence, A(m, k) );
@@ -178,14 +172,9 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
                 tempkm   = k == C->mt - 1 ? C->m - k * C->mb : C->mb;
                 tempkmin = k == KT    - 1 ? K    - k * A->nb : A->nb;
 
-                ldak = BLKLDD(A, k);
-                ldck = BLKLDD(C, k);
-                lddk = BLKLDD(D, k);
 
                 for (m = C->mt-1; m > k; m--) {
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldam = BLKLDD(A, m);
-                    ldcm = BLKLDD(C, m);
                     for (n = 0; n < C->nt; n++) {
                         tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
 
@@ -197,10 +186,10 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
                             &options,
                             side, trans,
                             tempmm, tempnn, tempkmin, 0, ib, T->nb,
-                            A(m, k), ldam,
-                            T(m, k), T->mb,
-                            C(k, n), ldck,
-                            C(m, n), ldcm);
+                            A(m, k),
+                            T(m, k),
+                            C(k, n),
+                            C(m, n));
                     }
                     RUNTIME_data_flush( sequence, A(m, k) );
                     RUNTIME_data_flush( sequence, T(m, k) );
@@ -212,14 +201,14 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
                     INSERT_TASK_zlacpy(
                         &options,
                         ChamLower, tempDkm, tempkmin, A->nb,
-                        A(k, k), ldak,
-                        D(k),    lddk );
+                        A(k, k),
+                        D(k) );
 #if defined(CHAMELEON_USE_CUDA)
                     INSERT_TASK_zlaset(
                         &options,
                         ChamUpper, tempDkm, tempkmin,
                         0., 1.,
-                        D(k), lddk );
+                        D(k) );
 #endif
                 }
                 for (n = 0; n < C->nt; n++) {
@@ -232,9 +221,9 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
                         &options,
                         side, trans,
                         tempkm, tempnn, tempkmin, ib, T->nb,
-                        D(k),    lddk,
-                        T(k, k), T->mb,
-                        C(k, n), ldck);
+                        D(k),
+                        T(k, k),
+                        C(k, n));
                 }
                 RUNTIME_data_flush( sequence, D(k)    );
                 RUNTIME_data_flush( sequence, T(k, k) );
@@ -252,15 +241,11 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
 
                 tempkn   = k == C->nt - 1 ? C->n - k * C->nb : C->nb;
                 tempkmin = k == KT    - 1 ? K    - k * A->nb : A->nb;
-                ldak = BLKLDD(A, k);
-                lddk = BLKLDD(D, k);
 
                 for (n = C->nt-1; n > k; n--) {
                     tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
-                    ldan = BLKLDD(A, n);
                     for (m = 0; m < C->mt; m++) {
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                        ldcm = BLKLDD(C, m);
 
                         RUNTIME_data_migrate( sequence, C(m, k),
                                               C->get_rankof( C, m, n ) );
@@ -270,10 +255,10 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
                             &options,
                             side, trans,
                             tempmm, tempnn, tempkmin, 0, ib, T->nb,
-                            A(n, k), ldan,
-                            T(n, k), T->mb,
-                            C(m, k), ldcm,
-                            C(m, n), ldcm);
+                            A(n, k),
+                            T(n, k),
+                            C(m, k),
+                            C(m, n));
                     }
 
                     RUNTIME_data_flush( sequence, A(n, k) );
@@ -286,19 +271,18 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
                     INSERT_TASK_zlacpy(
                         &options,
                         ChamLower, tempDkm, tempkmin, A->nb,
-                        A(k, k), ldak,
-                        D(k),    lddk );
+                        A(k, k),
+                        D(k) );
 #if defined(CHAMELEON_USE_CUDA)
                     INSERT_TASK_zlaset(
                         &options,
                         ChamUpper, tempDkm, tempkmin,
                         0., 1.,
-                        D(k), lddk );
+                        D(k) );
 #endif
                 }
                 for (m = 0; m < C->mt; m++) {
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldcm = BLKLDD(C, m);
 
                     RUNTIME_data_migrate( sequence, C(m, k),
                                           C->get_rankof( C, m, k ) );
@@ -307,9 +291,9 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
                         &options,
                         side, trans,
                         tempmm, tempkn, tempkmin, ib, T->nb,
-                        D(k),    lddk,
-                        T(k, k), T->mb,
-                        C(m, k), ldcm);
+                        D(k),
+                        T(k, k),
+                        C(m, k));
                 }
 
                 RUNTIME_data_flush( sequence, D(k)    );
@@ -327,8 +311,6 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
 
                 tempkn   = k == C->nt - 1 ? C->n - k * C->nb : C->nb;
                 tempkmin = k == KT    - 1 ? K    - k * A->nb : A->nb;
-                ldak = BLKLDD(A, k);
-                lddk = BLKLDD(D, k);
 
                 if ( genD ) {
                     int tempDkm = k == D->mt - 1 ? D->m - k * D->mb : D->mb;
@@ -336,26 +318,25 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
                     INSERT_TASK_zlacpy(
                         &options,
                         ChamLower, tempDkm, tempkmin, A->nb,
-                        A(k, k), ldak,
-                        D(k),    lddk );
+                        A(k, k),
+                        D(k) );
 #if defined(CHAMELEON_USE_CUDA)
                     INSERT_TASK_zlaset(
                         &options,
                         ChamUpper, tempDkm, tempkmin,
                         0., 1.,
-                        D(k), lddk );
+                        D(k) );
 #endif
                 }
                 for (m = 0; m < C->mt; m++) {
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldcm = BLKLDD(C, m);
                     INSERT_TASK_zunmqr(
                         &options,
                         side, trans,
                         tempmm, tempkn, tempkmin, ib, T->nb,
-                        D(k),    lddk,
-                        T(k, k), T->mb,
-                        C(m, k), ldcm);
+                        D(k),
+                        T(k, k),
+                        C(m, k));
                 }
 
                 RUNTIME_data_flush( sequence, D(k)    );
@@ -363,10 +344,8 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
 
                 for (n = k+1; n < C->nt; n++) {
                     tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
-                    ldan = BLKLDD(A, n);
                     for (m = 0; m < C->mt; m++) {
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                        ldcm = BLKLDD(C, m);
 
                         RUNTIME_data_migrate( sequence, C(m, k),
                                               C->get_rankof( C, m, n ) );
@@ -376,10 +355,10 @@ void chameleon_pzunmqr( int genD, cham_side_t side, cham_trans_t trans,
                             &options,
                             side, trans,
                             tempmm, tempnn, tempkmin, 0, ib, T->nb,
-                            A(n, k), ldan,
-                            T(n, k), T->mb,
-                            C(m, k), ldcm,
-                            C(m, n), ldcm);
+                            A(n, k),
+                            T(n, k),
+                            C(m, k),
+                            C(m, n));
                     }
 
                     RUNTIME_data_flush( sequence, A(n, k) );
diff --git a/compute/pzunmqr_param.c b/compute/pzunmqr_param.c
index a3905647c0767cbaad4f4a8be98e1397b6996924..8f95b49a6f4dcd053274965e764e91712aa4d27d 100644
--- a/compute/pzunmqr_param.c
+++ b/compute/pzunmqr_param.c
@@ -42,7 +42,6 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
     size_t ws_host = 0;
 
     int k, m, n, i, p;
-    int ldap, ldam, ldan, lddp, ldcp, ldcm;
     int temppm, temppn, tempmm, tempnn, tempkn,tempkmin;
     int ib, KT, L;
     int node, nbtiles, *tiles;
@@ -101,9 +100,6 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                     temppm   = p == C->mt-1 ? C->m - p * C->mb : C->mb;
                     tempkmin = chameleon_min( temppm, tempkn );
 
-                    ldap = BLKLDD(A, p);
-                    lddp = BLKLDD(D, p);
-                    ldcp = BLKLDD(C, p);
 
                     if ( genD ) {
                         int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
@@ -111,14 +107,14 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamLower, tempDpm, tempkmin, A->nb,
-                            A(p, k), ldap,
-                            D(p, k), lddp );
+                            A(p, k),
+                            D(p, k) );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamUpper, tempDpm, tempkmin,
                             0., 1.,
-                            D(p, k), lddp );
+                            D(p, k) );
 #endif
                     }
                     for (n = 0; n < C->nt; n++) {
@@ -126,9 +122,9 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                         INSERT_TASK_zunmqr(
                             &options, side, trans,
                             temppm, tempnn, tempkmin, ib, T->nb,
-                            D(p, k), lddp,
-                            T(p, k), T->mb,
-                            C(p, n), ldcp);
+                            D(p, k),
+                            T(p, k),
+                            C(p, n));
                     }
                     RUNTIME_data_flush( sequence, D(p, k) );
                     RUNTIME_data_flush( sequence, T(p, k) );
@@ -142,9 +138,6 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                     p = qrtree->currpiv(qrtree, k, m);
 
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldam = BLKLDD(A, m);
-                    ldcp = BLKLDD(C, p);
-                    ldcm = BLKLDD(C, m);
 
                     if( qrtree->gettype(qrtree, k, m) == LIBHQR_KILLED_BY_TS ) {
                         /* TS kernel */
@@ -166,10 +159,10 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                         INSERT_TASK_ztpmqrt(
                             &options, side, trans,
                             tempmm, tempnn, tempkn, L, ib, T->nb,
-                            A(m, k), ldam,
-                            T(m, k), T->mb,
-                            C(p, n), ldcp,
-                            C(m, n), ldcm);
+                            A(m, k),
+                            T(m, k),
+                            C(p, n),
+                            C(m, n));
                     }
                     RUNTIME_data_flush( sequence, A(m, k) );
                     RUNTIME_data_flush( sequence, T(m, k) );
@@ -201,9 +194,6 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                     p = qrtree->currpiv(qrtree, k, m);
 
                     tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                    ldam = BLKLDD(A, m);
-                    ldcp = BLKLDD(C, p);
-                    ldcm = BLKLDD(C, m);
 
                     if( qrtree->gettype(qrtree, k, m) == LIBHQR_KILLED_BY_TS ) {
                         /* TS kernel */
@@ -225,10 +215,10 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                         INSERT_TASK_ztpmqrt(
                             &options, side, trans,
                             tempmm, tempnn, tempkn, L, ib, T->nb,
-                            A(m, k), ldam,
-                            T(m, k), T->mb,
-                            C(p, n), ldcp,
-                            C(m, n), ldcm);
+                            A(m, k),
+                            T(m, k),
+                            C(p, n),
+                            C(m, n));
                     }
                     RUNTIME_data_flush( sequence, A(m, k) );
                     RUNTIME_data_flush( sequence, T(m, k) );
@@ -241,9 +231,6 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                     temppm   = p == C->mt-1 ? C->m-p*C->mb : C->mb;
                     tempkmin = chameleon_min( temppm, tempkn );
 
-                    ldap = BLKLDD(A, p);
-                    lddp = BLKLDD(D, p);
-                    ldcp = BLKLDD(C, p);
 
                     if ( genD ) {
                         int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
@@ -251,14 +238,14 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamLower, tempDpm, tempkmin, A->nb,
-                            A(p, k), ldap,
-                            D(p, k), lddp );
+                            A(p, k),
+                            D(p, k) );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamUpper, tempDpm, tempkmin,
                             0., 1.,
-                            D(p, k), lddp );
+                            D(p, k) );
 #endif
                     }
 
@@ -271,9 +258,9 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                         INSERT_TASK_zunmqr(
                             &options, side, trans,
                             temppm, tempnn, tempkmin, ib, T->nb,
-                            D(p, k), lddp,
-                            T(p, k), T->mb,
-                            C(p, n), ldcp);
+                            D(p, k),
+                            T(p, k),
+                            C(p, n));
                     }
                     RUNTIME_data_flush( sequence, D(p, k) );
                     RUNTIME_data_flush( sequence, T(p, k) );
@@ -300,7 +287,6 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                     p = qrtree->currpiv(qrtree, k, n);
 
                     tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
-                    ldan = BLKLDD(A, n);
 
                     if( qrtree->gettype(qrtree, k, n) == LIBHQR_KILLED_BY_TS ) {
                         /* TS kernel */
@@ -315,7 +301,6 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
 
                     for (m = 0; m < C->mt; m++) {
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                        ldcm = BLKLDD(C, m);
 
                         node = C->get_rankof( C, m, n );
                         RUNTIME_data_migrate( sequence, C(m, p), node );
@@ -324,10 +309,10 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                         INSERT_TASK_ztpmqrt(
                             &options, side, trans,
                             tempmm, tempnn, tempkn, chameleon_min( L, tempmm ), ib, T->nb,
-                            A(n, k), ldan,
-                            T(n, k), T->mb,
-                            C(m, p), ldcm,
-                            C(m, n), ldcm);
+                            A(n, k),
+                            T(n, k),
+                            C(m, p),
+                            C(m, n));
                     }
                     RUNTIME_data_flush( sequence, A(n, k) );
                     RUNTIME_data_flush( sequence, T(n, k) );
@@ -339,8 +324,6 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
 
                     temppn   = p == C->nt-1 ? C->n - p * C->nb : C->nb;
                     tempkmin = chameleon_min(temppn, tempkn);
-                    ldap = BLKLDD(A, p);
-                    lddp = BLKLDD(D, p);
 
                     if ( genD ) {
                         int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
@@ -348,20 +331,19 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamLower, tempDpm, tempkmin, A->nb,
-                            A(p, k), ldap,
-                            D(p, k), lddp );
+                            A(p, k),
+                            D(p, k) );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamUpper, tempDpm, tempkmin,
                             0., 1.,
-                            D(p, k), lddp );
+                            D(p, k) );
 #endif
                     }
 
                     for (m = 0; m < C->mt; m++) {
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                        ldcm = BLKLDD(C, m);
 
                         RUNTIME_data_migrate( sequence, C(m, p),
                                               C->get_rankof( C, m, p ) );
@@ -369,9 +351,9 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                         INSERT_TASK_zunmqr(
                             &options, side, trans,
                             tempmm, temppn, tempkmin, ib, T->nb,
-                            D(p, k), lddp,
-                            T(p, k), T->mb,
-                            C(m, p), ldcm);
+                            D(p, k),
+                            T(p, k),
+                            C(m, p));
                     }
                     RUNTIME_data_flush( sequence, D(p, k) );
                     RUNTIME_data_flush( sequence, T(p, k) );
@@ -394,8 +376,6 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
 
                     temppn   = p == C->nt - 1 ? C->n - p * C->nb : C->nb;
                     tempkmin = chameleon_min( temppn, tempkn );
-                    ldap = BLKLDD(A, p);
-                    lddp = BLKLDD(D, p);
 
                     if ( genD ) {
                         int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
@@ -403,26 +383,25 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamLower, tempDpm, tempkmin, A->nb,
-                            A(p, k), ldap,
-                            D(p, k), lddp );
+                            A(p, k),
+                            D(p, k) );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamUpper, tempDpm, tempkmin,
                             0., 1.,
-                            D(p, k), lddp );
+                            D(p, k) );
 #endif
                     }
 
                     for (m = 0; m < C->mt; m++) {
-                        ldcm = BLKLDD(C, m);
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
                         INSERT_TASK_zunmqr(
                             &options, side, trans,
                             tempmm, temppn, tempkmin, ib, T->nb,
-                            D(p, k), lddp,
-                            T(p, k), T->mb,
-                            C(m, p), ldcm);
+                            D(p, k),
+                            T(p, k),
+                            C(m, p));
                     }
                     RUNTIME_data_flush( sequence, D(p, k) );
                     RUNTIME_data_flush( sequence, T(p, k) );
@@ -436,7 +415,6 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                     p = qrtree->currpiv(qrtree, k, n);
 
                     tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
-                    ldan = BLKLDD(A, n);
 
                     if( qrtree->gettype(qrtree, k, n) == LIBHQR_KILLED_BY_TS ) {
                         /* TS kernel */
@@ -451,7 +429,6 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
 
                     for (m = 0; m < C->mt; m++) {
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                        ldcm = BLKLDD(C, m);
 
                         node = C->get_rankof( C, m, n );
                         RUNTIME_data_migrate( sequence, C(m, p), node );
@@ -460,10 +437,10 @@ void chameleon_pzunmqr_param( int genD, const libhqr_tree_t *qrtree,
                         INSERT_TASK_ztpmqrt(
                             &options, side, trans,
                             tempmm, tempnn, tempkn, chameleon_min( L, tempmm ), ib, T->nb,
-                            A(n, k), ldan,
-                            T(n, k), T->mb,
-                            C(m, p), ldcm,
-                            C(m, n), ldcm);
+                            A(n, k),
+                            T(n, k),
+                            C(m, p),
+                            C(m, n));
                     }
                     RUNTIME_data_flush( sequence, A(n, k) );
                     RUNTIME_data_flush( sequence, T(n, k) );
diff --git a/compute/pzunmqrrh.c b/compute/pzunmqrrh.c
index 1d4500f849c3072d7bbf9e54e0f25c72363a5fbd..9b34176c82c0684c136a757591cc6cb9b04ff0c4 100644
--- a/compute/pzunmqrrh.c
+++ b/compute/pzunmqrrh.c
@@ -47,9 +47,6 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
 
     int k, m, n, p;
     int KT, RD, lastRD;
-    int ldap, ldam, ldan;
-    int ldcp, ldcm;
-    int lddp;
     int temppm, temppn, tempkn, tempnn, tempmm, tempkmin;
     int ib, node;
 
@@ -102,9 +99,6 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                     temppm   = p == C->mt-1 ? C->m - p * C->mb : C->mb;
                     tempkmin = chameleon_min( temppm, tempkn );
 
-                    ldap = BLKLDD(A, p);
-                    lddp = BLKLDD(D, p);
-                    ldcp = BLKLDD(C, p);
 
                     if ( genD ) {
                         int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
@@ -112,14 +106,14 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamLower, tempDpm, tempkmin, A->nb,
-                            A(p, k), ldap,
-                            D(p, k), lddp );
+                            A(p, k),
+                            D(p, k) );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamUpper, tempDpm, tempkmin,
                             0., 1.,
-                            D(p, k), lddp );
+                            D(p, k) );
 #endif
                     }
                     for (n = 0; n < C->nt; n++) {
@@ -128,17 +122,15 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             &options,
                             side, trans,
                             temppm, tempnn, tempkmin, ib, T->nb,
-                            D(p, k), lddp,
-                            T(p, k), T->mb,
-                            C(p, n), ldcp);
+                            D(p, k),
+                            T(p, k),
+                            C(p, n));
                     }
                     RUNTIME_data_flush( sequence, D(p, k) );
                     RUNTIME_data_flush( sequence, T(p, k) );
 
                     for (m = p+1; m < chameleon_min(p+BS, C->mt); m++) {
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                        ldam = BLKLDD(A, m);
-                        ldcm = BLKLDD(C, m);
 
                         for (n = 0; n < C->nt; n++) {
                             tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
@@ -151,10 +143,10 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             INSERT_TASK_ztpmqrt(
                                 &options, side, trans,
                                 tempmm, tempnn, tempkn, 0, ib, T->nb,
-                                A(m, k), ldam,
-                                T(m, k), T->mb,
-                                C(p, n), ldcp,
-                                C(m, n), ldcm);
+                                A(m, k),
+                                T(m, k),
+                                C(p, n),
+                                C(m, n));
                         }
                         RUNTIME_data_flush( sequence, A(m, k) );
                         RUNTIME_data_flush( sequence, T(m, k) );
@@ -165,9 +157,6 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                         m = p+RD;
 
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                        ldam = BLKLDD(A, m);
-                        ldcm = BLKLDD(C, m);
-                        ldcp = BLKLDD(C, p);
 
                         for (n = 0; n < C->nt; n++) {
                             tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
@@ -180,10 +169,10 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             INSERT_TASK_ztpmqrt(
                                 &options, side, trans,
                                 tempmm, tempnn, tempkn, tempmm, ib, T->nb,
-                                A (m, k), ldam,
-                                T2(m, k), T->mb,
-                                C (p, n), ldcp,
-                                C (m, n), ldcm);
+                                A (m, k),
+                                T2(m, k),
+                                C (p, n),
+                                C (m, n));
                         }
                         RUNTIME_data_flush( sequence, A (m, k) );
                         RUNTIME_data_flush( sequence, T2(m, k) );
@@ -215,9 +204,6 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                         m = p+RD;
 
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                        ldam = BLKLDD(A, m);
-                        ldcm = BLKLDD(C, m);
-                        ldcp = BLKLDD(C, p);
 
                         for (n = 0; n < C->nt; n++) {
                             tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
@@ -230,24 +216,19 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             INSERT_TASK_ztpmqrt(
                                 &options, side, trans,
                                 tempmm, tempnn, tempkn, tempmm, ib, T->nb,
-                                A (m, k), ldam,
-                                T2(m, k), T->mb,
-                                C (p, n), ldcp,
-                                C (m, n), ldcm);
+                                A (m, k),
+                                T2(m, k),
+                                C (p, n),
+                                C (m, n));
                         }
                         RUNTIME_data_flush( sequence, A (m, k) );
                         RUNTIME_data_flush( sequence, T2(m, k) );
                     }
                 }
                 for (p = k; p < C->mt; p += BS) {
-                    ldap = BLKLDD(A, p);
-                    lddp = BLKLDD(D, p);
-                    ldcp = BLKLDD(C, p);
 
                     for (m = chameleon_min(p+BS, C->mt)-1; m > p; m--) {
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                        ldam = BLKLDD(A, m);
-                        ldcm = BLKLDD(C, m);
 
                         for (n = 0; n < C->nt; n++) {
                             tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
@@ -260,10 +241,10 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             INSERT_TASK_ztpmqrt(
                                 &options, side, trans,
                                 tempmm, tempnn, tempkn, 0, ib, T->nb,
-                                A(m, k), ldam,
-                                T(m, k), T->mb,
-                                C(p, n), ldcp,
-                                C(m, n), ldcm);
+                                A(m, k),
+                                T(m, k),
+                                C(p, n),
+                                C(m, n));
                         }
                         RUNTIME_data_flush( sequence, A(m, k) );
                         RUNTIME_data_flush( sequence, T(m, k) );
@@ -278,14 +259,14 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamLower, tempDpm, tempkmin, A->nb,
-                            A(p, k), ldap,
-                            D(p, k), lddp );
+                            A(p, k),
+                            D(p, k) );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamUpper, tempDpm, tempkmin,
                             0., 1.,
-                            D(p, k), lddp );
+                            D(p, k) );
 #endif
                     }
 
@@ -298,9 +279,9 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                         INSERT_TASK_zunmqr(
                             &options, side, trans,
                             temppm, tempnn, tempkmin, ib, T->nb,
-                            D(p, k), lddp,
-                            T(p, k), T->mb,
-                            C(p, n), ldcp);
+                            D(p, k),
+                            T(p, k),
+                            C(p, n));
                     }
                     RUNTIME_data_flush( sequence, D(p, k) );
                     RUNTIME_data_flush( sequence, T(p, k) );
@@ -327,11 +308,9 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                         n = p+RD;
 
                         tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
-                        ldan = BLKLDD(A, n);
 
                         for (m = 0; m < C->mt; m++) {
                             tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                            ldcm   = BLKLDD(C, m);
 
                             node = C->get_rankof( C, m, n );
                             RUNTIME_data_migrate( sequence, C(m, p), node );
@@ -341,10 +320,10 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             INSERT_TASK_ztpmqrt(
                                 &options, side, trans,
                                 tempmm, tempnn, tempkn, tempmm, ib, T->nb,
-                                A (n, k), ldan,
-                                T2(n, k), T->mb,
-                                C (m, p), ldcm,
-                                C (m, n), ldcm);
+                                A (n, k),
+                                T2(n, k),
+                                C (m, p),
+                                C (m, n));
                         }
                         RUNTIME_data_flush( sequence, A (n, k) );
                         RUNTIME_data_flush( sequence, T2(n, k) );
@@ -355,11 +334,9 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                     for (n = chameleon_min(p+BS, C->nt)-1; n > p; n--) {
 
                         tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
-                        ldan = BLKLDD(A, n);
 
                         for (m = 0; m < C->mt; m++) {
                             tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                            ldcm = BLKLDD(C, m);
 
                             node = C->get_rankof( C, m, n );
                             RUNTIME_data_migrate( sequence, C(m, p), node );
@@ -369,10 +346,10 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             INSERT_TASK_ztpmqrt(
                                 &options, side, trans,
                                 tempmm, tempnn, tempkn, 0, ib, T->nb,
-                                A(n, k), ldan,
-                                T(n, k), T->mb,
-                                C(m, p), ldcm,
-                                C(m, n), ldcm);
+                                A(n, k),
+                                T(n, k),
+                                C(m, p),
+                                C(m, n));
                         }
                         RUNTIME_data_flush( sequence, A(n, k) );
                         RUNTIME_data_flush( sequence, T(n, k) );
@@ -380,8 +357,6 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
 
                     temppn   = p == C->nt-1 ? C->n - p * C->nb : C->nb;
                     tempkmin = chameleon_min( temppn, tempkn );
-                    ldap = BLKLDD(A, p);
-                    lddp = BLKLDD(D, p);
 
                     if ( genD ) {
                         int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
@@ -389,20 +364,19 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamLower, tempDpm, tempkmin, A->nb,
-                            A(p, k), ldap,
-                            D(p, k), lddp );
+                            A(p, k),
+                            D(p, k) );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamUpper, tempDpm, tempkmin,
                             0., 1.,
-                            D(p, k), lddp );
+                            D(p, k) );
 #endif
                     }
 
                     for (m = 0; m < C->mt; m++) {
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                        ldcm = BLKLDD(C, m);
 
                         RUNTIME_data_migrate( sequence, C(m, p),
                                               C->get_rankof( C, m, p ) );
@@ -410,9 +384,9 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                         INSERT_TASK_zunmqr(
                             &options, side, trans,
                             tempmm, temppn, tempkmin, ib, T->nb,
-                            D(p, k), lddp,
-                            T(p, k), T->mb,
-                            C(m, p), ldcm);
+                            D(p, k),
+                            T(p, k),
+                            C(m, p));
                     }
                     RUNTIME_data_flush( sequence, D(p, k) );
                     RUNTIME_data_flush( sequence, T(p, k) );
@@ -433,8 +407,6 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                     temppn   = p == C->nt - 1 ? C->n - p * C->nb : C->nb;
                     tempkmin = chameleon_min( temppn, tempkn );
 
-                    ldap = BLKLDD(A, p);
-                    lddp = BLKLDD(D, p);
 
                     if ( genD ) {
                         int tempDpm = p == D->mt-1 ? D->m-p*D->mb : D->mb;
@@ -442,36 +414,33 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                         INSERT_TASK_zlacpy(
                             &options,
                             ChamLower, tempDpm, tempkmin, A->nb,
-                            A(p, k), ldap,
-                            D(p, k), lddp );
+                            A(p, k),
+                            D(p, k) );
 #if defined(CHAMELEON_USE_CUDA)
                         INSERT_TASK_zlaset(
                             &options,
                             ChamUpper, tempDpm, tempkmin,
                             0., 1.,
-                            D(p, k), lddp );
+                            D(p, k) );
 #endif
                     }
 
                     for (m = 0; m < C->mt; m++) {
-                        ldcm = BLKLDD(C, m);
                         tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
                         INSERT_TASK_zunmqr(
                             &options, side, trans,
                             tempmm, temppn, tempkmin, ib, T->nb,
-                            D(p, k), lddp,
-                            T(p, k), T->mb,
-                            C(m, p), ldcm);
+                            D(p, k),
+                            T(p, k),
+                            C(m, p));
                     }
                     RUNTIME_data_flush( sequence, D(p, k) );
                     RUNTIME_data_flush( sequence, T(p, k) );
 
                     for (n = p+1; n < chameleon_min(p+BS,  C->nt); n++) {
                         tempnn = n == C->nt-1 ? C->n-n*C->nb : C->nb;
-                        ldan = BLKLDD(A, n);
                         for (m = 0; m < C->mt; m++) {
                             tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                            ldcm = BLKLDD(C, m);
 
                             node = C->get_rankof( C, m, n );
                             RUNTIME_data_migrate( sequence, C(m, p), node );
@@ -481,10 +450,10 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             INSERT_TASK_ztpmqrt(
                                 &options, side, trans,
                                 tempmm, tempnn, tempkn, 0, ib, T->nb,
-                                A(n, k), ldan,
-                                T(n, k), T->mb,
-                                C(m, p), ldcm,
-                                C(m, n), ldcm);
+                                A(n, k),
+                                T(n, k),
+                                C(m, p),
+                                C(m, n));
                         }
                         RUNTIME_data_flush( sequence, A(n, k) );
                         RUNTIME_data_flush( sequence, T(n, k) );
@@ -494,11 +463,9 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                     for (p = k; p+RD < C->nt; p += 2*RD) {
                         n = p + RD;
                         tempnn = n == C->mt-1 ? C->m-n*C->mb : C->mb;
-                        ldan = BLKLDD(A, n);
 
                         for (m = 0; m < C->mt; m++) {
                             tempmm = m == C->mt-1 ? C->m-m*C->mb : C->mb;
-                            ldcm   = BLKLDD(C, m);
 
                             node = C->get_rankof( C, m, n );
                             RUNTIME_data_migrate( sequence, C(m, p), node );
@@ -508,10 +475,10 @@ void chameleon_pzunmqrrh( int genD, int BS, cham_side_t side, cham_trans_t trans
                             INSERT_TASK_ztpmqrt(
                                 &options, side, trans,
                                 tempmm, tempnn, tempkn, tempmm, ib, T->nb,
-                                A (n, k), ldan,
-                                T2(n, k), T->mb,
-                                C (m, p), ldcm,
-                                C (m, n), ldcm);
+                                A (n, k),
+                                T2(n, k),
+                                C (m, p),
+                                C (m, n));
                         }
                         RUNTIME_data_flush( sequence, A (n, k) );
                         RUNTIME_data_flush( sequence, T2(n, k) );
diff --git a/control/descriptor.c b/control/descriptor.c
index 2f739427740ad0abdd80887f86677dfe155b533d..52584a44646b9fd1305876708626cc7423b8d9f4 100644
--- a/control/descriptor.c
+++ b/control/descriptor.c
@@ -65,9 +65,32 @@ int chameleon_desc_mat_free( CHAM_desc_t *desc )
         desc->mat = NULL;
     }
 
+    if ( desc->tiles ) {
+        free( desc->tiles );
+    }
     return CHAMELEON_SUCCESS;
 }
 
+void chameleon_desc_init_tiles( CHAM_desc_t *desc )
+{
+    CHAM_tile_t *tile;
+    int ii, jj;
+
+    desc->tiles = malloc( desc->lmt * desc->lnt * sizeof(CHAM_tile_t) );
+
+    tile = desc->tiles;
+    for( jj=0; jj<desc->lnt; jj++ ) {
+        for( ii=0; ii<desc->lmt; ii++, tile++ ) {
+            int rank = desc->get_rankof( desc, ii, jj );
+            tile->format = CHAMELEON_TILE_FULLRANK;
+            tile->m   = ii == desc->lmt-1 ? desc->lm - ii * desc->mb : desc->mb;
+            tile->n   = jj == desc->lnt-1 ? desc->ln - jj * desc->nb : desc->nb;
+            tile->mat = (rank == desc->myrank) ? desc->get_blkaddr( desc, ii, jj ) : NULL;
+            tile->ld  = desc->get_blkldd( desc, ii );
+        }
+    }
+}
+
 /**
  *  Internal function to return MPI rank of element A(m,n) with m,n = block indices
  */
@@ -174,6 +197,7 @@ int chameleon_desc_init( CHAM_desc_t *desc, void *mat,
     }
 
     // If one of the function get_* is NULL, we switch back to the default, like in chameleon_desc_init()
+    desc->get_blktile = chameleon_desc_gettile;
     desc->get_blkaddr = get_blkaddr ? get_blkaddr : chameleon_getaddr_ccrb;
     desc->get_blkldd  = get_blkldd  ? get_blkldd  : chameleon_getblkldd_ccrb;
     desc->get_rankof  = get_rankof  ? get_rankof  : chameleon_getrankof_2d;
@@ -282,6 +306,8 @@ int chameleon_desc_init( CHAM_desc_t *desc, void *mat,
     desc->A12 = (size_t)(            desc->llm%mb)*(size_t)(desc->lln - desc->lln%nb) + desc->A21;
     desc->A22 = (size_t)(desc->llm - desc->llm%mb)*(size_t)(            desc->lln%nb) + desc->A12;
 
+    chameleon_desc_init_tiles( desc );
+
     /* Create runtime specific structure like registering data */
     RUNTIME_desc_create( desc );
 
diff --git a/control/descriptor.h b/control/descriptor.h
index 6eff677110b0e4269a5366cc7ffcf24269050547..b4d8832d0e376a34c3384d71ab868b97f1b27bfc 100644
--- a/control/descriptor.h
+++ b/control/descriptor.h
@@ -131,6 +131,21 @@ inline static void *chameleon_getaddr_null(const CHAM_desc_t *A, int m, int n)
     return NULL;
 }
 
+/**
+ *  Internal function to return address of block (m,n) with m,n = block indices
+ */
+inline static CHAM_tile_t *chameleon_desc_gettile(const CHAM_desc_t *A, int m, int n)
+{
+    size_t mm = m + A->i / A->mb;
+    size_t nn = n + A->j / A->nb;
+    size_t offset = 0;
+
+    assert( A->tiles != NULL );
+
+    offset = A->lmt * nn + mm;
+    return A->tiles + offset;
+}
+
 /**
  *  Internal function to return address of element A(m,n) with m,n = matrix indices
  */
diff --git a/coreblas/compute/CMakeLists.txt b/coreblas/compute/CMakeLists.txt
index 283659eed45ee323aed7c10ff5479c257c442dc9..e85b36fa5ed57637225dedfb7f47c0c3518dff19 100644
--- a/coreblas/compute/CMakeLists.txt
+++ b/coreblas/compute/CMakeLists.txt
@@ -98,6 +98,7 @@ set(ZSRC
     core_zttqrt.c
     core_zunmlq.c
     core_zunmqr.c
+    core_ztile.c
     )
 
 precisions_rules_py(COREBLAS_SRCS_GENERATED "${ZSRC}"
diff --git a/coreblas/compute/core_ztile.c b/coreblas/compute/core_ztile.c
new file mode 100644
index 0000000000000000000000000000000000000000..def292605dddc3090255e8f1dd559f044ddd3bfc
--- /dev/null
+++ b/coreblas/compute/core_ztile.c
@@ -0,0 +1,838 @@
+/**
+ *
+ * @file core_ztile.c
+ *
+ * @copyright 2009-2014 The University of Tennessee and The University of
+ *                      Tennessee Research Foundation. All rights reserved.
+ * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ * @brief Chameleon CPU kernel interface from CHAM_tile_t layout to the real one.
+ *
+ * @version 1.0.0
+ * @author Mathieu Faverge
+ * @date 2015-11-03
+ * @precisions normal z -> c d s
+ *
+ */
+#include "coreblas.h"
+#include "coreblas/coreblas_ztile.h"
+
+void
+TCORE_dzasum( cham_store_t       storev,
+              cham_uplo_t        uplo,
+              int                M,
+              int                N,
+              const CHAM_tile_t *A,
+              double *           work )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    CORE_dzasum( storev, uplo, M, N, A->mat, A->ld, work );
+}
+
+int
+TCORE_zaxpy( int                   M,
+             CHAMELEON_Complex64_t alpha,
+             const CHAM_tile_t *   A,
+             int                   incA,
+             CHAM_tile_t *         B,
+             int                   incB )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( B->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_zaxpy( M, alpha, A->mat, incA, B->mat, incB );
+}
+
+int
+TCORE_zgeadd( cham_trans_t          trans,
+              int                   M,
+              int                   N,
+              CHAMELEON_Complex64_t alpha,
+              const CHAM_tile_t *   A,
+              CHAMELEON_Complex64_t beta,
+              CHAM_tile_t *         B )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( B->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_zgeadd( trans, M, N, alpha, A->mat, A->ld, beta, B->mat, B->ld );
+}
+
+int
+TCORE_zgelqt( int                    M,
+              int                    N,
+              int                    IB,
+              CHAM_tile_t *          A,
+              CHAM_tile_t *          T,
+              CHAMELEON_Complex64_t *TAU,
+              CHAMELEON_Complex64_t *WORK )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( T->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_zgelqt( M, N, IB, A->mat, A->ld, T->mat, T->ld, TAU, WORK );
+}
+
+void
+TCORE_zgemm( cham_trans_t          transA,
+             cham_trans_t          transB,
+             int                   M,
+             int                   N,
+             int                   K,
+             CHAMELEON_Complex64_t alpha,
+             const CHAM_tile_t *   A,
+             const CHAM_tile_t *   B,
+             CHAMELEON_Complex64_t beta,
+             CHAM_tile_t *         C )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( B->format & CHAMELEON_TILE_FULLRANK );
+    assert( C->format & CHAMELEON_TILE_FULLRANK );
+    CORE_zgemm(
+        transA, transB, M, N, K, alpha, A->mat, A->ld, B->mat, B->ld, beta, C->mat, C->ld );
+}
+
+int
+TCORE_zgeqrt( int                    M,
+              int                    N,
+              int                    IB,
+              CHAM_tile_t *          A,
+              CHAM_tile_t *          T,
+              CHAMELEON_Complex64_t *TAU,
+              CHAMELEON_Complex64_t *WORK )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( T->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_zgeqrt( M, N, IB, A->mat, A->ld, T->mat, T->ld, TAU, WORK );
+}
+
+int
+TCORE_zgessm( int M, int N, int K, int IB, const int *IPIV, const CHAM_tile_t *L, CHAM_tile_t *A )
+{
+    assert( L->format & CHAMELEON_TILE_FULLRANK );
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_zgessm( M, N, K, IB, IPIV, L->mat, L->ld, A->mat, A->ld );
+}
+
+int
+TCORE_zgessq( cham_store_t storev, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *sclssq )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( sclssq->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_zgessq( storev, M, N, A->mat, A->ld, sclssq->mat );
+}
+
+int
+TCORE_zgetrf( int M, int N, CHAM_tile_t *A, int *IPIV, int *INFO )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_zgetrf( M, N, A->mat, A->ld, IPIV, INFO );
+}
+
+int
+TCORE_zgetrf_incpiv( int M, int N, int IB, CHAM_tile_t *A, int *IPIV, int *INFO )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_zgetrf_incpiv( M, N, IB, A->mat, A->ld, IPIV, INFO );
+}
+
+int
+TCORE_zgetrf_nopiv( int M, int N, int IB, CHAM_tile_t *A, int *INFO )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_zgetrf_nopiv( M, N, IB, A->mat, A->ld, INFO );
+}
+
+void
+TCORE_zhe2ge( cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *B )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( B->format & CHAMELEON_TILE_FULLRANK );
+    CORE_zhe2ge( uplo, M, N, A->mat, A->ld, B->mat, B->ld );
+}
+
+#if defined( PRECISION_z ) || defined( PRECISION_c )
+void
+TCORE_zhemm( cham_side_t           side,
+             cham_uplo_t           uplo,
+             int                   M,
+             int                   N,
+             CHAMELEON_Complex64_t alpha,
+             const CHAM_tile_t *   A,
+             const CHAM_tile_t *   B,
+             CHAMELEON_Complex64_t beta,
+             CHAM_tile_t *         C )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( B->format & CHAMELEON_TILE_FULLRANK );
+    assert( C->format & CHAMELEON_TILE_FULLRANK );
+    CORE_zhemm( side, uplo, M, N, alpha, A->mat, A->ld, B->mat, B->ld, beta, C->mat, C->ld );
+}
+
+void
+TCORE_zherk( cham_uplo_t        uplo,
+             cham_trans_t       trans,
+             int                N,
+             int                K,
+             double             alpha,
+             const CHAM_tile_t *A,
+             double             beta,
+             CHAM_tile_t *      C )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( C->format & CHAMELEON_TILE_FULLRANK );
+    CORE_zherk( uplo, trans, N, K, alpha, A->mat, A->ld, beta, C->mat, C->ld );
+}
+
+void
+TCORE_zher2k( cham_uplo_t           uplo,
+              cham_trans_t          trans,
+              int                   N,
+              int                   K,
+              CHAMELEON_Complex64_t alpha,
+              const CHAM_tile_t *   A,
+              const CHAM_tile_t *   B,
+              double                beta,
+              CHAM_tile_t *         C )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( B->format & CHAMELEON_TILE_FULLRANK );
+    assert( C->format & CHAMELEON_TILE_FULLRANK );
+    CORE_zher2k( uplo, trans, N, K, alpha, A->mat, A->ld, B->mat, B->ld, beta, C->mat, C->ld );
+}
+#endif
+
+int
+TCORE_zherfb( cham_uplo_t            uplo,
+              int                    N,
+              int                    K,
+              int                    IB,
+              int                    NB,
+              const CHAM_tile_t *    A,
+              const CHAM_tile_t *    T,
+              CHAM_tile_t *          C,
+              CHAMELEON_Complex64_t *WORK,
+              int                    ldwork )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( T->format & CHAMELEON_TILE_FULLRANK );
+    assert( C->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_zherfb(
+        uplo, N, K, IB, NB, A->mat, A->ld, T->mat, T->ld, C->mat, C->ld, WORK, ldwork );
+}
+
+#if defined( PRECISION_z ) || defined( PRECISION_c )
+int
+TCORE_zhessq( cham_store_t       storev,
+              cham_uplo_t        uplo,
+              int                N,
+              const CHAM_tile_t *A,
+              CHAM_tile_t *      sclssq )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( sclssq->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_zhessq( storev, uplo, N, A->mat, A->ld, sclssq->mat );
+}
+#endif
+
+void
+TCORE_zlacpy( cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *B )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( B->format & CHAMELEON_TILE_FULLRANK );
+    CORE_zlacpy( uplo, M, N, A->mat, A->ld, B->mat, B->ld );
+}
+
+void
+TCORE_zlange( cham_normtype_t    norm,
+              int                M,
+              int                N,
+              const CHAM_tile_t *A,
+              double *           work,
+              double *           normA )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    CORE_zlange( norm, M, N, A->mat, A->ld, work, normA );
+}
+
+#if defined( PRECISION_z ) || defined( PRECISION_c )
+void
+TCORE_zlanhe( cham_normtype_t    norm,
+              cham_uplo_t        uplo,
+              int                N,
+              const CHAM_tile_t *A,
+              double *           work,
+              double *           normA )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    CORE_zlanhe( norm, uplo, N, A->mat, A->ld, work, normA );
+}
+#endif
+
+void
+TCORE_zlansy( cham_normtype_t    norm,
+              cham_uplo_t        uplo,
+              int                N,
+              const CHAM_tile_t *A,
+              double *           work,
+              double *           normA )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    CORE_zlansy( norm, uplo, N, A->mat, A->ld, work, normA );
+}
+
+void
+TCORE_zlantr( cham_normtype_t    norm,
+              cham_uplo_t        uplo,
+              cham_diag_t        diag,
+              int                M,
+              int                N,
+              const CHAM_tile_t *A,
+              double *           work,
+              double *           normA )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    CORE_zlantr( norm, uplo, diag, M, N, A->mat, A->ld, work, normA );
+}
+
+int
+TCORE_zlascal( cham_uplo_t uplo, int m, int n, CHAMELEON_Complex64_t alpha, CHAM_tile_t *A )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_zlascal( uplo, m, n, alpha, A->mat, A->ld );
+}
+
+void
+TCORE_zlaset( cham_uplo_t           uplo,
+              int                   n1,
+              int                   n2,
+              CHAMELEON_Complex64_t alpha,
+              CHAMELEON_Complex64_t beta,
+              CHAM_tile_t *         A )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    CORE_zlaset( uplo, n1, n2, alpha, beta, A->mat, A->ld );
+}
+
+void
+TCORE_zlaset2( cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha, CHAM_tile_t *A )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    CORE_zlaset2( uplo, n1, n2, alpha, A->mat, A->ld );
+}
+
+int
+TCORE_zlatro( cham_uplo_t        uplo,
+              cham_trans_t       trans,
+              int                M,
+              int                N,
+              const CHAM_tile_t *A,
+              CHAM_tile_t *      B )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( B->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_zlatro( uplo, trans, M, N, A->mat, A->ld, B->mat, B->ld );
+}
+
+void
+TCORE_zlauum( cham_uplo_t uplo, int N, CHAM_tile_t *A )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    CORE_zlauum( uplo, N, A->mat, A->ld );
+}
+
+#if defined( PRECISION_z ) || defined( PRECISION_c )
+void
+TCORE_zplghe( double                 bump,
+              int                    m,
+              int                    n,
+              CHAM_tile_t *          tileA,
+              int                    bigM,
+              int                    m0,
+              int                    n0,
+              unsigned long long int seed )
+{
+    assert( tileA->format & CHAMELEON_TILE_FULLRANK );
+    CORE_zplghe( bump, m, n, tileA->mat, tileA->ld, bigM, m0, n0, seed );
+}
+#endif
+
+void
+TCORE_zplgsy( CHAMELEON_Complex64_t  bump,
+              int                    m,
+              int                    n,
+              CHAM_tile_t *          tileA,
+              int                    bigM,
+              int                    m0,
+              int                    n0,
+              unsigned long long int seed )
+{
+    assert( tileA->format & CHAMELEON_TILE_FULLRANK );
+    CORE_zplgsy( bump, m, n, tileA->mat, tileA->ld, bigM, m0, n0, seed );
+}
+
+void
+TCORE_zplrnt( int                    m,
+              int                    n,
+              CHAM_tile_t *          tileA,
+              int                    bigM,
+              int                    m0,
+              int                    n0,
+              unsigned long long int seed )
+{
+    assert( tileA->format & CHAMELEON_TILE_FULLRANK );
+    CORE_zplrnt( m, n, tileA->mat, tileA->ld, bigM, m0, n0, seed );
+}
+
+void
+TCORE_zpotrf( cham_uplo_t uplo, int n, CHAM_tile_t *A, int *INFO )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    CORE_zpotrf( uplo, n, A->mat, A->ld, INFO );
+}
+
+int
+TCORE_zssssm( int                M1,
+              int                N1,
+              int                M2,
+              int                N2,
+              int                K,
+              int                IB,
+              CHAM_tile_t *      A1,
+              CHAM_tile_t *      A2,
+              const CHAM_tile_t *L1,
+              const CHAM_tile_t *L2,
+              const int *        IPIV )
+{
+    assert( A1->format & CHAMELEON_TILE_FULLRANK );
+    assert( A2->format & CHAMELEON_TILE_FULLRANK );
+    assert( L1->format & CHAMELEON_TILE_FULLRANK );
+    assert( L2->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_zssssm( M1,
+                        N1,
+                        M2,
+                        N2,
+                        K,
+                        IB,
+                        A1->mat,
+                        A1->ld,
+                        A2->mat,
+                        A2->ld,
+                        L1->mat,
+                        L1->ld,
+                        L2->mat,
+                        L2->ld,
+                        IPIV );
+}
+
+void
+TCORE_zsymm( cham_side_t           side,
+             cham_uplo_t           uplo,
+             int                   M,
+             int                   N,
+             CHAMELEON_Complex64_t alpha,
+             const CHAM_tile_t *   A,
+             const CHAM_tile_t *   B,
+             CHAMELEON_Complex64_t beta,
+             CHAM_tile_t *         C )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( B->format & CHAMELEON_TILE_FULLRANK );
+    assert( C->format & CHAMELEON_TILE_FULLRANK );
+    CORE_zsymm( side, uplo, M, N, alpha, A->mat, A->ld, B->mat, B->ld, beta, C->mat, C->ld );
+}
+
+void
+TCORE_zsyrk( cham_uplo_t           uplo,
+             cham_trans_t          trans,
+             int                   N,
+             int                   K,
+             CHAMELEON_Complex64_t alpha,
+             const CHAM_tile_t *   A,
+             CHAMELEON_Complex64_t beta,
+             CHAM_tile_t *         C )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( C->format & CHAMELEON_TILE_FULLRANK );
+    CORE_zsyrk( uplo, trans, N, K, alpha, A->mat, A->ld, beta, C->mat, C->ld );
+}
+
+void
+TCORE_zsyr2k( cham_uplo_t           uplo,
+              cham_trans_t          trans,
+              int                   N,
+              int                   K,
+              CHAMELEON_Complex64_t alpha,
+              const CHAM_tile_t *   A,
+              const CHAM_tile_t *   B,
+              CHAMELEON_Complex64_t beta,
+              CHAM_tile_t *         C )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( B->format & CHAMELEON_TILE_FULLRANK );
+    assert( C->format & CHAMELEON_TILE_FULLRANK );
+    CORE_zsyr2k( uplo, trans, N, K, alpha, A->mat, A->ld, B->mat, B->ld, beta, C->mat, C->ld );
+}
+
+int
+TCORE_zsyssq( cham_store_t       storev,
+              cham_uplo_t        uplo,
+              int                N,
+              const CHAM_tile_t *A,
+              CHAM_tile_t *      sclssq )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( sclssq->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_zsyssq( storev, uplo, N, A->mat, A->ld, sclssq->mat );
+}
+
+#if defined( PRECISION_z ) || defined( PRECISION_c )
+int
+TCORE_zsytf2_nopiv( cham_uplo_t uplo, int n, CHAM_tile_t *A )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_zsytf2_nopiv( uplo, n, A->mat, A->ld );
+}
+#endif
+
+int
+TCORE_ztplqt( int                    M,
+              int                    N,
+              int                    L,
+              int                    IB,
+              CHAM_tile_t *          A,
+              CHAM_tile_t *          B,
+              CHAM_tile_t *          T,
+              CHAMELEON_Complex64_t *WORK )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( B->format & CHAMELEON_TILE_FULLRANK );
+    assert( T->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_ztplqt( M, N, L, IB, A->mat, A->ld, B->mat, B->ld, T->mat, T->ld, WORK );
+}
+
+int
+TCORE_ztpmlqt( cham_side_t            side,
+               cham_trans_t           trans,
+               int                    M,
+               int                    N,
+               int                    K,
+               int                    L,
+               int                    IB,
+               const CHAM_tile_t *    V,
+               const CHAM_tile_t *    T,
+               CHAM_tile_t *          A,
+               CHAM_tile_t *          B,
+               CHAMELEON_Complex64_t *WORK )
+{
+    assert( V->format & CHAMELEON_TILE_FULLRANK );
+    assert( T->format & CHAMELEON_TILE_FULLRANK );
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( B->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_ztpmlqt( side,
+                         trans,
+                         M,
+                         N,
+                         K,
+                         L,
+                         IB,
+                         V->mat,
+                         V->ld,
+                         T->mat,
+                         T->ld,
+                         A->mat,
+                         A->ld,
+                         B->mat,
+                         B->ld,
+                         WORK );
+}
+
+int
+TCORE_ztpmqrt( cham_side_t            side,
+               cham_trans_t           trans,
+               int                    M,
+               int                    N,
+               int                    K,
+               int                    L,
+               int                    IB,
+               const CHAM_tile_t *    V,
+               const CHAM_tile_t *    T,
+               CHAM_tile_t *          A,
+               CHAM_tile_t *          B,
+               CHAMELEON_Complex64_t *WORK )
+{
+    assert( V->format & CHAMELEON_TILE_FULLRANK );
+    assert( T->format & CHAMELEON_TILE_FULLRANK );
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( B->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_ztpmqrt( side,
+                         trans,
+                         M,
+                         N,
+                         K,
+                         L,
+                         IB,
+                         V->mat,
+                         V->ld,
+                         T->mat,
+                         T->ld,
+                         A->mat,
+                         A->ld,
+                         B->mat,
+                         B->ld,
+                         WORK );
+}
+
+int
+TCORE_ztpqrt( int                    M,
+              int                    N,
+              int                    L,
+              int                    IB,
+              CHAM_tile_t *          A,
+              CHAM_tile_t *          B,
+              CHAM_tile_t *          T,
+              CHAMELEON_Complex64_t *WORK )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( B->format & CHAMELEON_TILE_FULLRANK );
+    assert( T->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_ztpqrt( M, N, L, IB, A->mat, A->ld, B->mat, B->ld, T->mat, T->ld, WORK );
+}
+
+int
+TCORE_ztradd( cham_uplo_t           uplo,
+              cham_trans_t          trans,
+              int                   M,
+              int                   N,
+              CHAMELEON_Complex64_t alpha,
+              const CHAM_tile_t *   A,
+              CHAMELEON_Complex64_t beta,
+              CHAM_tile_t *         B )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( B->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_ztradd( uplo, trans, M, N, alpha, A->mat, A->ld, beta, B->mat, B->ld );
+}
+
+void
+TCORE_ztrasm( cham_store_t       storev,
+              cham_uplo_t        uplo,
+              cham_diag_t        diag,
+              int                M,
+              int                N,
+              const CHAM_tile_t *A,
+              double *           work )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    CORE_ztrasm( storev, uplo, diag, M, N, A->mat, A->ld, work );
+}
+
+void
+TCORE_ztrmm( cham_side_t           side,
+             cham_uplo_t           uplo,
+             cham_trans_t          transA,
+             cham_diag_t           diag,
+             int                   M,
+             int                   N,
+             CHAMELEON_Complex64_t alpha,
+             const CHAM_tile_t *   A,
+             CHAM_tile_t *         B )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( B->format & CHAMELEON_TILE_FULLRANK );
+    CORE_ztrmm( side, uplo, transA, diag, M, N, alpha, A->mat, A->ld, B->mat, B->ld );
+}
+
+void
+TCORE_ztrsm( cham_side_t           side,
+             cham_uplo_t           uplo,
+             cham_trans_t          transA,
+             cham_diag_t           diag,
+             int                   M,
+             int                   N,
+             CHAMELEON_Complex64_t alpha,
+             const CHAM_tile_t *   A,
+             CHAM_tile_t *         B )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( B->format & CHAMELEON_TILE_FULLRANK );
+    CORE_ztrsm( side, uplo, transA, diag, M, N, alpha, A->mat, A->ld, B->mat, B->ld );
+}
+
+int
+TCORE_ztrssq( cham_uplo_t        uplo,
+              cham_diag_t        diag,
+              int                M,
+              int                N,
+              const CHAM_tile_t *A,
+              CHAM_tile_t *      sclssq )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( sclssq->format & CHAMELEON_TILE_FULLRANK );
+    double *W = sclssq->mat;
+    return CORE_ztrssq( uplo, diag, M, N, A->mat, A->ld, W, W + 1 );
+}
+
+void
+TCORE_ztrtri( cham_uplo_t uplo, cham_diag_t diag, int N, CHAM_tile_t *A, int *info )
+{
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    CORE_ztrtri( uplo, diag, N, A->mat, A->ld, info );
+}
+
+int
+TCORE_ztsmlq_hetra1( cham_side_t            side,
+                     cham_trans_t           trans,
+                     int                    m1,
+                     int                    n1,
+                     int                    m2,
+                     int                    n2,
+                     int                    k,
+                     int                    ib,
+                     CHAM_tile_t *          A1,
+                     CHAM_tile_t *          A2,
+                     const CHAM_tile_t *    V,
+                     const CHAM_tile_t *    T,
+                     CHAMELEON_Complex64_t *WORK,
+                     int                    ldwork )
+{
+    assert( A1->format & CHAMELEON_TILE_FULLRANK );
+    assert( A2->format & CHAMELEON_TILE_FULLRANK );
+    assert( V->format & CHAMELEON_TILE_FULLRANK );
+    assert( T->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_ztsmlq_hetra1( side,
+                               trans,
+                               m1,
+                               n1,
+                               m2,
+                               n2,
+                               k,
+                               ib,
+                               A1->mat,
+                               A1->ld,
+                               A2->mat,
+                               A2->ld,
+                               V->mat,
+                               V->ld,
+                               T->mat,
+                               T->ld,
+                               WORK,
+                               ldwork );
+}
+
+int
+TCORE_ztsmqr_hetra1( cham_side_t            side,
+                     cham_trans_t           trans,
+                     int                    m1,
+                     int                    n1,
+                     int                    m2,
+                     int                    n2,
+                     int                    k,
+                     int                    ib,
+                     CHAM_tile_t *          A1,
+                     CHAM_tile_t *          A2,
+                     const CHAM_tile_t *    V,
+                     const CHAM_tile_t *    T,
+                     CHAMELEON_Complex64_t *WORK,
+                     int                    ldwork )
+{
+    assert( A1->format & CHAMELEON_TILE_FULLRANK );
+    assert( A2->format & CHAMELEON_TILE_FULLRANK );
+    assert( V->format & CHAMELEON_TILE_FULLRANK );
+    assert( T->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_ztsmqr_hetra1( side,
+                               trans,
+                               m1,
+                               n1,
+                               m2,
+                               n2,
+                               k,
+                               ib,
+                               A1->mat,
+                               A1->ld,
+                               A2->mat,
+                               A2->ld,
+                               V->mat,
+                               V->ld,
+                               T->mat,
+                               T->ld,
+                               WORK,
+                               ldwork );
+}
+
+int
+TCORE_ztstrf( int                    M,
+              int                    N,
+              int                    IB,
+              int                    NB,
+              CHAM_tile_t *          U,
+              CHAM_tile_t *          A,
+              CHAM_tile_t *          L,
+              int *                  IPIV,
+              CHAMELEON_Complex64_t *WORK,
+              int                    LDWORK,
+              int *                  INFO )
+{
+    assert( U->format & CHAMELEON_TILE_FULLRANK );
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    assert( L->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_ztstrf(
+        M, N, IB, NB, U->mat, U->ld, A->mat, A->ld, L->mat, L->ld, IPIV, WORK, LDWORK, INFO );
+}
+
+int
+TCORE_zunmlq( cham_side_t            side,
+              cham_trans_t           trans,
+              int                    M,
+              int                    N,
+              int                    IB,
+              int                    K,
+              const CHAM_tile_t *    V,
+              const CHAM_tile_t *    T,
+              CHAM_tile_t *          C,
+              CHAMELEON_Complex64_t *WORK,
+              int                    LDWORK )
+{
+    assert( V->format & CHAMELEON_TILE_FULLRANK );
+    assert( T->format & CHAMELEON_TILE_FULLRANK );
+    assert( C->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_zunmlq(
+        side, trans, M, N, IB, K, V->mat, V->ld, T->mat, T->ld, C->mat, C->ld, WORK, LDWORK );
+}
+
+int
+TCORE_zunmqr( cham_side_t            side,
+              cham_trans_t           trans,
+              int                    M,
+              int                    N,
+              int                    K,
+              int                    IB,
+              const CHAM_tile_t *    V,
+              const CHAM_tile_t *    T,
+              CHAM_tile_t *          C,
+              CHAMELEON_Complex64_t *WORK,
+              int                    LDWORK )
+{
+    assert( V->format & CHAMELEON_TILE_FULLRANK );
+    assert( T->format & CHAMELEON_TILE_FULLRANK );
+    assert( C->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_zunmqr(
+        side, trans, M, N, K, IB, V->mat, V->ld, T->mat, T->ld, C->mat, C->ld, WORK, LDWORK );
+}
+
+int
+TCORE_zgram( cham_uplo_t        uplo,
+             int                M,
+             int                N,
+             int                Mt,
+             int                Nt,
+             const CHAM_tile_t *Di,
+             const CHAM_tile_t *Dj,
+             const CHAM_tile_t *D,
+             CHAM_tile_t *      A )
+{
+    assert( Di->format & CHAMELEON_TILE_FULLRANK );
+    assert( Dj->format & CHAMELEON_TILE_FULLRANK );
+    assert( D->format & CHAMELEON_TILE_FULLRANK );
+    assert( A->format & CHAMELEON_TILE_FULLRANK );
+    return CORE_zgram(
+        uplo, M, N, Mt, Nt, Di->mat, Di->ld, Dj->mat, Dj->ld, D->mat, A->mat, A->ld );
+}
diff --git a/coreblas/include/CMakeLists.txt b/coreblas/include/CMakeLists.txt
index 9403541a4874e5e84ff365c73a150cc843f2f6da..3d77e2b03817ccb0bd7833d70757fc4bf70692e7 100644
--- a/coreblas/include/CMakeLists.txt
+++ b/coreblas/include/CMakeLists.txt
@@ -31,6 +31,7 @@ set(COREBLAS_HDRS_GENERATED "")
 set(ZHDR
   coreblas/coreblas_z.h
   coreblas/coreblas_zc.h
+  coreblas/coreblas_ztile.h
 )
 precisions_rules_py(
   COREBLAS_HDRS_GENERATED "${ZHDR}"
diff --git a/coreblas/include/coreblas/coreblas_ztile.h b/coreblas/include/coreblas/coreblas_ztile.h
new file mode 100644
index 0000000000000000000000000000000000000000..f26e3cbeee1251739b94f5cca6f756e8dd78e8be
--- /dev/null
+++ b/coreblas/include/coreblas/coreblas_ztile.h
@@ -0,0 +1,81 @@
+/**
+ *
+ * @file coreblas_ztile.h
+ *
+ * @copyright 2019-2019 Bordeaux INP, CNRS (LaBRI UMR 5800 ), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ * @brief Chameleon CPU kernel CHAM_tile_t interface
+ *
+ * @version 1.0.0
+ * @author Mathieu Faverge
+ * @date 2019-08-01
+ * @precisions normal z -> c d s
+ *
+ */
+#ifndef _coreblas_ztile_h_
+#define _coreblas_ztile_h_
+
+void TCORE_dzasum( cham_store_t storev, cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, double *work );
+int  TCORE_zaxpy( int M, CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A, int incA, CHAM_tile_t *B, int incB );
+int  TCORE_zgeadd( cham_trans_t trans, int M, int N, CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A, CHAMELEON_Complex64_t beta, CHAM_tile_t *B );
+int  TCORE_zgelqt( int M, int N, int IB, CHAM_tile_t *A, CHAM_tile_t *T, CHAMELEON_Complex64_t *TAU, CHAMELEON_Complex64_t *WORK );
+void TCORE_zgemm( cham_trans_t transA, cham_trans_t transB, int M, int N, int K, CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A, const CHAM_tile_t *B, CHAMELEON_Complex64_t beta, CHAM_tile_t *C );
+int  TCORE_zgeqrt( int M, int N, int IB, CHAM_tile_t *A, CHAM_tile_t *T, CHAMELEON_Complex64_t *TAU, CHAMELEON_Complex64_t *WORK );
+int  TCORE_zgessm( int M, int N, int K, int IB, const int *IPIV, const CHAM_tile_t *L, CHAM_tile_t *A );
+int  TCORE_zgessq( cham_store_t storev, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *sclssq );
+int  TCORE_zgetrf( int M, int N, CHAM_tile_t *A, int *IPIV, int *INFO );
+int  TCORE_zgetrf_incpiv( int M, int N, int IB, CHAM_tile_t *A, int *IPIV, int *INFO );
+int  TCORE_zgetrf_nopiv( int M, int N, int IB, CHAM_tile_t *A, int *INFO );
+void TCORE_zhe2ge( cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *B );
+#if defined(PRECISION_z ) || defined(PRECISION_c)
+void TCORE_zhemm( cham_side_t side, cham_uplo_t uplo, int M, int N, CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A, const CHAM_tile_t *B, CHAMELEON_Complex64_t beta, CHAM_tile_t *C );
+void TCORE_zherk( cham_uplo_t uplo, cham_trans_t trans, int N, int K, double alpha, const CHAM_tile_t *A, double beta, CHAM_tile_t *C );
+void TCORE_zher2k( cham_uplo_t uplo, cham_trans_t trans, int N, int K, CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A, const CHAM_tile_t *B, double beta, CHAM_tile_t *C );
+#endif
+int  TCORE_zherfb( cham_uplo_t uplo, int N, int K, int IB, int NB, const CHAM_tile_t *A, const CHAM_tile_t *T, CHAM_tile_t *C, CHAMELEON_Complex64_t *WORK, int ldwork );
+#if defined(PRECISION_z ) || defined(PRECISION_c)
+int  TCORE_zhessq( cham_store_t storev, cham_uplo_t uplo, int N, const CHAM_tile_t *A, CHAM_tile_t *sclssq );
+#endif
+void TCORE_zlacpy( cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *B );
+void TCORE_zlange( cham_normtype_t norm, int M, int N, const CHAM_tile_t *A, double *work, double *normA );
+#if defined(PRECISION_z ) || defined(PRECISION_c)
+void TCORE_zlanhe( cham_normtype_t norm, cham_uplo_t uplo, int N, const CHAM_tile_t *A, double *work, double *normA );
+#endif
+void TCORE_zlansy( cham_normtype_t norm, cham_uplo_t uplo, int N, const CHAM_tile_t *A, double *work, double *normA );
+void TCORE_zlantr( cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag, int M, int N, const CHAM_tile_t *A, double *work, double *normA );
+int  TCORE_zlascal( cham_uplo_t uplo, int m, int n, CHAMELEON_Complex64_t alpha, CHAM_tile_t *A );
+void TCORE_zlaset( cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta, CHAM_tile_t *A );
+void TCORE_zlaset2( cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha, CHAM_tile_t *A );
+int  TCORE_zlatro( cham_uplo_t uplo, cham_trans_t trans, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *B );
+void TCORE_zlauum( cham_uplo_t uplo, int N, CHAM_tile_t *A );
+#if defined(PRECISION_z ) || defined(PRECISION_c)
+void TCORE_zplghe( double bump, int m, int n, CHAM_tile_t *tileA, int bigM, int m0, int n0, unsigned long long int seed );
+#endif
+void TCORE_zplgsy( CHAMELEON_Complex64_t bump, int m, int n, CHAM_tile_t *tileA, int bigM, int m0, int n0, unsigned long long int seed );
+void TCORE_zplrnt( int m, int n, CHAM_tile_t *tileA, int bigM, int m0, int n0, unsigned long long int seed );
+void TCORE_zpotrf( cham_uplo_t uplo, int n, CHAM_tile_t *A, int *INFO );
+int  TCORE_zssssm( int M1, int N1, int M2, int N2, int K, int IB, CHAM_tile_t *A1, CHAM_tile_t *A2, const CHAM_tile_t *L1, const CHAM_tile_t *L2, const int *IPIV );
+void TCORE_zsymm( cham_side_t side, cham_uplo_t uplo, int M, int N, CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A, const CHAM_tile_t *B, CHAMELEON_Complex64_t beta, CHAM_tile_t *C );
+void TCORE_zsyrk( cham_uplo_t uplo, cham_trans_t trans, int N, int K, CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A, CHAMELEON_Complex64_t beta, CHAM_tile_t *C );
+void TCORE_zsyr2k( cham_uplo_t uplo, cham_trans_t trans, int N, int K, CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A, const CHAM_tile_t *B, CHAMELEON_Complex64_t beta, CHAM_tile_t *C );
+int  TCORE_zsyssq( cham_store_t storev, cham_uplo_t uplo, int N, const CHAM_tile_t *A, CHAM_tile_t *sclssq );
+int  TCORE_zsytf2_nopiv( cham_uplo_t uplo, int n, CHAM_tile_t *A );
+int  TCORE_ztplqt( int M, int N, int L, int IB, CHAM_tile_t *A, CHAM_tile_t *B, CHAM_tile_t *T, CHAMELEON_Complex64_t *WORK );
+int  TCORE_ztpmlqt( cham_side_t side, cham_trans_t trans, int M, int N, int K, int L, int IB, const CHAM_tile_t *V, const CHAM_tile_t *T, CHAM_tile_t *A, CHAM_tile_t *B, CHAMELEON_Complex64_t *WORK );
+int  TCORE_ztpmqrt( cham_side_t side, cham_trans_t trans, int M, int N, int K, int L, int IB, const CHAM_tile_t *V, const CHAM_tile_t *T, CHAM_tile_t *A, CHAM_tile_t *B, CHAMELEON_Complex64_t *WORK );
+int  TCORE_ztpqrt( int M, int N, int L, int IB, CHAM_tile_t *A, CHAM_tile_t *B, CHAM_tile_t *T, CHAMELEON_Complex64_t *WORK );
+int  TCORE_ztradd( cham_uplo_t uplo, cham_trans_t trans, int M, int N, CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A, CHAMELEON_Complex64_t beta, CHAM_tile_t *B );
+void TCORE_ztrasm( cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N, const CHAM_tile_t *A, double *work );
+void TCORE_ztrmm( cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, int M, int N, CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A, CHAM_tile_t *B );
+void TCORE_ztrsm( cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag, int M, int N, CHAMELEON_Complex64_t alpha, const CHAM_tile_t *A, CHAM_tile_t *B );
+int  TCORE_ztrssq( cham_uplo_t uplo, cham_diag_t diag, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *sclssq );
+void TCORE_ztrtri( cham_uplo_t uplo, cham_diag_t diag, int N, CHAM_tile_t *A, int *info );
+int  TCORE_ztsmlq_hetra1( cham_side_t side, cham_trans_t trans, int m1, int n1, int m2, int n2, int k, int ib, CHAM_tile_t *A1, CHAM_tile_t *A2, const CHAM_tile_t *V, const CHAM_tile_t *T, CHAMELEON_Complex64_t *WORK, int ldwork );
+int  TCORE_ztsmqr_hetra1( cham_side_t side, cham_trans_t trans, int m1, int n1, int m2, int n2, int k, int ib, CHAM_tile_t *A1, CHAM_tile_t *A2, const CHAM_tile_t *V, const CHAM_tile_t *T, CHAMELEON_Complex64_t *WORK, int ldwork );
+int  TCORE_ztstrf( int M, int N, int IB, int NB, CHAM_tile_t *U, CHAM_tile_t *A, CHAM_tile_t *L, int *IPIV, CHAMELEON_Complex64_t *WORK, int LDWORK, int *INFO );
+int  TCORE_zunmlq( cham_side_t side, cham_trans_t trans, int M, int N, int IB, int K, const CHAM_tile_t *V, const CHAM_tile_t *T, CHAM_tile_t *C, CHAMELEON_Complex64_t *WORK, int LDWORK );
+int  TCORE_zunmqr( cham_side_t side, cham_trans_t trans, int M, int N, int K, int IB, const CHAM_tile_t *V, const CHAM_tile_t *T, CHAM_tile_t *C, CHAMELEON_Complex64_t *WORK, int LDWORK );
+int TCORE_zgram( cham_uplo_t uplo, int M, int N, int Mt, int Nt, const CHAM_tile_t *Di, const CHAM_tile_t *Dj, const CHAM_tile_t *D, CHAM_tile_t *A );
+
+#endif /* _coreblas_ztile_h_ */
diff --git a/include/chameleon/struct.h b/include/chameleon/struct.h
index 74758056ea7d90e62f71ad73a0fd6cb872ff88f5..7451dfac6e75293e772cf7e90593ad765831f67d 100644
--- a/include/chameleon/struct.h
+++ b/include/chameleon/struct.h
@@ -28,6 +28,16 @@
 
 BEGIN_C_DECLS
 
+#define CHAMELEON_TILE_FULLRANK (1 << 0)
+#define CHAMELEON_TILE_DESC     (1 << 1)
+#define CHAMELEON_TILE_HMAT     (1 << 2)
+
+typedef struct chameleon_tile_s {
+    int8_t format;
+    int    m, n, ld;
+    void  *mat;
+} CHAM_tile_t;
+
 /**
  *  Tile matrix descriptor
  *
@@ -49,17 +59,21 @@ BEGIN_C_DECLS
 struct chameleon_desc_s;
 typedef struct chameleon_desc_s CHAM_desc_t;
 
-typedef void* (*blkaddr_fct_t)  ( const CHAM_desc_t*, int, int );
-typedef int   (*blkldd_fct_t)   ( const CHAM_desc_t*, int );
-typedef int   (*blkrankof_fct_t)( const CHAM_desc_t*, int, int );
+typedef void*        (*blkaddr_fct_t)  ( const CHAM_desc_t*, int, int );
+typedef int          (*blkldd_fct_t)   ( const CHAM_desc_t*, int );
+typedef int          (*blkrankof_fct_t)( const CHAM_desc_t*, int, int );
+typedef CHAM_tile_t* (*blktile_fct_t)  ( const CHAM_desc_t*, int, int );
 
 struct chameleon_desc_s {
+    // function to get chameleon tiles address
+    blktile_fct_t  get_blktile;
     // function to get chameleon tiles address
     blkaddr_fct_t   get_blkaddr;
     // function to get chameleon tiles leading dimension
     blkldd_fct_t    get_blkldd;
     // function to get chameleon tiles MPI rank
     blkrankof_fct_t get_rankof;
+    CHAM_tile_t *tiles; // pointer to the array of tiles descriptors
     void *mat;        // pointer to the beginning of the matrix
     size_t A21;       // pointer to the beginning of the matrix A21
     size_t A12;       // pointer to the beginning of the matrix A12
diff --git a/include/chameleon/tasks.h b/include/chameleon/tasks.h
index e0b1a56a89595ea45063aa600715f31afaaad29d..70b2d7b1330bbb2a20a9825b81089c8b01766a7f 100644
--- a/include/chameleon/tasks.h
+++ b/include/chameleon/tasks.h
@@ -93,7 +93,7 @@ typedef enum chameleon_tasktype_e {
 
 typedef int (*cham_unary_operator_t)( const CHAM_desc_t *desc,
                                       cham_uplo_t uplo, int m, int n,
-                                      void *data, void *op_args );
+                                      CHAM_tile_t *data, void *op_args );
 
 void INSERT_TASK_map( const RUNTIME_option_t *options,
                       cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An,
diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h
index d61e99b81996ac14f34607e6f9f96278080433ec..a5dbef97567bee951f8a6324966fc1289d292e7e 100644
--- a/include/chameleon/tasks_z.h
+++ b/include/chameleon/tasks_z.h
@@ -32,144 +32,144 @@
  */
 void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
                          cham_store_t storev, cham_uplo_t uplo, int M, int N,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *B, int Bm, int Bn );
 void INSERT_TASK_zaxpy( const RUNTIME_option_t *options,
                         int M, CHAMELEON_Complex64_t alpha,
                         const CHAM_desc_t *A, int Am, int An, int incA,
                         const CHAM_desc_t *B, int Bm, int Bn, int incB );
 void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *A, int Am, int An,
                          void *user_data, void* user_build_callback );
 void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
                          cham_trans_t trans, int m, int n, int nb,
-                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb );
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn );
 void INSERT_TASK_zgelqt( const RUNTIME_option_t *options,
                          int m, int n, int ib, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *T, int Tm, int Tn, int ldt );
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *T, int Tm, int Tn );
 void INSERT_TASK_zgemm( const RUNTIME_option_t *options,
                         cham_trans_t transA, cham_trans_t transB,
                         int m, int n, int k, int nb,
-                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                        const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                        CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
+                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                        const CHAM_desc_t *B, int Bm, int Bn,
+                        CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn );
 void INSERT_TASK_zgeqrt( const RUNTIME_option_t *options,
                          int m, int n, int ib, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *T, int Tm, int Tn, int ldt );
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *T, int Tm, int Tn );
 void INSERT_TASK_zgessm( const RUNTIME_option_t *options,
                          int m, int n, int k, int ib, int nb,
                          int *IPIV,
-                         const CHAM_desc_t *L, int Lm, int Ln, int ldl,
-                         const CHAM_desc_t *D, int Dm, int Dn, int ldd,
-                         const CHAM_desc_t *A, int Am, int An, int lda );
+                         const CHAM_desc_t *L, int Lm, int Ln,
+                         const CHAM_desc_t *D, int Dm, int Dn,
+                         const CHAM_desc_t *A, int Am, int An );
 void INSERT_TASK_zgessq( const RUNTIME_option_t *options,
                          cham_store_t storev, int m, int n,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn );
 void INSERT_TASK_zgetrf( const RUNTIME_option_t *options,
                          int m, int n, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *A, int Am, int An,
                          int *IPIV,
                          cham_bool_t check_info, int iinfo );
 void INSERT_TASK_zgetrf_incpiv( const RUNTIME_option_t *options,
                                 int m, int n, int ib, int nb,
-                                const CHAM_desc_t *A, int Am, int An, int lda,
-                                const CHAM_desc_t *L, int Lm, int Ln, int ldl,
+                                const CHAM_desc_t *A, int Am, int An,
+                                const CHAM_desc_t *L, int Lm, int Ln,
                                 int *IPIV,
                                 cham_bool_t check_info, int iinfo );
 void INSERT_TASK_zgetrf_nopiv( const RUNTIME_option_t *options,
                                int m, int n, int ib, int nb,
-                               const CHAM_desc_t *A, int Am, int An, int lda, int iinfo );
+                               const CHAM_desc_t *A, int Am, int An, int iinfo );
 void INSERT_TASK_zhe2ge( const RUNTIME_option_t *options,
                          cham_uplo_t uplo,
                          int m, int n, int mb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldb );
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn );
 void INSERT_TASK_zhemm( const RUNTIME_option_t *options,
                         cham_side_t side, cham_uplo_t uplo,
                         int m, int n, int nb,
-                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                        const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                        CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
+                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                        const CHAM_desc_t *B, int Bm, int Bn,
+                        CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn );
 void INSERT_TASK_zher2k( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_trans_t trans,
                          int n, int k, int nb,
-                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int LDB,
-                         double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn,
+                         double beta, const CHAM_desc_t *C, int Cm, int Cn );
 void INSERT_TASK_zherfb( const RUNTIME_option_t *options,
                          cham_uplo_t uplo,
                          int n, int k, int ib, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                         const CHAM_desc_t *C, int Cm, int Cn, int ldc );
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *T, int Tm, int Tn,
+                         const CHAM_desc_t *C, int Cm, int Cn );
 void INSERT_TASK_zherk( const RUNTIME_option_t *options,
                         cham_uplo_t uplo, cham_trans_t trans,
                         int n, int k, int nb,
-                        double alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                        double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
+                        double alpha, const CHAM_desc_t *A, int Am, int An,
+                        double beta, const CHAM_desc_t *C, int Cm, int Cn );
 void INSERT_TASK_zhessq( const RUNTIME_option_t *options,
                          cham_store_t storev, cham_uplo_t uplo, int n,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn );
 void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, int m, int n, int mb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldb );
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn );
 void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
                           cham_uplo_t uplo, int m, int n, int mb,
-                          int displA, const CHAM_desc_t *A, int Am, int An, int lda,
-                          int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb );
+                          int displA, const CHAM_desc_t *A, int Am, int An,
+                          int displB, const CHAM_desc_t *B, int Bm, int Bn );
 void INSERT_TASK_zlange( const RUNTIME_option_t *options,
                          cham_normtype_t norm, int M, int N, int NB,
-                         const CHAM_desc_t *A, int Am, int An, int LDA,
+                         const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *B, int Bm, int Bn );
 void INSERT_TASK_zlange_max( const RUNTIME_option_t *options,
                              const CHAM_desc_t *A, int Am, int An,
                              const CHAM_desc_t *B, int Bm, int Bn );
 void INSERT_TASK_zlanhe( const RUNTIME_option_t *options,
                          cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
-                         const CHAM_desc_t *A, int Am, int An, int LDA,
+                         const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *B, int Bm, int Bn );
 void INSERT_TASK_zlansy( const RUNTIME_option_t *options,
                          cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
-                         const CHAM_desc_t *A, int Am, int An, int LDA,
+                         const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *B, int Bm, int Bn );
 void INSERT_TASK_zlantr( const RUNTIME_option_t *options,
                          cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                          int M, int N, int NB,
-                         const CHAM_desc_t *A, int Am, int An, int LDA,
+                         const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *B, int Bm, int Bn );
 void INSERT_TASK_zlascal( const RUNTIME_option_t *options,
                           cham_uplo_t uplo,
                           int m, int n, int nb,
                           CHAMELEON_Complex64_t alpha,
-                          const CHAM_desc_t *A, int Am, int An, int lda );
+                          const CHAM_desc_t *A, int Am, int An );
 void INSERT_TASK_zlaset( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, int n1, int n2,
                          CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta,
-                         const CHAM_desc_t *tileA, int tileAm, int tileAn, int ldtilea );
+                         const CHAM_desc_t *tileA, int tileAm, int tileAn );
 void INSERT_TASK_zlaset2( const RUNTIME_option_t *options,
                           cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha,
-                          const CHAM_desc_t *tileA, int tileAm, int tileAn, int ldtilea );
+                          const CHAM_desc_t *tileA, int tileAm, int tileAn );
 void INSERT_TASK_zlatro( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_trans_t trans, int m, int n, int mb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldb );
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn );
 void INSERT_TASK_zlauum( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, int n, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int lda );
+                         const CHAM_desc_t *A, int Am, int An );
 void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
-                         double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
+                         double bump, int m, int n, const CHAM_desc_t *A, int Am, int An,
                          int bigM, int m0, int n0, unsigned long long int seed );
 void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
-                         CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
+                         CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An,
                          int bigM, int m0, int n0, unsigned long long int seed );
 void INSERT_TASK_zplrnt( const RUNTIME_option_t *options,
-                         int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
+                         int m, int n, const CHAM_desc_t *A, int Am, int An,
                          int bigM, int m0, int n0, unsigned long long int seed );
 void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
                          cham_store_t storev, int M, int N,
@@ -179,125 +179,125 @@ void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, int N,
                           const CHAM_desc_t *RESULT, int RESULTm, int RESULTn );
 void INSERT_TASK_zpotrf( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, int n, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *A, int Am, int An,
                          int iinfo );
 void INSERT_TASK_zssssm( const RUNTIME_option_t *options,
                          int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                         const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                         const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                         const CHAM_desc_t *L1, int L1m, int L1n, int ldl1,
-                         const CHAM_desc_t *L2, int L2m, int L2n, int ldl2,
+                         const CHAM_desc_t *A1, int A1m, int A1n,
+                         const CHAM_desc_t *A2, int A2m, int A2n,
+                         const CHAM_desc_t *L1, int L1m, int L1n,
+                         const CHAM_desc_t *L2, int L2m, int L2n,
                          const int *IPIV );
 void INSERT_TASK_zsymm( const RUNTIME_option_t *options,
                         cham_side_t side, cham_uplo_t uplo,
                         int m, int n, int nb,
-                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                        const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                        CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
+                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                        const CHAM_desc_t *B, int Bm, int Bn,
+                        CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn );
 void INSERT_TASK_zsyr2k( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_trans_t trans,
                          int n, int k, int nb,
-                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int LDB,
-                         CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn,
+                         CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn );
 void INSERT_TASK_zsyrk( const RUNTIME_option_t *options,
                         cham_uplo_t uplo, cham_trans_t trans,
                         int n, int k, int nb,
-                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                        CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
+                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                        CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn );
 void INSERT_TASK_zsyssq( const RUNTIME_option_t *options,
                          cham_store_t storev, cham_uplo_t uplo, int n,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn );
 void INSERT_TASK_zsytrf_nopiv( const RUNTIME_option_t *options,
                                cham_uplo_t uplo, int n, int nb,
-                               const CHAM_desc_t *A, int Am, int An, int lda,
+                               const CHAM_desc_t *A, int Am, int An,
                                int iinfo );
 void INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
                          int m, int n, int l, int ib, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                         const CHAM_desc_t *T, int Tm, int Tn, int ldt );
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn,
+                         const CHAM_desc_t *T, int Tm, int Tn );
 void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
                           cham_side_t side, cham_trans_t trans,
                           int M, int N, int K, int L, int ib, int nb,
-                          const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                          const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                          const CHAM_desc_t *A, int Am, int An, int lda,
-                          const CHAM_desc_t *B, int Bm, int Bn, int ldb );
+                          const CHAM_desc_t *V, int Vm, int Vn,
+                          const CHAM_desc_t *T, int Tm, int Tn,
+                          const CHAM_desc_t *A, int Am, int An,
+                          const CHAM_desc_t *B, int Bm, int Bn );
 void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options,
                           cham_side_t side, cham_trans_t trans,
                           int m, int n, int k, int l, int ib, int nb,
-                          const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                          const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                          const CHAM_desc_t *A, int Am, int An, int lda,
-                          const CHAM_desc_t *B, int Bm, int Bn, int ldb );
+                          const CHAM_desc_t *V, int Vm, int Vn,
+                          const CHAM_desc_t *T, int Tm, int Tn,
+                          const CHAM_desc_t *A, int Am, int An,
+                          const CHAM_desc_t *B, int Bm, int Bn );
 void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
                          int m, int n, int l, int ib, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                         const CHAM_desc_t *T, int Tm, int Tn, int ldt );
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn,
+                         const CHAM_desc_t *T, int Tm, int Tn );
 void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
-                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb );
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn );
 void INSERT_TASK_ztrasm( const RUNTIME_option_t *options,
                          cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *B, int Bm, int Bn );
 void INSERT_TASK_ztrmm( const RUNTIME_option_t *options,
                         cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
                         int m, int n, int nb,
-                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                        const CHAM_desc_t *B, int Bm, int Bn, int ldb );
+                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                        const CHAM_desc_t *B, int Bm, int Bn );
 void INSERT_TASK_ztrsm( const RUNTIME_option_t *options,
                         cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
                         int m, int n, int nb,
-                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                        const CHAM_desc_t *B, int Bm, int Bn, int ldb );
+                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                        const CHAM_desc_t *B, int Bm, int Bn );
 void INSERT_TASK_ztrssq( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_diag_t diag,
                          int m, int n,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn );
 void INSERT_TASK_ztrtri( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_diag_t diag, int n, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *A, int Am, int An,
 
                          int iinfo );
 void INSERT_TASK_ztsmlq_hetra1( const RUNTIME_option_t *options,
                                 cham_side_t side, cham_trans_t trans,
                                 int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                                const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                                const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                                const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                                const CHAM_desc_t *T, int Tm, int Tn, int ldt );
+                                const CHAM_desc_t *A1, int A1m, int A1n,
+                                const CHAM_desc_t *A2, int A2m, int A2n,
+                                const CHAM_desc_t *V, int Vm, int Vn,
+                                const CHAM_desc_t *T, int Tm, int Tn );
 void INSERT_TASK_ztsmqr_hetra1( const RUNTIME_option_t *options,
                                 cham_side_t side, cham_trans_t trans,
                                 int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                                const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                                const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                                const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                                const CHAM_desc_t *T, int Tm, int Tn, int ldt );
+                                const CHAM_desc_t *A1, int A1m, int A1n,
+                                const CHAM_desc_t *A2, int A2m, int A2n,
+                                const CHAM_desc_t *V, int Vm, int Vn,
+                                const CHAM_desc_t *T, int Tm, int Tn );
 void INSERT_TASK_ztstrf( const RUNTIME_option_t *options,
                          int m, int n, int ib, int nb,
-                         const CHAM_desc_t *U, int Um, int Un, int ldu,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *L, int Lm, int Ln, int ldl,
+                         const CHAM_desc_t *U, int Um, int Un,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *L, int Lm, int Ln,
                          int *IPIV,
                          cham_bool_t check_info, int iinfo );
 void INSERT_TASK_zunmlq( const RUNTIME_option_t *options,
                          cham_side_t side, cham_trans_t trans,
                          int m, int n, int ib,  int nb, int k,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                         const CHAM_desc_t *C, int Cm, int Cn, int ldc );
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *T, int Tm, int Tn,
+                         const CHAM_desc_t *C, int Cm, int Cn );
 void INSERT_TASK_zunmqr( const RUNTIME_option_t *options,
                          cham_side_t side, cham_trans_t trans,
                          int m, int n, int k, int ib, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                         const CHAM_desc_t *C, int Cm, int Cn, int ldc );
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *T, int Tm, int Tn,
+                         const CHAM_desc_t *C, int Cm, int Cn );
 
 /**
  * Keep these insert_task for retro-compatibility
@@ -305,117 +305,117 @@ void INSERT_TASK_zunmqr( const RUNTIME_option_t *options,
 static inline void
 INSERT_TASK_ztslqt( const RUNTIME_option_t *options,
                     int m, int n, int ib, int nb,
-                    const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                    const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                    const CHAM_desc_t *T, int Tm, int Tn, int ldt )
+                    const CHAM_desc_t *A1, int A1m, int A1n,
+                    const CHAM_desc_t *A2, int A2m, int A2n,
+                    const CHAM_desc_t *T, int Tm, int Tn )
 {
     INSERT_TASK_ztplqt( options, m, n, 0, ib, nb,
-                        A1, A1m, A1n, lda1,
-                        A2, A2m, A2n, lda2,
-                        T,  Tm,  Tn,  ldt );
+                        A1, A1m, A1n,
+                        A2, A2m, A2n,
+                        T,  Tm,  Tn );
 }
 
 static inline void
 INSERT_TASK_ztsqrt( const RUNTIME_option_t *options,
                     int m, int n, int ib, int nb,
-                    const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                    const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                    const CHAM_desc_t *T, int Tm, int Tn, int ldt )
+                    const CHAM_desc_t *A1, int A1m, int A1n,
+                    const CHAM_desc_t *A2, int A2m, int A2n,
+                    const CHAM_desc_t *T, int Tm, int Tn )
 {
     INSERT_TASK_ztpqrt( options, m, n, 0, ib, nb,
-                        A1, A1m, A1n, lda1,
-                        A2, A2m, A2n, lda2,
-                        T,  Tm,  Tn,  ldt );
+                        A1, A1m, A1n,
+                        A2, A2m, A2n,
+                        T,  Tm,  Tn );
 }
 
 static inline void
 INSERT_TASK_zttlqt( const RUNTIME_option_t *options,
                     int m, int n, int ib, int nb,
-                    const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                    const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                    const CHAM_desc_t *T, int Tm, int Tn, int ldt )
+                    const CHAM_desc_t *A1, int A1m, int A1n,
+                    const CHAM_desc_t *A2, int A2m, int A2n,
+                    const CHAM_desc_t *T, int Tm, int Tn )
 {
     INSERT_TASK_ztplqt( options, m, n, n, ib, nb,
-                        A1, A1m, A1n, lda1,
-                        A2, A2m, A2n, lda2,
-                        T,  Tm,  Tn,  ldt );
+                        A1, A1m, A1n,
+                        A2, A2m, A2n,
+                        T,  Tm,  Tn );
 }
 
 static inline void
 INSERT_TASK_zttqrt( const RUNTIME_option_t *options,
                     int m, int n, int ib, int nb,
-                    const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                    const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                    const CHAM_desc_t *T, int Tm, int Tn, int ldt )
+                    const CHAM_desc_t *A1, int A1m, int A1n,
+                    const CHAM_desc_t *A2, int A2m, int A2n,
+                    const CHAM_desc_t *T, int Tm, int Tn )
 {
     INSERT_TASK_ztpqrt( options, m, n, m, ib, nb,
-                        A1, A1m, A1n, lda1,
-                        A2, A2m, A2n, lda2,
-                        T,  Tm,  Tn,  ldt );
+                        A1, A1m, A1n,
+                        A2, A2m, A2n,
+                        T,  Tm,  Tn );
 }
 
 static inline void
 INSERT_TASK_ztsmlq( const RUNTIME_option_t *options,
                     cham_side_t side, cham_trans_t trans,
                     int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                    const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                    const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                    const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                    const CHAM_desc_t *T, int Tm, int Tn, int ldt )
+                    const CHAM_desc_t *A1, int A1m, int A1n,
+                    const CHAM_desc_t *A2, int A2m, int A2n,
+                    const CHAM_desc_t *V, int Vm, int Vn,
+                    const CHAM_desc_t *T, int Tm, int Tn )
 {
     (void)m1;
     (void)n1;
     INSERT_TASK_ztpmlqt( options, side, trans, m2, n2, k, 0, ib, nb,
-                         V, Vm, Vn, ldv, T, Tm, Tn, ldt,
-                         A1, A1m, A1n, lda1, A2, A2m, A2n, lda2 );
+                         V, Vm, Vn, T, Tm, Tn,
+                         A1, A1m, A1n, A2, A2m, A2n );
 }
 
 static inline void
 INSERT_TASK_ztsmqr( const RUNTIME_option_t *options,
                     cham_side_t side, cham_trans_t trans,
                     int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                    const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                    const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                    const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                    const CHAM_desc_t *T, int Tm, int Tn, int ldt )
+                    const CHAM_desc_t *A1, int A1m, int A1n,
+                    const CHAM_desc_t *A2, int A2m, int A2n,
+                    const CHAM_desc_t *V, int Vm, int Vn,
+                    const CHAM_desc_t *T, int Tm, int Tn )
 {
     (void)m1;
     (void)n1;
     INSERT_TASK_ztpmqrt( options, side, trans, m2, n2, k, 0, ib, nb,
-                         V, Vm, Vn, ldv, T, Tm, Tn, ldt,
-                         A1, A1m, A1n, lda1, A2, A2m, A2n, lda2 );
+                         V, Vm, Vn, T, Tm, Tn,
+                         A1, A1m, A1n, A2, A2m, A2n );
 }
 
 static inline void
 INSERT_TASK_zttmlq( const RUNTIME_option_t *options,
                     cham_side_t side, cham_trans_t trans,
                     int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                    const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                    const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                    const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                    const CHAM_desc_t *T, int Tm, int Tn, int ldt )
+                    const CHAM_desc_t *A1, int A1m, int A1n,
+                    const CHAM_desc_t *A2, int A2m, int A2n,
+                    const CHAM_desc_t *V, int Vm, int Vn,
+                    const CHAM_desc_t *T, int Tm, int Tn )
 {
     (void)m1;
     (void)n1;
     INSERT_TASK_ztpmlqt( options, side, trans, m2, n2, k, n2, ib, nb,
-                         V, Vm, Vn, ldv, T, Tm, Tn, ldt,
-                         A1, A1m, A1n, lda1, A2, A2m, A2n, lda2 );
+                         V, Vm, Vn, T, Tm, Tn,
+                         A1, A1m, A1n, A2, A2m, A2n );
 }
 
 static inline void
 INSERT_TASK_zttmqr( const RUNTIME_option_t *options,
                     cham_side_t side, cham_trans_t trans,
                     int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                    const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                    const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                    const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                    const CHAM_desc_t *T, int Tm, int Tn, int ldt )
+                    const CHAM_desc_t *A1, int A1m, int A1n,
+                    const CHAM_desc_t *A2, int A2m, int A2n,
+                    const CHAM_desc_t *V, int Vm, int Vn,
+                    const CHAM_desc_t *T, int Tm, int Tn )
 {
     (void)m1;
     (void)n1;
     INSERT_TASK_ztpmqrt( options, side, trans, m2, n2, k, m2, ib, nb,
-                         V, Vm, Vn, ldv, T, Tm, Tn, ldt,
-                         A1, A1m, A1n, lda1, A2, A2m, A2n, lda2 );
+                         V, Vm, Vn, T, Tm, Tn,
+                         A1, A1m, A1n, A2, A2m, A2n );
 }
 
 /**
@@ -424,9 +424,9 @@ INSERT_TASK_zttmqr( const RUNTIME_option_t *options,
 void INSERT_TASK_zgram( const RUNTIME_option_t *options,
                         cham_uplo_t uplo,
                         int m, int n, int mt, int nt,
-                        const CHAM_desc_t *Di, int Dim, int Din, int lddi,
-                        const CHAM_desc_t *Dj, int Djm, int Djn, int lddj,
+                        const CHAM_desc_t *Di, int Dim, int Din,
+                        const CHAM_desc_t *Dj, int Djm, int Djn,
                         const CHAM_desc_t *D, int Dm, int Dn,
-                        CHAM_desc_t *A, int Am, int An, int lda);
+                        CHAM_desc_t *A, int Am, int An);
 
 #endif /* _chameleon_tasks_z_h_ */
diff --git a/runtime/openmp/codelets/codelet_dzasum.c b/runtime/openmp/codelets/codelet_dzasum.c
index 1ce65879b2c4077e42a93d0785e69367bb69ffec..e5d37a1b9dbc7ddbe7efe562898dd8d18bbd8459 100644
--- a/runtime/openmp/codelets/codelet_dzasum.c
+++ b/runtime/openmp/codelets/codelet_dzasum.c
@@ -2,8 +2,6 @@
  *
  * @file openmp/codelet_dzasum.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
@@ -12,27 +10,26 @@
  * @brief Chameleon dzasum OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
- * @author Florent Pruvost
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
-#include "coreblas/coreblas_z.h"
 #include "chameleon/tasks_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-void INSERT_TASK_dzasum(const RUNTIME_option_t *options,
+void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
                        cham_store_t storev, cham_uplo_t uplo, int M, int N,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *B, int Bm, int Bn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An );
-    double *ptrB = RTBLKADDR( B, double, Bm, Bn );
-#pragma omp task firstprivate(storev, uplo, M, N, lda, ptrA, ptrB) depend(in:ptrA[0]) depend(inout:ptrB[0])
-    CORE_dzasum(storev, uplo, M, N, ptrA, lda, ptrB);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+
+#pragma omp task firstprivate( storev, uplo, M, N, tileA, tileB ) depend( in:tileA[0] ) depend( inout:tileB[0] )
+    TCORE_dzasum( storev, uplo, M, N, tileA, tileB->mat );
 }
 
 
diff --git a/runtime/openmp/codelets/codelet_map.c b/runtime/openmp/codelets/codelet_map.c
index 4feac8e523e34ee038df7834b1fadb4f3d0b1088..9bceac18976cfa0d5a841343bbbf79f08029e963 100644
--- a/runtime/openmp/codelets/codelet_map.c
+++ b/runtime/openmp/codelets/codelet_map.c
@@ -10,8 +10,9 @@
  * @brief Chameleon map OpenMP codelet
  *
  * @version 0.9.2
+ * @author Philippe Virouleau
  * @author Mathieu Faverge
- * @date 2018-11-21
+ * @date 2019-11-19
  *
  */
 #include "chameleon_openmp.h"
@@ -20,11 +21,11 @@ void INSERT_TASK_map( const RUNTIME_option_t *options,
                       cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An,
                       cham_unary_operator_t op_fct, void *op_args )
 {
-    char *ptrA = RTBLKADDR( A, char, Am, An );
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
 
-#pragma omp task depend(inout: ptrA[0])
+#pragma omp task depend( inout: tileA[0] )
     {
-        op_fct( A, uplo, Am, An, ptrA, op_args );
+        op_fct( A, uplo, Am, An, tileA, op_args );
     }
 
 }
diff --git a/runtime/openmp/codelets/codelet_zaxpy.c b/runtime/openmp/codelets/codelet_zaxpy.c
index 2d23d3ed893d9ff80c201560a712ebe466e735de..1c0006d8217b28e22d02938760ce8914a59eba45 100644
--- a/runtime/openmp/codelets/codelet_zaxpy.c
+++ b/runtime/openmp/codelets/codelet_zaxpy.c
@@ -2,34 +2,32 @@
  *
  * @file openmp/codelet_zaxpy.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zaxpy StarPU codelet
+ * @brief Chameleon zaxpy OpenMP codelet
  *
  * @version 0.9.2
- * @author Florent Pruvost
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-void INSERT_TASK_zaxpy(const RUNTIME_option_t *options,
+void INSERT_TASK_zaxpy( const RUNTIME_option_t *options,
                       int M, CHAMELEON_Complex64_t alpha,
                       const CHAM_desc_t *A, int Am, int An, int incA,
-                      const CHAM_desc_t *B, int Bm, int Bn, int incB)
+                      const CHAM_desc_t *B, int Bm, int Bn, int incB )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
-#pragma omp task firstprivate(M, alpha, incA, incB, ptrA, ptrB) depend(in:ptrA[0]) depend(inout:ptrB[0])
-    CORE_zaxpy(M, alpha, ptrA, incA, ptrB, incB);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+#pragma omp task firstprivate( M, alpha, incA, incB, tileA, tileB ) depend( in:tileA[0] ) depend( inout:tileB[0] )
+    TCORE_zaxpy( M, alpha, tileA, incA, tileB, incB );
 }
 
diff --git a/runtime/openmp/codelets/codelet_zbuild.c b/runtime/openmp/codelets/codelet_zbuild.c
index 094d9c2750ca04b3f5e5be566c841ca18fbba3f5..98170904d04e9b92320afa0b3ff72206bc7ac638 100644
--- a/runtime/openmp/codelets/codelet_zbuild.c
+++ b/runtime/openmp/codelets/codelet_zbuild.c
@@ -2,35 +2,26 @@
  *
  * @file openmp/codelet_zbuild.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zbuild StarPU codelet
+ * @brief Chameleon zbuild OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Piotr Luszczek
- * @author Pierre Lemarinier
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
- * @author Guillaume Sylvand
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
-                        const CHAM_desc_t *A, int Am, int An, int lda,
+                        const CHAM_desc_t *A, int Am, int An,
                         void *user_data, void* user_build_callback )
 {
   int row_min, row_max, col_min, col_max;
@@ -38,10 +29,10 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
   row_max = Am == A->mt-1 ? A->m-1 : row_min+A->mb-1 ;
   col_min = An*A->nb ;
   col_max = An == A->nt-1 ? A->n-1 : col_min+A->nb-1 ;
-  CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-  void (*callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) ;
+  CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+  void ( *callback )( int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data ) ;
   callback = user_build_callback;
 
-#pragma omp task firstprivate(row_min, row_max, col_min, col_max, ptrA, lda, user_data) depend(inout:ptrA[0])
-  callback(row_min, row_max, col_min, col_max, ptrA, lda, user_data);
+#pragma omp task firstprivate( row_min, row_max, col_min, col_max, tileA, user_data ) depend( inout:tileA[0] )
+  callback( row_min, row_max, col_min, col_max, tileA->mat, tileA->ld, user_data );
 }
diff --git a/runtime/openmp/codelets/codelet_zgeadd.c b/runtime/openmp/codelets/codelet_zgeadd.c
index e1bb12f26d3db3b005c7e1f96e022aa60532b4f5..60e8af6fb3adff9d698ac6eac0dbbf2c30038e42 100644
--- a/runtime/openmp/codelets/codelet_zgeadd.c
+++ b/runtime/openmp/codelets/codelet_zgeadd.c
@@ -2,90 +2,31 @@
  *
  * @file openmp/codelet_zgeadd.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zgeadd StarPU codelet
+ * @brief Chameleon zgeadd OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-/**
- ******************************************************************************
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- * @brief Adds two general matrices together as in PBLAS pzgeadd.
- *
- *       B <- alpha * op(A)  + beta * B,
- *
- * where op(X) = X, X', or conj(X')
- *
- *******************************************************************************
- *
- * @param[in] trans
- *          Specifies whether the matrix A is non-transposed, transposed, or
- *          conjugate transposed
- *          = ChamNoTrans:   op(A) = A
- *          = ChamTrans:     op(A) = A'
- *          = ChamConjTrans: op(A) = conj(A')
- *
- * @param[in] M
- *          Number of rows of the matrices op(A) and B.
- *
- * @param[in] N
- *          Number of columns of the matrices op(A) and B.
- *
- * @param[in] alpha
- *          Scalar factor of A.
- *
- * @param[in] A
- *          Matrix of size LDA-by-N, if trans = ChamNoTrans, LDA-by-M
- *          otherwise.
- *
- * @param[in] LDA
- *          Leading dimension of the array A. LDA >= max(1,k), with k=M, if
- *          trans = ChamNoTrans, and k=N otherwise.
- *
- * @param[in] beta
- *          Scalar factor of B.
- *
- * @param[in,out] B
- *          Matrix of size LDB-by-N.
- *          On exit, B = alpha * op(A) + beta * B
- *
- * @param[in] LDB
- *          Leading dimension of the array B. LDB >= max(1,M)
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if -i, the i-th argument had an illegal value
- *
- */
 void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
                          cham_trans_t trans, int m, int n, int nb,
-                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
-#pragma omp task firstprivate(trans, m, n, alpha, beta, lda, ldb, ptrA, ptrB) depend(in:ptrA[0]) depend(inout:ptrB[0])
-    CORE_zgeadd(trans, m, n, alpha, ptrA, lda, beta, ptrB, ldb);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+#pragma omp task firstprivate( trans, m, n, alpha, beta, tileA, tileB ) depend( in:tileA[0] ) depend( inout:tileB[0] )
+    TCORE_zgeadd( trans, m, n, alpha, tileA, beta, tileB );
 }
diff --git a/runtime/openmp/codelets/codelet_zgelqt.c b/runtime/openmp/codelets/codelet_zgelqt.c
index d6baed7cff39804d4bc2f9a73ae38a5782e3edc4..b6004abfa9d69c7cbf3d6347190b64a728cb086b 100644
--- a/runtime/openmp/codelets/codelet_zgelqt.c
+++ b/runtime/openmp/codelets/codelet_zgelqt.c
@@ -2,108 +2,39 @@
  *
  * @file openmp/codelet_zgelqt.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zgelqt StarPU codelet
+ * @brief Chameleon zgelqt OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Hatem Ltaief
- * @author Jakub Kurzak
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
-
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- *  CORE_zgelqt - computes a LQ factorization of a complex M-by-N tile A: A = L * Q.
- *
- *  The tile Q is represented as a product of elementary reflectors
- *
- *    Q = H(k)' . . . H(2)' H(1)', where k = min(M,N).
- *
- *  Each H(i) has the form
- *
- *    H(i) = I - tau * v * v'
- *
- *  where tau is a complex scalar, and v is a complex vector with
- *  v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in
- *  A(i,i+1:n), and tau in TAU(i).
- *
- *******************************************************************************
- *
- * @param[in] M
- *          The number of rows of the tile A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the tile A.  N >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile A.
- *         On exit, the elements on and below the diagonal of the array
- *         contain the M-by-min(M,N) lower trapezoidal tile L (L is
- *         lower triangular if M <= N); the elements above the diagonal,
- *         with the array TAU, represent the unitary tile Q as a
- *         product of elementary reflectors (see Further Details).
- *
- * @param[in] LDA
- *         The leading dimension of the array A.  LDA >= max(1,M).
- *
- * @param[out] T
- *         The IB-by-N triangular factor T of the block reflector.
- *         T is upper triangular by block (economic storage);
- *         The rest of the array is not referenced.
- *
- * @param[in] LDT
- *         The leading dimension of the array T. LDT >= IB.
- *
- * @param[out] TAU
- *         The scalar factors of the elementary reflectors (see Further
- *         Details).
- *
- * @param[out] WORK
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if -i, the i-th argument had an illegal value
- *
- */
+#include "coreblas/coreblas_ztile.h"
 
-void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
+void INSERT_TASK_zgelqt( const RUNTIME_option_t *options,
                        int m, int n, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldt)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *T, int Tm, int Tn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileT = T->get_blktile( T, Tm, Tn );
     int ws_size = options->ws_wsize;
 
-#pragma omp task firstprivate(ws_size, m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(out:ptrT[0])
+#pragma omp task firstprivate( ws_size, m, n, ib, tileA, tileT ) depend( inout:tileA[0] ) depend( out:tileT[0] )
     {
       CHAMELEON_Complex64_t TAU[ws_size];
       CHAMELEON_Complex64_t *work = TAU + chameleon_max( m, n );
 
-      CORE_zlaset( ChamUpperLower, ib, m, 0., 0., ptrT, ldt );
-      CORE_zgelqt( m, n, ib, ptrA, lda, ptrT, ldt, TAU, work );
+      TCORE_zlaset( ChamUpperLower, ib, m, 0., 0., tileT );
+      TCORE_zgelqt( m, n, ib, tileA, tileT, TAU, work );
     }
 }
diff --git a/runtime/openmp/codelets/codelet_zgemm.c b/runtime/openmp/codelets/codelet_zgemm.c
index cc15f3d0d7f20c6b6bfce30c9ab71bb0cf72b247..a65f303de24611773203f89384b95c313969036e 100644
--- a/runtime/openmp/codelets/codelet_zgemm.c
+++ b/runtime/openmp/codelets/codelet_zgemm.c
@@ -2,51 +2,40 @@
  *
  * @file openmp/codelet_zgemm.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zgemm StarPU codelet
+ * @brief Chameleon zgemm OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Hatem Ltaief
- * @author Jakub Kurzak
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- */
-void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
-                      cham_trans_t transA, cham_trans_t transB,
-                      int m, int n, int k, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                                               const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                      CHAMELEON_Complex64_t beta,  const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+void
+INSERT_TASK_zgemm( const RUNTIME_option_t *options,
+                   cham_trans_t transA, cham_trans_t transB,
+                   int m, int n, int k, int nb,
+                   CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                                                const CHAM_desc_t *B, int Bm, int Bn,
+                   CHAMELEON_Complex64_t beta,  const CHAM_desc_t *C, int Cm, int Cn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
-    CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn);
-#pragma omp task firstprivate(transA, transB, m, n, k, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0], ptrB[0]) depend(inout:ptrC[0])
-    CORE_zgemm(transA, transB,
-        m, n, k,
-        alpha, ptrA, lda,
-        ptrB, ldb,
-        beta, ptrC, ldc);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+    CHAM_tile_t *tileC = C->get_blktile( C, Cm, Cn );
+
+#pragma omp task firstprivate( transA, transB, m, n, k, alpha, tileA, tileB, beta, tileC ) depend( in:tileA[0], tileB[0] ) depend( inout:tileC[0] )
+    TCORE_zgemm( transA, transB,
+                m, n, k,
+                alpha, tileA,
+                tileB,
+                beta, tileC );
 }
diff --git a/runtime/openmp/codelets/codelet_zgeqrt.c b/runtime/openmp/codelets/codelet_zgeqrt.c
index 2f2ed8c495643fb244a6d9a7066e22e687632d66..1fdaa8683ae09e90e4495fe5f76de9e4c34b8934 100644
--- a/runtime/openmp/codelets/codelet_zgeqrt.c
+++ b/runtime/openmp/codelets/codelet_zgeqrt.c
@@ -2,109 +2,39 @@
  *
  * @file openmp/codelet_zgeqrt.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zgeqrt StarPU codelet
+ * @brief Chameleon zgeqrt OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Hatem Ltaief
- * @author Jakub Kurzak
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
-
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- *  CORE_zgeqrt computes a QR factorization of a complex M-by-N tile A:
- *  A = Q * R.
- *
- *  The tile Q is represented as a product of elementary reflectors
- *
- *    Q = H(1) H(2) . . . H(k), where k = min(M,N).
- *
- *  Each H(i) has the form
- *
- *    H(i) = I - tau * v * v'
- *
- *  where tau is a complex scalar, and v is a complex vector with
- *  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
- *  and tau in TAU(i).
- *
- *******************************************************************************
- *
- * @param[in] M
- *          The number of rows of the tile A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the tile A.  N >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile A.
- *         On exit, the elements on and above the diagonal of the array
- *         contain the min(M,N)-by-N upper trapezoidal tile R (R is
- *         upper triangular if M >= N); the elements below the diagonal,
- *         with the array TAU, represent the unitary tile Q as a
- *         product of elementary reflectors (see Further Details).
- *
- * @param[in] LDA
- *         The leading dimension of the array A.  LDA >= max(1,M).
- *
- * @param[out] T
- *         The IB-by-N triangular factor T of the block reflector.
- *         T is upper triangular by block (economic storage);
- *         The rest of the array is not referenced.
- *
- * @param[in] LDT
- *         The leading dimension of the array T. LDT >= IB.
- *
- * @param[out] TAU
- *         The scalar factors of the elementary reflectors (see Further
- *         Details).
- *
- * @param[out] WORK
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if -i, the i-th argument had an illegal value
- *
- */
+#include "coreblas/coreblas_ztile.h"
 
-void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
+void INSERT_TASK_zgeqrt( const RUNTIME_option_t *options,
                        int m, int n, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldt)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *T, int Tm, int Tn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileT = T->get_blktile( T, Tm, Tn );
     int ws_size = options->ws_wsize;
 
-#pragma omp task firstprivate(ws_size, m, n, ib, ptrA, lda, ptrT, ldt) depend(inout:ptrA[0]) depend(out:ptrT[0])
+#pragma omp task firstprivate( ws_size, m, n, ib, tileA, tileT ) depend( inout:tileA[0] ) depend( out:tileT[0] )
     {
       CHAMELEON_Complex64_t TAU[ws_size];
-      CHAMELEON_Complex64_t *work = TAU + chameleon_max(m, n);
+      CHAMELEON_Complex64_t *work = TAU + chameleon_max( m, n );
 
-      CORE_zlaset( ChamUpperLower, ib, n, 0., 0., ptrT, ldt );
-      CORE_zgeqrt( m, n, ib, ptrA, lda, ptrT, ldt, TAU, work );
+      TCORE_zlaset( ChamUpperLower, ib, n, 0., 0., tileT );
+      TCORE_zgeqrt( m, n, ib, tileA, tileT, TAU, work );
     }
 }
diff --git a/runtime/openmp/codelets/codelet_zgessm.c b/runtime/openmp/codelets/codelet_zgessm.c
index 29969b08e0fc9449ca84304b6a511bae98221d09..c9bfcd2f33c7cc40ca07793c61b6dcf7a1db2bc5 100644
--- a/runtime/openmp/codelets/codelet_zgessm.c
+++ b/runtime/openmp/codelets/codelet_zgessm.c
@@ -2,86 +2,33 @@
  *
  * @file openmp/codelet_zgessm.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zgessm StarPU codelet
+ * @brief Chameleon zgessm OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Hatem Ltaief
- * @author Jakub Kurzak
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
-
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- *  CORE_zgessm applies the factors L computed by CORE_zgetrf_incpiv to
- *  a complex M-by-N tile A.
- *
- *******************************************************************************
- *
- * @param[in] M
- *          The number of rows of the tile A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the tile A.  N >= 0.
- *
- * @param[in] K
- *         The number of columns of the tile L. K >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in] IPIV
- *         The pivot indices array of size K as returned by
- *         CORE_zgetrf_incpiv.
- *
- * @param[in] L
- *         The M-by-K lower triangular tile.
- *
- * @param[in] LDL
- *         The leading dimension of the array L.  LDL >= max(1,M).
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile A.
- *         On exit, updated by the application of L.
- *
- * @param[in] LDA
- *         The leading dimension of the array A.  LDA >= max(1,M).
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if INFO = -k, the k-th argument had an illegal value
- *
- */
+#include "coreblas/coreblas_ztile.h"
 
-void INSERT_TASK_zgessm(const RUNTIME_option_t *options,
+void INSERT_TASK_zgessm( const RUNTIME_option_t *options,
                        int m, int n, int k, int ib, int nb,
                        int *IPIV,
-                       const CHAM_desc_t *L, int Lm, int Ln, int ldl,
-                       const CHAM_desc_t *D, int Dm, int Dn, int ldd,
-                       const CHAM_desc_t *A, int Am, int An, int lda)
+                       const CHAM_desc_t *L, int Lm, int Ln,
+                       const CHAM_desc_t *D, int Dm, int Dn,
+                       const CHAM_desc_t *A, int Am, int An )
 {
-    CHAMELEON_Complex64_t *ptrD = RTBLKADDR(D, CHAMELEON_Complex64_t, Dm, Dn);
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-#pragma omp task firstprivate(m, n, k, ib, IPIV, ptrD, ldd, ptrA, lda) depend(in:ptrD[0]) depend(inout:ptrA[0])
-    CORE_zgessm(m, n, k, ib, IPIV, ptrD, ldd, ptrA, lda);
+    CHAM_tile_t *tileD = D->get_blktile( D, Dm, Dn );
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+#pragma omp task firstprivate( m, n, k, ib, IPIV, tileD, tileA ) depend( in:tileD[0] ) depend( inout:tileA[0] )
+    TCORE_zgessm( m, n, k, ib, IPIV, tileD, tileA );
 }
diff --git a/runtime/openmp/codelets/codelet_zgessq.c b/runtime/openmp/codelets/codelet_zgessq.c
index 42453eaacfde2ddc798a70059df8d417c7bd1edb..ccf2ee3435b318a0dd1f48e011366bf3717b71a2 100644
--- a/runtime/openmp/codelets/codelet_zgessq.c
+++ b/runtime/openmp/codelets/codelet_zgessq.c
@@ -2,35 +2,31 @@
  *
  * @file openmp/codelet_zgessq.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zgessq StarPU codelet
+ * @brief Chameleon zgessq OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
- * @author Mathieu Faverge
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void INSERT_TASK_zgessq( const RUNTIME_option_t *options,
                         cham_store_t storev, int m, int n,
-                        const CHAM_desc_t *A, int Am, int An, int lda,
+                        const CHAM_desc_t *A, int Am, int An,
                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    double *ptrScaleSum = RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn);
-#pragma omp task firstprivate(storev, m, n, ptrA, lda, ptrScaleSum) depend(in:ptrA[0]) depend(inout:ptrScaleSum[0])
-    CORE_zgessq( storev, m, n, ptrA, lda, ptrScaleSum );
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileScaleSum = SCALESUMSQ->get_blktile( SCALESUMSQ, SCALESUMSQm, SCALESUMSQn );
+#pragma omp task firstprivate( storev, m, n, tileA, tileScaleSum ) depend( in:tileA[0] ) depend( inout:tileScaleSum[0] )
+    TCORE_zgessq( storev, m, n, tileA, tileScaleSum );
 }
diff --git a/runtime/openmp/codelets/codelet_zgetrf.c b/runtime/openmp/codelets/codelet_zgetrf.c
index 8a197d18b2bd58c3507aa0c7fffc3578c242092f..4ed49a30cca24c06036fd58707cd37c1a3b7b1f9 100644
--- a/runtime/openmp/codelets/codelet_zgetrf.c
+++ b/runtime/openmp/codelets/codelet_zgetrf.c
@@ -2,38 +2,32 @@
  *
  * @file openmp/codelet_zgetrf.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zgetrf StarPU codelet
+ * @brief Chameleon zgetrf OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-void INSERT_TASK_zgetrf(const RUNTIME_option_t *options,
+void INSERT_TASK_zgetrf( const RUNTIME_option_t *options,
                        int m, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
+                       const CHAM_desc_t *A, int Am, int An,
                        int *IPIV,
-                       cham_bool_t check_info, int iinfo)
+                       cham_bool_t check_info, int iinfo )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
     int info = 0;
-#pragma omp task firstprivate(m, n, ptrA, lda, IPIV, info) depend(out:IPIV[0]) depend(inout:ptrA[0])
-    CORE_zgetrf( m, n, ptrA, lda, IPIV, &info );
+#pragma omp task firstprivate( m, n, tileA, IPIV, info ) depend( out:IPIV[0] ) depend( inout:tileA[0] )
+    TCORE_zgetrf( m, n, tileA, IPIV, &info );
 }
diff --git a/runtime/openmp/codelets/codelet_zgetrf_incpiv.c b/runtime/openmp/codelets/codelet_zgetrf_incpiv.c
index 00edb5847a067f7cf8c2366450cdd740d398bdbf..8dcf085390cafcff2f644a75c30f17268b662223 100644
--- a/runtime/openmp/codelets/codelet_zgetrf_incpiv.c
+++ b/runtime/openmp/codelets/codelet_zgetrf_incpiv.c
@@ -2,95 +2,33 @@
  *
  * @file openmp/codelet_zgetrf_incpiv.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zgetrf_incpiv StarPU codelet
+ * @brief Chameleon zgetrf_incpiv OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Hatem Ltaief
- * @author Jakub Kurzak
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
-
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- *  CORE_zgetrf_incpiv computes an LU factorization of a general M-by-N tile A
- *  using partial pivoting with row interchanges.
- *
- *  The factorization has the form
- *
- *    A = P * L * U
- *
- *  where P is a permutation matrix, L is lower triangular with unit
- *  diagonal elements (lower trapezoidal if m > n), and U is upper
- *  triangular (upper trapezoidal if m < n).
- *
- *  This is the right-looking Level 2.5 BLAS version of the algorithm.
- *
- *******************************************************************************
- *
- * @param[in] M
- *          The number of rows of the tile A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the tile A.  N >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile to be factored.
- *         On exit, the factors L and U from the factorization
- *         A = P*L*U; the unit diagonal elements of L are not stored.
- *
- * @param[in] LDA
- *         The leading dimension of the array A.  LDA >= max(1,M).
- *
- * @param[out] IPIV
- *         The pivot indices; for 1 <= i <= min(M,N), row i of the
- *         tile was interchanged with row IPIV(i).
- *
- * @param[out] INFO
- *         See returned value.
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if INFO = -k, the k-th argument had an illegal value
- * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
- *              has been completed, but the factor U is exactly
- *              singular, and division by zero will occur if it is used
- *              to solve a system of equations.
- *
- */
+#include "coreblas/coreblas_ztile.h"
 
-void INSERT_TASK_zgetrf_incpiv(const RUNTIME_option_t *options,
+void INSERT_TASK_zgetrf_incpiv( const RUNTIME_option_t *options,
                               int m, int n, int ib, int nb,
-                              const CHAM_desc_t *A, int Am, int An, int lda,
-                              const CHAM_desc_t *L, int Lm, int Ln, int ldl,
+                              const CHAM_desc_t *A, int Am, int An,
+                              const CHAM_desc_t *L, int Lm, int Ln,
                               int *IPIV,
-                              cham_bool_t check_info, int iinfo)
+                              cham_bool_t check_info, int iinfo )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
     int info = 0;
-#pragma omp task firstprivate(m, n, ib, ptrA, lda, IPIV, info) depend(out:IPIV[0]) depend(inout:ptrA[0])
-    CORE_zgetrf_incpiv(m, n, ib, ptrA, lda, IPIV, &info);
+#pragma omp task firstprivate( m, n, ib, tileA, IPIV, info ) depend( out:IPIV[0] ) depend( inout:tileA[0] )
+    TCORE_zgetrf_incpiv( m, n, ib, tileA, IPIV, &info );
 }
diff --git a/runtime/openmp/codelets/codelet_zgetrf_nopiv.c b/runtime/openmp/codelets/codelet_zgetrf_nopiv.c
index 86a520800969781dfad57bfc892575b8cc8de36c..53446efac813836c5c954046ceea02e9ab2356f0 100644
--- a/runtime/openmp/codelets/codelet_zgetrf_nopiv.c
+++ b/runtime/openmp/codelets/codelet_zgetrf_nopiv.c
@@ -2,83 +2,31 @@
  *
  * @file openmp/codelet_zgetrf_nopiv.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zgetrf_nopiv StarPU codelet
+ * @brief Chameleon zgetrf_nopiv OpenMP codelet
  *
  * @version 0.9.2
- * @author Omar Zenati
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
-
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- *  CORE_zgetrf_nopiv computes an LU factorization of a general diagonal
- *  dominant M-by-N matrix A witout pivoting.
- *
- *  The factorization has the form
- *     A = L * U
- *  where L is lower triangular with unit
- *  diagonal elements (lower trapezoidal if m > n), and U is upper
- *  triangular (upper trapezoidal if m < n).
- *
- *  This is the right-looking Level 3 BLAS version of the algorithm.
- *  WARNING: Your matrix need to be diagonal dominant if you want to call this
- *  routine safely.
- *
- *******************************************************************************
- *
- *  @param[in] M
- *          The number of rows of the matrix A.  M >= 0.
- *
- *  @param[in] N
- *          The number of columns of the matrix A.  N >= 0.
- *
- *  @param[in] IB
- *          The block size to switch between blocked and unblocked code.
- *
- *  @param[in,out] A
- *          On entry, the M-by-N matrix to be factored.
- *          On exit, the factors L and U from the factorization
- *          A = P*L*U; the unit diagonal elements of L are not stored.
- *
- *  @param[in] LDA
- *          The leading dimension of the array A.  LDA >= max(1,M).
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if INFO = -k, the k-th argument had an illegal value
- * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
- *              has been completed, but the factor U is exactly
- *              singular, and division by zero will occur if it is used
- *              to solve a system of equations.
- *
- */
+#include "coreblas/coreblas_ztile.h"
 
-void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options,
+void INSERT_TASK_zgetrf_nopiv( const RUNTIME_option_t *options,
                               int m, int n, int ib, int nb,
-                              const CHAM_desc_t *A, int Am, int An, int lda,
-                              int iinfo)
+                              const CHAM_desc_t *A, int Am, int An,
+                              int iinfo )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
     int info = 0;
-#pragma omp task firstprivate(m, n, ib, ptrA, lda, info) depend(inout:ptrA[0])
-    CORE_zgetrf_nopiv(m, n, ib, ptrA, lda, &info);
+#pragma omp task firstprivate( m, n, ib, tileA, info ) depend( inout:tileA[0] )
+    TCORE_zgetrf_nopiv( m, n, ib, tileA, &info );
 }
diff --git a/runtime/openmp/codelets/codelet_zgram.c b/runtime/openmp/codelets/codelet_zgram.c
index 1250573332538d53bf201c4b82fb7e57ec2aa1c3..af0363df7a1818789390d1e70f9bc477656e892e 100644
--- a/runtime/openmp/codelets/codelet_zgram.c
+++ b/runtime/openmp/codelets/codelet_zgram.c
@@ -10,34 +10,30 @@
  * @brief Chameleon zgram OpenMP codelet
  *
  * @version 0.9.2
+ * @author Philippe Virouleau
  * @author Mathieu Faverge
- * @author Florent Pruvost
- * @date 2019-04-10
+ * @date 2019-11-19
  * @precisions normal z -> s d c z
  *
  */
-
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void INSERT_TASK_zgram( const RUNTIME_option_t *options,
                         cham_uplo_t uplo,
                         int m, int n, int mt, int nt,
-                        const CHAM_desc_t *Di, int Dim, int Din, int lddi,
-                        const CHAM_desc_t *Dj, int Djm, int Djn, int lddj,
-                        const CHAM_desc_t *D, int Dm, int Dn,
-                        CHAM_desc_t *A, int Am, int An, int lda)
+                        const CHAM_desc_t *Di, int Dim, int Din,
+                        const CHAM_desc_t *Dj, int Djm, int Djn,
+                        const CHAM_desc_t *D,  int Dm,  int Dn,
+                              CHAM_desc_t *A,  int Am,  int An )
 {
-    double *ptrDi = RTBLKADDR(Di, double, Dim, Din);
-    double *ptrDj = RTBLKADDR(Dj, double, Djm, Djn);
-    double *ptrD  = RTBLKADDR(D,  double, Dm, Dn);
-    double *ptrA  = RTBLKADDR(A,  double, Am, An);
+    CHAM_tile_t *tileDi = Di->get_blktile( Di, Dim, Din );
+    CHAM_tile_t *tileDj = Dj->get_blktile( Dj, Djm, Djn );
+    CHAM_tile_t *tileD  = D->get_blktile( D, Dm, Dn );
+    CHAM_tile_t *tileA  = A->get_blktile( A, Am, An );
 
-#pragma omp task firstprivate(uplo, m, n, mt, nt, ptrDi, lddi, ptrDj, lddj, ptrD, ptrA, lda) depend(in:ptrDi[0], ptrDj[0], ptrD[0]) depend(inout:ptrA[0])
-    CORE_zgram( uplo,
-                m, n, mt, nt,
-                ptrDi, lddi,
-                ptrDj, lddj,
-                ptrD,
-                ptrA, lda);
+#pragma omp task firstprivate( uplo, m, n, mt, nt, tileDi, tileDj, tileD, tileA ) depend( in:tileDi[0], tileDj[0], tileD[0] ) depend( inout:tileA[0] )
+    TCORE_zgram( uplo, m, n, mt, nt,
+                 tileDi, tileDj, tileD, tileA );
 }
diff --git a/runtime/openmp/codelets/codelet_zhe2ge.c b/runtime/openmp/codelets/codelet_zhe2ge.c
index f0730231c80955dc20f77ca5335e1b44fc17c484..df85d582d252ce765e80271e94e22c1609b32e74 100644
--- a/runtime/openmp/codelets/codelet_zhe2ge.c
+++ b/runtime/openmp/codelets/codelet_zhe2ge.c
@@ -2,38 +2,32 @@
  *
  * @file openmp/codelet_zhe2ge.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zhe2ge StarPU codelet
+ * @brief Chameleon zhe2ge OpenMP codelet
  *
  * @version 0.9.2
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- */
-void INSERT_TASK_zhe2ge(const RUNTIME_option_t *options,
+void INSERT_TASK_zhe2ge( const RUNTIME_option_t *options,
                        cham_uplo_t uplo,
                        int m, int n, int mb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *B, int Bm, int Bn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An );
-    CHAMELEON_Complex64_t *ptrB = RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn );
-#pragma omp task firstprivate(uplo, m, n, ptrA, lda, ptrB, ldb) depend(in: ptrA[0]) depend(inout:ptrB[0])
-    CORE_zhe2ge(uplo, m, n, ptrA, lda, ptrB, ldb);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+#pragma omp task firstprivate( uplo, m, n, tileA, tileB ) depend( in: tileA[0] ) depend( inout:tileB[0] )
+    TCORE_zhe2ge( uplo, m, n, tileA, tileB );
 }
diff --git a/runtime/openmp/codelets/codelet_zhemm.c b/runtime/openmp/codelets/codelet_zhemm.c
index 91cafbb923023f0dd460356a9df79cf905e22e91..f472ceac36ef7eae036940e4813a6a09de184660 100644
--- a/runtime/openmp/codelets/codelet_zhemm.c
+++ b/runtime/openmp/codelets/codelet_zhemm.c
@@ -2,51 +2,38 @@
  *
  * @file openmp/codelet_zhemm.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zhemm StarPU codelet
+ * @brief Chameleon zhemm OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Hatem Ltaief
- * @author Jakub Kurzak
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- */
-void INSERT_TASK_zhemm(const RUNTIME_option_t *options,
+void INSERT_TASK_zhemm( const RUNTIME_option_t *options,
                       cham_side_t side, cham_uplo_t uplo,
                       int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                      const CHAM_desc_t *B, int Bm, int Bn,
+                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
-    CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn);
-#pragma omp task firstprivate(side, uplo, m, n, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0], ptrB[0]) depend(inout:ptrC[0])
-    CORE_zhemm(side, uplo,
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+    CHAM_tile_t *tileC = C->get_blktile( C, Cm, Cn );
+#pragma omp task firstprivate( side, uplo, m, n, alpha, tileA, tileB, beta, tileC ) depend( in:tileA[0], tileB[0] ) depend( inout:tileC[0] )
+    TCORE_zhemm( side, uplo,
         m, n,
-        alpha, ptrA, lda,
-        ptrB, ldb,
-        beta, ptrC, ldc);
+        alpha, tileA,
+        tileB,
+        beta, tileC );
 }
diff --git a/runtime/openmp/codelets/codelet_zher2k.c b/runtime/openmp/codelets/codelet_zher2k.c
index 0ad4992a4108bd78ebe5faf0b213ef1daf37ccf0..1c67a996ce1a34ca9610ff465792cf4a963c847e 100644
--- a/runtime/openmp/codelets/codelet_zher2k.c
+++ b/runtime/openmp/codelets/codelet_zher2k.c
@@ -2,48 +2,35 @@
  *
  * @file openmp/codelet_zher2k.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zher2k StarPU codelet
+ * @brief Chameleon zher2k OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Hatem Ltaief
- * @author Jakub Kurzak
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- */
-void INSERT_TASK_zher2k(const RUNTIME_option_t *options,
+void INSERT_TASK_zher2k( const RUNTIME_option_t *options,
                        cham_uplo_t uplo, cham_trans_t trans,
                        int n, int k, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                       double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *B, int Bm, int Bn,
+                       double beta, const CHAM_desc_t *C, int Cm, int Cn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
-    CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn);
-#pragma omp task firstprivate(uplo, trans, n, k, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0], ptrB[0]) depend(inout:ptrC[0])
-    CORE_zher2k(uplo, trans,
-                n, k, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+    CHAM_tile_t *tileC = C->get_blktile( C, Cm, Cn );
+#pragma omp task firstprivate( uplo, trans, n, k, alpha, tileA, tileB, beta, tileC ) depend( in:tileA[0], tileB[0] ) depend( inout:tileC[0] )
+    TCORE_zher2k( uplo, trans,
+                n, k, alpha, tileA, tileB, beta, tileC );
 }
diff --git a/runtime/openmp/codelets/codelet_zherfb.c b/runtime/openmp/codelets/codelet_zherfb.c
index bb6f5a4dca133be3d0eabe668f8230b36f202030..6dfa2fe3a15b38dca26c1a3b19e81a26c900b5ec 100644
--- a/runtime/openmp/codelets/codelet_zherfb.c
+++ b/runtime/openmp/codelets/codelet_zherfb.c
@@ -2,43 +2,38 @@
  *
  * @file openmp/codelet_zherfb.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zherfb StarPU codelet
+ * @brief Chameleon zherfb OpenMP codelet
  *
  * @version 0.9.2
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- */
-void INSERT_TASK_zherfb(const RUNTIME_option_t *options,
+void INSERT_TASK_zherfb( const RUNTIME_option_t *options,
                        cham_uplo_t uplo,
                        int n, int k, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                       const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *T, int Tm, int Tn,
+                       const CHAM_desc_t *C, int Cm, int Cn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
-    CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileT = T->get_blktile( T, Tm, Tn );
+    CHAM_tile_t *tileC = C->get_blktile( C, Cm, Cn );
     int ws_size = options->ws_wsize;
-#pragma omp task firstprivate(ws_size, uplo, n, k, ib, nb, ptrA, lda, ptrT, ldt) depend(in:ptrA[0], ptrT[0]) depend(inout:ptrC[0])
+#pragma omp task firstprivate( ws_size, uplo, n, k, ib, nb, tileA, tileT ) depend( in:tileA[0], tileT[0] ) depend( inout:tileC[0] )
     {
       CHAMELEON_Complex64_t work[ws_size];
-      CORE_zherfb(uplo, n, k, ib, nb, ptrA, lda, ptrT, ldt, ptrC, ldc, work, nb);
+      TCORE_zherfb( uplo, n, k, ib, nb, tileA, tileT, tileC, work, nb );
     }
 }
diff --git a/runtime/openmp/codelets/codelet_zherk.c b/runtime/openmp/codelets/codelet_zherk.c
index d9742c2677e114e8b619ea73b9fb951c30264669..d05ef433174d13f65951e9ab971108a2e5f2d833 100644
--- a/runtime/openmp/codelets/codelet_zherk.c
+++ b/runtime/openmp/codelets/codelet_zherk.c
@@ -2,48 +2,35 @@
  *
  * @file openmp/codelet_zherk.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zherk StarPU codelet
+ * @brief Chameleon zherk OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Hatem Ltaief
- * @author Jakub Kurzak
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- */
-void INSERT_TASK_zherk(const RUNTIME_option_t *options,
+void INSERT_TASK_zherk( const RUNTIME_option_t *options,
                       cham_uplo_t uplo, cham_trans_t trans,
                       int n, int k, int nb,
-                      double alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                      double alpha, const CHAM_desc_t *A, int Am, int An,
+                      double beta, const CHAM_desc_t *C, int Cm, int Cn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn);
-#pragma omp task firstprivate(uplo, trans, n, k, alpha, ptrA, lda, beta, ptrC, ldc) depend(in:ptrA[0]) depend(inout:ptrC[0])
-    CORE_zherk(uplo, trans,
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileC = C->get_blktile( C, Cm, Cn );
+#pragma omp task firstprivate( uplo, trans, n, k, alpha, tileA, beta, tileC ) depend( in:tileA[0] ) depend( inout:tileC[0] )
+    TCORE_zherk( uplo, trans,
         n, k,
-        alpha, ptrA, lda,
-        beta, ptrC, ldc);
+        alpha, tileA,
+        beta, tileC );
 }
diff --git a/runtime/openmp/codelets/codelet_zhessq.c b/runtime/openmp/codelets/codelet_zhessq.c
index 85232d001a1b29286796d1a2e17e77eb095f310c..7f1c8ae11bb226749794a67176955accafe899de 100644
--- a/runtime/openmp/codelets/codelet_zhessq.c
+++ b/runtime/openmp/codelets/codelet_zhessq.c
@@ -2,21 +2,17 @@
  *
  * @file openmp/codelet_zhessq.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zhessq StarPU codelet
+ * @brief Chameleon zhessq OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
- * @author Mathieu Faverge
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c
  *
  */
@@ -25,10 +21,10 @@
 
 void INSERT_TASK_zhessq( const RUNTIME_option_t *options,
                          cham_store_t storev, cham_uplo_t uplo, int n,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
 {
     INSERT_TASK_zsyssq( options, storev, uplo, n,
-                        A, Am, An, lda,
+                        A, Am, An,
                         SCALESUMSQ, SCALESUMSQm, SCALESUMSQn );
 }
diff --git a/runtime/openmp/codelets/codelet_zlacpy.c b/runtime/openmp/codelets/codelet_zlacpy.c
index e483d86d10ac5c2671f55addb415499065a754f8..a2bdc0947ceb984e3e12b7987a23c442931ec568 100644
--- a/runtime/openmp/codelets/codelet_zlacpy.c
+++ b/runtime/openmp/codelets/codelet_zlacpy.c
@@ -2,54 +2,50 @@
  *
  * @file openmp/codelet_zlacpy.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zlacpy StarPU codelet
+ * @brief Chameleon zlacpy OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Julien Langou
- * @author Henricus Bouwmeester
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- */
 void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
                           cham_uplo_t uplo, int m, int n, int nb,
-                          int displA, const CHAM_desc_t *A, int Am, int An, int lda,
-                          int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+                          int displA, const CHAM_desc_t *A, int Am, int An,
+                          int displB, const CHAM_desc_t *B, int Bm, int Bn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A + displA, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B + displB, CHAMELEON_Complex64_t, Bm, Bn);
-#pragma omp task firstprivate(uplo, m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0])
-    CORE_zlacpy(uplo, m, n, ptrA, lda, ptrB, ldb);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+
+    assert( tileA->format & CHAMELEON_TILE_FULLRANK );
+    assert( tileB->format & CHAMELEON_TILE_FULLRANK );
+
+#pragma omp task firstprivate( uplo, m, n, displA, tileA, displB, tileB ) depend( in:tileA[0] ) depend( inout:tileB[0] )
+    {
+        CHAMELEON_Complex64_t *A = tileA->mat;
+        CHAMELEON_Complex64_t *B = tileB->mat;
+
+        CORE_zlacpy( uplo, m, n, A + displA, tileA->ld, B + displB, tileB->ld );
+    }
 }
 
 void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, int m, int n, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     INSERT_TASK_zlacpyx( options, uplo, m, n, nb,
-                         0, A, Am, An, lda,
-                         0, B, Bm, Bn, ldb );
+                         0, A, Am, An,
+                         0, B, Bm, Bn );
 }
diff --git a/runtime/openmp/codelets/codelet_zlag2c.c b/runtime/openmp/codelets/codelet_zlag2c.c
index cc3b003c0b795a3a0d77792a8d022c1a1c462b84..1edde747d708f3102289bae61c77b46bc84c6408 100644
--- a/runtime/openmp/codelets/codelet_zlag2c.c
+++ b/runtime/openmp/codelets/codelet_zlag2c.c
@@ -2,53 +2,42 @@
  *
  * @file openmp/codelet_zlag2c.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zlag2c StarPU codelet
+ * @brief Chameleon zlag2c OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions mixed zc -> ds
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- */
 void INSERT_TASK_zlag2c( const RUNTIME_option_t *options,
                          int m, int n, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex32_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex32_t, Bm, Bn);
-#pragma omp task firstprivate(m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0])
-    CORE_zlag2c( m, n, ptrA, lda, ptrB, ldb);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAMELEON_Complex32_t *tileB = B->get_blktile( B, Bm, Bn );
+#pragma omp task firstprivate( m, n, tileA, tileB ) depend( in:tileA[0] ) depend( inout:tileB[0] )
+    TCORE_zlag2c( m, n, tileA, tileB );
 }
 
 void INSERT_TASK_clag2z( const RUNTIME_option_t *options,
                          int m, int n, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
-    CHAMELEON_Complex32_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex32_t, Am, An);
-    CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
-#pragma omp task firstprivate(m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0])
-    CORE_clag2z( m, n, ptrA, lda, ptrB, ldb);
+    CHAMELEON_Complex32_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+#pragma omp task firstprivate( m, n, tileA, tileB ) depend( in:tileA[0] ) depend( inout:tileB[0] )
+    TCORE_clag2z( m, n, tileA, tileB );
 }
diff --git a/runtime/openmp/codelets/codelet_zlange.c b/runtime/openmp/codelets/codelet_zlange.c
index 5ed3ec11cf89f80cf51a44de8ccbb8856406d77a..b9ec27f1052e8e37d56762de1491e1b4d8fe4ae0 100644
--- a/runtime/openmp/codelets/codelet_zlange.c
+++ b/runtime/openmp/codelets/codelet_zlange.c
@@ -2,55 +2,55 @@
  *
  * @file openmp/codelet_zlange.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zlange StarPU codelet
+ * @brief Chameleon zlange OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
- * @author Julien Langou
- * @author Henricus Bouwmeester
- * @author Mathieu Faverge
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-void INSERT_TASK_zlange(const RUNTIME_option_t *options,
-                       cham_normtype_t norm, int M, int N, int NB,
-                       const CHAM_desc_t *A, int Am, int An, int LDA,
-                       const CHAM_desc_t *B, int Bm, int Bn)
+void INSERT_TASK_zlange( const RUNTIME_option_t *options,
+                         cham_normtype_t norm, int M, int N, int NB,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    double *ptrB = RTBLKADDR(B, double, Bm, Bn);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
     int ws_size = options->ws_wsize;
-#pragma omp task firstprivate(ws_size, M, N, ptrA, LDA, ptrB, options) depend(in:ptrA[0]) depend(inout:ptrB[0])
+#pragma omp task firstprivate( ws_size, M, N, tileA, tileB, options ) depend( in:tileA[0] ) depend( inout:tileB[0] )
     {
-      double work[ws_size];
-      CORE_zlange( norm, M, N, ptrA, LDA, work, ptrB);
+        double work[ws_size];
+        TCORE_zlange( norm, M, N, tileA, work, tileB->mat );
     }
 }
 
-void INSERT_TASK_zlange_max(const RUNTIME_option_t *options,
+void INSERT_TASK_zlange_max( const RUNTIME_option_t *options,
                            const CHAM_desc_t *A, int Am, int An,
-                           const CHAM_desc_t *B, int Bm, int Bn)
+                           const CHAM_desc_t *B, int Bm, int Bn )
 {
-    double *ptrA = RTBLKADDR(A, double, Am, An);
-    double *ptrB = RTBLKADDR(B, double, Bm, Bn);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
 
-#pragma omp task firstprivate(ptrA, ptrB) depend(in:ptrA[0]) depend(inout:ptrB[0])
+#pragma omp task firstprivate( tileA, tileB ) depend( in:tileA[0] ) depend( inout:tileB[0] )
     {
-        if ( *ptrA > *ptrB )
-            *ptrB = *ptrA;
+        double *A, *B;
+
+        A = tileA->mat;
+        B = tileB->mat;
+
+        if ( A[0] > B[0] ) {
+            B[0] = A[0];
+        }
     }
 }
diff --git a/runtime/openmp/codelets/codelet_zlanhe.c b/runtime/openmp/codelets/codelet_zlanhe.c
index 5f9e0a77179414645cecf4de662bebb9a3eec789..9b13a208ffb89d38ce148902acd7e41f9916952b 100644
--- a/runtime/openmp/codelets/codelet_zlanhe.c
+++ b/runtime/openmp/codelets/codelet_zlanhe.c
@@ -2,41 +2,36 @@
  *
  * @file openmp/codelet_zlanhe.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zlanhe StarPU codelet
+ * @brief Chameleon zlanhe OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
- * @author Julien Langou
- * @author Henricus Bouwmeester
- * @author Mathieu Faverge
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-void INSERT_TASK_zlanhe(const RUNTIME_option_t *options,
-                       cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
-                       const CHAM_desc_t *A, int Am, int An, int LDA,
-                       const CHAM_desc_t *B, int Bm, int Bn)
+void INSERT_TASK_zlanhe( const RUNTIME_option_t *options,
+                         cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    double *normA = RTBLKADDR(B, double, Bm, Bn);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
     int ws_size = options->ws_wsize;
-#pragma omp task firstprivate(ws_size, norm, uplo, N, ptrA, LDA, normA) depend(in:ptrA[0]) depend(inout:normA[0])
+
+#pragma omp task firstprivate( ws_size, norm, uplo, N, tileA, tileB ) depend( in:tileA[0] ) depend( inout:tileB[0] )
     {
-      double work[ws_size];
-      CORE_zlanhe( norm, uplo, N, ptrA, LDA, work, normA);
+        double work[ws_size];
+        TCORE_zlanhe( norm, uplo, N, tileA, work, tileB->mat );
     }
 }
diff --git a/runtime/openmp/codelets/codelet_zlansy.c b/runtime/openmp/codelets/codelet_zlansy.c
index 147b59da4c3634f5ebf66657975ba988c4e98d3f..9046ca44c76753e1cb8320db039697f805ca2574 100644
--- a/runtime/openmp/codelets/codelet_zlansy.c
+++ b/runtime/openmp/codelets/codelet_zlansy.c
@@ -2,41 +2,36 @@
  *
  * @file openmp/codelet_zlansy.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zlansy StarPU codelet
+ * @brief Chameleon zlansy OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
- * @author Julien Langou
- * @author Henricus Bouwmeester
- * @author Mathieu Faverge
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-void INSERT_TASK_zlansy(const RUNTIME_option_t *options,
-                       cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
-                       const CHAM_desc_t *A, int Am, int An, int LDA,
-                       const CHAM_desc_t *B, int Bm, int Bn)
+void INSERT_TASK_zlansy( const RUNTIME_option_t *options,
+                         cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    double *normA = RTBLKADDR(B, double, Bm, Bn);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
     int ws_size = options->ws_wsize;
-#pragma omp task firstprivate(ws_size, norm, uplo, N, ptrA, LDA, normA) depend(in:ptrA[0]) depend(inout:normA[0])
+
+#pragma omp task firstprivate( ws_size, norm, uplo, N, tileA, tileB ) depend( in:tileA[0] ) depend( inout:tileB[0] )
     {
-      double work[ws_size];
-      CORE_zlansy( norm, uplo, N, ptrA, LDA, work, normA);
+        double work[ws_size];
+        TCORE_zlansy( norm, uplo, N, tileA, work, tileB->mat );
     }
 }
diff --git a/runtime/openmp/codelets/codelet_zlantr.c b/runtime/openmp/codelets/codelet_zlantr.c
index 994d2fb26f966b92621a1f91b7a041c09a211c8f..0006b23035387bdace48a1446400e1c8b65e8f8c 100644
--- a/runtime/openmp/codelets/codelet_zlantr.c
+++ b/runtime/openmp/codelets/codelet_zlantr.c
@@ -2,40 +2,36 @@
  *
  * @file openmp/codelet_zlantr.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zlantr StarPU codelet
+ * @brief Chameleon zlantr OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
- * @author Mathieu Faverge
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-void INSERT_TASK_zlantr(const RUNTIME_option_t *options,
-                       cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
-                       int M, int N, int NB,
-                       const CHAM_desc_t *A, int Am, int An, int LDA,
-                       const CHAM_desc_t *B, int Bm, int Bn)
+void INSERT_TASK_zlantr( const RUNTIME_option_t *options,
+                         cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
+                         int M, int N, int NB,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    double *ptrB = RTBLKADDR(B, double, Bm, Bn);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
     int ws_wsize = options->ws_wsize;
-#pragma omp task firstprivate(ws_wsize, norm, uplo, diag, M, N, ptrA, LDA, ptrB) depend(in:ptrA[0]) depend(inout:ptrB[0])
+#pragma omp task firstprivate( ws_wsize, norm, uplo, diag, M, N, tileA, tileB ) depend( in:tileA[0] ) depend( inout:tileB[0] )
     {
-      double work[ws_wsize];
-      CORE_zlantr(norm, uplo, diag, M, N, ptrA, LDA, work, ptrB);
+        double work[ws_wsize];
+        TCORE_zlantr( norm, uplo, diag, M, N, tileA, work, tileB->mat );
     }
 }
diff --git a/runtime/openmp/codelets/codelet_zlascal.c b/runtime/openmp/codelets/codelet_zlascal.c
index 84944c9093f5d21768788da53b9149e3970ea497..71bb938c73a38df3063793c7df70ae7b5ee96cf7 100644
--- a/runtime/openmp/codelets/codelet_zlascal.c
+++ b/runtime/openmp/codelets/codelet_zlascal.c
@@ -2,67 +2,31 @@
  *
  * @file openmp/codelet_zlascal.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zlascal StarPU codelet
+ * @brief Chameleon zlascal OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Dalal Sukkari
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- *  CORE_zlascal adds to matrices together.
- *
- *       A <- alpha * A
- *
- *******************************************************************************
- *
- * @param[in] M
- *          Number of rows of the matrices A and B.
- *
- * @param[in] N
- *          Number of columns of the matrices A and B.
- *
- * @param[in] alpha
- *          Scalar factor of A.
- *
- * @param[in] A
- *          Matrix of size LDA-by-N.
- *
- * @param[in] LDA
- *          Leading dimension of the array A. LDA >= max(1,M)
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if -i, the i-th argument had an illegal value
- *
- */
-
-void INSERT_TASK_zlascal(const RUNTIME_option_t *options,
-                        cham_uplo_t uplo,
-                        int m, int n, int nb,
-                        CHAMELEON_Complex64_t alpha,
-                        const CHAM_desc_t *A, int Am, int An, int lda)
+void INSERT_TASK_zlascal( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo,
+                         int m, int n, int nb,
+                         CHAMELEON_Complex64_t alpha,
+                         const CHAM_desc_t *A, int Am, int An )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-#pragma omp task firstprivate(uplo, m, n, alpha, ptrA, lda) depend(inout:ptrA[0])
-    CORE_zlascal(uplo, m, n, alpha, ptrA, lda);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+#pragma omp task firstprivate( uplo, m, n, alpha, tileA ) depend( inout:tileA[0] )
+    TCORE_zlascal( uplo, m, n, alpha, tileA );
 }
diff --git a/runtime/openmp/codelets/codelet_zlaset.c b/runtime/openmp/codelets/codelet_zlaset.c
index 8884f4cbf8a37e30afbbd78097ac188267cea79e..5e083ffb12d5db6a9fc9a2f602a4f58a86027ad2 100644
--- a/runtime/openmp/codelets/codelet_zlaset.c
+++ b/runtime/openmp/codelets/codelet_zlaset.c
@@ -2,73 +2,30 @@
  *
  * @file openmp/codelet_zlaset.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zlaset StarPU codelet
+ * @brief Chameleon zlaset OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Hatem Ltaief
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- *  CORE_zlaset - Sets the elements of the matrix A on the diagonal
- *  to beta and on the off-diagonals to alpha
- *
- *******************************************************************************
- *
- * @param[in] uplo
- *          Specifies which elements of the matrix are to be set
- *          = ChamUpper: Upper part of A is set;
- *          = ChamLower: Lower part of A is set;
- *          = ChamUpperLower: ALL elements of A are set.
- *
- * @param[in] M
- *          The number of rows of the matrix A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the matrix A.  N >= 0.
- *
- * @param[in] alpha
- *         The constant to which the off-diagonal elements are to be set.
- *
- * @param[in] beta
- *         The constant to which the diagonal elements are to be set.
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile A.
- *         On exit, A has been set accordingly.
- *
- * @param[in] LDA
- *         The leading dimension of the array A.  LDA >= max(1,M).
- *
- */
-void INSERT_TASK_zlaset(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, int M, int N,
-                       CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta,
-                       const CHAM_desc_t *A, int Am, int An, int LDA)
+void INSERT_TASK_zlaset( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, int M, int N,
+                         CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta,
+                         const CHAM_desc_t *A, int Am, int An )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-#pragma omp task firstprivate(uplo, M, N, alpha, beta, ptrA, LDA) depend(inout:ptrA[0])
-    CORE_zlaset(uplo, M, N, alpha, beta, ptrA, LDA);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+#pragma omp task firstprivate( uplo, M, N, alpha, beta, tileA ) depend( inout:tileA[0] )
+    TCORE_zlaset( uplo, M, N, alpha, beta, tileA );
 }
diff --git a/runtime/openmp/codelets/codelet_zlaset2.c b/runtime/openmp/codelets/codelet_zlaset2.c
index 87fb57d9e275372ae9cd86ae62617f466c2ebf65..0203e4b79b73d2e2a0e53a7cc8f5c43bb33bb15c 100644
--- a/runtime/openmp/codelets/codelet_zlaset2.c
+++ b/runtime/openmp/codelets/codelet_zlaset2.c
@@ -2,71 +2,30 @@
  *
  * @file openmp/codelet_zlaset2.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zlaset2 StarPU codelet
+ * @brief Chameleon zlaset2 OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Hatem Ltaief
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- *  CORE_zlaset2 - Sets the elements of the matrix A to alpha.
- *  Not LAPACK compliant! Read below.
- *
- *******************************************************************************
- *
- * @param[in] uplo
- *          Specifies which elements of the matrix are to be set
- *          = ChamUpper: STRICT Upper part of A is set to alpha;
- *          = ChamLower: STRICT Lower part of A is set to alpha;
- *          = ChamUpperLower: ALL elements of A are set to alpha.
- *          Not LAPACK Compliant.
- *
- * @param[in] M
- *          The number of rows of the matrix A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the matrix A.  N >= 0.
- *
- * @param[in] alpha
- *         The constant to which the elements are to be set.
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile A.
- *         On exit, A has been set to alpha accordingly.
- *
- * @param[in] LDA
- *         The leading dimension of the array A.  LDA >= max(1,M).
- *
- */
-void INSERT_TASK_zlaset2(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, int M, int N,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int LDA)
+void INSERT_TASK_zlaset2( const RUNTIME_option_t *options,
+                          cham_uplo_t uplo, int M, int N,
+                          CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An )
 {
 
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-#pragma omp task firstprivate(uplo, M, N, alpha, ptrA, LDA) depend(inout:ptrA[0])
-    CORE_zlaset2(uplo, M, N, alpha, ptrA, LDA);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+#pragma omp task firstprivate( uplo, M, N, alpha, tileA ) depend( inout:tileA[0] )
+    TCORE_zlaset2( uplo, M, N, alpha, tileA );
 }
diff --git a/runtime/openmp/codelets/codelet_zlatro.c b/runtime/openmp/codelets/codelet_zlatro.c
index 86c0c4b884773083383c5fb9ce7a5615c7540dda..408a5a7fdec6072d0a747d707fdc56f3d97e77be 100644
--- a/runtime/openmp/codelets/codelet_zlatro.c
+++ b/runtime/openmp/codelets/codelet_zlatro.c
@@ -2,45 +2,32 @@
  *
  * @file openmp/codelet_zlatro.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zlatro StarPU codelet
+ * @brief Chameleon zlatro OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Julien Langou
- * @author Henricus Bouwmeester
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- */
 void INSERT_TASK_zlatro( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_trans_t trans,
                          int m, int n, int mb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
-#pragma omp task firstprivate(uplo, trans, m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0])
-    CORE_zlatro(uplo, trans, m, n, ptrA, lda, ptrB, ldb);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+#pragma omp task firstprivate( uplo, trans, m, n, tileA, tileB ) depend( in:tileA[0] ) depend( inout:tileB[0] )
+    TCORE_zlatro( uplo, trans, m, n, tileA, tileB );
 }
diff --git a/runtime/openmp/codelets/codelet_zlauum.c b/runtime/openmp/codelets/codelet_zlauum.c
index 3d729f974fadd7aca74a3be1a6790469b0a5bb1c..48f1704d1024f4ae1b7b3114931639c7cbd8eb47 100644
--- a/runtime/openmp/codelets/codelet_zlauum.c
+++ b/runtime/openmp/codelets/codelet_zlauum.c
@@ -2,42 +2,29 @@
  *
  * @file openmp/codelet_zlauum.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zlauum StarPU codelet
+ * @brief Chameleon zlauum OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Julien Langou
- * @author Henricus Bouwmeester
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- */
-void INSERT_TASK_zlauum(const RUNTIME_option_t *options,
+void INSERT_TASK_zlauum( const RUNTIME_option_t *options,
                        cham_uplo_t uplo, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda)
+                       const CHAM_desc_t *A, int Am, int An )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-#pragma omp task firstprivate(uplo, n, ptrA, lda) depend(inout:ptrA[0])
-    CORE_zlauum(uplo, n, ptrA, lda);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+#pragma omp task firstprivate( uplo, n, tileA ) depend( inout:tileA[0] )
+    TCORE_zlauum( uplo, n, tileA );
 }
diff --git a/runtime/openmp/codelets/codelet_zplghe.c b/runtime/openmp/codelets/codelet_zplghe.c
index f7721e74ca45d8c706b2984c55df581e80c81896..148360e139f4e4a41fb34e01324ebf6aa23445e4 100644
--- a/runtime/openmp/codelets/codelet_zplghe.c
+++ b/runtime/openmp/codelets/codelet_zplghe.c
@@ -2,37 +2,29 @@
  *
  * @file openmp/codelet_zplghe.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zplghe StarPU codelet
+ * @brief Chameleon zplghe OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Piotr Luszczek
- * @author Pierre Lemarinier
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
-                         double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
+                         double bump, int m, int n, const CHAM_desc_t *A, int Am, int An,
                          int bigM, int m0, int n0, unsigned long long int seed )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-#pragma omp task firstprivate(bump, m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0])
-    CORE_zplghe( bump, m, n, ptrA, lda, bigM, m0, n0, seed );
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+#pragma omp task firstprivate( bump, m, n, tileA, bigM, m0, n0, seed ) depend( inout:tileA[0] )
+    TCORE_zplghe( bump, m, n, tileA, bigM, m0, n0, seed );
 }
diff --git a/runtime/openmp/codelets/codelet_zplgsy.c b/runtime/openmp/codelets/codelet_zplgsy.c
index d41878c377a5b2b4e9a8326195a2e3e27412cf22..f33f432205e1d2a8122eb62c9302163859049379 100644
--- a/runtime/openmp/codelets/codelet_zplgsy.c
+++ b/runtime/openmp/codelets/codelet_zplgsy.c
@@ -2,37 +2,29 @@
  *
  * @file openmp/codelet_zplgsy.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zplgsy StarPU codelet
+ * @brief Chameleon zplgsy OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Piotr Luszczek
- * @author Pierre Lemarinier
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
-                         CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
+                         CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An,
                          int bigM, int m0, int n0, unsigned long long int seed )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-#pragma omp task firstprivate(bump, m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0])
-    CORE_zplgsy( bump, m, n, ptrA, lda, bigM, m0, n0, seed );
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+#pragma omp task firstprivate( bump, m, n, tileA, bigM, m0, n0, seed ) depend( inout:tileA[0] )
+    TCORE_zplgsy( bump, m, n, tileA, bigM, m0, n0, seed );
 }
diff --git a/runtime/openmp/codelets/codelet_zplrnt.c b/runtime/openmp/codelets/codelet_zplrnt.c
index 9827cc74c1cd01889c466e0a3a5aa7aa94c4fde3..4251214c6143fed6b62ac4f9cf3c3bc49d5268e2 100644
--- a/runtime/openmp/codelets/codelet_zplrnt.c
+++ b/runtime/openmp/codelets/codelet_zplrnt.c
@@ -2,37 +2,29 @@
  *
  * @file openmp/codelet_zplrnt.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zplrnt StarPU codelet
+ * @brief Chameleon zplrnt OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Piotr Luszczek
- * @author Pierre Lemarinier
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void INSERT_TASK_zplrnt( const RUNTIME_option_t *options,
-                         int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
+                         int m, int n, const CHAM_desc_t *A, int Am, int An,
                          int bigM, int m0, int n0, unsigned long long int seed )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-#pragma omp task firstprivate(m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0])
-    CORE_zplrnt( m, n, ptrA, lda, bigM, m0, n0, seed );
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+#pragma omp task firstprivate( m, n, tileA, bigM, m0, n0, seed ) depend( inout:tileA[0] )
+    TCORE_zplrnt( m, n, tileA, bigM, m0, n0, seed );
 }
diff --git a/runtime/openmp/codelets/codelet_zplssq.c b/runtime/openmp/codelets/codelet_zplssq.c
index ad59a3eb6955f386297753e19bc79a9a12f32bc0..260a1f29bde9bb3a68d5b8976b2d9a4fe8c6210f 100644
--- a/runtime/openmp/codelets/codelet_zplssq.c
+++ b/runtime/openmp/codelets/codelet_zplssq.c
@@ -2,45 +2,47 @@
  *
  * @file openmp/codelet_zplssq.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zplssq StarPU codelet
+ * @brief Chameleon zplssq OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
- * @author Mathieu Faverge
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include <math.h>
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
                          cham_store_t storev, int M, int N,
                          const CHAM_desc_t *IN,  int INm,  int INn,
                          const CHAM_desc_t *OUT, int OUTm, int OUTn )
 {
-    double *sclssq_in  = RTBLKADDR(IN,  double, INm,  INn );
-    double *sclssq_out = RTBLKADDR(OUT, double, OUTm, OUTn);
-#pragma omp task firstprivate(storev, M, N) depend(in: sclssq_in[0]) depend(inout: sclssq_out[0])
-    CORE_zplssq(storev, M, N, sclssq_in, sclssq_out);
+    CHAM_tile_t *tileIN  = IN->get_blktile( IN, INm, INn );
+    CHAM_tile_t *tileOUT = OUT->get_blktile( OUT, OUTm, OUTn );
+
+    assert( tileIN->format  & CHAMELEON_TILE_FULLRANK );
+    assert( tileOUT->format & CHAMELEON_TILE_FULLRANK );
+
+#pragma omp task firstprivate( storev, M, N ) depend( in: tileIN[0] ) depend( inout: tileOUT[0] )
+    CORE_zplssq( storev, M, N, tileIN->mat, tileOUT->mat );
 }
 
 void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, int N,
                           const CHAM_desc_t *RESULT, int RESULTm, int RESULTn )
 {
-    double *res = RTBLKADDR(RESULT, double, RESULTm, RESULTn);
+    CHAM_tile_t *tileRESULT = RESULT->get_blktile( RESULT, RESULTm, RESULTn );
+
+    assert( tileRESULT->format & CHAMELEON_TILE_FULLRANK );
 
-#pragma omp task firstprivate(N) depend(inout: res[0])
-    CORE_zplssq2(N, res);
+#pragma omp task firstprivate( N ) depend( inout: tileRESULT[0] )
+    CORE_zplssq2( N, tileRESULT->mat );
 }
diff --git a/runtime/openmp/codelets/codelet_zpotrf.c b/runtime/openmp/codelets/codelet_zpotrf.c
index 72ac47a75b6c56bc04ff96b3f51438e5e6e52179..aea59eab66db7d93f2719682936a730bb5a11505 100644
--- a/runtime/openmp/codelets/codelet_zpotrf.c
+++ b/runtime/openmp/codelets/codelet_zpotrf.c
@@ -2,46 +2,33 @@
  *
  * @file openmp/codelet_zpotrf.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zpotrf StarPU codelet
+ * @brief Chameleon zpotrf OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Hatem Ltaief
- * @author Jakub Kurzak
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
 #include "coreblas.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- */
-void INSERT_TASK_zpotrf(const RUNTIME_option_t *options,
+void INSERT_TASK_zpotrf( const RUNTIME_option_t *options,
                        cham_uplo_t uplo, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       int iinfo)
+                       const CHAM_desc_t *A, int Am, int An,
+                       int iinfo )
 {
-    (void)nb;
+    ( void )nb;
     int info = 0;
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-#pragma omp task firstprivate(uplo, n, lda, info, ptrA) depend(inout:ptrA[0])
-    CORE_zpotrf(uplo, n, ptrA, lda, &info);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+#pragma omp task firstprivate( uplo, n, info, tileA ) depend( inout:tileA[0] )
+    TCORE_zpotrf( uplo, n, tileA, &info );
 }
diff --git a/runtime/openmp/codelets/codelet_zssssm.c b/runtime/openmp/codelets/codelet_zssssm.c
index c1fd4b896400c43abebc5da2bfc150b0db376a88..a0e23614ed8dd39373d40e8c1540d22ab60d69ee 100644
--- a/runtime/openmp/codelets/codelet_zssssm.c
+++ b/runtime/openmp/codelets/codelet_zssssm.c
@@ -2,114 +2,38 @@
  *
  * @file openmp/codelet_zssssm.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zssssm StarPU codelet
+ * @brief Chameleon zssssm OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Hatem Ltaief
- * @author Jakub Kurzak
+ * @author Philippe Virouleau
  * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
- * @date 2018-06-15
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
-
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- *  CORE_zssssm applies the LU factorization update from a complex
- *  matrix formed by a lower triangular IB-by-K tile L1 on top of a
- *  M2-by-K tile L2 to a second complex matrix formed by a M1-by-N1
- *  tile A1 on top of a M2-by-N2 tile A2 (N1 == N2).
- *
- *  This is the right-looking Level 2.5 BLAS version of the algorithm.
- *
- *******************************************************************************
- *
- * @param[in] M1
- *         The number of rows of the tile A1.  M1 >= 0.
- *
- * @param[in] N1
- *         The number of columns of the tile A1.  N1 >= 0.
- *
- * @param[in] M2
- *         The number of rows of the tile A2 and of the tile L2.
- *         M2 >= 0.
- *
- * @param[in] N2
- *         The number of columns of the tile A2.  N2 >= 0.
- *
- * @param[in] K
- *         The number of columns of the tiles L1 and L2.  K >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in,out] A1
- *         On entry, the M1-by-N1 tile A1.
- *         On exit, A1 is updated by the application of L (L1 L2).
- *
- * @param[in] LDA1
- *         The leading dimension of the array A1.  LDA1 >= max(1,M1).
- *
- * @param[in,out] A2
- *         On entry, the M2-by-N2 tile A2.
- *         On exit, A2 is updated by the application of L (L1 L2).
- *
- * @param[in] LDA2
- *         The leading dimension of the array A2.  LDA2 >= max(1,M2).
- *
- * @param[in] L1
- *         The IB-by-K lower triangular tile as returned by
- *         CORE_ztstrf.
- *
- * @param[in] LDL1
- *         The leading dimension of the array L1.  LDL1 >= max(1,IB).
- *
- * @param[in] L2
- *         The M2-by-K tile as returned by CORE_ztstrf.
- *
- * @param[in] LDL2
- *         The leading dimension of the array L2.  LDL2 >= max(1,M2).
- *
- * @param[in] IPIV
- *         The pivot indices array of size K as returned by
- *         CORE_ztstrf.
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if INFO = -k, the k-th argument had an illegal value
- *
- */
+#include "coreblas/coreblas_ztile.h"
 
-void INSERT_TASK_zssssm(const RUNTIME_option_t *options,
+void INSERT_TASK_zssssm( const RUNTIME_option_t *options,
                        int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                       const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                       const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                       const CHAM_desc_t *L1, int L1m, int L1n, int ldl1,
-                       const CHAM_desc_t *L2, int L2m, int L2n, int ldl2,
-                       const int *IPIV)
+                       const CHAM_desc_t *A1, int A1m, int A1n,
+                       const CHAM_desc_t *A2, int A2m, int A2n,
+                       const CHAM_desc_t *L1, int L1m, int L1n,
+                       const CHAM_desc_t *L2, int L2m, int L2n,
+                       const int *IPIV )
 {
-    CHAMELEON_Complex64_t *ptrA1 = RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n);
-    CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n);
-    CHAMELEON_Complex64_t *ptrL1 = RTBLKADDR(L1, CHAMELEON_Complex64_t, L1m, L1n);
-    CHAMELEON_Complex64_t *ptrL2 = RTBLKADDR(L2, CHAMELEON_Complex64_t, L2m, L2n);
+    CHAM_tile_t *tileA1 = A1->get_blktile( A1, A1m, A1n );
+    CHAM_tile_t *tileA2 = A2->get_blktile( A2, A2m, A2n );
+    CHAM_tile_t *tileL1 = L1->get_blktile( L1, L1m, L1n );
+    CHAM_tile_t *tileL2 = L2->get_blktile( L2, L2m, L2n );
 
-#pragma omp task firstprivate(m1, n1, m2, n2, k, ib, ptrA1, ptrA2, ptrL1, ptrL2, lda1, lda2, ldl1, ldl2, IPIV) \
-    depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrL1[0], ptrL2[0])
-    CORE_zssssm(m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrL1, ldl1, ptrL2, ldl2, IPIV);
+#pragma omp task firstprivate( m1, n1, m2, n2, k, ib, tileA1, tileA2, tileL1, tileL2, IPIV ) \
+    depend( inout:tileA1[0], tileA2[0] ) depend( in:tileL1[0], tileL2[0] )
+    TCORE_zssssm( m1, n1, m2, n2, k, ib, tileA1, tileA2, tileL1, tileL2, IPIV );
 }
diff --git a/runtime/openmp/codelets/codelet_zsymm.c b/runtime/openmp/codelets/codelet_zsymm.c
index 85bb6c0fe491cbc9e14b655cdc64e4a396295d4e..60bbbfbd1914f5e1212c8765778f0c9bd7498c50 100644
--- a/runtime/openmp/codelets/codelet_zsymm.c
+++ b/runtime/openmp/codelets/codelet_zsymm.c
@@ -2,49 +2,38 @@
  *
  * @file openmp/codelet_zsymm.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zsymm StarPU codelet
+ * @brief Chameleon zsymm OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Hatem Ltaief
- * @author Jakub Kurzak
+ * @author Philippe Virouleau
  * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
- * @date 2018-06-15
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
-
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- */
-void INSERT_TASK_zsymm(const RUNTIME_option_t *options,
+#include "coreblas/coreblas_ztile.h"
+
+void INSERT_TASK_zsymm( const RUNTIME_option_t *options,
                       cham_side_t side, cham_uplo_t uplo,
                       int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                      const CHAM_desc_t *B, int Bm, int Bn,
+                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
-    CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn);
-#pragma omp task firstprivate(side, uplo, m, n, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0], ptrB[0]) depend(inout:ptrC[0])
-    CORE_zsymm(side, uplo,
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+    CHAM_tile_t *tileC = C->get_blktile( C, Cm, Cn );
+#pragma omp task firstprivate( side, uplo, m, n, alpha, tileA, tileB, beta, tileC ) depend( in:tileA[0], tileB[0] ) depend( inout:tileC[0] )
+    TCORE_zsymm( side, uplo,
         m, n,
-        alpha, ptrA, lda,
-        ptrB, ldb,
-        beta, ptrC, ldc);
+        alpha, tileA,
+        tileB,
+        beta, tileC );
 }
diff --git a/runtime/openmp/codelets/codelet_zsyr2k.c b/runtime/openmp/codelets/codelet_zsyr2k.c
index bedb0ef46e2d4bf7203ba46df83e1da70ab95e02..73d09b143ba46b8bd15ab7d1ebf2e145821bc628 100644
--- a/runtime/openmp/codelets/codelet_zsyr2k.c
+++ b/runtime/openmp/codelets/codelet_zsyr2k.c
@@ -2,49 +2,36 @@
  *
  * @file openmp/codelet_zsyr2k.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zsyr2k StarPU codelet
+ * @brief Chameleon zsyr2k OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Hatem Ltaief
- * @author Jakub Kurzak
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- */
-void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options,
+void INSERT_TASK_zsyr2k( const RUNTIME_option_t *options,
                        cham_uplo_t uplo, cham_trans_t trans,
                        int n, int k, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                       CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *B, int Bm, int Bn,
+                       CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn )
 {
-    (void)nb;
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
-    CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn);
-#pragma omp task firstprivate(uplo, trans, n, k, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0], ptrB[0]) depend(inout:ptrC[0])
-    CORE_zsyr2k(uplo, trans,
-                 n, k, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc);
+    ( void )nb;
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+    CHAM_tile_t *tileC = C->get_blktile( C, Cm, Cn );
+#pragma omp task firstprivate( uplo, trans, n, k, alpha, tileA, tileB, beta, tileC ) depend( in:tileA[0], tileB[0] ) depend( inout:tileC[0] )
+    TCORE_zsyr2k( uplo, trans,
+                 n, k, alpha, tileA, tileB, beta, tileC );
 }
diff --git a/runtime/openmp/codelets/codelet_zsyrk.c b/runtime/openmp/codelets/codelet_zsyrk.c
index 797aa4b089b05621c210d9928b23b9500f7371f0..89d674a7fea68c4ad994fedc9348947e75e86d62 100644
--- a/runtime/openmp/codelets/codelet_zsyrk.c
+++ b/runtime/openmp/codelets/codelet_zsyrk.c
@@ -2,49 +2,36 @@
  *
  * @file openmp/codelet_zsyrk.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zsyrk StarPU codelet
+ * @brief Chameleon zsyrk OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Hatem Ltaief
- * @author Jakub Kurzak
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- */
-void INSERT_TASK_zsyrk(const RUNTIME_option_t *options,
+void INSERT_TASK_zsyrk( const RUNTIME_option_t *options,
                       cham_uplo_t uplo, cham_trans_t trans,
                       int n, int k, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn )
 {
-    (void)nb;
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn);
-#pragma omp task firstprivate(uplo, trans, n, k, alpha, ptrA, lda, beta, ptrC, ldc) depend(in:ptrA[0]) depend(inout:ptrC[0])
-    CORE_zsyrk(uplo, trans,
+    ( void )nb;
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileC = C->get_blktile( C, Cm, Cn );
+#pragma omp task firstprivate( uplo, trans, n, k, alpha, tileA, beta, tileC ) depend( in:tileA[0] ) depend( inout:tileC[0] )
+    TCORE_zsyrk( uplo, trans,
         n, k,
-        alpha, ptrA, lda,
-        beta, ptrC, ldc);
+        alpha, tileA,
+        beta, tileC );
 }
diff --git a/runtime/openmp/codelets/codelet_zsyssq.c b/runtime/openmp/codelets/codelet_zsyssq.c
index 32a7dc9d52775570d9575755b8d5c05d53945b70..e767eba613cd5394594b997ac295598e698b2831 100644
--- a/runtime/openmp/codelets/codelet_zsyssq.c
+++ b/runtime/openmp/codelets/codelet_zsyssq.c
@@ -2,33 +2,31 @@
  *
  * @file openmp/codelet_zsyssq.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zsyssq StarPU codelet
+ * @brief Chameleon zsyssq OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
+ * @author Philippe Virouleau
  * @author Mathieu Faverge
- * @date 2018-06-15
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void INSERT_TASK_zsyssq( const RUNTIME_option_t *options,
                         cham_store_t storev, cham_uplo_t uplo, int n,
-                        const CHAM_desc_t *A, int Am, int An, int lda,
+                        const CHAM_desc_t *A, int Am, int An,
                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    double *ptrSCALESUMSQ = RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn);
-#pragma omp task firstprivate(storev, uplo, n, ptrA, lda, ptrSCALESUMSQ) depend(in:ptrA[0]) depend(inout:ptrSCALESUMSQ[0])
-    CORE_zsyssq( storev, uplo, n, ptrA, lda, ptrSCALESUMSQ );
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileSCALESUMSQ = SCALESUMSQ->get_blktile( SCALESUMSQ, SCALESUMSQm, SCALESUMSQn );
+#pragma omp task firstprivate( storev, uplo, n, tileA, tileSCALESUMSQ ) depend( in:tileA[0] ) depend( inout:tileSCALESUMSQ[0] )
+    TCORE_zsyssq( storev, uplo, n, tileA, tileSCALESUMSQ );
 }
diff --git a/runtime/openmp/codelets/codelet_zsytrf_nopiv.c b/runtime/openmp/codelets/codelet_zsytrf_nopiv.c
index 3fb6bb2c4f2fe51b72c4b4ea729a9649fc867fec..9f6debd57eb47570118cc317678aabbcad913f10 100644
--- a/runtime/openmp/codelets/codelet_zsytrf_nopiv.c
+++ b/runtime/openmp/codelets/codelet_zsytrf_nopiv.c
@@ -2,36 +2,30 @@
  *
  * @file openmp/codelet_zsytrf_nopiv.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zsytrf_nopiv StarPU codelet
+ * @brief Chameleon zsytrf_nopiv OpenMP codelet
  *
  * @version 0.9.2
- * @author Hatem Ltaief
- * @author Jakub Kurzak
+ * @author Philippe Virouleau
  * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
- * @author Florent Pruvost
- * @author Marc Sergent
- * @date 2018-06-15
+ * @date 2019-11-19
  * @precisions normal z -> c
  *
  */
-
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-void INSERT_TASK_zsytrf_nopiv(const RUNTIME_option_t *options,
-                             cham_uplo_t uplo, int n, int nb,
-                             const CHAM_desc_t *A, int Am, int An, int lda,
-                             int iinfo)
+#include "coreblas/coreblas_ztile.h"
+
+void INSERT_TASK_zsytrf_nopiv( const RUNTIME_option_t *options,
+                               cham_uplo_t uplo, int n, int nb,
+                               const CHAM_desc_t *A, int Am, int An,
+                               int iinfo )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-#pragma omp task firstprivate(uplo, n, ptrA, lda) depend(inout:ptrA[0])
-    CORE_zsytf2_nopiv(uplo, n, ptrA, lda);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+#pragma omp task firstprivate( uplo, n, tileA ) depend( inout:tileA[0] )
+    TCORE_zsytf2_nopiv( uplo, n, tileA );
 }
diff --git a/runtime/openmp/codelets/codelet_ztplqt.c b/runtime/openmp/codelets/codelet_ztplqt.c
index d35487b004ef23d4aa2ecc47884dece306387762..db708143e4c1d195074b1d753634179460790222 100644
--- a/runtime/openmp/codelets/codelet_ztplqt.c
+++ b/runtime/openmp/codelets/codelet_ztplqt.c
@@ -2,42 +2,41 @@
  *
  * @file openmp/codelet_ztplqt.c
  *
- * @copyright 2009-2016 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon ztplqt StarPU codelet
+ * @brief Chameleon ztplqt OpenMP codelet
  *
  * @version 0.9.2
+ * @author Philippe Virouleau
  * @author Mathieu Faverge
- * @date 2018-06-15
+ * @date 2019-11-19
  * @precisions normal z -> s d c
  *
  */
-
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
                          int M, int N, int L, int ib, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                         const CHAM_desc_t *T, int Tm, int Tn, int ldt )
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn,
+                         const CHAM_desc_t *T, int Tm, int Tn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
-    CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+    CHAM_tile_t *tileT = T->get_blktile( T, Tm, Tn );
     int ws_size = options->ws_wsize;
 
-#pragma omp task firstprivate(ws_size, M, N, L, ib, ptrA, lda, ptrB, ldb, ptrT, ldt) depend(inout:ptrA[0], ptrB[0]) depend(out:ptrT[0])
+#pragma omp task firstprivate( ws_size, M, N, L, ib, tileA, tileB, tileT ) depend( inout:tileA[0], tileB[0] ) depend( out:tileT[0] )
     {
-      CHAMELEON_Complex64_t work[ws_size];
+        CHAMELEON_Complex64_t work[ws_size];
 
-      CORE_zlaset( ChamUpperLower, ib, M, 0., 0., ptrT, ldt );
-      CORE_ztplqt( M, N, L, ib,
-                   ptrA, lda, ptrB, ldb, ptrT, ldt, work );
+        TCORE_zlaset( ChamUpperLower, ib, M, 0., 0., tileT );
+        TCORE_ztplqt( M, N, L, ib,
+                      tileA, tileB, tileT, work );
     }
 }
diff --git a/runtime/openmp/codelets/codelet_ztpmlqt.c b/runtime/openmp/codelets/codelet_ztpmlqt.c
index 5a131e823f26abbc4e008a8278c39307fdbd96ff..06fbb30a4a3e5a5dcf7355b9b56d3fdfd52f4473 100644
--- a/runtime/openmp/codelets/codelet_ztpmlqt.c
+++ b/runtime/openmp/codelets/codelet_ztpmlqt.c
@@ -2,40 +2,40 @@
  *
  * @file openmp/codelet_ztpmlqt.c
  *
- * @copyright 2009-2016 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
- * @brief Chameleon ztpmlqt StarPU codelet
+ * @brief Chameleon ztpmlqt OpenMP codelet
  *
  * @version 0.9.2
+ * @author Philippe Virouleau
  * @author Mathieu Faverge
- * @date 2018-06-15
+ * @date 2019-11-19
  * @precisions normal z -> s d c
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
                           cham_side_t side, cham_trans_t trans,
                           int M, int N, int K, int L, int ib, int nb,
-                          const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                          const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                          const CHAM_desc_t *A, int Am, int An, int lda,
-                          const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+                          const CHAM_desc_t *V, int Vm, int Vn,
+                          const CHAM_desc_t *T, int Tm, int Tn,
+                          const CHAM_desc_t *A, int Am, int An,
+                          const CHAM_desc_t *B, int Bm, int Bn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
-    CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
-    CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+    CHAM_tile_t *tileT = T->get_blktile( T, Tm, Tn );
+    CHAM_tile_t *tileV = V->get_blktile( V, Vm, Vn );
     int ws_size = options->ws_wsize;
 
-#pragma omp task firstprivate(ws_size, side, trans, M, N, K, L, ib, ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(in:ptrV[0], ptrT[0]) depend(inout:ptrA[0], ptrB[0])
+#pragma omp task firstprivate( ws_size, side, trans, M, N, K, L, ib, tileV, tileT, tileA, tileB ) depend( in:tileV[0], tileT[0] ) depend( inout:tileA[0], tileB[0] )
     {
         CHAMELEON_Complex64_t work[ws_size];
-        CORE_ztpmlqt( side, trans, M, N, K, L, ib,
-                      ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work );
+        TCORE_ztpmlqt( side, trans, M, N, K, L, ib,
+                      tileV, tileT, tileA, tileB, work );
     }
 }
diff --git a/runtime/openmp/codelets/codelet_ztpmqrt.c b/runtime/openmp/codelets/codelet_ztpmqrt.c
index a281b35cca6d3c16611f33754f47683349e66ec3..33a746216cc69c397110cbeac30b2c9181706473 100644
--- a/runtime/openmp/codelets/codelet_ztpmqrt.c
+++ b/runtime/openmp/codelets/codelet_ztpmqrt.c
@@ -2,40 +2,40 @@
  *
  * @file openmp/codelet_ztpmqrt.c
  *
- * @copyright 2009-2016 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
- * @brief Chameleon ztpmqrt StarPU codelet
+ * @brief Chameleon ztpmqrt OpenMP codelet
  *
  * @version 0.9.2
+ * @author Philippe Virouleau
  * @author Mathieu Faverge
- * @date 2018-06-15
+ * @date 2019-11-19
  * @precisions normal z -> s d c
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options,
                           cham_side_t side, cham_trans_t trans,
                           int M, int N, int K, int L, int ib, int nb,
-                          const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                          const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                          const CHAM_desc_t *A, int Am, int An, int lda,
-                          const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+                          const CHAM_desc_t *V, int Vm, int Vn,
+                          const CHAM_desc_t *T, int Tm, int Tn,
+                          const CHAM_desc_t *A, int Am, int An,
+                          const CHAM_desc_t *B, int Bm, int Bn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
-    CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
-    CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+    CHAM_tile_t *tileT = T->get_blktile( T, Tm, Tn );
+    CHAM_tile_t *tileV = V->get_blktile( V, Vm, Vn );
     int ws_size = options->ws_wsize;
 
-#pragma omp task firstprivate(ws_size, side, trans, M, N, K, L, ib, nb, ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(in:ptrV[0], ptrT[0]) depend(inout:ptrA[0], ptrB[0])
+#pragma omp task firstprivate( ws_size, side, trans, M, N, K, L, ib, nb, tileV, tileT, tileA, tileB ) depend( in:tileV[0], tileT[0] ) depend( inout:tileA[0], tileB[0] )
     {
         CHAMELEON_Complex64_t tmp[ws_size];
-        CORE_ztpmqrt( side, trans, M, N, K, L, ib,
-                      ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, tmp );
+        TCORE_ztpmqrt( side, trans, M, N, K, L, ib,
+                      tileV, tileT, tileA, tileB, tmp );
     }
 }
diff --git a/runtime/openmp/codelets/codelet_ztpqrt.c b/runtime/openmp/codelets/codelet_ztpqrt.c
index 13ce6a8f5ddf06db09483faf1be2f71f41ada1f2..bba9bfea39ba2d2c973521cec1c1edb21f164dde 100644
--- a/runtime/openmp/codelets/codelet_ztpqrt.c
+++ b/runtime/openmp/codelets/codelet_ztpqrt.c
@@ -2,41 +2,41 @@
  *
  * @file openmp/codelet_ztpqrt.c
  *
- * @copyright 2009-2016 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon ztpqrt StarPU codelet
+ * @brief Chameleon ztpqrt OpenMP codelet
  *
  * @version 0.9.2
+ * @author Philippe Virouleau
  * @author Mathieu Faverge
- * @date 2018-06-15
+ * @date 2019-11-19
  * @precisions normal z -> s d c
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
                          int M, int N, int L, int ib, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                         const CHAM_desc_t *T, int Tm, int Tn, int ldt )
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn,
+                         const CHAM_desc_t *T, int Tm, int Tn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
-    CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+    CHAM_tile_t *tileT = T->get_blktile( T, Tm, Tn );
     int ws_size = options->ws_wsize;
 
-#pragma omp task firstprivate(ws_size, M, N, L, ib, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(inout:ptrA[0], ptrB[0]) depend(out:ptrT[0])
+#pragma omp task firstprivate( ws_size, M, N, L, ib, tileT, tileA, tileB ) depend( inout:tileA[0], tileB[0] ) depend( out:tileT[0] )
     {
       CHAMELEON_Complex64_t tmp[ws_size];
 
-      CORE_zlaset( ChamUpperLower, ib, N, 0., 0., ptrT, ldt );
-      CORE_ztpqrt( M, N, L, ib,
-                   ptrA, lda, ptrB, ldb, ptrT, ldt, tmp );
+      TCORE_zlaset( ChamUpperLower, ib, N, 0., 0., tileT );
+      TCORE_ztpqrt( M, N, L, ib,
+                   tileA, tileB, tileT, tmp );
     }
 }
diff --git a/runtime/openmp/codelets/codelet_ztradd.c b/runtime/openmp/codelets/codelet_ztradd.c
index dbf8ba72f581e67a969c75c1ca434c23d21c5f5d..18dc9cc2ade2e47b9d25ffdac95f7d12bfad2e75 100644
--- a/runtime/openmp/codelets/codelet_ztradd.c
+++ b/runtime/openmp/codelets/codelet_ztradd.c
@@ -2,94 +2,32 @@
  *
  * @file openmp/codelet_ztradd.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon ztradd StarPU codelet
+ * @brief Chameleon ztradd OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
+ * @author Philippe Virouleau
  * @author Mathieu Faverge
- * @date 2018-06-15
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
-
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-/**
- ******************************************************************************
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- * @brief Adds two trapezoidal matrices together as in PBLAS pzgeadd.
- *
- *       B <- alpha * op(A)  + beta * B,
- *
- * where op(X) = X, X', or conj(X')
- *
- *******************************************************************************
- *
- * @param[in] uplo
- *          Specifies the shape of A and B matrices:
- *          = ChamUpperLower: A and B are general matrices.
- *          = ChamUpper: op(A) and B are upper trapezoidal matrices.
- *          = ChamLower: op(A) and B are lower trapezoidal matrices.
- *
- * @param[in] trans
- *          Specifies whether the matrix A is non-transposed, transposed, or
- *          conjugate transposed
- *          = ChamNoTrans:   op(A) = A
- *          = ChamTrans:     op(A) = A'
- *          = ChamConjTrans: op(A) = conj(A')
- *
- * @param[in] M
- *          Number of rows of the matrices op(A) and B.
- *
- * @param[in] N
- *          Number of columns of the matrices op(A) and B.
- *
- * @param[in] alpha
- *          Scalar factor of A.
- *
- * @param[in] A
- *          Matrix of size LDA-by-N, if trans = ChamNoTrans, LDA-by-M
- *          otherwise.
- *
- * @param[in] LDA
- *          Leading dimension of the array A. LDA >= max(1,k), with k=M, if
- *          trans = ChamNoTrans, and k=N otherwise.
- *
- * @param[in] beta
- *          Scalar factor of B.
- *
- * @param[in,out] B
- *          Matrix of size LDB-by-N.
- *          On exit, B = alpha * op(A) + beta * B
- *
- * @param[in] LDB
- *          Leading dimension of the array B. LDB >= max(1,M)
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if -i, the i-th argument had an illegal value
- *
- */
 void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
-                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
 
-#pragma omp task firstprivate(uplo, trans, m, n, alpha, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0])
-    CORE_ztradd(uplo, trans, m, n, alpha, ptrA, lda, beta, ptrB, ldb);
+#pragma omp task firstprivate( uplo, trans, m, n, alpha, tileA, tileB ) depend( in:tileA[0] ) depend( inout:tileB[0] )
+    TCORE_ztradd( uplo, trans, m, n, alpha, tileA, beta, tileB );
 }
diff --git a/runtime/openmp/codelets/codelet_ztrasm.c b/runtime/openmp/codelets/codelet_ztrasm.c
index 715dc89a1d91da6444062094fe1fa75120168833..d3392d90e716835424c32b2d46ae9e3deca6e6b3 100644
--- a/runtime/openmp/codelets/codelet_ztrasm.c
+++ b/runtime/openmp/codelets/codelet_ztrasm.c
@@ -2,33 +2,31 @@
  *
  * @file openmp/codelet_ztrasm.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon ztrasm StarPU codelet
+ * @brief Chameleon ztrasm OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
+ * @author Philippe Virouleau
  * @author Mathieu Faverge
- * @date 2018-06-15
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
-
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-void INSERT_TASK_ztrasm(const RUNTIME_option_t *options,
+#include "coreblas/coreblas_ztile.h"
+
+void INSERT_TASK_ztrasm( const RUNTIME_option_t *options,
                        cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *B, int Bm, int Bn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    double *ptrB = RTBLKADDR(B, double, Bm, Bn);
-#pragma omp task firstprivate(storev, uplo, diag, M, N, ptrA, lda, ptrB) depend(in:ptrA[0]) depend(inout:ptrB[0])
-    CORE_ztrasm(storev, uplo, diag, M, N, ptrA, lda, ptrB);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+#pragma omp task firstprivate( storev, uplo, diag, M, N, tileA, tileB ) depend( in:tileA[0] ) depend( inout:tileB[0] )
+    TCORE_ztrasm( storev, uplo, diag, M, N, tileA, tileB->mat );
 }
diff --git a/runtime/openmp/codelets/codelet_ztrmm.c b/runtime/openmp/codelets/codelet_ztrmm.c
index 248865be1a32d8963fc56b531e9a77598a6ab245..c7a329359ff607417812294a2420305f09948c6c 100644
--- a/runtime/openmp/codelets/codelet_ztrmm.c
+++ b/runtime/openmp/codelets/codelet_ztrmm.c
@@ -2,47 +2,36 @@
  *
  * @file openmp/codelet_ztrmm.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon ztrmm StarPU codelet
+ * @brief Chameleon ztrmm OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Julien Langou
- * @author Henricus Bouwmeester
+ * @author Philippe Virouleau
  * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
- * @date 2018-06-15
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
-
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- */
-void INSERT_TASK_ztrmm(const RUNTIME_option_t *options,
+#include "coreblas/coreblas_ztile.h"
+
+void INSERT_TASK_ztrmm( const RUNTIME_option_t *options,
                       cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
                       int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                      const CHAM_desc_t *B, int Bm, int Bn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
-#pragma omp task firstprivate(side, uplo, transA, diag, m, n, alpha, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0])
-    CORE_ztrmm(side, uplo,
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+#pragma omp task firstprivate( side, uplo, transA, diag, m, n, alpha, tileA, tileB ) depend( in:tileA[0] ) depend( inout:tileB[0] )
+    TCORE_ztrmm( side, uplo,
         transA, diag,
         m, n,
-        alpha, ptrA, lda,
-        ptrB, ldb);
+        alpha, tileA,
+        tileB );
 }
diff --git a/runtime/openmp/codelets/codelet_ztrsm.c b/runtime/openmp/codelets/codelet_ztrsm.c
index 062bfc51c012d35036a3511680ee84d3429f2f5a..bc02e562a42124bc515ab7f005e6bf76f6d1c7dd 100644
--- a/runtime/openmp/codelets/codelet_ztrsm.c
+++ b/runtime/openmp/codelets/codelet_ztrsm.c
@@ -2,49 +2,36 @@
  *
  * @file openmp/codelet_ztrsm.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon ztrsm StarPU codelet
+ * @brief Chameleon ztrsm OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Hatem Ltaief
- * @author Jakub Kurzak
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- */
-void INSERT_TASK_ztrsm(const RUNTIME_option_t *options,
+void INSERT_TASK_ztrsm( const RUNTIME_option_t *options,
                       cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
                       int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                      const CHAM_desc_t *B, int Bm, int Bn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
-#pragma omp task firstprivate(side, uplo, transA, diag, m, n, alpha, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout: ptrB[0])
-    CORE_ztrsm(side, uplo,
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+#pragma omp task firstprivate( side, uplo, transA, diag, m, n, alpha, tileA, tileB ) depend( in:tileA[0] ) depend( inout: tileB[0] )
+    TCORE_ztrsm( side, uplo,
         transA, diag,
         m, n,
-        alpha, ptrA, lda,
-        ptrB, ldb);
+        alpha, tileA,
+        tileB );
 }
diff --git a/runtime/openmp/codelets/codelet_ztrssq.c b/runtime/openmp/codelets/codelet_ztrssq.c
index 38a69fca2afba6ad56389152e6f88f30e556a7d3..de53dfe7cc55cfce797c217b628ad2b8d6109c7f 100644
--- a/runtime/openmp/codelets/codelet_ztrssq.c
+++ b/runtime/openmp/codelets/codelet_ztrssq.c
@@ -2,34 +2,32 @@
  *
  * @file openmp/codelet_ztrssq.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon ztrssq StarPU codelet
+ * @brief Chameleon ztrssq OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
+ * @author Philippe Virouleau
  * @author Mathieu Faverge
- * @date 2018-06-15
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void INSERT_TASK_ztrssq( const RUNTIME_option_t *options,
                         cham_uplo_t uplo, cham_diag_t diag,
                         int m, int n,
-                        const CHAM_desc_t *A, int Am, int An, int lda,
-                        const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
+                        const CHAM_desc_t *A, int Am, int An,
+                        const CHAM_desc_t *W, int Wm, int Wn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    double *ptrSCALESUMSQ = RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn);
-#pragma omp task firstprivate(uplo, diag, m, n, ptrA, lda, SCALESUMSQ) depend(in:ptrA[0]) depend(inout:ptrSCALESUMSQ[0])
-    CORE_ztrssq( uplo, diag, m, n, ptrA, lda, &ptrSCALESUMSQ[0], &ptrSCALESUMSQ[1]);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileW = W->get_blktile( W, Wm, Wn );
+#pragma omp task firstprivate( uplo, diag, m, n, tileA, tileW ) depend( in:tileA[0] ) depend( inout:tileW[0] )
+    TCORE_ztrssq( uplo, diag, m, n, tileA, tileW );
 }
diff --git a/runtime/openmp/codelets/codelet_ztrtri.c b/runtime/openmp/codelets/codelet_ztrtri.c
index ea113cdaeeea8b359a3f32e758c0eeb56cbceee5..8c0d9b8b60e38f1c1a78055b8ab4457a66d9ceb8 100644
--- a/runtime/openmp/codelets/codelet_ztrtri.c
+++ b/runtime/openmp/codelets/codelet_ztrtri.c
@@ -2,42 +2,31 @@
  *
  * @file openmp/codelet_ztrtri.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon ztrtri StarPU codelet
+ * @brief Chameleon ztrtri OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Julien Langou
- * @author Henricus Bouwmeester
+ * @author Philippe Virouleau
  * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
- * @date 2018-06-15
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
-
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- */
-void INSERT_TASK_ztrtri(const RUNTIME_option_t *options,
+#include "coreblas/coreblas_ztile.h"
+
+void INSERT_TASK_ztrtri( const RUNTIME_option_t *options,
                        cham_uplo_t uplo, cham_diag_t diag,
                        int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       int iinfo)
+                       const CHAM_desc_t *A, int Am, int An,
+                       int iinfo )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-#pragma omp task firstprivate(uplo, diag, n, ptrA, lda, iinfo) depend(inout:ptrA[0])
-    CORE_ztrtri(uplo, diag, n, ptrA, lda, &iinfo);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+#pragma omp task firstprivate( uplo, diag, n, tileA, iinfo ) depend( inout:tileA[0] )
+    TCORE_ztrtri( uplo, diag, n, tileA, &iinfo );
 }
diff --git a/runtime/openmp/codelets/codelet_ztsmlq_hetra1.c b/runtime/openmp/codelets/codelet_ztsmlq_hetra1.c
index 4bd4eec9306c27472776e856597e850fe5d4ac14..4c5ed64a58ca44edf3c6594c6a654b40dcde8ade 100644
--- a/runtime/openmp/codelets/codelet_ztsmlq_hetra1.c
+++ b/runtime/openmp/codelets/codelet_ztsmlq_hetra1.c
@@ -2,50 +2,43 @@
  *
  * @file openmp/codelet_ztsmlq_hetra1.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon ztsmlq_hetra1 StarPU codelet
+ * @brief Chameleon ztsmlq_hetra1 OpenMP codelet
  *
  * @version 0.9.2
- * @author Hatem Ltaief
- * @author Mathieu Faverge
- * @author Azzam Haidar
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- */
-void INSERT_TASK_ztsmlq_hetra1(const RUNTIME_option_t *options,
+#include "coreblas/coreblas_ztile.h"
+
+void INSERT_TASK_ztsmlq_hetra1( const RUNTIME_option_t *options,
                               cham_side_t side, cham_trans_t trans,
                               int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                              const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                              const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                              const CHAM_desc_t *V,  int Vm,  int Vn,  int ldv,
-                              const CHAM_desc_t *T,  int Tm,  int Tn,  int ldt)
+                              const CHAM_desc_t *A1, int A1m, int A1n,
+                              const CHAM_desc_t *A2, int A2m, int A2n,
+                              const CHAM_desc_t *V,  int Vm,  int Vn,
+                              const CHAM_desc_t *T,  int Tm,  int Tn )
 {
-    CHAMELEON_Complex64_t *ptrA1 = RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n);
-    CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n);
-    CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
-    CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn);
+    CHAM_tile_t *tileA1 = A1->get_blktile( A1, A1m, A1n );
+    CHAM_tile_t *tileA2 = A2->get_blktile( A2, A2m, A2n );
+    CHAM_tile_t *tileT = T->get_blktile( T, Tm, Tn );
+    CHAM_tile_t *tileV = V->get_blktile( V, Vm, Vn );
     int ldwork = side == ChamLeft ? ib : nb;
     int ws_size = options->ws_wsize;
-#pragma omp task firstprivate(ws_size, side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0])
+#pragma omp task firstprivate( ws_size, side, trans, m1, n1, m2, n2, k, ib, tileA1, tileA2, tileV, tileT, ldwork ) depend( inout:tileA1[0], tileA2[0] ) depend( in:tileT[0], tileV[0] )
     {
       CHAMELEON_Complex64_t work[ws_size];
-      CORE_ztsmlq_hetra1(side, trans, m1, n1, m2, n2, k,
-                         ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork);
+      TCORE_ztsmlq_hetra1( side, trans, m1, n1, m2, n2, k,
+                         ib, tileA1, tileA2, tileV, tileT, work, ldwork );
     }
 }
diff --git a/runtime/openmp/codelets/codelet_ztsmqr_hetra1.c b/runtime/openmp/codelets/codelet_ztsmqr_hetra1.c
index 1a655281b25777187c8eaa7d0997cc8dcd9b0bb4..97f84c5ad13ccc1ef9f2f724c5420eb225e58838 100644
--- a/runtime/openmp/codelets/codelet_ztsmqr_hetra1.c
+++ b/runtime/openmp/codelets/codelet_ztsmqr_hetra1.c
@@ -2,50 +2,43 @@
  *
  * @file openmp/codelet_ztsmqr_hetra1.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon ztsmqr_hetra1 StarPU codelet
+ * @brief Chameleon ztsmqr_hetra1 OpenMP codelet
  *
  * @version 0.9.2
- * @author Hatem Ltaief
- * @author Mathieu Faverge
- * @author Azzam Haidar
  * @author Philippe Virouleau
- * @date 2018-06-15
+ * @author Mathieu Faverge
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
 
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- */
-void INSERT_TASK_ztsmqr_hetra1(const RUNTIME_option_t *options,
+#include "coreblas/coreblas_ztile.h"
+
+void INSERT_TASK_ztsmqr_hetra1( const RUNTIME_option_t *options,
                               cham_side_t side, cham_trans_t trans,
                               int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                              const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                              const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                              const CHAM_desc_t *V,  int Vm,  int Vn,  int ldv,
-                              const CHAM_desc_t *T,  int Tm,  int Tn,  int ldt)
+                              const CHAM_desc_t *A1, int A1m, int A1n,
+                              const CHAM_desc_t *A2, int A2m, int A2n,
+                              const CHAM_desc_t *V,  int Vm,  int Vn,
+                              const CHAM_desc_t *T,  int Tm,  int Tn )
 {
-    CHAMELEON_Complex64_t *ptrA1 = RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n);
-    CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n);
-    CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
-    CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn);
+    CHAM_tile_t *tileA1 = A1->get_blktile( A1, A1m, A1n );
+    CHAM_tile_t *tileA2 = A2->get_blktile( A2, A2m, A2n );
+    CHAM_tile_t *tileT = T->get_blktile( T, Tm, Tn );
+    CHAM_tile_t *tileV = V->get_blktile( V, Vm, Vn );
     int ldwork = side == ChamLeft ? ib : nb;
     int ws_size = options->ws_wsize;
-#pragma omp task firstprivate(ws_size, side, trans, m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, ldwork) depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrT[0], ptrV[0])
+#pragma omp task firstprivate( ws_size, side, trans, m1, n1, m2, n2, k, ib, tileA1, tileA2, tileV, tileT, ldwork ) depend( inout:tileA1[0], tileA2[0] ) depend( in:tileT[0], tileV[0] )
     {
       CHAMELEON_Complex64_t work[ws_size];
-      CORE_ztsmqr_hetra1(side, trans, m1, n1, m2, n2, k,
-                         ib, ptrA1, lda1, ptrA2, lda2, ptrV, ldv, ptrT, ldt, work, ldwork);
+      TCORE_ztsmqr_hetra1( side, trans, m1, n1, m2, n2, k,
+                         ib, tileA1, tileA2, tileV, tileT, work, ldwork );
     }
 }
diff --git a/runtime/openmp/codelets/codelet_ztstrf.c b/runtime/openmp/codelets/codelet_ztstrf.c
index 2db3931ec0967bb9eca6067f665c7950afa29ccc..da5e42a9c7f57b8454b5230e40be2fb0e26d26c2 100644
--- a/runtime/openmp/codelets/codelet_ztstrf.c
+++ b/runtime/openmp/codelets/codelet_ztstrf.c
@@ -2,111 +2,39 @@
  *
  * @file openmp/codelet_ztstrf.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon ztstrf StarPU codelet
+ * @brief Chameleon ztstrf OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Hatem Ltaief
- * @author Jakub Kurzak
+ * @author Philippe Virouleau
  * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
- * @date 2018-06-15
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
-
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- *  CORE_ztstrf computes an LU factorization of a complex matrix formed
- *  by an upper triangular NB-by-N tile U on top of a M-by-N tile A
- *  using partial pivoting with row interchanges.
- *
- *  This is the right-looking Level 2.5 BLAS version of the algorithm.
- *
- *******************************************************************************
- *
- * @param[in] M
- *         The number of rows of the tile A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the tile A.  N >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in] NB
- *
- * @param[in,out] U
- *         On entry, the NB-by-N upper triangular tile.
- *         On exit, the new factor U from the factorization
- *
- * @param[in] LDU
- *         The leading dimension of the array U.  LDU >= max(1,NB).
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile to be factored.
- *         On exit, the factor L from the factorization
- *
- * @param[in] LDA
- *         The leading dimension of the array A.  LDA >= max(1,M).
- *
- * @param[in,out] L
- *         On entry, the IB-by-N lower triangular tile.
- *         On exit, the interchanged rows form the tile A in case of pivoting.
- *
- * @param[in] LDL
- *         The leading dimension of the array L.  LDL >= max(1,IB).
- *
- * @param[out] IPIV
- *         The pivot indices; for 1 <= i <= min(M,N), row i of the
- *         tile U was interchanged with row IPIV(i) of the tile A.
- *
- * @param[in,out] WORK
- *
- * @param[in] LDWORK
- *         The dimension of the array WORK.
- *
- * @param[out] INFO
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if INFO = -k, the k-th argument had an illegal value
- * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
- *              has been completed, but the factor U is exactly
- *              singular, and division by zero will occur if it is used
- *              to solve a system of equations.
- *
- */
+#include "coreblas/coreblas_ztile.h"
 
-void INSERT_TASK_ztstrf(const RUNTIME_option_t *options,
+void INSERT_TASK_ztstrf( const RUNTIME_option_t *options,
                        int m, int n, int ib, int nb,
-                       const CHAM_desc_t *U, int Um, int Un, int ldu,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *L, int Lm, int Ln, int ldl,
+                       const CHAM_desc_t *U, int Um, int Un,
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *L, int Lm, int Ln,
                        int *IPIV,
-                       cham_bool_t check_info, int iinfo)
+                       cham_bool_t check_info, int iinfo )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrU = RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un);
-    CHAMELEON_Complex64_t *ptrL = RTBLKADDR(L, CHAMELEON_Complex64_t, Lm, Ln);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileU = U->get_blktile( U, Um, Un );
+    CHAM_tile_t *tileL = L->get_blktile( L, Lm, Ln );
     int ws_size = options->ws_wsize;
-#pragma omp task firstprivate(ws_size, m, n, ib, nb, ptrU, ldu, ptrA, lda, ptrL, ldl, IPIV, iinfo) depend(inout:ptrA[0], ptrU[0], ptrL[0])
+#pragma omp task firstprivate( ws_size, m, n, ib, nb, tileU, tileA, tileL, IPIV, iinfo ) depend( inout:tileA[0], tileU[0], tileL[0] )
     {
       CHAMELEON_Complex64_t work[ws_size];
-      CORE_ztstrf(m, n, ib, nb, ptrU, ldu, ptrA, lda, ptrL, ldl, IPIV, work, nb, &iinfo);
+      TCORE_ztstrf( m, n, ib, nb, tileU, tileA, tileL, IPIV, work, nb, &iinfo );
     }
 }
diff --git a/runtime/openmp/codelets/codelet_zunmlq.c b/runtime/openmp/codelets/codelet_zunmlq.c
index 92d6e71f8f34c171100880d747fb4f605e50baa3..9b62b78f49e8c8386e391b8c8853db683c05b043 100644
--- a/runtime/openmp/codelets/codelet_zunmlq.c
+++ b/runtime/openmp/codelets/codelet_zunmlq.c
@@ -2,129 +2,39 @@
  *
  * @file openmp/codelet_zunmlq.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zunmlq StarPU codelet
+ * @brief Chameleon zunmlq OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Hatem Ltaief
- * @author Jakub Kurzak
- * @author Dulceneia Becker
+ * @author Philippe Virouleau
  * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
- * @date 2018-06-15
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
-
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- *  CORE_zunmlq overwrites the general complex M-by-N tile C with
- *
- *                    SIDE = 'L'     SIDE = 'R'
- *    TRANS = 'N':      Q * C          C * Q
- *    TRANS = 'C':      Q^H * C       C * Q^H
- *
- *  where Q is a complex unitary matrix defined as the product of k
- *  elementary reflectors
- *
- *    Q = H(k) . . . H(2) H(1)
- *
- *  as returned by CORE_zgelqt. Q is of order M if SIDE = 'L' and of order N
- *  if SIDE = 'R'.
- *
- *******************************************************************************
- *
- * @param[in] side
- *         @arg ChamLeft  : apply Q or Q^H from the Left;
- *         @arg ChamRight : apply Q or Q^H from the Right.
- *
- * @param[in] trans
- *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  Transpose, apply Q^H.
- *
- * @param[in] M
- *         The number of rows of the tile C.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the tile C.  N >= 0.
- *
- * @param[in] K
- *         The number of elementary reflectors whose product defines
- *         the matrix Q.
- *         If SIDE = ChamLeft,  M >= K >= 0;
- *         if SIDE = ChamRight, N >= K >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in] A
- *         Dimension:  (LDA,M) if SIDE = ChamLeft,
- *                     (LDA,N) if SIDE = ChamRight,
- *         The i-th row must contain the vector which defines the
- *         elementary reflector H(i), for i = 1,2,...,k, as returned by
- *         CORE_zgelqt in the first k rows of its array argument A.
- *
- * @param[in] LDA
- *         The leading dimension of the array A.  LDA >= max(1,K).
- *
- * @param[in] T
- *         The IB-by-K triangular factor T of the block reflector.
- *         T is upper triangular by block (economic storage);
- *         The rest of the array is not referenced.
- *
- * @param[in] LDT
- *         The leading dimension of the array T. LDT >= IB.
- *
- * @param[in,out] C
- *         On entry, the M-by-N tile C.
- *         On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q.
- *
- * @param[in] LDC
- *         The leading dimension of the array C. LDC >= max(1,M).
- *
- * @param[in,out] WORK
- *         On exit, if INFO = 0, WORK(1) returns the optimal LDWORK.
- *
- * @param[in] LDWORK
- *         The dimension of the array WORK.
- *         If SIDE = ChamLeft,  LDWORK >= max(1,N);
- *         if SIDE = ChamRight, LDWORK >= max(1,M).
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if -i, the i-th argument had an illegal value
- *
- */
+#include "coreblas/coreblas_ztile.h"
 
-void INSERT_TASK_zunmlq(const RUNTIME_option_t *options,
+void INSERT_TASK_zunmlq( const RUNTIME_option_t *options,
                        cham_side_t side, cham_trans_t trans,
                        int m, int n, int k, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                       const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *T, int Tm, int Tn,
+                       const CHAM_desc_t *C, int Cm, int Cn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
-    CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileT = T->get_blktile( T, Tm, Tn );
+    CHAM_tile_t *tileC = C->get_blktile( C, Cm, Cn );
     int ws_size = options->ws_wsize;
-#pragma omp task firstprivate(ws_size, side, trans, m, n, k, ib, nb, ptrA, lda, ptrT, ldt, ptrC, ldc) depend(in:ptrA[0], ptrT[0]) depend(inout:ptrC[0])
+#pragma omp task firstprivate( ws_size, side, trans, m, n, k, ib, nb, tileA, tileT, tileC ) depend( in:tileA[0], tileT[0] ) depend( inout:tileC[0] )
     {
       CHAMELEON_Complex64_t work[ws_size];
-      CORE_zunmlq(side, trans, m, n, k, ib,
-                  ptrA, lda, ptrT, ldt, ptrC, ldc, work, nb);
+      TCORE_zunmlq( side, trans, m, n, k, ib,
+                  tileA, tileT, tileC, work, nb );
     }
 }
diff --git a/runtime/openmp/codelets/codelet_zunmqr.c b/runtime/openmp/codelets/codelet_zunmqr.c
index 66aa62b5dbc6e8ce2d9a74b91ce1fd5ae1d088f8..e8af9ea663eab1febd3a250080fb9305cdda1afb 100644
--- a/runtime/openmp/codelets/codelet_zunmqr.c
+++ b/runtime/openmp/codelets/codelet_zunmqr.c
@@ -2,129 +2,39 @@
  *
  * @file openmp/codelet_zunmqr.c
  *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
  * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
  *                      Univ. Bordeaux. All rights reserved.
  *
  ***
  *
- * @brief Chameleon zunmqr StarPU codelet
+ * @brief Chameleon zunmqr OpenMP codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
- * @author Hatem Ltaief
- * @author Jakub Kurzak
+ * @author Philippe Virouleau
  * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
- * @date 2018-06-15
+ * @date 2019-11-19
  * @precisions normal z -> c d s
  *
  */
-
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-/**
- *
- * @ingroup CORE_CHAMELEON_Complex64_t
- *
- *  CORE_zunmqr overwrites the general complex M-by-N tile C with
- *
- *                    SIDE = 'L'     SIDE = 'R'
- *    TRANS = 'N':      Q * C          C * Q
- *    TRANS = 'C':      Q^H * C       C * Q^H
- *
- *  where Q is a complex unitary matrix defined as the product of k
- *  elementary reflectors
- *
- *    Q = H(1) H(2) . . . H(k)
- *
- *  as returned by CORE_zgeqrt. Q is of order M if SIDE = 'L' and of order N
- *  if SIDE = 'R'.
- *
- *******************************************************************************
- *
- * @param[in] side
- *         @arg ChamLeft  : apply Q or Q^H from the Left;
- *         @arg ChamRight : apply Q or Q^H from the Right.
- *
- * @param[in] trans
- *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  Transpose, apply Q^H.
- *
- * @param[in] M
- *         The number of rows of the tile C.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the tile C.  N >= 0.
- *
- * @param[in] K
- *         The number of elementary reflectors whose product defines
- *         the matrix Q.
- *         If SIDE = ChamLeft,  M >= K >= 0;
- *         if SIDE = ChamRight, N >= K >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in] A
- *         Dimension:  (LDA,K)
- *         The i-th column must contain the vector which defines the
- *         elementary reflector H(i), for i = 1,2,...,k, as returned by
- *         CORE_zgeqrt in the first k columns of its array argument A.
- *
- * @param[in] LDA
- *         The leading dimension of the array A.
- *         If SIDE = ChamLeft,  LDA >= max(1,M);
- *         if SIDE = ChamRight, LDA >= max(1,N).
- *
- * @param[in] T
- *         The IB-by-K triangular factor T of the block reflector.
- *         T is upper triangular by block (economic storage);
- *         The rest of the array is not referenced.
- *
- * @param[in] LDT
- *         The leading dimension of the array T. LDT >= IB.
- *
- * @param[in,out] C
- *         On entry, the M-by-N tile C.
- *         On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q.
- *
- * @param[in] LDC
- *         The leading dimension of the array C. LDC >= max(1,M).
- *
- * @param[in,out] WORK
- *         On exit, if INFO = 0, WORK(1) returns the optimal LDWORK.
- *
- * @param[in] LDWORK
- *         The dimension of the array WORK.
- *         If SIDE = ChamLeft,  LDWORK >= max(1,N);
- *         if SIDE = ChamRight, LDWORK >= max(1,M).
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if -i, the i-th argument had an illegal value
- *
- */
+#include "coreblas/coreblas_ztile.h"
 
-void INSERT_TASK_zunmqr(const RUNTIME_option_t *options,
+void INSERT_TASK_zunmqr( const RUNTIME_option_t *options,
                        cham_side_t side, cham_trans_t trans,
                        int m, int n, int k, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                       const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *T, int Tm, int Tn,
+                       const CHAM_desc_t *C, int Cm, int Cn )
 {
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
-    CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileT = T->get_blktile( T, Tm, Tn );
+    CHAM_tile_t *tileC = C->get_blktile( C, Cm, Cn );
     int ws_size = options->ws_wsize;
-#pragma omp task firstprivate(ws_size, side, trans, m, n, k, ib, nb, ptrA, lda, ptrT, ldt, ptrC, ldc) depend(in:ptrA[0], ptrT[0]) depend(inout:ptrC[0])
+#pragma omp task firstprivate( ws_size, side, trans, m, n, k, ib, nb, tileA, tileT, tileC ) depend( in:tileA[0], tileT[0] ) depend( inout:tileC[0] )
     {
       CHAMELEON_Complex64_t tmp[ws_size];
-      CORE_zunmqr(side, trans, m, n, k, ib,
-          ptrA, lda, ptrT, ldt, ptrC, ldc, tmp, nb);
+      TCORE_zunmqr( side, trans, m, n, k, ib,
+          tileA, tileT, tileC, tmp, nb );
     }
 }
diff --git a/runtime/openmp/control/runtime_descriptor.c b/runtime/openmp/control/runtime_descriptor.c
index 5fefecb6575d422ce91b03fbbddafe2682e8f3f9..222128c7e4816b3c10e61174fbf096ac41e195ee 100644
--- a/runtime/openmp/control/runtime_descriptor.c
+++ b/runtime/openmp/control/runtime_descriptor.c
@@ -102,5 +102,5 @@ void RUNTIME_data_migrate( const RUNTIME_sequence_t *sequence,
 
 void *RUNTIME_data_getaddr( const CHAM_desc_t *desc, int m, int n )
 {
-    return desc->get_blkaddr( desc, m, n );
+    return desc->get_blktile( desc, m, n );
 }
diff --git a/runtime/openmp/include/chameleon_openmp.h b/runtime/openmp/include/chameleon_openmp.h
index 2e93fe8f01fbf44167c694d428b9347e74a72add..c8bbbe4efd69088aeaf411853e22791b32afd30a 100644
--- a/runtime/openmp/include/chameleon_openmp.h
+++ b/runtime/openmp/include/chameleon_openmp.h
@@ -20,14 +20,7 @@
 #define _chameleon_openmp_h_
 
 #include "coreblas.h"
-
 #include "control/common.h"
 #include <omp.h>
 
-/*
- * Access to block pointer and leading dimension
- */
-#define RTBLKADDR( desc, type, m, n ) ( (type*)RUNTIME_data_getaddr( desc, m, n ) )
-
-
 #endif /* _chameleon_openmp_h_ */
diff --git a/runtime/parsec/codelets/codelet_dzasum.c b/runtime/parsec/codelets/codelet_dzasum.c
index e0faa8dd277a80e58d01c6f1e133f1664113ae44..7f256d1117ea4906a478f1fe45d60ccbc91e7000 100644
--- a/runtime/parsec/codelets/codelet_dzasum.c
+++ b/runtime/parsec/codelets/codelet_dzasum.c
@@ -44,10 +44,11 @@ CORE_dzasum_parsec( parsec_execution_stream_t *context,
 
 void INSERT_TASK_dzasum(const RUNTIME_option_t *options,
                        cham_store_t storev, cham_uplo_t uplo, int M, int N,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
+                       const CHAM_desc_t *A, int Am, int An,
                        const CHAM_desc_t *B, int Bm, int Bn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_dzasum_parsec, options->priority, "dzasum",
@@ -56,7 +57,7 @@ void INSERT_TASK_dzasum(const RUNTIME_option_t *options,
         sizeof(int),           &M,                                VALUE,
         sizeof(int),           &N,                                VALUE,
         PASSED_BY_REF,         RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),           &lda,                              VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,         RTBLKADDR( B, double, Bm, Bn ),     INOUT | AFFINITY,
         PARSEC_DTD_ARG_END );
 }
diff --git a/runtime/parsec/codelets/codelet_zbuild.c b/runtime/parsec/codelets/codelet_zbuild.c
index bd85b99b06b15c24513c6dfab51b4d9990023051..6118017b2abb0ae9e9c9db4cd3e8f45d462fb31c 100644
--- a/runtime/parsec/codelets/codelet_zbuild.c
+++ b/runtime/parsec/codelets/codelet_zbuild.c
@@ -43,10 +43,11 @@ CORE_zbuild_parsec( parsec_execution_stream_t *context,
 }
 
 void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
-                        const CHAM_desc_t *A, int Am, int An, int lda,
+                        const CHAM_desc_t *A, int Am, int An,
                         void *user_data, void* user_build_callback )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
     int row_min, row_max, col_min, col_max;
     row_min = Am*A->mb ;
     row_max = Am == A->mt-1 ? A->m-1 : row_min+A->mb-1 ;
@@ -60,7 +61,7 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
         sizeof(int),   &col_min,                          VALUE,
         sizeof(int),   &col_max,                          VALUE,
         PASSED_BY_REF,  RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | OUTPUT | AFFINITY,
-        sizeof(int),   &lda,                              VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         sizeof(void*), &user_data,                        VALUE,
         sizeof(void*), &user_build_callback,              VALUE,
         PARSEC_DTD_ARG_END );
diff --git a/runtime/parsec/codelets/codelet_zgeadd.c b/runtime/parsec/codelets/codelet_zgeadd.c
index da7eb2de21b946bd5e57a414d77202082b433a83..e1246e8af8d8bbbbb0e3a37699fba181217e09aa 100644
--- a/runtime/parsec/codelets/codelet_zgeadd.c
+++ b/runtime/parsec/codelets/codelet_zgeadd.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zgeadd PaRSEC codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @author Reazul Hoque
  * @date 2015-11-04
@@ -102,10 +100,12 @@ CORE_zgeadd_parsec( parsec_execution_stream_t *context,
  */
 void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
                          cham_trans_t trans, int m, int n, int nb,
-                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zgeadd_parsec, options->priority, "geadd",
@@ -114,10 +114,10 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
         sizeof(int),               &n,     VALUE,
         sizeof(CHAMELEON_Complex64_t), &alpha, VALUE,
         PASSED_BY_REF,              RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),               &lda,   VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         sizeof(CHAMELEON_Complex64_t), &beta,  VALUE,
         PASSED_BY_REF,              RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), chameleon_parsec_get_arena_index( B ) | INOUT | AFFINITY,
-        sizeof(int),               &ldb,   VALUE,
+        sizeof(int), &(tileB->ld), VALUE,
         PARSEC_DTD_ARG_END );
 
     (void)nb;
diff --git a/runtime/parsec/codelets/codelet_zgelqt.c b/runtime/parsec/codelets/codelet_zgelqt.c
index 38cc714d602728fe10033cb05744151ddb924800..966d919fe4f59f6ecefd93a199263d01f28dc8f7 100644
--- a/runtime/parsec/codelets/codelet_zgelqt.c
+++ b/runtime/parsec/codelets/codelet_zgelqt.c
@@ -21,66 +21,6 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zgelqt - computes a LQ factorization of a complex M-by-N tile A: A = L * Q.
- *
- *  The tile Q is represented as a product of elementary reflectors
- *
- *    Q = H(k)' . . . H(2)' H(1)', where k = min(M,N).
- *
- *  Each H(i) has the form
- *
- *    H(i) = I - tau * v * v'
- *
- *  where tau is a complex scalar, and v is a complex vector with
- *  v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in
- *  A(i,i+1:n), and tau in TAU(i).
- *
- *******************************************************************************
- *
- * @param[in] M
- *          The number of rows of the tile A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the tile A.  N >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile A.
- *         On exit, the elements on and below the diagonal of the array
- *         contain the M-by-min(M,N) lower trapezoidal tile L (L is
- *         lower triangular if M <= N); the elements above the diagonal,
- *         with the array TAU, represent the unitary tile Q as a
- *         product of elementary reflectors (see Further Details).
- *
- * @param[in] LDA
- *         The leading dimension of the array A.  LDA >= max(1,M).
- *
- * @param[out] T
- *         The IB-by-N triangular factor T of the block reflector.
- *         T is upper triangular by block (economic storage);
- *         The rest of the array is not referenced.
- *
- * @param[in] LDT
- *         The leading dimension of the array T. LDT >= IB.
- *
- * @param[out] TAU
- *         The scalar factors of the elementary reflectors (see Further
- *         Details).
- *
- * @param[out] WORK
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if -i, the i-th argument had an illegal value
- *
- */
 static inline int
 CORE_zgelqt_parsec( parsec_execution_stream_t *context,
                     parsec_task_t             *this_task )
@@ -107,10 +47,12 @@ CORE_zgelqt_parsec( parsec_execution_stream_t *context,
 
 void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
                        int m, int n, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldt)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *T, int Tm, int Tn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileT = T->get_blktile( T, Tm, Tn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zgelqt_parsec, options->priority, "gelqt",
@@ -118,9 +60,9 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
         sizeof(int),                        &n,     VALUE,
         sizeof(int),                        &ib,    VALUE,
         PASSED_BY_REF,         RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INOUT | AFFINITY,
-        sizeof(int),           &lda,                VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,         RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | OUTPUT,
-        sizeof(int),           &ldt,                VALUE,
+        sizeof(int), &(tileT->ld), VALUE,
         sizeof(CHAMELEON_Complex64_t)*nb,       NULL,   SCRATCH,
         sizeof(CHAMELEON_Complex64_t)*ib*nb,    NULL,   SCRATCH,
         PARSEC_DTD_ARG_END );
diff --git a/runtime/parsec/codelets/codelet_zgemm.c b/runtime/parsec/codelets/codelet_zgemm.c
index c3db187a65fdcde80fa408b8efac051f0d4c0c6a..267033bc73e5bb8c4c795fd6954343db3abcac86 100644
--- a/runtime/parsec/codelets/codelet_zgemm.c
+++ b/runtime/parsec/codelets/codelet_zgemm.c
@@ -59,11 +59,14 @@ CORE_zgemm_parsec( parsec_execution_stream_t *context,
 void INSERT_TASK_zgemm( const RUNTIME_option_t *options,
                        cham_trans_t transA, cham_trans_t transB,
                        int m, int n, int k, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                                                const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                       CHAMELEON_Complex64_t beta,  const CHAM_desc_t *C, int Cm, int Cn, int ldc )
+                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                                                const CHAM_desc_t *B, int Bm, int Bn,
+                       CHAMELEON_Complex64_t beta,  const CHAM_desc_t *C, int Cm, int Cn )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+    CHAM_tile_t *tileC = C->get_blktile( C, Cm, Cn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zgemm_parsec, options->priority, "Gemm",
@@ -74,12 +77,12 @@ void INSERT_TASK_zgemm( const RUNTIME_option_t *options,
         sizeof(int),           &k,                                VALUE,
         sizeof(CHAMELEON_Complex64_t),           &alpha,              VALUE,
         PASSED_BY_REF,     RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),           &lda,                              VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,     RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), chameleon_parsec_get_arena_index( B ) | INPUT,
-        sizeof(int),           &ldb,                              VALUE,
+        sizeof(int), &(tileB->ld), VALUE,
         sizeof(CHAMELEON_Complex64_t),           &beta,               VALUE,
         PASSED_BY_REF,     RTBLKADDR( C, CHAMELEON_Complex64_t, Cm, Cn ), chameleon_parsec_get_arena_index( C ) | INOUT | AFFINITY,
-        sizeof(int),           &ldc,                              VALUE,
+        sizeof(int), &(tileC->ld), VALUE,
         PARSEC_DTD_ARG_END );
 
     (void)nb;
diff --git a/runtime/parsec/codelets/codelet_zgeqrt.c b/runtime/parsec/codelets/codelet_zgeqrt.c
index ca6f169c943654121c7de5fd8b3e75bb5d5507fe..9a514361ba8f26f5b126bef4410af55310fdac88 100644
--- a/runtime/parsec/codelets/codelet_zgeqrt.c
+++ b/runtime/parsec/codelets/codelet_zgeqrt.c
@@ -108,10 +108,12 @@ CORE_zgeqrt_parsec ( parsec_execution_stream_t *context,
 
 void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
                        int m, int n, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldt)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *T, int Tm, int Tn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileT = T->get_blktile( T, Tm, Tn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zgeqrt_parsec, options->priority, "geqrt",
@@ -119,9 +121,9 @@ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
         sizeof(int),           &n,                             VALUE,
         sizeof(int),           &ib,                            VALUE,
         PASSED_BY_REF,         RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INOUT | AFFINITY,
-        sizeof(int),           &lda,                           VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,         RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | OUTPUT,
-        sizeof(int),           &ldt,                           VALUE,
+        sizeof(int), &(tileT->ld), VALUE,
         sizeof(CHAMELEON_Complex64_t)*nb,       NULL,                         SCRATCH,
         sizeof(CHAMELEON_Complex64_t)*ib*nb,    NULL,                         SCRATCH,
         PARSEC_DTD_ARG_END );
diff --git a/runtime/parsec/codelets/codelet_zgessm.c b/runtime/parsec/codelets/codelet_zgessm.c
index 0515ccec7c52469bb67e72eb67c6f5defff39c0c..3893d377021d5ddf4614620355b45cccf7bc9a83 100644
--- a/runtime/parsec/codelets/codelet_zgessm.c
+++ b/runtime/parsec/codelets/codelet_zgessm.c
@@ -92,11 +92,14 @@ CORE_zgessm_parsec( parsec_execution_stream_t *context,
 
 void INSERT_TASK_zgessm(const RUNTIME_option_t *options,
                        int m, int n, int k, int ib, int nb, int *IPIV,
-                       const CHAM_desc_t *L, int Lm, int Ln, int ldl,
-                       const CHAM_desc_t *D, int Dm, int Dn, int ldd,
-                       const CHAM_desc_t *A, int Am, int An, int lda)
+                       const CHAM_desc_t *L, int Lm, int Ln,
+                       const CHAM_desc_t *D, int Dm, int Dn,
+                       const CHAM_desc_t *A, int Am, int An)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileL = L->get_blktile( L, Lm, Ln );
+    CHAM_tile_t *tileD = D->get_blktile( D, Dm, Dn );
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zgessm_parsec, options->priority, "gessm",
@@ -106,11 +109,11 @@ void INSERT_TASK_zgessm(const RUNTIME_option_t *options,
         sizeof(int),           &ib,                               VALUE,
         sizeof(int*),          &IPIV,                             VALUE,
         PASSED_BY_REF,         RTBLKADDR( L, CHAMELEON_Complex64_t, Lm, Ln ), chameleon_parsec_get_arena_index( L ) | INPUT,
-        sizeof(int),           &ldl,                              VALUE,
+        sizeof(int), &(tileL->ld), VALUE,
         PASSED_BY_REF,         RTBLKADDR( D, CHAMELEON_Complex64_t, Dm, Dn ), chameleon_parsec_get_arena_index( D ) | INPUT,
-        sizeof(int),           &ldd,                              VALUE,
+        sizeof(int), &(tileD->ld), VALUE,
         PASSED_BY_REF,         RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INOUT | AFFINITY,
-        sizeof(int),           &lda,                              VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PARSEC_DTD_ARG_END );
 
     (void)nb;
diff --git a/runtime/parsec/codelets/codelet_zgessq.c b/runtime/parsec/codelets/codelet_zgessq.c
index 4cd62d842671401caf7a0422df7b1ac1d3801947..4a74f16a27825b0f1aaae29408f4c97de9c5bb3e 100644
--- a/runtime/parsec/codelets/codelet_zgessq.c
+++ b/runtime/parsec/codelets/codelet_zgessq.c
@@ -43,10 +43,11 @@ CORE_zgessq_parsec( parsec_execution_stream_t *context,
 
 void INSERT_TASK_zgessq( const RUNTIME_option_t *options,
                         cham_store_t storev, int m, int n,
-                        const CHAM_desc_t *A, int Am, int An, int lda,
+                        const CHAM_desc_t *A, int Am, int An,
                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zgessq_parsec, options->priority, "gessq",
@@ -54,7 +55,7 @@ void INSERT_TASK_zgessq( const RUNTIME_option_t *options,
         sizeof(int),          &m,            VALUE,
         sizeof(int),          &n,            VALUE,
         PASSED_BY_REF,   RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),          &lda,          VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,   RTBLKADDR( SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn ), chameleon_parsec_get_arena_index( SCALESUMSQ ) | INOUT | AFFINITY,
         PARSEC_DTD_ARG_END );
 }
diff --git a/runtime/parsec/codelets/codelet_zgetrf.c b/runtime/parsec/codelets/codelet_zgetrf.c
index 8154ca2cf70a8e06c7449fe0d809b354e9af308d..3f02a3d4549cbe94a50a62dd401a326191dcaa60 100644
--- a/runtime/parsec/codelets/codelet_zgetrf.c
+++ b/runtime/parsec/codelets/codelet_zgetrf.c
@@ -51,18 +51,19 @@ CORE_zgetrf_parsec( parsec_execution_stream_t *context,
 
 void INSERT_TASK_zgetrf(const RUNTIME_option_t *options,
                        int m, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
+                       const CHAM_desc_t *A, int Am, int An,
                        int *IPIV,
                        cham_bool_t check_info, int iinfo)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zgetrf_parsec, options->priority, "getrf",
         sizeof(int),                 &m,                          VALUE,
         sizeof(int),                 &n,                          VALUE,
         PASSED_BY_REF,               RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INOUT | AFFINITY,
-        sizeof(int),                 &lda,                        VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         sizeof(int)*nb,              IPIV,                        SCRATCH,
         sizeof(cham_bool_t),         &check_info,                 VALUE,
         sizeof(int),                 &iinfo,                      VALUE,
diff --git a/runtime/parsec/codelets/codelet_zgetrf_incpiv.c b/runtime/parsec/codelets/codelet_zgetrf_incpiv.c
index d3128c04947402852262d453efd3f818d9a68eb3..d328f2e774e07f1ed2fb7366585098ce08f9ab7a 100644
--- a/runtime/parsec/codelets/codelet_zgetrf_incpiv.c
+++ b/runtime/parsec/codelets/codelet_zgetrf_incpiv.c
@@ -21,59 +21,6 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zgetrf_incpiv computes an LU factorization of a general M-by-N tile A
- *  using partial pivoting with row int erchanges.
- *
- *  The factorization has the form
- *
- *    A = P * L * U
- *
- *  where P is a permutation matrix, L is lower triangular with unit
- *  diagonal elements (lower trapezoidal if m > n), and U is upper
- *  triangular (upper trapezoidal if m < n).
- *
- *  This is the right-looking Level 2.5 BLAS version of the algorithm.
- *
- *******************************************************************************
- *
- * @param[in] M
- *          The number of rows of the tile A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the tile A.  N >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile to be factored.
- *         On exit, the factors L and U from the factorization
- *         A = P*L*U; the unit diagonal elements of L are not stored.
- *
- * @param[in] LDA
- *         The leading dimension of the array A.  LDA >= max(1,M).
- *
- * @param[out] IPIV
- *         The pivot indices; for 1 <= i <= min(M,N), row i of the
- *         tile was int erchanged with row IPIV(i).
- *
- * @param[out] INFO
- *         See returned value.
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if INFO = -k, the k-th argument had an illegal value
- * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
- *              has been completed, but the factor U is exactly
- *              singular, and division by zero will occur if it is used
- *              to solve a system of equations.
- *
- */
 static inline int
 CORE_zgetrf_incpiv_parsec( parsec_execution_stream_t *context,
                            parsec_task_t             *this_task )
@@ -105,12 +52,13 @@ CORE_zgetrf_incpiv_parsec( parsec_execution_stream_t *context,
 
 void INSERT_TASK_zgetrf_incpiv( const RUNTIME_option_t *options,
                                int m, int n, int ib, int nb,
-                               const CHAM_desc_t *A, int Am, int An, int lda,
-                               const CHAM_desc_t *L, int Lm, int Ln, int ldl,
+                               const CHAM_desc_t *A, int Am, int An,
+                               const CHAM_desc_t *L, int Lm, int Ln,
                                int *IPIV,
                                cham_bool_t check_info, int iinfo )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zgetrf_incpiv_parsec, options->priority, "getrf_inc",
@@ -118,7 +66,7 @@ void INSERT_TASK_zgetrf_incpiv( const RUNTIME_option_t *options,
         sizeof(int),                 &n,                                VALUE,
         sizeof(int),                 &ib,                               VALUE,
         PASSED_BY_REF,               RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INOUT | AFFINITY,
-        sizeof(int),                 &lda,                              VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         sizeof(int*),                &IPIV,                             VALUE,
         sizeof(int),                 &check_info,                       VALUE,
         sizeof(int),                 &iinfo,                            VALUE,
@@ -129,6 +77,5 @@ void INSERT_TASK_zgetrf_incpiv( const RUNTIME_option_t *options,
     (void)L;
     (void)Lm;
     (void)Ln;
-    (void)ldl;
     (void)nb;
 }
diff --git a/runtime/parsec/codelets/codelet_zgetrf_nopiv.c b/runtime/parsec/codelets/codelet_zgetrf_nopiv.c
index 2ff4e9a20db1acdc7bd10d4d1a94d5d045c53f05..867a24b9d1e866c3e43e8cb3197ba69d268b8eb4 100644
--- a/runtime/parsec/codelets/codelet_zgetrf_nopiv.c
+++ b/runtime/parsec/codelets/codelet_zgetrf_nopiv.c
@@ -21,52 +21,6 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zgetrf_nopiv computes an LU factorization of a general diagonal
- *  dominant M-by-N matrix A witout pivoting.
- *
- *  The factorization has the form
- *     A = L * U
- *  where L is lower triangular with unit
- *  diagonal elements (lower trapezoidal if m > n), and U is upper
- *  triangular (upper trapezoidal if m < n).
- *
- *  This is the right-looking Level 3 BLAS version of the algorithm.
- *  WARNING: Your matrix need to be diagonal dominant if you want to call this
- *  routine safely.
- *
- *******************************************************************************
- *
- *  @param[in] M
- *          The number of rows of the matrix A.  M >= 0.
- *
- *  @param[in] N
- *          The number of columns of the matrix A.  N >= 0.
- *
- *  @param[in] IB
- *          The block size to switch between blocked and unblocked code.
- *
- *  @param[in,out] A
- *          On entry, the M-by-N matrix to be factored.
- *          On exit, the factors L and U from the factorization
- *          A = P*L*U; the unit diagonal elements of L are not stored.
- *
- *  @param[in] LDA
- *          The leading dimension of the array A.  LDA >= max(1,M).
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if INFO = -k, the k-th argument had an illegal value
- * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
- *              has been completed, but the factor U is exactly
- *              singular, and division by zero will occur if it is used
- *              to solve a system of equations.
- *
- */
 static inline int
 CORE_zgetrf_nopiv_parsec( parsec_execution_stream_t *context,
                     parsec_task_t             *this_task )
@@ -96,10 +50,11 @@ CORE_zgetrf_nopiv_parsec( parsec_execution_stream_t *context,
 
 void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options,
                              int m, int n, int ib, int nb,
-                             const CHAM_desc_t *A, int Am, int An, int lda,
+                             const CHAM_desc_t *A, int Am, int An,
                              int iinfo)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zgetrf_nopiv_parsec, options->priority,  "getrf_nopiv",
@@ -107,7 +62,7 @@ void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options,
         sizeof(int),                 &n,                          VALUE,
         sizeof(int),                 &ib,                         VALUE,
         PASSED_BY_REF,               RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INOUT | AFFINITY,
-        sizeof(int),                 &lda,                        VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         sizeof(int),                 &iinfo,                      VALUE,
         sizeof(RUNTIME_sequence_t*), &(options->sequence),        VALUE,
         sizeof(RUNTIME_request_t*),  &(options->request),         VALUE,
diff --git a/runtime/parsec/codelets/codelet_zgram.c b/runtime/parsec/codelets/codelet_zgram.c
index 8d0217cc2552e329b51be45577c71d9399170098..ddba740357c9d521ad2a0d3a9ae28bdf153a14ce 100644
--- a/runtime/parsec/codelets/codelet_zgram.c
+++ b/runtime/parsec/codelets/codelet_zgram.c
@@ -20,11 +20,6 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
 static inline int
 CORE_zgegram_parsec( parsec_execution_stream_t *context,
                      parsec_task_t             *this_task )
@@ -82,12 +77,15 @@ CORE_zsygram_parsec( parsec_execution_stream_t *context,
 void INSERT_TASK_zgram( const RUNTIME_option_t *options,
                         cham_uplo_t uplo,
                         int m, int n, int mt, int nt,
-                        const CHAM_desc_t *Di, int Dim, int Din, int lddi,
-                        const CHAM_desc_t *Dj, int Djm, int Djn, int lddj,
+                        const CHAM_desc_t *Di, int Dim, int Din,
+                        const CHAM_desc_t *Dj, int Djm, int Djn,
                         const CHAM_desc_t *D, int Dm, int Dn,
-                        CHAM_desc_t *A, int Am, int An, int lda)
+                        CHAM_desc_t *A, int Am, int An)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileDi = Di->get_blktile( Di, Dim, Din );
+    CHAM_tile_t *tileDj = Dj->get_blktile( Dj, Djm, Djn );
+    CHAM_tile_t *tileA  = A->get_blktile( A, Am, An );
     double *ptrDi, *ptrDj;
 
     /*
@@ -105,10 +103,10 @@ void INSERT_TASK_zgram( const RUNTIME_option_t *options,
             sizeof(int),   &mt,   VALUE,
             sizeof(int),   &nt,   VALUE,
             PASSED_BY_REF, RTBLKADDR( Di, double, Dim, Din ), chameleon_parsec_get_arena_index( Di ) | INPUT,
-            sizeof(int),   &lddi, VALUE,
+            sizeof(int), &(tileDi->ld), VALUE,
             PASSED_BY_REF, RTBLKADDR( D, double, Dm, Dn ), chameleon_parsec_get_arena_index( D ) | INPUT,
             PASSED_BY_REF, RTBLKADDR( A, double, Am, An ), chameleon_parsec_get_arena_index( A ) | INOUT | AFFINITY,
-            sizeof(int),   &lda,  VALUE,
+            sizeof(int), &(tileA->ld), VALUE,
             PARSEC_DTD_ARG_END );
     } else {
         parsec_dtd_taskpool_insert_task(
@@ -119,12 +117,12 @@ void INSERT_TASK_zgram( const RUNTIME_option_t *options,
             sizeof(int),   &mt,   VALUE,
             sizeof(int),   &nt,   VALUE,
             PASSED_BY_REF, RTBLKADDR( Di, double, Dim, Din ), chameleon_parsec_get_arena_index( Di ) | INPUT,
-            sizeof(int),   &lddi, VALUE,
+            sizeof(int), &(tileDi->ld), VALUE,
             PASSED_BY_REF, RTBLKADDR( Dj, double, Djm, Djn ), chameleon_parsec_get_arena_index( Dj ) | INPUT,
-            sizeof(int),   &lddj, VALUE,
+            sizeof(int), &(tileDj->ld), VALUE,
             PASSED_BY_REF, RTBLKADDR( D, double, Dm, Dn ), chameleon_parsec_get_arena_index( D ) | INPUT,
             PASSED_BY_REF, RTBLKADDR( A, double, Am, An ), chameleon_parsec_get_arena_index( A ) | INOUT | AFFINITY,
-            sizeof(int),   &lda,  VALUE,
+            sizeof(int), &(tileA->ld), VALUE,
             PARSEC_DTD_ARG_END );
     }
 
diff --git a/runtime/parsec/codelets/codelet_zhe2ge.c b/runtime/parsec/codelets/codelet_zhe2ge.c
index a54fb14aa91bf3b9536b4c5a1c71e60e0e92d9c5..2b8b6ad242309002d51be8031eab5ca78bdf5ddc 100644
--- a/runtime/parsec/codelets/codelet_zhe2ge.c
+++ b/runtime/parsec/codelets/codelet_zhe2ge.c
@@ -21,11 +21,6 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
 static inline int
 CORE_zhe2ge_parsec( parsec_execution_stream_t *context,
                     parsec_task_t             *this_task )
@@ -51,10 +46,12 @@ CORE_zhe2ge_parsec( parsec_execution_stream_t *context,
 void INSERT_TASK_zhe2ge(const RUNTIME_option_t *options,
                        cham_uplo_t uplo,
                        int m, int n, int mb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *B, int Bm, int Bn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zhe2ge_parsec, options->priority, "he2ge",
@@ -62,9 +59,9 @@ void INSERT_TASK_zhe2ge(const RUNTIME_option_t *options,
         sizeof(int),        &m,      VALUE,
         sizeof(int),        &n,      VALUE,
         PASSED_BY_REF,       RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT ,
-        sizeof(int),        &lda,    VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,       RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), OUTPUT | AFFINITY,
-        sizeof(int),        &ldb,    VALUE,
+        sizeof(int), &(tileB->ld), VALUE,
         PARSEC_DTD_ARG_END );
 
     (void)mb;
diff --git a/runtime/parsec/codelets/codelet_zhemm.c b/runtime/parsec/codelets/codelet_zhemm.c
index a4ce4f6135f7c28f7c4977b6bdfddd7758bd7091..e3fe539cb74b460000f35ef5580a6d5383256299 100644
--- a/runtime/parsec/codelets/codelet_zhemm.c
+++ b/runtime/parsec/codelets/codelet_zhemm.c
@@ -21,11 +21,6 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
 static inline int
 CORE_zhemm_parsec( parsec_execution_stream_t *context,
                    parsec_task_t             *this_task )
@@ -58,11 +53,14 @@ CORE_zhemm_parsec( parsec_execution_stream_t *context,
 void INSERT_TASK_zhemm(const RUNTIME_option_t *options,
                       cham_side_t side, cham_uplo_t uplo,
                       int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                      const CHAM_desc_t *B, int Bm, int Bn,
+                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+    CHAM_tile_t *tileC = C->get_blktile( C, Cm, Cn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zhemm_parsec, options->priority, "hemm",
@@ -72,12 +70,12 @@ void INSERT_TASK_zhemm(const RUNTIME_option_t *options,
         sizeof(int),                       &n,       VALUE,
         sizeof(CHAMELEON_Complex64_t),         &alpha,   VALUE,
         PASSED_BY_REF,         RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),           &lda,                 VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,         RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), chameleon_parsec_get_arena_index( B ) | INPUT,
-        sizeof(int),           &ldb,                 VALUE,
+        sizeof(int), &(tileB->ld), VALUE,
         sizeof(CHAMELEON_Complex64_t),         &beta,    VALUE,
         PASSED_BY_REF,         RTBLKADDR( C, CHAMELEON_Complex64_t, Cm, Cn ), chameleon_parsec_get_arena_index( C ) | INOUT | AFFINITY,
-        sizeof(int),           &ldc,                 VALUE,
+        sizeof(int), &(tileC->ld), VALUE,
         PARSEC_DTD_ARG_END );
 
     (void)nb;
diff --git a/runtime/parsec/codelets/codelet_zher2k.c b/runtime/parsec/codelets/codelet_zher2k.c
index 1d4aec6ff21b017366afe3d49caf2d92bd2f5437..10599009c9ca1b76c54f630815f30bd138bc0af8 100644
--- a/runtime/parsec/codelets/codelet_zher2k.c
+++ b/runtime/parsec/codelets/codelet_zher2k.c
@@ -21,11 +21,6 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
 static inline int
 CORE_zher2k_parsec( parsec_execution_stream_t *context,
                     parsec_task_t             *this_task )
@@ -58,11 +53,14 @@ CORE_zher2k_parsec( parsec_execution_stream_t *context,
 void INSERT_TASK_zher2k(const RUNTIME_option_t *options,
                        cham_uplo_t uplo, cham_trans_t trans,
                        int n, int k, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                       double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *B, int Bm, int Bn,
+                       double beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+    CHAM_tile_t *tileC = C->get_blktile( C, Cm, Cn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zher2k_parsec, options->priority, "her2k",
@@ -72,12 +70,12 @@ void INSERT_TASK_zher2k(const RUNTIME_option_t *options,
         sizeof(int),                        &k,        VALUE,
         sizeof(CHAMELEON_Complex64_t),          &alpha,    VALUE,
         PASSED_BY_REF,         RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),                        &lda,      VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,         RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), chameleon_parsec_get_arena_index( B ) | INPUT,
-        sizeof(int),                        &ldb,      VALUE,
+        sizeof(int), &(tileB->ld), VALUE,
         sizeof(double),                     &beta,     VALUE,
         PASSED_BY_REF,         RTBLKADDR( C, CHAMELEON_Complex64_t, Cm, Cn ), chameleon_parsec_get_arena_index( C ) | INOUT | AFFINITY,
-        sizeof(int),                        &ldc,      VALUE,
+        sizeof(int), &(tileC->ld), VALUE,
         PARSEC_DTD_ARG_END );
 
     (void)nb;
diff --git a/runtime/parsec/codelets/codelet_zherfb.c b/runtime/parsec/codelets/codelet_zherfb.c
index 489e7d01695346131e187a9020fcab9f97d8bcac..0b6d3d649c1b769e6aa9f00930dfddf967bfe774 100644
--- a/runtime/parsec/codelets/codelet_zherfb.c
+++ b/runtime/parsec/codelets/codelet_zherfb.c
@@ -53,11 +53,14 @@ CORE_zherfb_parsec( parsec_execution_stream_t *context,
 void INSERT_TASK_zherfb(const RUNTIME_option_t *options,
                        cham_uplo_t uplo,
                        int n, int k, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                       const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *T, int Tm, int Tn,
+                       const CHAM_desc_t *C, int Cm, int Cn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileT = T->get_blktile( T, Tm, Tn );
+    CHAM_tile_t *tileC = C->get_blktile( C, Cm, Cn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zherfb_parsec, options->priority, "herfb",
@@ -67,11 +70,11 @@ void INSERT_TASK_zherfb(const RUNTIME_option_t *options,
         sizeof(int),        &ib,   VALUE,
         sizeof(int),        &nb,   VALUE,
         PASSED_BY_REF,       RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INOUT,
-        sizeof(int),        &lda,  VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,       RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), INPUT,
-        sizeof(int),        &ldt,  VALUE,
+        sizeof(int), &(tileT->ld), VALUE,
         PASSED_BY_REF,       RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), INOUT | AFFINITY,
-        sizeof(int),        &ldc,  VALUE,
+        sizeof(int), &(tileC->ld), VALUE,
         sizeof(CHAMELEON_Complex64_t)*2*nb*nb,  NULL, SCRATCH,
         sizeof(int),        &nb,   VALUE,
         PARSEC_DTD_ARG_END );
diff --git a/runtime/parsec/codelets/codelet_zherk.c b/runtime/parsec/codelets/codelet_zherk.c
index 4cd3f591b034f7d80934be5392841f0bb1e65578..45822a1e2080b40ec6ac787db28b38ab6fa1f66d 100644
--- a/runtime/parsec/codelets/codelet_zherk.c
+++ b/runtime/parsec/codelets/codelet_zherk.c
@@ -21,11 +21,6 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
 static inline int
 CORE_zherk_parsec( parsec_execution_stream_t *context,
                    parsec_task_t             *this_task )
@@ -55,10 +50,12 @@ CORE_zherk_parsec( parsec_execution_stream_t *context,
 void INSERT_TASK_zherk(const RUNTIME_option_t *options,
                       cham_uplo_t uplo, cham_trans_t trans,
                       int n, int k, int nb,
-                      double alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                      double alpha, const CHAM_desc_t *A, int Am, int An,
+                      double beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileC = C->get_blktile( C, Cm, Cn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zherk_parsec, options->priority, "herk",
@@ -68,10 +65,10 @@ void INSERT_TASK_zherk(const RUNTIME_option_t *options,
         sizeof(int),           &k,                                VALUE,
         sizeof(double),        &alpha,                            VALUE,
         PASSED_BY_REF,         RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),           &lda,                              VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         sizeof(double),        &beta,                             VALUE,
         PASSED_BY_REF,         RTBLKADDR( C, CHAMELEON_Complex64_t, Cm, Cn ), chameleon_parsec_get_arena_index( C ) | INOUT | AFFINITY,
-        sizeof(int),           &ldc,                              VALUE,
+        sizeof(int), &(tileC->ld), VALUE,
         PARSEC_DTD_ARG_END );
 
     (void)nb;
diff --git a/runtime/parsec/codelets/codelet_zhessq.c b/runtime/parsec/codelets/codelet_zhessq.c
index 2229c62eb91c03e52cbc848f482b2702a40a4c0c..5cd16be91efceb988fa1a8d2faf8caad5df5963a 100644
--- a/runtime/parsec/codelets/codelet_zhessq.c
+++ b/runtime/parsec/codelets/codelet_zhessq.c
@@ -22,10 +22,10 @@
 
 void INSERT_TASK_zhessq( const RUNTIME_option_t *options,
                          cham_store_t storev, cham_uplo_t uplo, int n,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
 {
     INSERT_TASK_zsyssq( options, storev, uplo, n,
-                        A, Am, An, lda,
+                        A, Am, An,
                         SCALESUMSQ, SCALESUMSQm, SCALESUMSQn );
 }
diff --git a/runtime/parsec/codelets/codelet_zlacpy.c b/runtime/parsec/codelets/codelet_zlacpy.c
index 4b5faaf1dee4533c747220e92df54392694e6b10..62b5ffa3682dd7969b1884f2799f3c3935fa13a8 100644
--- a/runtime/parsec/codelets/codelet_zlacpy.c
+++ b/runtime/parsec/codelets/codelet_zlacpy.c
@@ -21,11 +21,6 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
 static inline int
 CORE_zlacpyx_parsec( parsec_execution_stream_t *context,
                     parsec_task_t             *this_task )
@@ -51,10 +46,12 @@ CORE_zlacpyx_parsec( parsec_execution_stream_t *context,
 
 void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
                           cham_uplo_t uplo, int m, int n, int nb,
-                          int displA, const CHAM_desc_t *A, int Am, int An, int lda,
-                          int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+                          int displA, const CHAM_desc_t *A, int Am, int An,
+                          int displB, const CHAM_desc_t *B, int Bm, int Bn )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zlacpyx_parsec, options->priority, "lacpy",
@@ -63,20 +60,20 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
         sizeof(int),           &n,                         VALUE,
         sizeof(int),           &displA,                    VALUE,
         PASSED_BY_REF,         RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),           &lda,                       VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         sizeof(int),           &displB,                    VALUE,
         PASSED_BY_REF,         RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), chameleon_parsec_get_arena_index( B ) | OUTPUT | AFFINITY,
-        sizeof(int),           &ldb,                       VALUE,
+        sizeof(int), &(tileB->ld), VALUE,
         PARSEC_DTD_ARG_END );
     (void)nb;
 }
 
 void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, int m, int n, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     INSERT_TASK_zlacpyx( options, uplo, m, n, nb,
-                         0, A, Am, An, lda,
-                         0, B, Bm, Bn, ldb );
+                         0, A, Am, An,
+                         0, B, Bm, Bn );
 }
diff --git a/runtime/parsec/codelets/codelet_zlag2c.c b/runtime/parsec/codelets/codelet_zlag2c.c
index 6c65435356d66cc24aa4977c50e12a58d745ae01..535849fc88b9659cb8e99dd45e18fbde6542acc9 100644
--- a/runtime/parsec/codelets/codelet_zlag2c.c
+++ b/runtime/parsec/codelets/codelet_zlag2c.c
@@ -21,11 +21,6 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
 static inline int
 CORE_zlag2c_parsec( parsec_execution_stream_t *context,
                     parsec_task_t             *this_task )
@@ -48,18 +43,20 @@ CORE_zlag2c_parsec( parsec_execution_stream_t *context,
 
 void INSERT_TASK_zlag2c(const RUNTIME_option_t *options,
                        int m, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *B, int Bm, int Bn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
 
     parsec_dtd_taskpool_insert_task(PARSEC_dtd_taskpool, CORE_zlag2c_parsec, "lag2c",
         sizeof(int),                        &m,         VALUE,
         sizeof(int),                        &n,         VALUE,
         PASSED_BY_REF,         RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),                        &lda,       VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,         RTBLKADDR( B, CHAMELEON_Complex32_t, Bm, Bn ),     OUTPUT | AFFINITY,
-        sizeof(int),                        &ldb,       VALUE,
+        sizeof(int), &(tileB->ld), VALUE,
         PARSEC_DTD_ARG_END );
 }
 
@@ -89,18 +86,20 @@ CORE_clag2z_parsec(parsec_execution_stream_t *context, parsec_task_t *this_task)
 
 void INSERT_TASK_clag2z(const RUNTIME_option_t *options,
                        int m, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *B, int Bm, int Bn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_clag2z_parsec, options->priority, "lag2z",
         sizeof(int),                        &m,         VALUE,
         sizeof(int),                        &n,         VALUE,
         PASSED_BY_REF,         RTBLKADDR( A, CHAMELEON_Complex32_t, Am, An ),     INPUT,
-        sizeof(int),                        &lda,       VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,         RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), chameleon_parsec_get_arena_index( B ) | OUTPUT | AFFINITY,
-        sizeof(int),                        &ldb,       VALUE,
+        sizeof(int), &(tileB->ld), VALUE,
         PARSEC_DTD_ARG_END );
 }
diff --git a/runtime/parsec/codelets/codelet_zlange.c b/runtime/parsec/codelets/codelet_zlange.c
index b66dfd7bccaf0f0782f9c9495e167e29b392ae64..776ea85e557c99bbf77ebcd6c5f042cf30840f1b 100644
--- a/runtime/parsec/codelets/codelet_zlange.c
+++ b/runtime/parsec/codelets/codelet_zlange.c
@@ -44,10 +44,11 @@ CORE_zlange_parsec( parsec_execution_stream_t *context,
 
 void INSERT_TASK_zlange(const RUNTIME_option_t *options,
                        cham_normtype_t norm, int M, int N, int NB,
-                       const CHAM_desc_t *A, int Am, int An, int LDA,
+                       const CHAM_desc_t *A, int Am, int An,
                        const CHAM_desc_t *B, int Bm, int Bn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
 
     int szeW = chameleon_max( M, N );
 
@@ -57,7 +58,7 @@ void INSERT_TASK_zlange(const RUNTIME_option_t *options,
         sizeof(int),                   &M,             VALUE,
         sizeof(int),                   &N,             VALUE,
         PASSED_BY_REF,         RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),                   &LDA,           VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         sizeof(double)*szeW,           NULL,           SCRATCH,
         PASSED_BY_REF,         RTBLKADDR( B, double, Bm, Bn ),            OUTPUT | AFFINITY,
         PARSEC_DTD_ARG_END );
diff --git a/runtime/parsec/codelets/codelet_zlanhe.c b/runtime/parsec/codelets/codelet_zlanhe.c
index 5621b08cc0205a059f9401c3a208476c44643954..02a9d50bc9b215a1f5ee61ddeae5ef7569373d67 100644
--- a/runtime/parsec/codelets/codelet_zlanhe.c
+++ b/runtime/parsec/codelets/codelet_zlanhe.c
@@ -44,11 +44,11 @@ CORE_zlanhe_parsec( parsec_execution_stream_t *context,
 
 void INSERT_TASK_zlanhe(const RUNTIME_option_t *options,
                        cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
-                       const CHAM_desc_t *A, int Am, int An, int LDA,
+                       const CHAM_desc_t *A, int Am, int An,
                        const CHAM_desc_t *B, int Bm, int Bn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
-
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
     int szeW = chameleon_max( 1, N );
 
     parsec_dtd_taskpool_insert_task(
@@ -57,7 +57,7 @@ void INSERT_TASK_zlanhe(const RUNTIME_option_t *options,
         sizeof(int),            &uplo,          VALUE,
         sizeof(int),                   &N,             VALUE,
         PASSED_BY_REF,         RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),                   &LDA,           VALUE,
+        sizeof(int), &(tileA->ld),           VALUE,
         sizeof(double)*szeW,           NULL,           SCRATCH,
         PASSED_BY_REF,         RTBLKADDR( B, double, Bm, Bn ),            OUTPUT | AFFINITY,
         PARSEC_DTD_ARG_END );
diff --git a/runtime/parsec/codelets/codelet_zlansy.c b/runtime/parsec/codelets/codelet_zlansy.c
index 85e23886b9e8a479f1c89746f35eed44a6091abc..425bcc99230d40d0f250d5cfe6b23377ff5ea03f 100644
--- a/runtime/parsec/codelets/codelet_zlansy.c
+++ b/runtime/parsec/codelets/codelet_zlansy.c
@@ -44,11 +44,11 @@ CORE_zlansy_parsec( parsec_execution_stream_t *context,
 
 void INSERT_TASK_zlansy(const RUNTIME_option_t *options,
                        cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
-                       const CHAM_desc_t *A, int Am, int An, int LDA,
+                       const CHAM_desc_t *A, int Am, int An,
                        const CHAM_desc_t *B, int Bm, int Bn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
-
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
     int szeW = chameleon_max( 1, N );
 
     parsec_dtd_taskpool_insert_task(
@@ -57,7 +57,7 @@ void INSERT_TASK_zlansy(const RUNTIME_option_t *options,
         sizeof(int),            &uplo,          VALUE,
         sizeof(int),                   &N,             VALUE,
         PASSED_BY_REF,         RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),                   &LDA,           VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         sizeof(double)*szeW,           NULL,           SCRATCH,
         PASSED_BY_REF,         RTBLKADDR( B, double, Bm, Bn ),            OUTPUT | AFFINITY,
         PARSEC_DTD_ARG_END );
diff --git a/runtime/parsec/codelets/codelet_zlantr.c b/runtime/parsec/codelets/codelet_zlantr.c
index d4aa9b42ad2500a2e0ee1310863ee184321bd774..650f5c71b6263d50ffd219123951c62289114fd7 100644
--- a/runtime/parsec/codelets/codelet_zlantr.c
+++ b/runtime/parsec/codelets/codelet_zlantr.c
@@ -45,22 +45,22 @@ CORE_zlantr_parsec( parsec_execution_stream_t *context,
 void INSERT_TASK_zlantr(const RUNTIME_option_t *options,
                        cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                        int M, int N, int NB,
-                       const CHAM_desc_t *A, int Am, int An, int LDA,
+                       const CHAM_desc_t *A, int Am, int An,
                        const CHAM_desc_t *B, int Bm, int Bn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
-
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
     int szeW = chameleon_max( 1, N );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zlantr_parsec, options->priority, "lantr",
-        sizeof(int),            &norm,          VALUE,
-        sizeof(int),            &uplo,          VALUE,
-        sizeof(int),            &diag,          VALUE,
-        sizeof(int),                   &M,             VALUE,
-        sizeof(int),                   &N,             VALUE,
+        sizeof(int), &norm,       VALUE,
+        sizeof(int), &uplo,       VALUE,
+        sizeof(int), &diag,       VALUE,
+        sizeof(int), &M,          VALUE,
+        sizeof(int), &N,          VALUE,
         PASSED_BY_REF,         RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),                   &LDA,           VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         sizeof(double)*szeW,           NULL,           SCRATCH,
         PASSED_BY_REF,         RTBLKADDR( B, double, Bm, Bn ),            OUTPUT | AFFINITY,
         PARSEC_DTD_ARG_END );
diff --git a/runtime/parsec/codelets/codelet_zlascal.c b/runtime/parsec/codelets/codelet_zlascal.c
index 09f40b18f3a5562c44d09d81ebfd7bc98466eb4b..32456e8c798dda40d2e01682cffb51a643af4184 100644
--- a/runtime/parsec/codelets/codelet_zlascal.c
+++ b/runtime/parsec/codelets/codelet_zlascal.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zlascal PaRSEC codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Julien Langou
  * @author Henricus Bouwmeester
  * @author Mathieu Faverge
@@ -27,11 +25,6 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
 static inline int
 CORE_zlascal_parsec( parsec_execution_stream_t *context,
                      parsec_task_t             *this_task )
@@ -56,9 +49,10 @@ void INSERT_TASK_zlascal(const RUNTIME_option_t *options,
                         cham_uplo_t uplo,
                         int m, int n, int nb,
                         CHAMELEON_Complex64_t alpha,
-                        const CHAM_desc_t *A, int Am, int An, int lda)
+                        const CHAM_desc_t *A, int Am, int An)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zlascal_parsec, options->priority, "lascal",
@@ -67,7 +61,7 @@ void INSERT_TASK_zlascal(const RUNTIME_option_t *options,
         sizeof(int),               &n,     VALUE,
         sizeof(CHAMELEON_Complex64_t), &alpha, VALUE,
         PASSED_BY_REF,              RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INOUT | AFFINITY,
-        sizeof(int),               &lda,   VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PARSEC_DTD_ARG_END );
 
     (void)nb;
diff --git a/runtime/parsec/codelets/codelet_zlaset.c b/runtime/parsec/codelets/codelet_zlaset.c
index f9e0b9566da0cb0852228f0d3603e314987e4f11..8190891aadac71cf7bc2f226f27a435238b8e84e 100644
--- a/runtime/parsec/codelets/codelet_zlaset.c
+++ b/runtime/parsec/codelets/codelet_zlaset.c
@@ -21,41 +21,6 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zlaset - Sets the elements of the matrix A on the diagonal
- *  to beta and on the off-diagonals to alpha
- *
- *******************************************************************************
- *
- * @param[in] uplo
- *          Specifies which elements of the matrix are to be set
- *          = ChamUpper: Upper part of A is set;
- *          = ChamLower: Lower part of A is set;
- *          = ChamUpperLower: ALL elements of A are set.
- *
- * @param[in] M
- *          The number of rows of the matrix A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the matrix A.  N >= 0.
- *
- * @param[in] alpha
- *         The constant to which the off-diagonal elements are to be set.
- *
- * @param[in] beta
- *         The constant to which the diagonal elements are to be set.
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile A.
- *         On exit, A has been set accordingly.
- *
- * @param[in] LDA
- *         The leading dimension of the array A.  LDA >= max(1,M).
- *
- */
 static inline int
 CORE_zlaset_parsec( parsec_execution_stream_t *context,
                     parsec_task_t             *this_task )
@@ -80,9 +45,10 @@ CORE_zlaset_parsec( parsec_execution_stream_t *context,
 void INSERT_TASK_zlaset(const RUNTIME_option_t *options,
                        cham_uplo_t uplo, int M, int N,
                        CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta,
-                       const CHAM_desc_t *A, int Am, int An, int LDA)
+                       const CHAM_desc_t *A, int Am, int An)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zlaset_parsec, options->priority, "laset",
@@ -92,6 +58,6 @@ void INSERT_TASK_zlaset(const RUNTIME_option_t *options,
         sizeof(CHAMELEON_Complex64_t),       &alpha,       VALUE,
         sizeof(CHAMELEON_Complex64_t),       &beta,        VALUE,
         PASSED_BY_REF,         RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | OUTPUT | AFFINITY,
-        sizeof(int),                     &LDA,         VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PARSEC_DTD_ARG_END );
 }
diff --git a/runtime/parsec/codelets/codelet_zlaset2.c b/runtime/parsec/codelets/codelet_zlaset2.c
index 731e6d4717a1d9fbb693d3c0667ef8725879748d..46067a65fa1267c0bcd23b0f3d685d16d5af0008 100644
--- a/runtime/parsec/codelets/codelet_zlaset2.c
+++ b/runtime/parsec/codelets/codelet_zlaset2.c
@@ -21,39 +21,6 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zlaset2 - Sets the elements of the matrix A to alpha.
- *  Not LAPACK compliant! Read below.
- *
- *******************************************************************************
- *
- * @param[in] uplo
- *          Specifies which elements of the matrix are to be set
- *          = ChamUpper: STRICT Upper part of A is set to alpha;
- *          = ChamLower: STRICT Lower part of A is set to alpha;
- *          = ChamUpperLower: ALL elements of A are set to alpha.
- *          Not LAPACK Compliant.
- *
- * @param[in] M
- *          The number of rows of the matrix A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the matrix A.  N >= 0.
- *
- * @param[in] alpha
- *         The constant to which the elements are to be set.
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile A.
- *         On exit, A has been set to alpha accordingly.
- *
- * @param[in] LDA
- *         The leading dimension of the array A.  LDA >= max(1,M).
- *
- */
 static inline int
 CORE_zlaset2_parsec( parsec_execution_stream_t *context,
                      parsec_task_t             *this_task )
@@ -76,9 +43,10 @@ CORE_zlaset2_parsec( parsec_execution_stream_t *context,
 
 void INSERT_TASK_zlaset2(const RUNTIME_option_t *options,
                         cham_uplo_t uplo, int M, int N,
-                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int LDA)
+                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zlaset2_parsec, options->priority, "laset2",
@@ -87,6 +55,6 @@ void INSERT_TASK_zlaset2(const RUNTIME_option_t *options,
         sizeof(int),                       &N,         VALUE,
         sizeof(int),                &alpha,     VALUE,
         PASSED_BY_REF,         RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | OUTPUT | AFFINITY,
-        sizeof(int),                       &LDA,       VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PARSEC_DTD_ARG_END );
 }
diff --git a/runtime/parsec/codelets/codelet_zlatro.c b/runtime/parsec/codelets/codelet_zlatro.c
index af9c8cf3dffd8e857d641e7e567cdb27ff265080..9451ea48a67b2e87d4fef19993e64785b3ad0322 100644
--- a/runtime/parsec/codelets/codelet_zlatro.c
+++ b/runtime/parsec/codelets/codelet_zlatro.c
@@ -44,16 +44,15 @@ CORE_zlatro_parsec( parsec_execution_stream_t *context,
     return PARSEC_HOOK_RETURN_DONE;
 }
 
-/**
- *
- */
 void INSERT_TASK_zlatro(const RUNTIME_option_t *options,
                        cham_uplo_t uplo, cham_trans_t trans,
                        int m, int n, int mb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *B, int Bm, int Bn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zlatro_parsec, options->priority, "latro",
@@ -62,9 +61,9 @@ void INSERT_TASK_zlatro(const RUNTIME_option_t *options,
         sizeof(int),        &m,     VALUE,
         sizeof(int),        &n,     VALUE,
         PASSED_BY_REF,       RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT,
-        sizeof(int),        &lda,   VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,       RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), OUTPUT | AFFINITY,
-        sizeof(int),        &ldb,   VALUE,
+        sizeof(int), &(tileB->ld), VALUE,
         PARSEC_DTD_ARG_END );
 
     (void)mb;
diff --git a/runtime/parsec/codelets/codelet_zlauum.c b/runtime/parsec/codelets/codelet_zlauum.c
index 497862ea15f5309ef64dec552bcc5b8e85aafca3..45098a1ff024cea672b48a7063361842a63c3652 100644
--- a/runtime/parsec/codelets/codelet_zlauum.c
+++ b/runtime/parsec/codelets/codelet_zlauum.c
@@ -21,11 +21,6 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
 static inline int
 CORE_zlauum_parsec( parsec_execution_stream_t *context,
                     parsec_task_t             *this_task )
@@ -46,16 +41,17 @@ CORE_zlauum_parsec( parsec_execution_stream_t *context,
 
 void INSERT_TASK_zlauum(const RUNTIME_option_t *options,
                        cham_uplo_t uplo, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda)
+                       const CHAM_desc_t *A, int Am, int An)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zlauum_parsec, options->priority, "lauum",
         sizeof(int),    &uplo,                  VALUE,
         sizeof(int),           &n,                     VALUE,
         PASSED_BY_REF,         RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INOUT | AFFINITY,
-        sizeof(int),           &lda,                   VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PARSEC_DTD_ARG_END );
 
     (void)nb;
diff --git a/runtime/parsec/codelets/codelet_zplghe.c b/runtime/parsec/codelets/codelet_zplghe.c
index 324270e2a6c06a32b46b5a0fc3b135ee356a4b63..40d3aa614ff6a042b25f5f380d4ed2d4b9a2e543 100644
--- a/runtime/parsec/codelets/codelet_zplghe.c
+++ b/runtime/parsec/codelets/codelet_zplghe.c
@@ -45,10 +45,11 @@ CORE_zplghe_parsec( parsec_execution_stream_t *context,
 }
 
 void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
-                        double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
+                        double bump, int m, int n, const CHAM_desc_t *A, int Am, int An,
                         int bigM, int m0, int n0, unsigned long long int seed )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zplghe_parsec, options->priority, "zplghe",
@@ -56,7 +57,7 @@ void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
         sizeof(int),       &m,                             VALUE,
         sizeof(int),       &n,                             VALUE,
         PASSED_BY_REF,     RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | OUTPUT | AFFINITY,
-        sizeof(int),       &lda,                           VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         sizeof(int),       &bigM,                          VALUE,
         sizeof(int),       &m0,                            VALUE,
         sizeof(int),       &n0,                            VALUE,
diff --git a/runtime/parsec/codelets/codelet_zplgsy.c b/runtime/parsec/codelets/codelet_zplgsy.c
index 9ab5a4faeb73e4067e91cdd8e72217d03c5c3196..53e6f2b310f76ff9c4dc58740d7b00691094a906 100644
--- a/runtime/parsec/codelets/codelet_zplgsy.c
+++ b/runtime/parsec/codelets/codelet_zplgsy.c
@@ -45,10 +45,11 @@ CORE_zplgsy_parsec( parsec_execution_stream_t *context,
 }
 
 void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
-                        CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
+                        CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An,
                         int bigM, int m0, int n0, unsigned long long int seed )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zplgsy_parsec, options->priority, "zplgsy",
@@ -56,7 +57,7 @@ void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
         sizeof(int),               &m,                             VALUE,
         sizeof(int),               &n,                             VALUE,
         PASSED_BY_REF,             RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | OUTPUT | AFFINITY,
-        sizeof(int),               &lda,                           VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         sizeof(int),               &bigM,                          VALUE,
         sizeof(int),               &m0,                            VALUE,
         sizeof(int),               &n0,                            VALUE,
diff --git a/runtime/parsec/codelets/codelet_zplrnt.c b/runtime/parsec/codelets/codelet_zplrnt.c
index 2bbeef7419167515c1bd4bffc0d6f516a3196653..b1e97221d0cb5dfce8aece73a236867ef41f7916 100644
--- a/runtime/parsec/codelets/codelet_zplrnt.c
+++ b/runtime/parsec/codelets/codelet_zplrnt.c
@@ -44,17 +44,18 @@ CORE_zplrnt_parsec( parsec_execution_stream_t *context,
 }
 
 void INSERT_TASK_zplrnt( const RUNTIME_option_t *options,
-                        int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
+                        int m, int n, const CHAM_desc_t *A, int Am, int An,
                         int bigM, int m0, int n0, unsigned long long int seed )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zplrnt_parsec, options->priority, "zplrnt",
         sizeof(int),       &m,                          VALUE,
         sizeof(int),       &n,                          VALUE,
         PASSED_BY_REF,     RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | OUTPUT | AFFINITY,
-        sizeof(int),       &lda,                        VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         sizeof(int),       &bigM,                       VALUE,
         sizeof(int),       &m0,                         VALUE,
         sizeof(int),       &n0,                         VALUE,
diff --git a/runtime/parsec/codelets/codelet_zpotrf.c b/runtime/parsec/codelets/codelet_zpotrf.c
index 0b6b6b9b5d31d503ffcc1c8bfdf495dfe4522e3f..93fdbf4a3326f1785f57262d26a1a18c397e2a96 100644
--- a/runtime/parsec/codelets/codelet_zpotrf.c
+++ b/runtime/parsec/codelets/codelet_zpotrf.c
@@ -53,17 +53,18 @@ CORE_zpotrf_parsec( parsec_execution_stream_t *context,
 
 void INSERT_TASK_zpotrf(const RUNTIME_option_t *options,
                        cham_uplo_t uplo, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
+                       const CHAM_desc_t *A, int Am, int An,
                        int iinfo)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zpotrf_parsec, options->priority, "potrf",
         sizeof(int),                 &uplo,                             VALUE,
         sizeof(int),                 &n,                                VALUE,
         PASSED_BY_REF,               RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INOUT | AFFINITY,
-        sizeof(int),                 &lda,                              VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         sizeof(int),                 &iinfo,                            VALUE,
         sizeof(RUNTIME_sequence_t*), &(options->sequence),              VALUE,
         sizeof(RUNTIME_request_t*),  &(options->request),               VALUE,
diff --git a/runtime/parsec/codelets/codelet_zssssm.c b/runtime/parsec/codelets/codelet_zssssm.c
index 25d00097a9c312800b39c213fec13a37db294c35..e11a2131bee6f81748d1840d7e3bc966643c0e1c 100644
--- a/runtime/parsec/codelets/codelet_zssssm.c
+++ b/runtime/parsec/codelets/codelet_zssssm.c
@@ -52,13 +52,17 @@ CORE_zssssm_parsec( parsec_execution_stream_t *context,
 
 void INSERT_TASK_zssssm(const RUNTIME_option_t *options,
                        int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                       const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                       const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                       const CHAM_desc_t *L1, int L1m, int L1n, int ldl1,
-                       const CHAM_desc_t *L2, int L2m, int L2n, int ldl2,
+                       const CHAM_desc_t *A1, int A1m, int A1n,
+                       const CHAM_desc_t *A2, int A2m, int A2n,
+                       const CHAM_desc_t *L1, int L1m, int L1n,
+                       const CHAM_desc_t *L2, int L2m, int L2n,
                        const int *IPIV)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA1 = A1->get_blktile( A1, A1m, A1n );
+    CHAM_tile_t *tileA2 = A2->get_blktile( A2, A2m, A2n );
+    CHAM_tile_t *tileL1 = L1->get_blktile( L1, L1m, L1n );
+    CHAM_tile_t *tileL2 = L2->get_blktile( L2, L2m, L2n );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zssssm_parsec, options->priority, "ssssm",
@@ -69,13 +73,13 @@ void INSERT_TASK_zssssm(const RUNTIME_option_t *options,
         sizeof(int),           &k,                                 VALUE,
         sizeof(int),           &ib,                                VALUE,
         PASSED_BY_REF,         RTBLKADDR( A1, CHAMELEON_Complex64_t, A1m, A1n ), chameleon_parsec_get_arena_index( A1 ) | INOUT,
-        sizeof(int),           &lda1,                              VALUE,
+        sizeof(int), &(tileA1->ld), VALUE,
         PASSED_BY_REF,         RTBLKADDR( A2, CHAMELEON_Complex64_t, A2m, A2n ), chameleon_parsec_get_arena_index( A2 ) | INOUT | AFFINITY,
-        sizeof(int),           &lda2,                              VALUE,
+        sizeof(int), &(tileA2->ld), VALUE,
         PASSED_BY_REF,         RTBLKADDR( L1, CHAMELEON_Complex64_t, L1m, L1n ), chameleon_parsec_get_arena_index( L1 ) | INPUT,
-        sizeof(int),           &ldl1,                              VALUE,
+        sizeof(int), &(tileL1->ld), VALUE,
         PASSED_BY_REF,         RTBLKADDR( L2, CHAMELEON_Complex64_t, L2m, L2n ), chameleon_parsec_get_arena_index( L2 ) | INPUT,
-        sizeof(int),           &ldl2,                              VALUE,
+        sizeof(int), &(tileL2->ld), VALUE,
         sizeof(int*),          &IPIV,                              VALUE,
         PARSEC_DTD_ARG_END );
 
diff --git a/runtime/parsec/codelets/codelet_zsymm.c b/runtime/parsec/codelets/codelet_zsymm.c
index 703b1b5d15d7bab0f0db10f0cb6a47c9c1934dc9..75e879049762f8109256020f6bc083fc70a4d834 100644
--- a/runtime/parsec/codelets/codelet_zsymm.c
+++ b/runtime/parsec/codelets/codelet_zsymm.c
@@ -53,11 +53,14 @@ CORE_zsymm_parsec( parsec_execution_stream_t *context,
 void INSERT_TASK_zsymm(const RUNTIME_option_t *options,
                       cham_side_t side, cham_uplo_t uplo,
                       int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                      const CHAM_desc_t *B, int Bm, int Bn,
+                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+    CHAM_tile_t *tileC = C->get_blktile( C, Cm, Cn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zsymm_parsec, options->priority, "symm",
@@ -67,12 +70,12 @@ void INSERT_TASK_zsymm(const RUNTIME_option_t *options,
         sizeof(int),            &n,                 VALUE,
         sizeof(CHAMELEON_Complex64_t),  &alpha,         VALUE,
         PASSED_BY_REF,          RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),            &lda,               VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,          RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), chameleon_parsec_get_arena_index( B ) | INPUT,
-        sizeof(int),            &ldb,               VALUE,
+        sizeof(int), &(tileB->ld), VALUE,
         sizeof(CHAMELEON_Complex64_t),  &beta,          VALUE,
         PASSED_BY_REF,          RTBLKADDR( C, CHAMELEON_Complex64_t, Cm, Cn ), chameleon_parsec_get_arena_index( C ) | INOUT | AFFINITY,
-        sizeof(int),            &ldc,               VALUE,
+        sizeof(int), &(tileC->ld), VALUE,
         PARSEC_DTD_ARG_END );
 
     (void)nb;
diff --git a/runtime/parsec/codelets/codelet_zsyr2k.c b/runtime/parsec/codelets/codelet_zsyr2k.c
index 3a44fa279e9d029d68dcc2336115855394aabe1b..dfe1bc26a24ff2e8ccf9e1d876fb5da20cb414a2 100644
--- a/runtime/parsec/codelets/codelet_zsyr2k.c
+++ b/runtime/parsec/codelets/codelet_zsyr2k.c
@@ -53,11 +53,14 @@ CORE_zsyr2k_parsec( parsec_execution_stream_t *context,
 void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options,
                        cham_uplo_t uplo, cham_trans_t trans,
                        int n, int k, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                       CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *B, int Bm, int Bn,
+                       CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+    CHAM_tile_t *tileC = C->get_blktile( C, Cm, Cn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zsyr2k_parsec, options->priority, "syr2k",
@@ -67,12 +70,12 @@ void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options,
         sizeof(int),            &k,                     VALUE,
         sizeof(CHAMELEON_Complex64_t), &alpha,              VALUE,
         PASSED_BY_REF,          RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),            &lda,                   VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,          RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), chameleon_parsec_get_arena_index( B ) | INPUT,
-        sizeof(int),            &ldb,                   VALUE,
+        sizeof(int), &(tileB->ld), VALUE,
         sizeof(CHAMELEON_Complex64_t), &beta,               VALUE,
         PASSED_BY_REF,          RTBLKADDR( C, CHAMELEON_Complex64_t, Cm, Cn ), chameleon_parsec_get_arena_index( C ) | INOUT | AFFINITY,
-        sizeof(int),            &ldc,                   VALUE,
+        sizeof(int), &(tileC->ld), VALUE,
         PARSEC_DTD_ARG_END );
 
     (void)nb;
diff --git a/runtime/parsec/codelets/codelet_zsyrk.c b/runtime/parsec/codelets/codelet_zsyrk.c
index 6c1109e894db543c7b260eddb60a78ce05a19b6b..26d4352e28e020ee6529c37d94d70bcfab321b3f 100644
--- a/runtime/parsec/codelets/codelet_zsyrk.c
+++ b/runtime/parsec/codelets/codelet_zsyrk.c
@@ -50,10 +50,12 @@ CORE_zsyrk_parsec( parsec_execution_stream_t *context,
 void INSERT_TASK_zsyrk(const RUNTIME_option_t *options,
                       cham_uplo_t uplo, cham_trans_t trans,
                       int n, int k, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileC = C->get_blktile( C, Cm, Cn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zsyrk_parsec, options->priority, "syrk",
@@ -63,10 +65,10 @@ void INSERT_TASK_zsyrk(const RUNTIME_option_t *options,
         sizeof(int),           &k,                                 VALUE,
         sizeof(CHAMELEON_Complex64_t),           &alpha,               VALUE,
         PASSED_BY_REF,         RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),           &lda,                               VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         sizeof(CHAMELEON_Complex64_t),           &beta,                VALUE,
         PASSED_BY_REF,         RTBLKADDR( C, CHAMELEON_Complex64_t, Cm, Cn ), chameleon_parsec_get_arena_index( C ) | INOUT | AFFINITY,
-        sizeof(int),           &ldc,                               VALUE,
+        sizeof(int), &(tileC->ld), VALUE,
         PARSEC_DTD_ARG_END );
 
     (void)nb;
diff --git a/runtime/parsec/codelets/codelet_zsyssq.c b/runtime/parsec/codelets/codelet_zsyssq.c
index d3797ca26c9966bbe8319e1cfd173a430688eaeb..c4d570e4c55249f46b247b585fccc07303df2721 100644
--- a/runtime/parsec/codelets/codelet_zsyssq.c
+++ b/runtime/parsec/codelets/codelet_zsyssq.c
@@ -43,10 +43,11 @@ CORE_zsyssq_parsec( parsec_execution_stream_t *context,
 
 void INSERT_TASK_zsyssq( const RUNTIME_option_t *options,
                         cham_store_t storev, cham_uplo_t uplo, int n,
-                        const CHAM_desc_t *A, int Am, int An, int lda,
+                        const CHAM_desc_t *A, int Am, int An,
                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zsyssq_parsec, options->priority, "syssq",
@@ -54,7 +55,7 @@ void INSERT_TASK_zsyssq( const RUNTIME_option_t *options,
         sizeof(int),            &uplo,                  VALUE,
         sizeof(int),            &n,                     VALUE,
         PASSED_BY_REF,          RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),            &lda,                   VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,          RTBLKADDR( SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn ), chameleon_parsec_get_arena_index( SCALESUMSQ ) | INOUT | AFFINITY,
         PARSEC_DTD_ARG_END );
 }
diff --git a/runtime/parsec/codelets/codelet_zsytrf_nopiv.c b/runtime/parsec/codelets/codelet_zsytrf_nopiv.c
index ae4cdc64077f11eeb79a5618ebfadb6993726702..773fbcbc154b31351c613f6a11100bf7b1306153 100644
--- a/runtime/parsec/codelets/codelet_zsytrf_nopiv.c
+++ b/runtime/parsec/codelets/codelet_zsytrf_nopiv.c
@@ -42,17 +42,18 @@ CORE_zsytrf_nopiv_parsec( parsec_execution_stream_t *context,
 
 void INSERT_TASK_zsytrf_nopiv(const RUNTIME_option_t *options,
                              cham_uplo_t uplo, int n, int nb,
-                             const CHAM_desc_t *A, int Am, int An, int lda,
+                             const CHAM_desc_t *A, int Am, int An,
                              int iinfo)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zsytrf_nopiv_parsec, options->priority, "sytrf_nopiv",
         sizeof(int),              &uplo,                VALUE,
         sizeof(int),                     &n,                   VALUE,
         PASSED_BY_REF,         RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INOUT | AFFINITY,
-        sizeof(int),                     &lda,                 VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         sizeof(int),                     &iinfo,               VALUE,
         PARSEC_DTD_ARG_END );
 
diff --git a/runtime/parsec/codelets/codelet_ztplqt.c b/runtime/parsec/codelets/codelet_ztplqt.c
index b99d4961fdae1d1750284ccbb46b51d44cbd9705..1d97ccd4a6ce49de3488232331c83aec752a0329 100644
--- a/runtime/parsec/codelets/codelet_ztplqt.c
+++ b/runtime/parsec/codelets/codelet_ztplqt.c
@@ -50,11 +50,14 @@ CORE_ztplqt_parsec( parsec_execution_stream_t *context,
 
 void INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
                          int M, int N, int L, int ib, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                         const CHAM_desc_t *T, int Tm, int Tn, int ldt )
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn,
+                         const CHAM_desc_t *T, int Tm, int Tn )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+    CHAM_tile_t *tileT = T->get_blktile( T, Tm, Tn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_ztplqt_parsec, options->priority, "tplqt",
@@ -63,11 +66,11 @@ void INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
         sizeof(int),   &L,   VALUE,
         sizeof(int),   &ib,  VALUE,
         PASSED_BY_REF,  RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INOUT,
-        sizeof(int),   &lda, VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,  RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), chameleon_parsec_get_arena_index( B ) | INOUT | AFFINITY,
-        sizeof(int),   &ldb, VALUE,
+        sizeof(int), &(tileB->ld), VALUE,
         PASSED_BY_REF,  RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | OUTPUT,
-        sizeof(int),   &ldt, VALUE,
+        sizeof(int), &(tileT->ld), VALUE,
         sizeof(CHAMELEON_Complex64_t)*(ib+1)*nb, NULL, SCRATCH,
         PARSEC_DTD_ARG_END );
 }
diff --git a/runtime/parsec/codelets/codelet_ztpmlqt.c b/runtime/parsec/codelets/codelet_ztpmlqt.c
index b31b3514be8603bcd2c551f7a01b82aae9304c9c..bf4b83472d97b544ffc8fef5971bf6c78502a8b8 100644
--- a/runtime/parsec/codelets/codelet_ztpmlqt.c
+++ b/runtime/parsec/codelets/codelet_ztpmlqt.c
@@ -55,12 +55,16 @@ CORE_ztpmlqt_parsec( parsec_execution_stream_t *context,
 void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
                          cham_side_t side, cham_trans_t trans,
                          int M, int N, int K, int L, int ib, int nb,
-                         const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                         const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+                         const CHAM_desc_t *V, int Vm, int Vn,
+                         const CHAM_desc_t *T, int Tm, int Tn,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileV = V->get_blktile( V, Vm, Vn );
+    CHAM_tile_t *tileT = T->get_blktile( T, Tm, Tn );
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_ztpmlqt_parsec, options->priority, "tpmlqt",
@@ -72,13 +76,13 @@ void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
         sizeof(int),        &L,     VALUE,
         sizeof(int),        &ib,    VALUE,
         PASSED_BY_REF,       RTBLKADDR( V, CHAMELEON_Complex64_t, Vm, Vn ), chameleon_parsec_get_arena_index( V ) | INPUT,
-        sizeof(int),        &ldv,   VALUE,
+        sizeof(int), &(tileV->ld), VALUE,
         PASSED_BY_REF,       RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | INPUT,
-        sizeof(int),        &ldt,   VALUE,
+        sizeof(int), &(tileT->ld), VALUE,
         PASSED_BY_REF,       RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INOUT,
-        sizeof(int),        &lda,   VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,       RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), chameleon_parsec_get_arena_index( B ) | INOUT | AFFINITY,
-        sizeof(int),        &ldb,   VALUE,
+        sizeof(int), &(tileB->ld), VALUE,
         sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH,
         PARSEC_DTD_ARG_END );
 }
diff --git a/runtime/parsec/codelets/codelet_ztpmqrt.c b/runtime/parsec/codelets/codelet_ztpmqrt.c
index e8caa5d1f9b8b71f7c6c0bf9c8f2b557ef00a877..bacc8fbd2a22163a9b0bd51c8885c4b660d3d0c3 100644
--- a/runtime/parsec/codelets/codelet_ztpmqrt.c
+++ b/runtime/parsec/codelets/codelet_ztpmqrt.c
@@ -55,12 +55,16 @@ CORE_ztpmqrt_parsec( parsec_execution_stream_t *context,
 void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options,
                          cham_side_t side, cham_trans_t trans,
                          int M, int N, int K, int L, int ib, int nb,
-                         const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                         const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+                         const CHAM_desc_t *V, int Vm, int Vn,
+                         const CHAM_desc_t *T, int Tm, int Tn,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileV = V->get_blktile( V, Vm, Vn );
+    CHAM_tile_t *tileT = T->get_blktile( T, Tm, Tn );
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_ztpmqrt_parsec, options->priority, "tpmqrt",
@@ -72,13 +76,13 @@ void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options,
         sizeof(int),        &L,     VALUE,
         sizeof(int),        &ib,    VALUE,
         PASSED_BY_REF,       RTBLKADDR( V, CHAMELEON_Complex64_t, Vm, Vn ), chameleon_parsec_get_arena_index( V ) | INPUT,
-        sizeof(int),        &ldv,   VALUE,
+        sizeof(int), &(tileV->ld), VALUE,
         PASSED_BY_REF,       RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | INPUT,
-        sizeof(int),        &ldt,   VALUE,
+        sizeof(int), &(tileT->ld), VALUE,
         PASSED_BY_REF,       RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INOUT,
-        sizeof(int),        &lda,   VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,       RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), chameleon_parsec_get_arena_index( B ) | INOUT | AFFINITY,
-        sizeof(int),        &ldb,   VALUE,
+        sizeof(int), &(tileB->ld), VALUE,
         sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH,
         PARSEC_DTD_ARG_END );
 }
diff --git a/runtime/parsec/codelets/codelet_ztpqrt.c b/runtime/parsec/codelets/codelet_ztpqrt.c
index 022d7014ef90ae5df0feba16e2c02e57c5773064..e552b6da98e7aadcebfb46d55c76be615a3066c9 100644
--- a/runtime/parsec/codelets/codelet_ztpqrt.c
+++ b/runtime/parsec/codelets/codelet_ztpqrt.c
@@ -50,11 +50,14 @@ CORE_ztpqrt_parsec( parsec_execution_stream_t *context,
 
 void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
                          int M, int N, int L, int ib, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                         const CHAM_desc_t *T, int Tm, int Tn, int ldt )
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn,
+                         const CHAM_desc_t *T, int Tm, int Tn )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
+    CHAM_tile_t *tileT = T->get_blktile( T, Tm, Tn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_ztpqrt_parsec, options->priority, "tpqrt",
@@ -63,11 +66,11 @@ void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
         sizeof(int),   &L,   VALUE,
         sizeof(int),   &ib,  VALUE,
         PASSED_BY_REF,  RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INOUT,
-        sizeof(int),   &lda, VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,  RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), chameleon_parsec_get_arena_index( B ) | INOUT | AFFINITY,
-        sizeof(int),   &ldb, VALUE,
+        sizeof(int), &(tileB->ld), VALUE,
         PASSED_BY_REF,  RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | OUTPUT,
-        sizeof(int),   &ldt, VALUE,
+        sizeof(int), &(tileT->ld), VALUE,
         sizeof(CHAMELEON_Complex64_t)*(ib+1)*nb, NULL, SCRATCH,
         PARSEC_DTD_ARG_END );
 }
diff --git a/runtime/parsec/codelets/codelet_ztradd.c b/runtime/parsec/codelets/codelet_ztradd.c
index fdfb35729e593c483e4a43bfca2b2bc8e0f900bb..825b71a01f9595a9acd70a8abfa37d0948c6e180 100644
--- a/runtime/parsec/codelets/codelet_ztradd.c
+++ b/runtime/parsec/codelets/codelet_ztradd.c
@@ -12,8 +12,6 @@
  * @brief Chameleon ztradd PaRSEC codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @date 2015-11-04
  * @precisions normal z -> c d s
@@ -108,10 +106,12 @@ CORE_ztradd_parsec( parsec_execution_stream_t *context,
  */
 void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
-                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_ztradd_parsec, options->priority, "tradd",
@@ -121,10 +121,10 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
         sizeof(int),               &n,     VALUE,
         sizeof(CHAMELEON_Complex64_t), &alpha, VALUE,
         PASSED_BY_REF,              RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),               &lda,   VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         sizeof(CHAMELEON_Complex64_t), &beta,  VALUE,
         PASSED_BY_REF,              RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), chameleon_parsec_get_arena_index( B ) | INOUT | AFFINITY,
-        sizeof(int),               &ldb,   VALUE,
+        sizeof(int), &(tileB->ld), VALUE,
         PARSEC_DTD_ARG_END );
 
     (void)nb;
diff --git a/runtime/parsec/codelets/codelet_ztrasm.c b/runtime/parsec/codelets/codelet_ztrasm.c
index dc222f01df905923496830579dd67386ef221548..71ceb01ebc8e0b966af20b003015e6e6ac3bf8d9 100644
--- a/runtime/parsec/codelets/codelet_ztrasm.c
+++ b/runtime/parsec/codelets/codelet_ztrasm.c
@@ -45,10 +45,11 @@ CORE_ztrasm_parsec( parsec_execution_stream_t *context,
 
 void INSERT_TASK_ztrasm(const RUNTIME_option_t *options,
                        cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
+                       const CHAM_desc_t *A, int Am, int An,
                        const CHAM_desc_t *B, int Bm, int Bn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_ztrasm_parsec, options->priority, "trasm",
@@ -58,7 +59,7 @@ void INSERT_TASK_ztrasm(const RUNTIME_option_t *options,
         sizeof(int),            &M,                     VALUE,
         sizeof(int),            &N,                     VALUE,
         PASSED_BY_REF,          RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),            &lda,                   VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,          RTBLKADDR( B, double, Bm, Bn ),     INOUT | AFFINITY,
         PARSEC_DTD_ARG_END );
 }
diff --git a/runtime/parsec/codelets/codelet_ztrmm.c b/runtime/parsec/codelets/codelet_ztrmm.c
index a7b16e6aa6268235bedcdc73ad0e52ca866af515..449647bfd6b98845af8fa0ae85475429bfcd9eb0 100644
--- a/runtime/parsec/codelets/codelet_ztrmm.c
+++ b/runtime/parsec/codelets/codelet_ztrmm.c
@@ -53,10 +53,12 @@ CORE_ztrmm_parsec( parsec_execution_stream_t *context,
 void INSERT_TASK_ztrmm(const RUNTIME_option_t *options,
                       cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
                       int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                      const CHAM_desc_t *B, int Bm, int Bn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_ztrmm_parsec, options->priority, "trmm",
@@ -68,9 +70,9 @@ void INSERT_TASK_ztrmm(const RUNTIME_option_t *options,
         sizeof(int),            &n,                     VALUE,
         sizeof(CHAMELEON_Complex64_t),         &alpha,      VALUE,
         PASSED_BY_REF,          RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),            &lda,                   VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,          RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), chameleon_parsec_get_arena_index( B ) | INOUT | AFFINITY,
-        sizeof(int),            &ldb,                   VALUE,
+        sizeof(int), &(tileB->ld), VALUE,
         PARSEC_DTD_ARG_END );
 
     (void)nb;
diff --git a/runtime/parsec/codelets/codelet_ztrsm.c b/runtime/parsec/codelets/codelet_ztrsm.c
index 0d068b3c63c39ebce2294c498c7808589fea8599..64bff731bf45c46f8d936a47fa50229eaa8e6806 100644
--- a/runtime/parsec/codelets/codelet_ztrsm.c
+++ b/runtime/parsec/codelets/codelet_ztrsm.c
@@ -45,10 +45,12 @@ CORE_ztrsm_parsec( parsec_execution_stream_t *context,
 void INSERT_TASK_ztrsm(const RUNTIME_option_t *options,
                       cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
                       int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                      const CHAM_desc_t *B, int Bm, int Bn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileB = B->get_blktile( B, Bm, Bn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_ztrsm_parsec, options->priority, "Trsm",
@@ -60,9 +62,9 @@ void INSERT_TASK_ztrsm(const RUNTIME_option_t *options,
         sizeof(int),           &n,                        VALUE,
         sizeof(CHAMELEON_Complex64_t),           &alpha,      VALUE,
         PASSED_BY_REF,     RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),           &lda,                      VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,     RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), chameleon_parsec_get_arena_index( B ) | INOUT | AFFINITY,
-        sizeof(int),           &ldb,                      VALUE,
+        sizeof(int), &(tileB->ld), VALUE,
         PARSEC_DTD_ARG_END );
 
     (void)nb;
diff --git a/runtime/parsec/codelets/codelet_ztrssq.c b/runtime/parsec/codelets/codelet_ztrssq.c
index f56c8b7ef10fa9a48c35856dedfc593f9589e34e..b3e1f25885f004a249f6d2aaa559a31dcf390cac 100644
--- a/runtime/parsec/codelets/codelet_ztrssq.c
+++ b/runtime/parsec/codelets/codelet_ztrssq.c
@@ -45,10 +45,11 @@ CORE_ztrssq_parsec( parsec_execution_stream_t *context,
 void INSERT_TASK_ztrssq( const RUNTIME_option_t *options,
                         cham_uplo_t uplo, cham_diag_t diag,
                         int m, int n,
-                        const CHAM_desc_t *A, int Am, int An, int lda,
+                        const CHAM_desc_t *A, int Am, int An,
                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_ztrssq_parsec, options->priority, "trssq",
@@ -57,7 +58,7 @@ void INSERT_TASK_ztrssq( const RUNTIME_option_t *options,
         sizeof(int),            &m,                     VALUE,
         sizeof(int),            &n,                     VALUE,
         PASSED_BY_REF,          RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),            &lda,                   VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,          RTBLKADDR( SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn ),    INOUT | AFFINITY,
         PARSEC_DTD_ARG_END );
 }
diff --git a/runtime/parsec/codelets/codelet_ztrtri.c b/runtime/parsec/codelets/codelet_ztrtri.c
index c8f80aa308b634a328e139680590ac1b4901a606..e91aa9acec64fdcac7783a9e2d6e418a570ff16c 100644
--- a/runtime/parsec/codelets/codelet_ztrtri.c
+++ b/runtime/parsec/codelets/codelet_ztrtri.c
@@ -51,10 +51,11 @@ CORE_ztrtri_parsec( parsec_execution_stream_t *context,
 void INSERT_TASK_ztrtri( const RUNTIME_option_t *options,
                         cham_uplo_t uplo, cham_diag_t diag,
                         int n, int nb,
-                        const CHAM_desc_t *A, int Am, int An, int lda,
+                        const CHAM_desc_t *A, int Am, int An,
                         int iinfo )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_ztrtri_parsec, options->priority, "trtri",
@@ -62,7 +63,7 @@ void INSERT_TASK_ztrtri( const RUNTIME_option_t *options,
         sizeof(int),                 &diag,                  VALUE,
         sizeof(int),                 &n,                     VALUE,
         PASSED_BY_REF,               RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INOUT | AFFINITY,
-        sizeof(int),                 &lda,                   VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         sizeof(int),                 &iinfo,                 VALUE,
         sizeof(RUNTIME_sequence_t*), &(options->sequence),   VALUE,
         sizeof(RUNTIME_request_t*),  &(options->request),    VALUE,
diff --git a/runtime/parsec/codelets/codelet_ztsmlq_hetra1.c b/runtime/parsec/codelets/codelet_ztsmlq_hetra1.c
index 5df710c3a3c1bcc196bd83c5f1ba6b1cfbcaa335..b37325c2f67b23a1711d2e87dde81eb796f80e19 100644
--- a/runtime/parsec/codelets/codelet_ztsmlq_hetra1.c
+++ b/runtime/parsec/codelets/codelet_ztsmlq_hetra1.c
@@ -62,12 +62,16 @@ CORE_ztsmlq_hetra1_parsec( parsec_execution_stream_t *context,
 void INSERT_TASK_ztsmlq_hetra1(const RUNTIME_option_t *options,
                               cham_side_t side, cham_trans_t trans,
                               int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                              const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                              const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                              const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                              const CHAM_desc_t *T, int Tm, int Tn, int ldt)
+                              const CHAM_desc_t *A1, int A1m, int A1n,
+                              const CHAM_desc_t *A2, int A2m, int A2n,
+                              const CHAM_desc_t *V, int Vm, int Vn,
+                              const CHAM_desc_t *T, int Tm, int Tn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA1 = A1->get_blktile( A1, A1m, A1n );
+    CHAM_tile_t *tileA2 = A2->get_blktile( A2, A2m, A2n );
+    CHAM_tile_t *tileV = V->get_blktile( V, Vm, Vn );
+    CHAM_tile_t *tileT = T->get_blktile( T, Tm, Tn );
     int ldwork = side == ChamLeft ? ib : nb;
 
     parsec_dtd_taskpool_insert_task(
@@ -81,13 +85,13 @@ void INSERT_TASK_ztsmlq_hetra1(const RUNTIME_option_t *options,
         sizeof(int),        &k,      VALUE,
         sizeof(int),        &ib,     VALUE,
         PASSED_BY_REF,       RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INOUT,
-        sizeof(int),        &lda1,   VALUE,
+        sizeof(int), &(tileA1->ld), VALUE,
         PASSED_BY_REF,       RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT | AFFINITY,
-        sizeof(int),        &lda2,   VALUE,
+        sizeof(int), &(tileA2->ld), VALUE,
         PASSED_BY_REF,       RTBLKADDR(V,  CHAMELEON_Complex64_t, Vm,  Vn),  INPUT,
-        sizeof(int),        &ldv,    VALUE,
+        sizeof(int), &(tileV->ld), VALUE,
         PASSED_BY_REF,       RTBLKADDR(T,  CHAMELEON_Complex64_t, Tm,  Tn),  INPUT,
-        sizeof(int),        &ldt,    VALUE,
+        sizeof(int), &(tileT->ld), VALUE,
         sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH,
         sizeof(int),        &ldwork, VALUE,
         PARSEC_DTD_ARG_END );
diff --git a/runtime/parsec/codelets/codelet_ztsmqr_hetra1.c b/runtime/parsec/codelets/codelet_ztsmqr_hetra1.c
index 3aeddf268ce41863394771a47e7bf0051a0c36d9..4e04d8b4b5b49ebc1d27a2ba3fdc9ac36ebd275b 100644
--- a/runtime/parsec/codelets/codelet_ztsmqr_hetra1.c
+++ b/runtime/parsec/codelets/codelet_ztsmqr_hetra1.c
@@ -62,12 +62,16 @@ CORE_ztsmqr_hetra1_parsec( parsec_execution_stream_t *context,
 void INSERT_TASK_ztsmqr_hetra1(const RUNTIME_option_t *options,
                               cham_side_t side, cham_trans_t trans,
                               int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                              const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                              const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                              const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                              const CHAM_desc_t *T, int Tm, int Tn, int ldt)
+                              const CHAM_desc_t *A1, int A1m, int A1n,
+                              const CHAM_desc_t *A2, int A2m, int A2n,
+                              const CHAM_desc_t *V, int Vm, int Vn,
+                              const CHAM_desc_t *T, int Tm, int Tn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA1 = A1->get_blktile( A1, A1m, A1n );
+    CHAM_tile_t *tileA2 = A2->get_blktile( A2, A2m, A2n );
+    CHAM_tile_t *tileV = V->get_blktile( V, Vm, Vn );
+    CHAM_tile_t *tileT = T->get_blktile( T, Tm, Tn );
     int ldwork = side == ChamLeft ? ib : nb;
 
     parsec_dtd_taskpool_insert_task(
@@ -81,13 +85,13 @@ void INSERT_TASK_ztsmqr_hetra1(const RUNTIME_option_t *options,
         sizeof(int),        &k,      VALUE,
         sizeof(int),        &ib,     VALUE,
         PASSED_BY_REF,       RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INOUT,
-        sizeof(int),        &lda1,   VALUE,
+        sizeof(int), &(tileA1->ld), VALUE,
         PASSED_BY_REF,       RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT | AFFINITY,
-        sizeof(int),        &lda2,   VALUE,
+        sizeof(int), &(tileA2->ld), VALUE,
         PASSED_BY_REF,       RTBLKADDR(V,  CHAMELEON_Complex64_t, Vm,  Vn),  INPUT,
-        sizeof(int),        &ldv,    VALUE,
+        sizeof(int), &(tileV->ld), VALUE,
         PASSED_BY_REF,       RTBLKADDR(T,  CHAMELEON_Complex64_t, Tm,  Tn),  INPUT,
-        sizeof(int),        &ldt,    VALUE,
+        sizeof(int), &(tileT->ld), VALUE,
         sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL, SCRATCH,
         sizeof(int),        &ldwork, VALUE,
         PARSEC_DTD_ARG_END );
diff --git a/runtime/parsec/codelets/codelet_ztstrf.c b/runtime/parsec/codelets/codelet_ztstrf.c
index 5cf60cee1a32379ee7d52c90bbacae3097d2ae65..b489bab07aaef59b9b58a11e92f74d8cff8f2bc2 100644
--- a/runtime/parsec/codelets/codelet_ztstrf.c
+++ b/runtime/parsec/codelets/codelet_ztstrf.c
@@ -59,13 +59,16 @@ CORE_ztstrf_parsec( parsec_execution_stream_t *context,
 
 void INSERT_TASK_ztstrf(const RUNTIME_option_t *options,
                        int m, int n, int ib, int nb,
-                       const CHAM_desc_t *U, int Um, int Un, int ldu,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *L, int Lm, int Ln, int ldl,
+                       const CHAM_desc_t *U, int Um, int Un,
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *L, int Lm, int Ln,
                        int *IPIV,
                        cham_bool_t check_info, int iinfo)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileU = U->get_blktile( U, Um, Un );
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileL = L->get_blktile( L, Lm, Ln );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_ztstrf_parsec, options->priority, "tstrf",
@@ -74,11 +77,11 @@ void INSERT_TASK_ztstrf(const RUNTIME_option_t *options,
         sizeof(int),                 &ib,                               VALUE,
         sizeof(int),                 &nb,                               VALUE,
         PASSED_BY_REF,               RTBLKADDR( U, CHAMELEON_Complex64_t, Um, Un ), chameleon_parsec_get_arena_index( U ) | INOUT,
-        sizeof(int),                 &ldu,                              VALUE,
+        sizeof(int), &(tileU->ld), VALUE,
         PASSED_BY_REF,               RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INOUT | AFFINITY,
-        sizeof(int),                 &lda,                              VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,               RTBLKADDR( L, CHAMELEON_Complex64_t, Lm, Ln ), chameleon_parsec_get_arena_index( L ) | OUTPUT,
-        sizeof(int),                 &ldl,                              VALUE,
+        sizeof(int), &(tileL->ld), VALUE,
         sizeof(int*),                &IPIV,                             VALUE,
         sizeof(CHAMELEON_Complex64_t)*ib*nb,    NULL,                 SCRATCH,
         sizeof(int),                 &nb,                               VALUE,
diff --git a/runtime/parsec/codelets/codelet_zunmlq.c b/runtime/parsec/codelets/codelet_zunmlq.c
index f1c725cc3ea144e531539d39152d9ce6052b8ea9..13866d1921f114f30365fd282813db2077021985 100644
--- a/runtime/parsec/codelets/codelet_zunmlq.c
+++ b/runtime/parsec/codelets/codelet_zunmlq.c
@@ -53,11 +53,14 @@ CORE_zunmlq_parsec( parsec_execution_stream_t *context,
 void INSERT_TASK_zunmlq(const RUNTIME_option_t *options,
                        cham_side_t side, cham_trans_t trans,
                        int m, int n, int k, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                       const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *T, int Tm, int Tn,
+                       const CHAM_desc_t *C, int Cm, int Cn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileT = T->get_blktile( T, Tm, Tn );
+    CHAM_tile_t *tileC = C->get_blktile( C, Cm, Cn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zunmlq_parsec, options->priority, "unmlq",
@@ -68,11 +71,11 @@ void INSERT_TASK_zunmlq(const RUNTIME_option_t *options,
         sizeof(int),                        &k,                 VALUE,
         sizeof(int),                        &ib,                VALUE,
         PASSED_BY_REF,         RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),                        &lda,               VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,         RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | INPUT,
-        sizeof(int),                        &ldt,               VALUE,
+        sizeof(int), &(tileT->ld), VALUE,
         PASSED_BY_REF,         RTBLKADDR( C, CHAMELEON_Complex64_t, Cm, Cn ), chameleon_parsec_get_arena_index( C ) | INOUT | AFFINITY,
-        sizeof(int),                        &ldc,               VALUE,
+        sizeof(int), &(tileC->ld), VALUE,
         sizeof(CHAMELEON_Complex64_t)*ib*nb,    NULL,               SCRATCH,
         sizeof(int),                        &nb,                VALUE,
         PARSEC_DTD_ARG_END );
diff --git a/runtime/parsec/codelets/codelet_zunmqr.c b/runtime/parsec/codelets/codelet_zunmqr.c
index 66eb30cdbac747fc4299d154c7c945fb73c9eead..2e5ff664fbe8f6fc33565481bd964e911bdc447e 100644
--- a/runtime/parsec/codelets/codelet_zunmqr.c
+++ b/runtime/parsec/codelets/codelet_zunmqr.c
@@ -53,11 +53,14 @@ CORE_zunmqr_parsec( parsec_execution_stream_t *context,
 void INSERT_TASK_zunmqr(const RUNTIME_option_t *options,
                        cham_side_t side, cham_trans_t trans,
                        int m, int n, int k, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                       const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *T, int Tm, int Tn,
+                       const CHAM_desc_t *C, int Cm, int Cn)
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
+    CHAM_tile_t *tileA = A->get_blktile( A, Am, An );
+    CHAM_tile_t *tileT = T->get_blktile( T, Tm, Tn );
+    CHAM_tile_t *tileC = C->get_blktile( C, Cm, Cn );
 
     parsec_dtd_taskpool_insert_task(
         PARSEC_dtd_taskpool, CORE_zunmqr_parsec, options->priority, "unmqr",
@@ -68,11 +71,11 @@ void INSERT_TASK_zunmqr(const RUNTIME_option_t *options,
         sizeof(int),           &k,                                 VALUE,
         sizeof(int),           &ib,                                VALUE,
         PASSED_BY_REF,         RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | INPUT,
-        sizeof(int),           &lda,                               VALUE,
+        sizeof(int), &(tileA->ld), VALUE,
         PASSED_BY_REF,         RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), chameleon_parsec_get_arena_index( T ) | INPUT,
-        sizeof(int),           &ldt,                               VALUE,
+        sizeof(int), &(tileT->ld), VALUE,
         PASSED_BY_REF,         RTBLKADDR( C, CHAMELEON_Complex64_t, Cm, Cn ), chameleon_parsec_get_arena_index( C ) | INOUT | AFFINITY,
-        sizeof(int),           &ldc,                               VALUE,
+        sizeof(int), &(tileC->ld), VALUE,
         sizeof(CHAMELEON_Complex64_t)*ib*nb,   NULL,                          SCRATCH,
         sizeof(int),           &nb,                                VALUE,
         PARSEC_DTD_ARG_END );
diff --git a/runtime/parsec/include/chameleon_parsec.h b/runtime/parsec/include/chameleon_parsec.h
index 94a850f753596a092c12d16edcf0859c3aa1abdd..1d2a035c695e3a13198e1cd718a036369e1a2a33 100644
--- a/runtime/parsec/include/chameleon_parsec.h
+++ b/runtime/parsec/include/chameleon_parsec.h
@@ -32,7 +32,7 @@
 struct chameleon_parsec_desc_s {
     parsec_data_collection_t super;
     int                      arena_index;
-    CHAM_desc_t            *desc;
+    CHAM_desc_t             *desc;
     parsec_data_t          **data_map;
 };
 
diff --git a/runtime/quark/codelets/codelet_dzasum.c b/runtime/quark/codelets/codelet_dzasum.c
index 8c49a6f76160564cdc2100ba9cee6a419260ff0e..86b1a6e2b8583ecec039d015da45128c5c7c3d96 100644
--- a/runtime/quark/codelets/codelet_dzasum.c
+++ b/runtime/quark/codelets/codelet_dzasum.c
@@ -12,8 +12,6 @@
  * @brief Chameleon dzasum Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @date 2014-11-16
  * @precisions normal z -> c d s
@@ -21,25 +19,25 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
-void CORE_dzasum_quark(Quark *quark)
+static inline void
+CORE_dzasum_quark(Quark *quark)
 {
     cham_store_t storev;
     cham_uplo_t uplo;
     int M;
     int N;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    double *work;
+    CHAM_tile_t *A;
+    CHAM_tile_t *work;
 
-    quark_unpack_args_7(quark, storev, uplo, M, N, A, lda, work);
-    CORE_dzasum(storev, uplo, M, N, A, lda, work);
+    quark_unpack_args_6(quark, storev, uplo, M, N, A, work);
+    TCORE_dzasum( storev, uplo, M, N, A, work->mat );
 }
 
 void INSERT_TASK_dzasum(const RUNTIME_option_t *options,
                        cham_store_t storev, cham_uplo_t uplo, int M, int N,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
+                       const CHAM_desc_t *A, int Am, int An,
                        const CHAM_desc_t *B, int Bm, int Bn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
@@ -49,8 +47,7 @@ void INSERT_TASK_dzasum(const RUNTIME_option_t *options,
         sizeof(int),              &uplo,      VALUE,
         sizeof(int),                     &M,         VALUE,
         sizeof(int),                     &N,         VALUE,
-        sizeof(CHAMELEON_Complex64_t)*lda*N,  RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
-        sizeof(int),                     &lda,       VALUE,
-        sizeof(double),                   RTBLKADDR(B, double, Bm, Bn), INOUT,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
+        sizeof(void*), RTBLKADDR(B, double, Bm, Bn), INOUT,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_map.c b/runtime/quark/codelets/codelet_map.c
index 5e51cd9c552f3d998dc764b5b008194f5fffc85c..5b6ed1ac8dfa5fa80e49a85637ef52c1a978eeb5 100644
--- a/runtime/quark/codelets/codelet_map.c
+++ b/runtime/quark/codelets/codelet_map.c
@@ -23,12 +23,12 @@ void CORE_map_quark(Quark *quark)
     cham_uplo_t uplo;
     int m;
     int n;
-    void *data;
+    CHAM_tile_t *tile;
     cham_unary_operator_t op_fct;
     void *op_args;
 
-    quark_unpack_args_7( quark, desc, uplo, m, n, data, op_fct, op_args );
-    op_fct( desc, uplo, m, n, data, op_args );
+    quark_unpack_args_7( quark, desc, uplo, m, n, tile, op_fct, op_args );
+    op_fct( desc, uplo, m, n, tile, op_args );
 }
 
 void INSERT_TASK_map( const RUNTIME_option_t *options,
@@ -43,7 +43,7 @@ void INSERT_TASK_map( const RUNTIME_option_t *options,
         sizeof(cham_uplo_t),              &uplo, VALUE,
         sizeof(int),                      &Am,   VALUE,
         sizeof(int),                      &An,   VALUE,
-        sizeof(char), RTBLKADDR(A, void, Am, An), INOUT,
+        sizeof(void*), RTBLKADDR(A, void, Am, An), INOUT,
         sizeof(cham_unary_operator_t),    &op_fct,  VALUE,
         sizeof(void*),                    &op_args, VALUE,
         0);
diff --git a/runtime/quark/codelets/codelet_zaxpy.c b/runtime/quark/codelets/codelet_zaxpy.c
index be7fd595991dbc1bdf671fe813db39d3870e211a..6e77e3ca1a2c6f9874e00efd63271114f03845a2 100644
--- a/runtime/quark/codelets/codelet_zaxpy.c
+++ b/runtime/quark/codelets/codelet_zaxpy.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zaxpy Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @date 2014-11-16
  * @precisions normal z -> c d s
@@ -21,19 +19,19 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zaxpy_quark(Quark *quark)
 {
     int M;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
+    CHAM_tile_t *tileA;
     int incA;
-    CHAMELEON_Complex64_t *B;
+    CHAM_tile_t *tileB;
     int incB;
 
-    quark_unpack_args_6(quark, M, alpha, A, incA, B, incB);
-    CORE_zaxpy(M, alpha, A, incA, B, incB);
+    quark_unpack_args_6( quark, M, alpha, tileA, incA, tileB, incB );
+    TCORE_zaxpy(M, alpha, tileA, incA, tileB, incB);
 }
 
 void INSERT_TASK_zaxpy(const RUNTIME_option_t *options,
@@ -46,9 +44,9 @@ void INSERT_TASK_zaxpy(const RUNTIME_option_t *options,
     QUARK_Insert_Task(opt->quark, CORE_zaxpy_quark, (Quark_Task_Flags*)opt,
         sizeof(int),                        &M,         VALUE,
         sizeof(CHAMELEON_Complex64_t),      &alpha,     VALUE,
-        sizeof(CHAMELEON_Complex64_t)*M,     RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT,
         sizeof(int),                        &incA,      VALUE,
-        sizeof(CHAMELEON_Complex64_t)*M,     RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), INOUT,
+        sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), INOUT,
         sizeof(int),                        &incB,      VALUE,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zbuild.c b/runtime/quark/codelets/codelet_zbuild.c
index d7269a6af4bf992dd10175407d62387c4b037fa2..ff2de7675e62a5630c2549c989f59dadc353b0fa 100644
--- a/runtime/quark/codelets/codelet_zbuild.c
+++ b/runtime/quark/codelets/codelet_zbuild.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zbuild Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Piotr Luszczek
  * @author Pierre Lemarinier
  * @author Mathieu Faverge
@@ -26,23 +24,22 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zbuild_quark(Quark *quark)
 {
-    CHAMELEON_Complex64_t *A;
-    int lda;
+    CHAM_tile_t *tileA;
     void *user_data;
     void (*user_build_callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) ;
     int row_min, row_max, col_min, col_max;
 
-    quark_unpack_args_8( quark, row_min, row_max, col_min, col_max, A, lda, user_data, user_build_callback);
+    quark_unpack_args_7( quark, row_min, row_max, col_min, col_max, tileA, user_data, user_build_callback);
 
-    user_build_callback(row_min, row_max, col_min, col_max, A, lda, user_data);
+    user_build_callback(row_min, row_max, col_min, col_max, tileA->mat, tileA->ld, user_data);
 }
 
 void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
-                        const CHAM_desc_t *A, int Am, int An, int lda,
+                        const CHAM_desc_t *A, int Am, int An,
                         void *user_data, void* user_build_callback )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
@@ -58,8 +55,7 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
                       sizeof(int),                      &row_max,    VALUE,
                       sizeof(int),                      &col_min,    VALUE,
                       sizeof(int),                      &col_max,    VALUE,
-                      sizeof(CHAMELEON_Complex64_t)*lda*A->nb, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),         OUTPUT,
-                      sizeof(int),                      &lda,  VALUE,
+                      sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),         OUTPUT,
                       sizeof(void*),                    &user_data,  VALUE,
                       sizeof(void*),                    &user_build_callback,   VALUE,
                       0);
diff --git a/runtime/quark/codelets/codelet_zgeadd.c b/runtime/quark/codelets/codelet_zgeadd.c
index dd8205a6ed43d16cba0691f0bb815ea479bd8ff4..4e0aef869dac6a9557d577a9519d546a69d953d3 100644
--- a/runtime/quark/codelets/codelet_zgeadd.c
+++ b/runtime/quark/codelets/codelet_zgeadd.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zgeadd Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @author Emmanuel Agullo
  * @author Cedric Castagnede
@@ -23,7 +21,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zgeadd_quark(Quark *quark)
 {
@@ -31,14 +29,12 @@ void CORE_zgeadd_quark(Quark *quark)
     int M;
     int N;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
+    CHAM_tile_t *tileA;
     CHAMELEON_Complex64_t beta;
-    CHAMELEON_Complex64_t *B;
-    int LDB;
+    CHAM_tile_t *tileB;
 
-    quark_unpack_args_9(quark, trans, M, N, alpha, A, LDA, beta, B, LDB);
-    CORE_zgeadd(trans, M, N, alpha, A, LDA, beta, B, LDB);
+    quark_unpack_args_7(quark, trans, M, N, alpha, tileA, beta, tileB);
+    TCORE_zgeadd(trans, M, N, alpha, tileA, beta, tileB);
     return;
 }
 
@@ -97,8 +93,8 @@ void CORE_zgeadd_quark(Quark *quark)
  */
 void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
                          cham_trans_t trans, int m, int n, int nb,
-                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_GEADD;
@@ -107,11 +103,9 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
         sizeof(int),                        &m,     VALUE,
         sizeof(int),                        &n,     VALUE,
         sizeof(CHAMELEON_Complex64_t),         &alpha, VALUE,
-        sizeof(CHAMELEON_Complex64_t)*lda*n,    RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),             INPUT,
-        sizeof(int),                        &lda,   VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),             INPUT,
         sizeof(CHAMELEON_Complex64_t),         &beta,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*ldb*n,    RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),             INOUT,
-        sizeof(int),                        &ldb,   VALUE,
+        sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),             INOUT,
         0);
 
     (void)nb;
diff --git a/runtime/quark/codelets/codelet_zgelqt.c b/runtime/quark/codelets/codelet_zgelqt.c
index 6704bd55a2956181927d622aadf9a4d1267d53f4..d294df89ba89e09db2c45e7aa8081e5670699d0b 100644
--- a/runtime/quark/codelets/codelet_zgelqt.c
+++ b/runtime/quark/codelets/codelet_zgelqt.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zgelqt Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
@@ -25,89 +23,27 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zgelqt_quark(Quark *quark)
 {
     int m;
     int n;
     int ib;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    CHAMELEON_Complex64_t *T;
-    int ldt;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileT;
     CHAMELEON_Complex64_t *TAU;
     CHAMELEON_Complex64_t *WORK;
 
-    quark_unpack_args_9(quark, m, n, ib, A, lda, T, ldt, TAU, WORK);
-    CORE_zlaset( ChamUpperLower, ib, m, 0., 0., T, ldt );
-    CORE_zgelqt(m, n, ib, A, lda, T, ldt, TAU, WORK);
+    quark_unpack_args_7(quark, m, n, ib, tileA, tileT, TAU, WORK);
+    TCORE_zlaset( ChamUpperLower, ib, m, 0., 0., tileT );
+    TCORE_zgelqt(m, n, ib, tileA,  tileT,  TAU, WORK);
 }
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zgelqt - computes a LQ factorization of a complex M-by-N tile A: A = L * Q.
- *
- *  The tile Q is represented as a product of elementary reflectors
- *
- *    Q = H(k)' . . . H(2)' H(1)', where k = min(M,N).
- *
- *  Each H(i) has the form
- *
- *    H(i) = I - tau * v * v'
- *
- *  where tau is a complex scalar, and v is a complex vector with
- *  v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in
- *  A(i,i+1:n), and tau in TAU(i).
- *
- *******************************************************************************
- *
- * @param[in] M
- *          The number of rows of the tile A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the tile A.  N >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile A.
- *         On exit, the elements on and below the diagonal of the array
- *         contain the M-by-min(M,N) lower trapezoidal tile L (L is
- *         lower triangular if M <= N); the elements above the diagonal,
- *         with the array TAU, represent the unitary tile Q as a
- *         product of elementary reflectors (see Further Details).
- *
- * @param[in] LDA
- *         The leading dimension of the array A.  LDA >= max(1,M).
- *
- * @param[out] T
- *         The IB-by-N triangular factor T of the block reflector.
- *         T is upper triangular by block (economic storage);
- *         The rest of the array is not referenced.
- *
- * @param[in] LDT
- *         The leading dimension of the array T. LDT >= IB.
- *
- * @param[out] TAU
- *         The scalar factors of the elementary reflectors (see Further
- *         Details).
- *
- * @param[out] WORK
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if -i, the i-th argument had an illegal value
- *
- */
 void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
                        int m, int n, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldt)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *T, int Tm, int Tn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_GELQT;
@@ -115,10 +51,8 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
         sizeof(int),                     &m,     VALUE,
         sizeof(int),                     &n,     VALUE,
         sizeof(int),                     &ib,    VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INOUT,
-        sizeof(int),                     &lda,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*ib*nb, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), OUTPUT,
-        sizeof(int),                     &ldt,   VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INOUT,
+        sizeof(void*), RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), OUTPUT,
         sizeof(CHAMELEON_Complex64_t)*nb,    NULL,          SCRATCH,
         sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL,          SCRATCH,
         0);
diff --git a/runtime/quark/codelets/codelet_zgemm.c b/runtime/quark/codelets/codelet_zgemm.c
index b736b9b880ff369d3bfc00b92a020900e79e4d6c..7bdffae459449ba81628f2f9a6b3f975d2e8fb3a 100644
--- a/runtime/quark/codelets/codelet_zgemm.c
+++ b/runtime/quark/codelets/codelet_zgemm.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zgemm Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
@@ -25,7 +23,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zgemm_quark(Quark *quark)
 {
@@ -35,28 +33,25 @@ void CORE_zgemm_quark(Quark *quark)
     int n;
     int k;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    CHAMELEON_Complex64_t *B;
-    int ldb;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
     CHAMELEON_Complex64_t beta;
-    CHAMELEON_Complex64_t *C;
-    int ldc;
+    CHAM_tile_t *tileC;
 
-    quark_unpack_args_13(quark, transA, transB, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
-    CORE_zgemm(transA, transB,
-               m, n, k,
-               alpha, A, lda,
-               B, ldb,
-               beta, C, ldc);
+    quark_unpack_args_10(quark, transA, transB, m, n, k, alpha, tileA, tileB, beta, tileC);
+    TCORE_zgemm( transA, transB,
+                 m, n, k,
+                 alpha, tileA,
+                        tileB,
+                 beta,  tileC );
 }
 
 void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
                       cham_trans_t transA, cham_trans_t transB,
                       int m, int n, int k, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                      const CHAM_desc_t *B, int Bm, int Bn,
+                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_GEMM;
@@ -67,12 +62,9 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
                       sizeof(int),                        &n,         VALUE,
                       sizeof(int),                        &k,         VALUE,
                       sizeof(CHAMELEON_Complex64_t),         &alpha,     VALUE,
-                      sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
-                      sizeof(int),                        &lda,       VALUE,
-                      sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),                 INPUT,
-                      sizeof(int),                        &ldb,       VALUE,
+                      sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
+                      sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),                 INPUT,
                       sizeof(CHAMELEON_Complex64_t),         &beta,      VALUE,
-                      sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),                 INOUT,
-                      sizeof(int),                        &ldc,       VALUE,
+                      sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),                 INOUT,
                       0);
 }
diff --git a/runtime/quark/codelets/codelet_zgeqrt.c b/runtime/quark/codelets/codelet_zgeqrt.c
index 4aa554ea90b8ef64e8d550eb4244f1403ab8873b..2cf6d7a328e568f72dff86d0e82ba93820c97c03 100644
--- a/runtime/quark/codelets/codelet_zgeqrt.c
+++ b/runtime/quark/codelets/codelet_zgeqrt.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zgeqrt Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
@@ -25,90 +23,27 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zgeqrt_quark(Quark *quark)
 {
     int m;
     int n;
     int ib;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    CHAMELEON_Complex64_t *T;
-    int ldt;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileT;
     CHAMELEON_Complex64_t *TAU;
     CHAMELEON_Complex64_t *WORK;
 
-    quark_unpack_args_9(quark, m, n, ib, A, lda, T, ldt, TAU, WORK);
-    CORE_zlaset( ChamUpperLower, ib, n, 0., 0., T, ldt );
-    CORE_zgeqrt(m, n, ib, A, lda, T, ldt, TAU, WORK);
+    quark_unpack_args_7(quark, m, n, ib, tileA, tileT, TAU, WORK);
+    TCORE_zlaset( ChamUpperLower, ib, n, 0., 0., tileT );
+    TCORE_zgeqrt( m, n, ib, tileA, tileT, TAU, WORK );
 }
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zgeqrt computes a QR factorization of a complex M-by-N tile A:
- *  A = Q * R.
- *
- *  The tile Q is represented as a product of elementary reflectors
- *
- *    Q = H(1) H(2) . . . H(k), where k = min(M,N).
- *
- *  Each H(i) has the form
- *
- *    H(i) = I - tau * v * v'
- *
- *  where tau is a complex scalar, and v is a complex vector with
- *  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
- *  and tau in TAU(i).
- *
- *******************************************************************************
- *
- * @param[in] M
- *          The number of rows of the tile A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the tile A.  N >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile A.
- *         On exit, the elements on and above the diagonal of the array
- *         contain the min(M,N)-by-N upper trapezoidal tile R (R is
- *         upper triangular if M >= N); the elements below the diagonal,
- *         with the array TAU, represent the unitary tile Q as a
- *         product of elementary reflectors (see Further Details).
- *
- * @param[in] LDA
- *         The leading dimension of the array A.  LDA >= max(1,M).
- *
- * @param[out] T
- *         The IB-by-N triangular factor T of the block reflector.
- *         T is upper triangular by block (economic storage);
- *         The rest of the array is not referenced.
- *
- * @param[in] LDT
- *         The leading dimension of the array T. LDT >= IB.
- *
- * @param[out] TAU
- *         The scalar factors of the elementary reflectors (see Further
- *         Details).
- *
- * @param[out] WORK
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if -i, the i-th argument had an illegal value
- *
- */
 void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
                        int m, int n, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldt)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *T, int Tm, int Tn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_GEQRT;
@@ -116,10 +51,8 @@ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
         sizeof(int),                     &m,     VALUE,
         sizeof(int),                     &n,     VALUE,
         sizeof(int),                     &ib,    VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INOUT,
-        sizeof(int),                     &lda,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*ib*nb, RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), OUTPUT,
-        sizeof(int),                     &ldt,   VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INOUT,
+        sizeof(void*), RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), OUTPUT,
         sizeof(CHAMELEON_Complex64_t)*nb,    NULL,   SCRATCH,
         sizeof(CHAMELEON_Complex64_t)*ib*nb, NULL,   SCRATCH,
         0);
diff --git a/runtime/quark/codelets/codelet_zgessm.c b/runtime/quark/codelets/codelet_zgessm.c
index 53a6b3eb9f4746fe2f3a0d288f6710ce9d17a7bd..5a893bd8549f3d4e77dbde391f25eab915330d70 100644
--- a/runtime/quark/codelets/codelet_zgessm.c
+++ b/runtime/quark/codelets/codelet_zgessm.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zgessm Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
@@ -26,7 +24,7 @@
 #include "coreblas/cblas.h"
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zgessm_quark(Quark *quark)
 {
@@ -35,67 +33,20 @@ void CORE_zgessm_quark(Quark *quark)
     int k;
     int ib;
     int *IPIV;
-    CHAMELEON_Complex64_t *L;
-    int ldl;
-    CHAMELEON_Complex64_t *D;
-    int ldd;
-    CHAMELEON_Complex64_t *A;
-    int lda;
+    CHAM_tile_t *tileL;
+    CHAM_tile_t *tileD;
+    CHAM_tile_t *tileA;
 
-    quark_unpack_args_11(quark, m, n, k, ib, IPIV, L, ldl, D, ldd, A, lda);
-    CORE_zgessm(m, n, k, ib, IPIV, D, ldd, A, lda);
+    quark_unpack_args_8(quark, m, n, k, ib, IPIV, tileL, tileD, tileA);
+    TCORE_zgessm(m, n, k, ib, IPIV, tileD, tileA);
 }
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zgessm applies the factors L computed by CORE_zgetrf_incpiv to
- *  a complex M-by-N tile A.
- *
- *******************************************************************************
- *
- * @param[in] M
- *          The number of rows of the tile A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the tile A.  N >= 0.
- *
- * @param[in] K
- *         The number of columns of the tile L. K >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in] IPIV
- *         The pivot indices array of size K as returned by
- *         CORE_zgetrf_incpiv.
- *
- * @param[in] L
- *         The M-by-K lower triangular tile.
- *
- * @param[in] LDL
- *         The leading dimension of the array L.  LDL >= max(1,M).
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile A.
- *         On exit, updated by the application of L.
- *
- * @param[in] LDA
- *         The leading dimension of the array A.  LDA >= max(1,M).
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if INFO = -k, the k-th argument had an illegal value
- *
- */
 void INSERT_TASK_zgessm(const RUNTIME_option_t *options,
                        int m, int n, int k, int ib, int nb,
                        int *IPIV,
-                       const CHAM_desc_t *L, int Lm, int Ln, int ldl,
-                       const CHAM_desc_t *D, int Dm, int Dn, int ldd,
-                       const CHAM_desc_t *A, int Am, int An, int lda)
+                       const CHAM_desc_t *L, int Lm, int Ln,
+                       const CHAM_desc_t *D, int Dm, int Dn,
+                       const CHAM_desc_t *A, int Am, int An)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_GESSM;
@@ -105,11 +56,8 @@ void INSERT_TASK_zgessm(const RUNTIME_option_t *options,
         sizeof(int),                        &k,     VALUE,
         sizeof(int),                        &ib,    VALUE,
         sizeof(int)*nb,                      IPIV,          INPUT,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(L, CHAMELEON_Complex64_t, Lm, Ln),             INPUT | QUARK_REGION_L,
-        sizeof(int),                        &ldl,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(D, CHAMELEON_Complex64_t, Dm, Dn),             INPUT | QUARK_REGION_L,
-        sizeof(int),                        &ldd,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),             INOUT,
-        sizeof(int),                        &lda,   VALUE,
+        sizeof(void*), RTBLKADDR(L, CHAMELEON_Complex64_t, Lm, Ln),             INPUT | QUARK_REGION_L,
+        sizeof(void*), RTBLKADDR(D, CHAMELEON_Complex64_t, Dm, Dn),             INPUT | QUARK_REGION_L,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),             INOUT,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zgessq.c b/runtime/quark/codelets/codelet_zgessq.c
index a0f343091a76782f5853b87b73a84da415db1b46..5ea99fc1f206508b7b641c7d935eb4f0d8870522 100644
--- a/runtime/quark/codelets/codelet_zgessq.c
+++ b/runtime/quark/codelets/codelet_zgessq.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zgessq Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @date 2014-11-16
  * @precisions normal z -> c d s
@@ -21,44 +19,32 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zgessq_quark(Quark *quark)
 {
     cham_store_t storev;
     int m;
     int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    double *SCALESUMSQ;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileW;
 
-    quark_unpack_args_6( quark, storev, m, n, A, lda, SCALESUMSQ );
-    CORE_zgessq( storev, m, n, A, lda, SCALESUMSQ );
+    quark_unpack_args_5( quark, storev, m, n, tileA, tileW );
+    TCORE_zgessq( storev, m, n, tileA, tileW );
 }
 
 void INSERT_TASK_zgessq( const RUNTIME_option_t *options,
-                        cham_store_t storev, int m, int n,
-                        const CHAM_desc_t *A, int Am, int An, int lda,
-                        const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
+                         cham_store_t storev, int m, int n,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
 {
-    int sizessq;
-
-    if ( storev == ChamColumnwise ) {
-        sizessq = 2*n;
-    } else if ( storev == ChamRowwise ) {
-        sizessq = 2*m;
-    } else {
-        sizessq = 2;
-    }
-
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_GESSQ;
     QUARK_Insert_Task(opt->quark, CORE_zgessq_quark, (Quark_Task_Flags*)opt,
                       sizeof(cham_store_t),            &storev, VALUE,
                       sizeof(int),                     &m,      VALUE,
                       sizeof(int),                     &n,      VALUE,
-                      sizeof(CHAMELEON_Complex64_t)*m*n, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT,
-                      sizeof(int),                     &lda,    VALUE,
-                      sizeof(double)*sizessq,          RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn), INOUT,
+                      sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT,
+                      sizeof(void*), RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn), INOUT,
                       0);
 }
diff --git a/runtime/quark/codelets/codelet_zgetrf.c b/runtime/quark/codelets/codelet_zgetrf.c
index 86d1c324405b79290b6f7ccfedd028551c4b1bd5..8b235b15d4b0a5cc14a61c7ed90e9acaf1868d4c 100644
--- a/runtime/quark/codelets/codelet_zgetrf.c
+++ b/runtime/quark/codelets/codelet_zgetrf.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zgetrf Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @author Emmanuel Agullo
  * @author Cedric Castagnede
@@ -23,14 +21,13 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zgetrf_quark(Quark *quark)
 {
     int m;
     int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
+    CHAM_tile_t *tileA;
     int *IPIV;
     RUNTIME_sequence_t *sequence;
     RUNTIME_request_t *request;
@@ -38,8 +35,8 @@ void CORE_zgetrf_quark(Quark *quark)
     int iinfo;
     int info;
 
-    quark_unpack_args_9(quark, m, n, A, lda, IPIV, sequence, request, check_info, iinfo);
-    CORE_zgetrf( m, n, A, lda, IPIV, &info );
+    quark_unpack_args_8(quark, m, n, tileA, IPIV, sequence, request, check_info, iinfo);
+    TCORE_zgetrf( m, n, tileA, IPIV, &info );
     if ( (info != CHAMELEON_SUCCESS) && check_info ) {
         RUNTIME_sequence_flush( (CHAM_context_t*)quark, sequence, request, iinfo+info );
     }
@@ -47,7 +44,7 @@ void CORE_zgetrf_quark(Quark *quark)
 
 void INSERT_TASK_zgetrf(const RUNTIME_option_t *options,
                        int m, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
+                       const CHAM_desc_t *A, int Am, int An,
                        int *IPIV,
                        cham_bool_t check_info, int iinfo)
 {
@@ -56,11 +53,10 @@ void INSERT_TASK_zgetrf(const RUNTIME_option_t *options,
     QUARK_Insert_Task(opt->quark, CORE_zgetrf_quark, (Quark_Task_Flags*)opt,
         sizeof(int),                        &m,             VALUE,
         sizeof(int),                        &n,             VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                     INOUT | LOCALITY,
-        sizeof(int),                        &lda,           VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                     INOUT | LOCALITY,
         sizeof(int)*nb,                      IPIV,                  OUTPUT,
-        sizeof(RUNTIME_sequence_t*),           &(options->sequence),      VALUE,
-        sizeof(RUNTIME_request_t*),            &(options->request),       VALUE,
+        sizeof(RUNTIME_sequence_t*),        &(options->sequence),      VALUE,
+        sizeof(RUNTIME_request_t*),         &(options->request),       VALUE,
         sizeof(cham_bool_t),                &check_info,    VALUE,
         sizeof(int),                        &iinfo,         VALUE,
         0);
diff --git a/runtime/quark/codelets/codelet_zgetrf_incpiv.c b/runtime/quark/codelets/codelet_zgetrf_incpiv.c
index 7a4da42d7a63e9cda85b89cb55ea6e33062bdb09..2bac0ee19a3634e9afb4ca9dc936075e921bf92c 100644
--- a/runtime/quark/codelets/codelet_zgetrf_incpiv.c
+++ b/runtime/quark/codelets/codelet_zgetrf_incpiv.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zgetrf_incpiv Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
@@ -25,15 +23,14 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zgetrf_incpiv_quark(Quark *quark)
 {
     int m;
     int n;
     int ib;
-    CHAMELEON_Complex64_t *A;
-    int lda;
+    CHAM_tile_t *tileA;
     int *IPIV;
     RUNTIME_sequence_t *sequence;
     RUNTIME_request_t *request;
@@ -42,70 +39,17 @@ void CORE_zgetrf_incpiv_quark(Quark *quark)
 
     int info;
 
-    quark_unpack_args_10(quark, m, n, ib, A, lda, IPIV, sequence, request, check_info, iinfo);
-    CORE_zgetrf_incpiv(m, n, ib, A, lda, IPIV, &info);
+    quark_unpack_args_9(quark, m, n, ib, tileA, IPIV, sequence, request, check_info, iinfo);
+    TCORE_zgetrf_incpiv(m, n, ib, tileA, IPIV, &info);
     if ( (info != CHAMELEON_SUCCESS) && check_info ) {
         RUNTIME_sequence_flush( (CHAM_context_t*)quark, sequence, request, iinfo+info );
     }
 }
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zgetrf_incpiv computes an LU factorization of a general M-by-N tile A
- *  using partial pivoting with row interchanges.
- *
- *  The factorization has the form
- *
- *    A = P * L * U
- *
- *  where P is a permutation matrix, L is lower triangular with unit
- *  diagonal elements (lower trapezoidal if m > n), and U is upper
- *  triangular (upper trapezoidal if m < n).
- *
- *  This is the right-looking Level 2.5 BLAS version of the algorithm.
- *
- *******************************************************************************
- *
- * @param[in] M
- *          The number of rows of the tile A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the tile A.  N >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile to be factored.
- *         On exit, the factors L and U from the factorization
- *         A = P*L*U; the unit diagonal elements of L are not stored.
- *
- * @param[in] LDA
- *         The leading dimension of the array A.  LDA >= max(1,M).
- *
- * @param[out] IPIV
- *         The pivot indices; for 1 <= i <= min(M,N), row i of the
- *         tile was interchanged with row IPIV(i).
- *
- * @param[out] INFO
- *         See returned value.
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if INFO = -k, the k-th argument had an illegal value
- * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
- *              has been completed, but the factor U is exactly
- *              singular, and division by zero will occur if it is used
- *              to solve a system of equations.
- *
- */
 void INSERT_TASK_zgetrf_incpiv(const RUNTIME_option_t *options,
                               int m, int n, int ib, int nb,
-                              const CHAM_desc_t *A, int Am, int An, int lda,
-                              const CHAM_desc_t *L, int Lm, int Ln, int ldl,
+                              const CHAM_desc_t *A, int Am, int An,
+                              const CHAM_desc_t *L, int Lm, int Ln,
                               int *IPIV,
                               cham_bool_t check_info, int iinfo)
 {
@@ -115,8 +59,7 @@ void INSERT_TASK_zgetrf_incpiv(const RUNTIME_option_t *options,
         sizeof(int),                        &m,             VALUE,
         sizeof(int),                        &n,             VALUE,
         sizeof(int),                        &ib,            VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                     INOUT,
-        sizeof(int),                        &lda,           VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                     INOUT,
         sizeof(int)*nb,                      IPIV,                  OUTPUT,
         sizeof(RUNTIME_sequence_t*),           &(options->sequence),      VALUE,
         sizeof(RUNTIME_request_t*),            &(options->request),       VALUE,
@@ -124,5 +67,4 @@ void INSERT_TASK_zgetrf_incpiv(const RUNTIME_option_t *options,
         sizeof(int),                        &iinfo,         VALUE,
         0);
 
-    (void)L; (void)Lm; (void)Ln; (void)ldl;
 }
diff --git a/runtime/quark/codelets/codelet_zgetrf_nopiv.c b/runtime/quark/codelets/codelet_zgetrf_nopiv.c
index 8dd8e65fd5cdea6892b9f21880ce693f6f0ec88d..130cf7ef41a69e2a4fd012ab68858b2a2115b5da 100644
--- a/runtime/quark/codelets/codelet_zgetrf_nopiv.c
+++ b/runtime/quark/codelets/codelet_zgetrf_nopiv.c
@@ -22,76 +22,29 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zgetrf_nopiv_quark(Quark *quark)
 {
     int m;
     int n;
     int ib;
-    CHAMELEON_Complex64_t *A;
-    int lda;
+    CHAM_tile_t *tileA;
     RUNTIME_sequence_t *sequence;
     RUNTIME_request_t *request;
     int iinfo;
     int info;
 
-    quark_unpack_args_8(quark, m, n, ib, A, lda, sequence, request, iinfo);
-    CORE_zgetrf_nopiv(m, n, ib, A, lda, &info);
+    quark_unpack_args_7(quark, m, n, ib, tileA, sequence, request, iinfo);
+    TCORE_zgetrf_nopiv(m, n, ib, tileA, &info);
     if ( info != CHAMELEON_SUCCESS ) {
         RUNTIME_sequence_flush( (CHAM_context_t*)quark, sequence, request, iinfo+info );
     }
 }
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zgetrf_nopiv computes an LU factorization of a general diagonal
- *  dominant M-by-N matrix A witout pivoting.
- *
- *  The factorization has the form
- *     A = L * U
- *  where L is lower triangular with unit
- *  diagonal elements (lower trapezoidal if m > n), and U is upper
- *  triangular (upper trapezoidal if m < n).
- *
- *  This is the right-looking Level 3 BLAS version of the algorithm.
- *  WARNING: Your matrix need to be diagonal dominant if you want to call this
- *  routine safely.
- *
- *******************************************************************************
- *
- *  @param[in] M
- *          The number of rows of the matrix A.  M >= 0.
- *
- *  @param[in] N
- *          The number of columns of the matrix A.  N >= 0.
- *
- *  @param[in] IB
- *          The block size to switch between blocked and unblocked code.
- *
- *  @param[in,out] A
- *          On entry, the M-by-N matrix to be factored.
- *          On exit, the factors L and U from the factorization
- *          A = P*L*U; the unit diagonal elements of L are not stored.
- *
- *  @param[in] LDA
- *          The leading dimension of the array A.  LDA >= max(1,M).
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if INFO = -k, the k-th argument had an illegal value
- * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
- *              has been completed, but the factor U is exactly
- *              singular, and division by zero will occur if it is used
- *              to solve a system of equations.
- *
- */
 void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options,
                              int m, int n, int ib, int nb,
-                             const CHAM_desc_t *A, int Am, int An, int lda,
+                             const CHAM_desc_t *A, int Am, int An,
                              int iinfo)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
@@ -101,8 +54,7 @@ void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options,
         sizeof(int),                        &m,             VALUE,
         sizeof(int),                        &n,             VALUE,
         sizeof(int),                        &ib,            VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                     INOUT,
-        sizeof(int),                        &lda,           VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                     INOUT,
         sizeof(RUNTIME_sequence_t*),           &(options->sequence),      VALUE,
         sizeof(RUNTIME_request_t*),            &(options->request),       VALUE,
         sizeof(int),                        &iinfo,         VALUE,
diff --git a/runtime/quark/codelets/codelet_zgram.c b/runtime/quark/codelets/codelet_zgram.c
index f2c4228ec1fd7dfbe0535d9fac109e903c12ce0f..2acba7cdd644f3e68d66d1aaaa1ccfa3f5cfbea1 100644
--- a/runtime/quark/codelets/codelet_zgram.c
+++ b/runtime/quark/codelets/codelet_zgram.c
@@ -18,36 +18,28 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zgram_quark(Quark *quark)
 {
     cham_uplo_t uplo;
     int m, n, mt, nt;
-    double *Di;
-    int lddi;
-    double *Dj;
-    int lddj;
-    double *D;
-    double *A;
-    int lda;
+    CHAM_tile_t *Di;
+    CHAM_tile_t *Dj;
+    CHAM_tile_t *D;
+    CHAM_tile_t *A;
 
-    quark_unpack_args_12(quark, uplo, m, n, mt, nt, Di, lddi, Dj, lddj, D, A, lda);
-    CORE_zgram( uplo,
-                m, n, mt, nt,
-                Di, lddi,
-                Dj, lddj,
-                D,
-                A, lda);
+    quark_unpack_args_9(quark, uplo, m, n, mt, nt, Di, Dj, D, A );
+    TCORE_zgram( uplo, m, n, mt, nt, Di, Dj, D, A );
 }
 
 void INSERT_TASK_zgram( const RUNTIME_option_t *options,
                         cham_uplo_t uplo,
                         int m, int n, int mt, int nt,
-                        const CHAM_desc_t *Di, int Dim, int Din, int lddi,
-                        const CHAM_desc_t *Dj, int Djm, int Djn, int lddj,
+                        const CHAM_desc_t *Di, int Dim, int Din,
+                        const CHAM_desc_t *Dj, int Djm, int Djn,
                         const CHAM_desc_t *D, int Dm, int Dn,
-                        CHAM_desc_t *A, int Am, int An, int lda)
+                        CHAM_desc_t *A, int Am, int An )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_GRAM;
@@ -57,12 +49,9 @@ void INSERT_TASK_zgram( const RUNTIME_option_t *options,
                       sizeof(int),             &n,         VALUE,
                       sizeof(int),             &mt,        VALUE,
                       sizeof(int),             &nt,        VALUE,
-                      sizeof(double)*lddi*mt,  RTBLKADDR(Di, double, Dim, Din), INPUT,
-                      sizeof(int),             &lddi,      VALUE,
-                      sizeof(double)*lddj*nt,  RTBLKADDR(Dj, double, Djm, Djn), INPUT,
-                      sizeof(int),             &lddj,      VALUE,
-                      sizeof(double)*2,        RTBLKADDR(D, double, Dm, Dn),  INPUT,
-                      sizeof(double)*mt*nt,    RTBLKADDR(A, double, Am, An),  INOUT,
-                      sizeof(int),             &lda,       VALUE,
+                      sizeof(void*), RTBLKADDR(Di, double, Dim, Din), INPUT,
+                      sizeof(void*), RTBLKADDR(Dj, double, Djm, Djn), INPUT,
+                      sizeof(void*), RTBLKADDR(D,  double, Dm,  Dn ), INPUT,
+                      sizeof(void*), RTBLKADDR(A,  double, Am,  An ), INOUT,
                       0);
 }
diff --git a/runtime/quark/codelets/codelet_zhe2ge.c b/runtime/quark/codelets/codelet_zhe2ge.c
index fc141c5b7a0846529c59559bdc995c5f9a231b91..ae9ea7e2fac1bc308e7b07c25578c968b735310c 100644
--- a/runtime/quark/codelets/codelet_zhe2ge.c
+++ b/runtime/quark/codelets/codelet_zhe2ge.c
@@ -19,7 +19,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 /**
  *
@@ -31,21 +31,19 @@ static inline void CORE_zhe2ge_quark(Quark *quark)
     cham_uplo_t uplo;
     int M;
     int N;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-    CHAMELEON_Complex64_t *B;
-    int LDB;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
 
-    quark_unpack_args_7(quark, uplo, M, N, A, LDA, B, LDB);
-    CORE_zhe2ge(uplo, M, N, A, LDA, B, LDB);
+    quark_unpack_args_5(quark, uplo, M, N, tileA, tileB);
+    TCORE_zhe2ge(uplo, M, N, tileA, tileB);
 }
 
 
 void INSERT_TASK_zhe2ge(const RUNTIME_option_t *options,
                        cham_uplo_t uplo,
                        int m, int n, int mb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *B, int Bm, int Bn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_LACPY;
@@ -53,9 +51,7 @@ void INSERT_TASK_zhe2ge(const RUNTIME_option_t *options,
         sizeof(int),              &uplo,   VALUE,
         sizeof(int),                     &m,      VALUE,
         sizeof(int),                     &n,      VALUE,
-        sizeof(CHAMELEON_Complex64_t)*mb*mb,  RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT,
-        sizeof(int),                     &lda,    VALUE,
-        sizeof(CHAMELEON_Complex64_t)*mb*mb,  RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), OUTPUT,
-        sizeof(int),                     &ldb,    VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT,
+        sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), OUTPUT,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zhemm.c b/runtime/quark/codelets/codelet_zhemm.c
index af368adb26d41d2d07f477374bba0fa1a836cc77..0e576a93eb2ae13fb0c94cab15f25672d3cd7463 100644
--- a/runtime/quark/codelets/codelet_zhemm.c
+++ b/runtime/quark/codelets/codelet_zhemm.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zhemm Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
@@ -25,7 +23,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zhemm_quark(Quark *quark)
 {
@@ -34,28 +32,25 @@ void CORE_zhemm_quark(Quark *quark)
     int M;
     int N;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-    CHAMELEON_Complex64_t *B;
-    int LDB;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
     CHAMELEON_Complex64_t beta;
-    CHAMELEON_Complex64_t *C;
-    int LDC;
+    CHAM_tile_t *tileC;
 
-    quark_unpack_args_12(quark, side, uplo, M, N, alpha, A, LDA, B, LDB, beta, C, LDC);
-    CORE_zhemm(side, uplo,
+    quark_unpack_args_9(quark, side, uplo, M, N, alpha, tileA, tileB, beta, tileC);
+    TCORE_zhemm(side, uplo,
         M, N,
-        alpha, A, LDA,
-        B, LDB,
-        beta, C, LDC);
+        alpha, tileA,
+        tileB,
+        beta, tileC);
 }
 
 void INSERT_TASK_zhemm(const RUNTIME_option_t *options,
                       cham_side_t side, cham_uplo_t uplo,
                       int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                      const CHAM_desc_t *B, int Bm, int Bn,
+                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_HEMM;
@@ -65,13 +60,10 @@ void INSERT_TASK_zhemm(const RUNTIME_option_t *options,
         sizeof(int),                        &m,       VALUE,
         sizeof(int),                        &n,       VALUE,
         sizeof(CHAMELEON_Complex64_t),         &alpha,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),               INPUT,
-        sizeof(int),                        &lda,     VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),               INPUT,
-        sizeof(int),                        &ldb,     VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),               INPUT,
+        sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),               INPUT,
         sizeof(CHAMELEON_Complex64_t),         &beta,    VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),               INOUT,
-        sizeof(int),                        &ldc,     VALUE,
+        sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),               INOUT,
         0);
 }
 
diff --git a/runtime/quark/codelets/codelet_zher2k.c b/runtime/quark/codelets/codelet_zher2k.c
index c39093ddc0c00466d7122f985d918a25dc1663f9..5e0ba7efb3b4406733eeab6a1377c5aa9319f450 100644
--- a/runtime/quark/codelets/codelet_zher2k.c
+++ b/runtime/quark/codelets/codelet_zher2k.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zher2k Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
@@ -25,7 +23,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zher2k_quark(Quark *quark)
 {
@@ -34,25 +32,22 @@ void CORE_zher2k_quark(Quark *quark)
     int n;
     int k;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    CHAMELEON_Complex64_t *B;
-    int ldb;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
     double beta;
-    CHAMELEON_Complex64_t *C;
-    int ldc;
+    CHAM_tile_t *tileC;
 
-    quark_unpack_args_12(quark, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
-    CORE_zher2k(uplo, trans,
-                n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+    quark_unpack_args_9(quark, uplo, trans, n, k, alpha, tileA, tileB, beta, tileC);
+    TCORE_zher2k(uplo, trans,
+                n, k, alpha, tileA, tileB, beta, tileC);
 }
 
 void INSERT_TASK_zher2k(const RUNTIME_option_t *options,
                        cham_uplo_t uplo, cham_trans_t trans,
                        int n, int k, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                       double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *B, int Bm, int Bn,
+                       double beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_HER2K;
@@ -62,12 +57,9 @@ void INSERT_TASK_zher2k(const RUNTIME_option_t *options,
         sizeof(int),                        &n,         VALUE,
         sizeof(int),                        &k,         VALUE,
         sizeof(CHAMELEON_Complex64_t),         &alpha,     VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
-        sizeof(int),                        &lda,       VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),                 INPUT,
-        sizeof(int),                        &ldb,       VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
+        sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),                 INPUT,
         sizeof(double),                     &beta,      VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),                 INOUT,
-        sizeof(int),                        &ldc,       VALUE,
+        sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),                 INOUT,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zherfb.c b/runtime/quark/codelets/codelet_zherfb.c
index 3e4565b316c367ad6f36d6e51561a886a4b56be6..29b525f30278a407f9710c873b59a31c607df3c8 100644
--- a/runtime/quark/codelets/codelet_zherfb.c
+++ b/runtime/quark/codelets/codelet_zherfb.c
@@ -19,7 +19,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zherfb_quark(Quark *quark)
 {
@@ -28,25 +28,22 @@ void CORE_zherfb_quark(Quark *quark)
     int k;
     int ib;
     int nb;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    CHAMELEON_Complex64_t *T;
-    int ldt;
-    CHAMELEON_Complex64_t *C;
-    int ldc;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileT;
+    CHAM_tile_t *tileC;
     CHAMELEON_Complex64_t *WORK;
     int ldwork;
 
-    quark_unpack_args_13(quark, uplo, n, k, ib, nb, A, lda, T, ldt, C, ldc, WORK, ldwork);
-    CORE_zherfb(uplo, n, k, ib, nb, A, lda, T, ldt, C, ldc, WORK, ldwork);
+    quark_unpack_args_10(quark, uplo, n, k, ib, nb, tileA, tileT, tileC, WORK, ldwork);
+    TCORE_zherfb(uplo, n, k, ib, nb, tileA, tileT, tileC, WORK, ldwork);
 }
 
 void INSERT_TASK_zherfb(const RUNTIME_option_t *options,
                        cham_uplo_t uplo,
                        int n, int k, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                       const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *T, int Tm, int Tn,
+                       const CHAM_desc_t *C, int Cm, int Cn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
 
@@ -56,12 +53,9 @@ void INSERT_TASK_zherfb(const RUNTIME_option_t *options,
         sizeof(int),                       &k,    VALUE,
         sizeof(int),                       &ib,   VALUE,
         sizeof(int),                       &nb,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), (uplo == ChamUpper) ? INOUT|QUARK_REGION_U : INOUT|QUARK_REGION_L,
-        sizeof(int),                       &lda,  VALUE,
-        sizeof(CHAMELEON_Complex64_t)*ib*nb,    RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), INPUT,
-        sizeof(int),                       &ldt,  VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), (uplo == ChamUpper) ? INOUT|QUARK_REGION_D|QUARK_REGION_U : INOUT|QUARK_REGION_D|QUARK_REGION_L,
-        sizeof(int),                       &ldc,  VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), (uplo == ChamUpper) ? INOUT|QUARK_REGION_U : INOUT|QUARK_REGION_L,
+        sizeof(void*), RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), INPUT,
+        sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), (uplo == ChamUpper) ? INOUT|QUARK_REGION_D|QUARK_REGION_U : INOUT|QUARK_REGION_D|QUARK_REGION_L,
         sizeof(CHAMELEON_Complex64_t)*2*nb*nb,  NULL, SCRATCH,
         sizeof(int),                       &nb,   VALUE,
         0);
diff --git a/runtime/quark/codelets/codelet_zherk.c b/runtime/quark/codelets/codelet_zherk.c
index 70254a131236f3f610d81f6bc4b61b8da2dcc11e..8651de4d9c3a9cab50023db08b41384748af2ef8 100644
--- a/runtime/quark/codelets/codelet_zherk.c
+++ b/runtime/quark/codelets/codelet_zherk.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zherk Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
@@ -25,7 +23,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zherk_quark(Quark *quark)
 {
@@ -34,24 +32,22 @@ void CORE_zherk_quark(Quark *quark)
     int n;
     int k;
     double alpha;
-    CHAMELEON_Complex64_t *A;
-    int lda;
+    CHAM_tile_t *tileA;
     double beta;
-    CHAMELEON_Complex64_t *C;
-    int ldc;
+    CHAM_tile_t *tileC;
 
-    quark_unpack_args_10(quark, uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
-    CORE_zherk(uplo, trans,
-        n, k,
-        alpha, A, lda,
-        beta, C, ldc);
+    quark_unpack_args_8(quark, uplo, trans, n, k, alpha, tileA, beta, tileC);
+    TCORE_zherk(uplo, trans,
+               n, k,
+               alpha, tileA,
+               beta, tileC);
 }
 
 void INSERT_TASK_zherk(const RUNTIME_option_t *options,
                       cham_uplo_t uplo, cham_trans_t trans,
                       int n, int k, int nb,
-                      double alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                      double alpha, const CHAM_desc_t *A, int Am, int An,
+                      double beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_HERK;
@@ -61,10 +57,8 @@ void INSERT_TASK_zherk(const RUNTIME_option_t *options,
         sizeof(int),                        &n,         VALUE,
         sizeof(int),                        &k,         VALUE,
         sizeof(double),                     &alpha,     VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
-        sizeof(int),                        &lda,       VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
         sizeof(double),                     &beta,      VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),                 INOUT,
-        sizeof(int),                        &ldc,       VALUE,
+        sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),                 INOUT,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zhessq.c b/runtime/quark/codelets/codelet_zhessq.c
index b66dfd60a9dcda3747993cacec4b4d33b22e6370..f58dc4194da704f2537a7ae7046aab799c4f2979 100644
--- a/runtime/quark/codelets/codelet_zhessq.c
+++ b/runtime/quark/codelets/codelet_zhessq.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zhessq Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @date 2014-11-16
  * @precisions normal z -> c
@@ -24,10 +22,10 @@
 
 void INSERT_TASK_zhessq( const RUNTIME_option_t *options,
                          cham_store_t storev, cham_uplo_t uplo, int n,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
 {
     INSERT_TASK_zsyssq( options, storev, uplo, n,
-                        A, Am, An, lda,
+                        A, Am, An,
                         SCALESUMSQ, SCALESUMSQm, SCALESUMSQn );
 }
diff --git a/runtime/quark/codelets/codelet_zlacpy.c b/runtime/quark/codelets/codelet_zlacpy.c
index 817b684529f72052efaea4b0c055ae07abab71b8..976bc83a79b328514311bf13f08f3af2a5d02409 100644
--- a/runtime/quark/codelets/codelet_zlacpy.c
+++ b/runtime/quark/codelets/codelet_zlacpy.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zlacpy Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Julien Langou
  * @author Henricus Bouwmeester
  * @author Mathieu Faverge
@@ -25,7 +23,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 static inline void CORE_zlacpy_quark(Quark *quark)
 {
@@ -33,20 +31,26 @@ static inline void CORE_zlacpy_quark(Quark *quark)
     int M;
     int N;
     int displA;
+    CHAM_tile_t *tileA;
     CHAMELEON_Complex64_t *A;
-    int LDA;
     int displB;
+    CHAM_tile_t *tileB;
     CHAMELEON_Complex64_t *B;
-    int LDB;
 
-    quark_unpack_args_9(quark, uplo, M, N, displA, A, LDA, displB, B, LDB);
-    CORE_zlacpy(uplo, M, N, A + displA, LDA, B + displB, LDB);
+    quark_unpack_args_7(quark, uplo, M, N, displA, tileA, displB, tileB);
+
+    assert( tileA->format & CHAMELEON_TILE_FULLRANK );
+    assert( tileB->format & CHAMELEON_TILE_FULLRANK );
+
+    A = tileA->mat;
+    B = tileB->mat;
+    CORE_zlacpy( uplo, M, N, A + displA, tileA->ld, B + displB, tileB->ld );
 }
 
 void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
                           cham_uplo_t uplo, int m, int n, int nb,
-                          int displA, const CHAM_desc_t *A, int Am, int An, int lda,
-                          int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+                          int displA, const CHAM_desc_t *A, int Am, int An,
+                          int displB, const CHAM_desc_t *B, int Bm, int Bn )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_LACPY;
@@ -55,20 +59,18 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
         sizeof(int),                     &m,      VALUE,
         sizeof(int),                     &n,      VALUE,
         sizeof(int),                     &displA, VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,  RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),             INPUT,
-        sizeof(int),                     &lda,    VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),             INPUT,
         sizeof(int),                     &displB, VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,  RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),             OUTPUT,
-        sizeof(int),                     &ldb,    VALUE,
+        sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),             OUTPUT,
         0);
 }
 
 void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, int m, int n, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     INSERT_TASK_zlacpyx( options, uplo, m, n, nb,
-                         0, A, Am, An, lda,
-                         0, B, Bm, Bn, ldb );
+                         0, A, Am, An,
+                         0, B, Bm, Bn );
 }
diff --git a/runtime/quark/codelets/codelet_zlag2c.c b/runtime/quark/codelets/codelet_zlag2c.c
index 23bf31b92f2230fa995e030f10b1e82a8783f3a8..28ac7853cbb6de6c595a547d587b06d8d0c2cd2e 100644
--- a/runtime/quark/codelets/codelet_zlag2c.c
+++ b/runtime/quark/codelets/codelet_zlag2c.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zlag2c Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @author Emmanuel Agullo
  * @author Cedric Castagnede
@@ -23,37 +21,37 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zlag2c_quark(Quark *quark)
 {
     int m;
     int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    CHAMELEON_Complex32_t *B;
-    int ldb;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
     RUNTIME_sequence_t *sequence;
     RUNTIME_request_t *request;
+    int info;
 
-    quark_unpack_args_8(quark, m, n, A, lda, B, ldb, sequence, request);
-    CORE_zlag2c( m, n, A, lda, B, ldb);
+    quark_unpack_args_6(quark, m, n, tileA, tileB, sequence, request);
+    TCORE_zlag2c( m, n, tileA, tileB, &info );
+    if ( (sequence->status != CHAMELEON_SUCCESS) && (info != 0) ) {
+        RUNTIME_sequence_flush( (CHAM_context_t*)quark, sequence, request, info );
+    }
 }
 
 void INSERT_TASK_zlag2c(const RUNTIME_option_t *options,
                        int m, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *B, int Bm, int Bn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_LAG2C;
     QUARK_Insert_Task(opt->quark, CORE_zlag2c_quark, (Quark_Task_Flags*)opt,
                       sizeof(int),                        &m,         VALUE,
                       sizeof(int),                        &n,         VALUE,
-                      sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
-                      sizeof(int),                        &lda,       VALUE,
-                      sizeof(CHAMELEON_Complex32_t)*nb*nb,    RTBLKADDR(B, CHAMELEON_Complex32_t, Bm, Bn),                 OUTPUT,
-                      sizeof(int),                        &ldb,       VALUE,
+                      sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
+                      sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex32_t, Bm, Bn),                 OUTPUT,
                       sizeof(RUNTIME_sequence_t*),           &(options->sequence),  VALUE,
                       sizeof(RUNTIME_request_t*),            &(options->request),   VALUE,
                       0);
@@ -63,26 +61,22 @@ void CORE_clag2z_quark(Quark *quark)
 {
     int m;
     int n;
-    CHAMELEON_Complex32_t *A;
-    int lda;
-    CHAMELEON_Complex64_t *B;
-    int ldb;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
 
-    quark_unpack_args_6(quark, m, n, A, lda, B, ldb);
-    CORE_clag2z( m, n, A, lda, B, ldb);
+    quark_unpack_args_6(quark, m, n, tileA, tileB);
+    TCORE_clag2z( m, n, tileA, tileB);
 }
 
 void INSERT_TASK_clag2z(const RUNTIME_option_t *options,
                        int m, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *B, int Bm, int Bn)
 {
     QUARK_Insert_Task(opt->quark, CORE_clag2z_quark, (Quark_Task_Flags*)opt,
                       sizeof(int),                        &m,     VALUE,
                       sizeof(int),                        &n,     VALUE,
-                      sizeof(CHAMELEON_Complex32_t)*nb*nb,    RTBLKADDR(A, CHAMELEON_Complex32_t, Am, An),             INPUT,
-                      sizeof(int),                        &lda,   VALUE,
-                      sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),             INOUT,
-                      sizeof(int),                        &ldb,   VALUE,
+                      sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex32_t, Am, An),             INPUT,
+                      sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),             INOUT,
                       0);
 }
diff --git a/runtime/quark/codelets/codelet_zlange.c b/runtime/quark/codelets/codelet_zlange.c
index c1d32c4324a2b016b7d03d478c58201d60b50e5a..608100393ca5d1a12207023508acceb4a98971aa 100644
--- a/runtime/quark/codelets/codelet_zlange.c
+++ b/runtime/quark/codelets/codelet_zlange.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zlange Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
  * @author Julien Langou
  * @author Henricus Bouwmeester
  * @author Mathieu Faverge
@@ -23,25 +21,24 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zlange_quark(Quark *quark)
 {
-    double *normA;
+    CHAM_tile_t *tileNorm;
     cham_normtype_t norm;
     int M;
     int N;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
+    CHAM_tile_t *tileA;
     double *work;
 
-    quark_unpack_args_7(quark, norm, M, N, A, LDA, work, normA);
-    CORE_zlange( norm, M, N, A, LDA, work, normA);
+    quark_unpack_args_6( quark, norm, M, N, tileA, work, tileNorm );
+    TCORE_zlange( norm, M, N, tileA, work, tileNorm->mat );
 }
 
 void INSERT_TASK_zlange(const RUNTIME_option_t *options,
                        cham_normtype_t norm, int M, int N, int NB,
-                       const CHAM_desc_t *A, int Am, int An, int LDA,
+                       const CHAM_desc_t *A, int Am, int An,
                        const CHAM_desc_t *B, int Bm, int Bn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
@@ -52,21 +49,25 @@ void INSERT_TASK_zlange(const RUNTIME_option_t *options,
         sizeof(int),              &norm,  VALUE,
         sizeof(int),                     &M,     VALUE,
         sizeof(int),                     &N,     VALUE,
-        sizeof(CHAMELEON_Complex64_t)*NB*NB, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT,
-        sizeof(int),                     &LDA,   VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT,
         sizeof(double)*szeW,             NULL,   SCRATCH,
-        sizeof(double),                  RTBLKADDR(B, double, Bm, Bn), OUTPUT,
+        sizeof(void*), RTBLKADDR(B, double, Bm, Bn), OUTPUT,
         0);
 }
 
 void CORE_zlange_max_quark(Quark *quark)
 {
-    double *A;
-    double *normA;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileNorm;
+    double *A, *norm;
 
-    quark_unpack_args_2(quark, A, normA);
-    if ( A[0] > *normA )
-        *normA = A[0];
+    quark_unpack_args_2(quark, tileA, tileNorm);
+    A = tileA->mat;
+    norm = tileNorm->mat;
+
+    if ( A[0] > *norm ) {
+        *norm = A[0];
+    }
 }
 
 void INSERT_TASK_zlange_max(const RUNTIME_option_t *options,
@@ -77,8 +78,8 @@ void INSERT_TASK_zlange_max(const RUNTIME_option_t *options,
     DAG_CORE_LANGE_MAX;
     QUARK_Insert_Task(
         opt->quark, CORE_zlange_max_quark, (Quark_Task_Flags*)opt,
-        sizeof(double), RTBLKADDR(A, double, Am, An), INPUT,
-        sizeof(double), RTBLKADDR(B, double, Bm, Bn), OUTPUT,
+        sizeof(void*), RTBLKADDR(A, double, Am, An), INPUT,
+        sizeof(void*), RTBLKADDR(B, double, Bm, Bn), OUTPUT,
         0);
 }
 
diff --git a/runtime/quark/codelets/codelet_zlanhe.c b/runtime/quark/codelets/codelet_zlanhe.c
index 8ff0c70d052ca78d8aaec61ff60ea4053850c5d6..a7c05e812c8ecfaefd1336800bf4d1b5776fe4da 100644
--- a/runtime/quark/codelets/codelet_zlanhe.c
+++ b/runtime/quark/codelets/codelet_zlanhe.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zlanhe Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
  * @author Julien Langou
  * @author Henricus Bouwmeester
  * @author Mathieu Faverge
@@ -23,25 +21,24 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zlanhe_quark(Quark *quark)
 {
-    double *normA;
+    CHAM_tile_t *tileNorm;
     cham_normtype_t norm;
     cham_uplo_t uplo;
     int N;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
+    CHAM_tile_t *tileA;
     double *work;
 
-    quark_unpack_args_7(quark, norm, uplo, N, A, LDA, work, normA);
-    CORE_zlanhe( norm, uplo, N, A, LDA, work, normA);
+    quark_unpack_args_6(quark, norm, uplo, N, tileA, work, tileNorm );
+    TCORE_zlanhe( norm, uplo, N, tileA, work, tileNorm->mat );
 }
 
 void INSERT_TASK_zlanhe(const RUNTIME_option_t *options,
                        cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
-                       const CHAM_desc_t *A, int Am, int An, int LDA,
+                       const CHAM_desc_t *A, int Am, int An,
                        const CHAM_desc_t *B, int Bm, int Bn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
@@ -52,9 +49,8 @@ void INSERT_TASK_zlanhe(const RUNTIME_option_t *options,
         sizeof(int),              &norm,  VALUE,
         sizeof(int),              &uplo,  VALUE,
         sizeof(int),                     &N,     VALUE,
-        sizeof(CHAMELEON_Complex64_t)*NB*NB, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT,
-        sizeof(int),                     &LDA,   VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT,
         sizeof(double)*szeW,             NULL,   SCRATCH,
-        sizeof(double),                  RTBLKADDR(B, double, Bm, Bn), OUTPUT,
+        sizeof(void*), RTBLKADDR(B, double, Bm, Bn), OUTPUT,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zlansy.c b/runtime/quark/codelets/codelet_zlansy.c
index 1acef127cea59154897ff203b49ece22db649ff0..17aecc2320fdad11fac4e891bc1b6f1f0b8933d2 100644
--- a/runtime/quark/codelets/codelet_zlansy.c
+++ b/runtime/quark/codelets/codelet_zlansy.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zlansy Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
  * @author Julien Langou
  * @author Henricus Bouwmeester
  * @author Mathieu Faverge
@@ -23,25 +21,24 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zlansy_quark(Quark *quark)
 {
-    double *normA;
+    CHAM_tile_t *tileNorm;
     cham_normtype_t norm;
     cham_uplo_t uplo;
     int N;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
+    CHAM_tile_t *tileA;
     double *work;
 
-    quark_unpack_args_7(quark, norm, uplo, N, A, LDA, work, normA);
-    CORE_zlansy( norm, uplo, N, A, LDA, work, normA);
+    quark_unpack_args_6(quark, norm, uplo, N, tileA, work, tileNorm );
+    TCORE_zlansy( norm, uplo, N, tileA, work, tileNorm->mat );
 }
 
 void INSERT_TASK_zlansy(const RUNTIME_option_t *options,
                        cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
-                       const CHAM_desc_t *A, int Am, int An, int LDA,
+                       const CHAM_desc_t *A, int Am, int An,
                        const CHAM_desc_t *B, int Bm, int Bn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
@@ -52,9 +49,8 @@ void INSERT_TASK_zlansy(const RUNTIME_option_t *options,
         sizeof(int),              &norm,  VALUE,
         sizeof(int),              &uplo,  VALUE,
         sizeof(int),                     &N,     VALUE,
-        sizeof(CHAMELEON_Complex64_t)*NB*NB, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT,
-        sizeof(int),                     &LDA,   VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT,
         sizeof(double)*szeW,             NULL,   SCRATCH,
-        sizeof(double),                  RTBLKADDR(B, double, Bm, Bn), OUTPUT,
+        sizeof(void*), RTBLKADDR(B, double, Bm, Bn), OUTPUT,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zlantr.c b/runtime/quark/codelets/codelet_zlantr.c
index 708f783292550191381cbb88f00581282e5c1b21..5a6bdb83333c47fe7fecf9f90da606f14f6d1443 100644
--- a/runtime/quark/codelets/codelet_zlantr.c
+++ b/runtime/quark/codelets/codelet_zlantr.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zlantr Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @date 2014-11-16
  * @precisions normal z -> c d s
@@ -21,26 +19,25 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zlantr_quark(Quark *quark)
 {
-    double *normA;
+    CHAM_tile_t *tileNorm;
     cham_normtype_t norm, uplo, diag;
     int M;
     int N;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
+    CHAM_tile_t *tileA;
     double *work;
 
-    quark_unpack_args_9(quark, norm, uplo, diag, M, N, A, LDA, work, normA);
-    CORE_zlantr( norm, uplo, diag, M, N, A, LDA, work, normA);
+    quark_unpack_args_8(quark, norm, uplo, diag, M, N, tileA, work, tileNorm );
+    TCORE_zlantr( norm, uplo, diag, M, N, tileA, work, tileNorm->mat );
 }
 
 void INSERT_TASK_zlantr(const RUNTIME_option_t *options,
                        cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                        int M, int N, int NB,
-                       const CHAM_desc_t *A, int Am, int An, int LDA,
+                       const CHAM_desc_t *A, int Am, int An,
                        const CHAM_desc_t *B, int Bm, int Bn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
@@ -53,9 +50,8 @@ void INSERT_TASK_zlantr(const RUNTIME_option_t *options,
         sizeof(int),              &diag,  VALUE,
         sizeof(int),                     &M,     VALUE,
         sizeof(int),                     &N,     VALUE,
-        sizeof(CHAMELEON_Complex64_t)*NB*NB, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT,
-        sizeof(int),                     &LDA,   VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT,
         sizeof(double)*szeW,             NULL,   SCRATCH,
-        sizeof(double),                  RTBLKADDR(B, double, Bm, Bn), OUTPUT,
+        sizeof(void*), RTBLKADDR(B, double, Bm, Bn), OUTPUT,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zlascal.c b/runtime/quark/codelets/codelet_zlascal.c
index 5f73570b9640091d0a08fc643e95dbf1f5de02af..4aebaf8d663b8d6589a91308a9adf60da7a0722b 100644
--- a/runtime/quark/codelets/codelet_zlascal.c
+++ b/runtime/quark/codelets/codelet_zlascal.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zlascal Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Julien Langou
  * @author Henricus Bouwmeester
  * @author Mathieu Faverge
@@ -25,7 +23,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 static inline void CORE_zlascal_quark(Quark *quark)
 {
@@ -33,18 +31,17 @@ static inline void CORE_zlascal_quark(Quark *quark)
     int M;
     int N;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
+    CHAM_tile_t *tileA;
 
-    quark_unpack_args_6(quark, uplo, M, N, alpha, A, LDA);
-    CORE_zlascal(uplo, M, N, alpha, A, LDA);
+    quark_unpack_args_5(quark, uplo, M, N, alpha, tileA);
+    TCORE_zlascal(uplo, M, N, alpha, tileA);
 }
 
 void INSERT_TASK_zlascal(const RUNTIME_option_t *options,
                         cham_uplo_t uplo,
                         int m, int n, int nb,
                         CHAMELEON_Complex64_t alpha,
-                        const CHAM_desc_t *A, int Am, int An, int lda)
+                        const CHAM_desc_t *A, int Am, int An)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_LASCAL;
@@ -53,8 +50,7 @@ void INSERT_TASK_zlascal(const RUNTIME_option_t *options,
         sizeof(int),                     &m,     VALUE,
         sizeof(int),                     &n,     VALUE,
         sizeof(CHAMELEON_Complex64_t),       &alpha, VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,  RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INOUT,
-        sizeof(int),                     &lda,   VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INOUT,
         0);
 }
 
diff --git a/runtime/quark/codelets/codelet_zlaset.c b/runtime/quark/codelets/codelet_zlaset.c
index 5b35de50f76c80b488108b629b5946a8fec49d13..7c449a02fc8ed34753392c574f3c9bb0c4cc66b5 100644
--- a/runtime/quark/codelets/codelet_zlaset.c
+++ b/runtime/quark/codelets/codelet_zlaset.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zlaset Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Mathieu Faverge
  * @author Emmanuel Agullo
@@ -24,7 +22,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zlaset_quark(Quark *quark)
 {
@@ -33,52 +31,16 @@ void CORE_zlaset_quark(Quark *quark)
     int N;
     CHAMELEON_Complex64_t alpha;
     CHAMELEON_Complex64_t beta;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
+    CHAM_tile_t *tileA;
 
-    quark_unpack_args_7(quark, uplo, M, N, alpha, beta, A, LDA);
-    CORE_zlaset(uplo, M, N, alpha, beta, A, LDA);
+    quark_unpack_args_6(quark, uplo, M, N, alpha, beta, tileA);
+    TCORE_zlaset(uplo, M, N, alpha, beta, tileA);
 }
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zlaset - Sets the elements of the matrix A on the diagonal
- *  to beta and on the off-diagonals to alpha
- *
- *******************************************************************************
- *
- * @param[in] uplo
- *          Specifies which elements of the matrix are to be set
- *          = ChamUpper: Upper part of A is set;
- *          = ChamLower: Lower part of A is set;
- *          = ChamUpperLower: ALL elements of A are set.
- *
- * @param[in] M
- *          The number of rows of the matrix A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the matrix A.  N >= 0.
- *
- * @param[in] alpha
- *         The constant to which the off-diagonal elements are to be set.
- *
- * @param[in] beta
- *         The constant to which the diagonal elements are to be set.
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile A.
- *         On exit, A has been set accordingly.
- *
- * @param[in] LDA
- *         The leading dimension of the array A.  LDA >= max(1,M).
- *
- */
 void INSERT_TASK_zlaset(const RUNTIME_option_t *options,
                        cham_uplo_t uplo, int M, int N,
                        CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta,
-                       const CHAM_desc_t *A, int Am, int An, int LDA)
+                       const CHAM_desc_t *A, int Am, int An)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_LASET;
@@ -88,7 +50,6 @@ void INSERT_TASK_zlaset(const RUNTIME_option_t *options,
         sizeof(int),                        &N,     VALUE,
         sizeof(CHAMELEON_Complex64_t),         &alpha, VALUE,
         sizeof(CHAMELEON_Complex64_t),         &beta,  VALUE,
-        sizeof(CHAMELEON_Complex64_t)*LDA*N,    RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),      OUTPUT,
-        sizeof(int),                        &LDA,   VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),      OUTPUT,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zlaset2.c b/runtime/quark/codelets/codelet_zlaset2.c
index aa4998a40f99bfe01ad9e16763d61fe3607762a8..b8acfd2680a8d958c8384411af9562d725282e30 100644
--- a/runtime/quark/codelets/codelet_zlaset2.c
+++ b/runtime/quark/codelets/codelet_zlaset2.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zlaset2 Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Mathieu Faverge
  * @author Emmanuel Agullo
@@ -24,7 +22,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zlaset2_quark(Quark *quark)
 {
@@ -32,49 +30,15 @@ void CORE_zlaset2_quark(Quark *quark)
     int M;
     int N;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
+    CHAM_tile_t *tileA;
 
-    quark_unpack_args_6(quark, uplo, M, N, alpha, A, LDA);
-    CORE_zlaset2(uplo, M, N, alpha, A, LDA);
+    quark_unpack_args_5(quark, uplo, M, N, alpha, tileA);
+    TCORE_zlaset2(uplo, M, N, alpha, tileA);
 }
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zlaset2 - Sets the elements of the matrix A to alpha.
- *  Not LAPACK compliant! Read below.
- *
- *******************************************************************************
- *
- * @param[in] uplo
- *          Specifies which elements of the matrix are to be set
- *          = ChamUpper: STRICT Upper part of A is set to alpha;
- *          = ChamLower: STRICT Lower part of A is set to alpha;
- *          = ChamUpperLower: ALL elements of A are set to alpha.
- *          Not LAPACK Compliant.
- *
- * @param[in] M
- *          The number of rows of the matrix A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the matrix A.  N >= 0.
- *
- * @param[in] alpha
- *         The constant to which the elements are to be set.
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile A.
- *         On exit, A has been set to alpha accordingly.
- *
- * @param[in] LDA
- *         The leading dimension of the array A.  LDA >= max(1,M).
- *
- */
 void INSERT_TASK_zlaset2(const RUNTIME_option_t *options,
                        cham_uplo_t uplo, int M, int N,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int LDA)
+                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_LASET;
@@ -83,7 +47,6 @@ void INSERT_TASK_zlaset2(const RUNTIME_option_t *options,
         sizeof(int),                        &M,     VALUE,
         sizeof(int),                        &N,     VALUE,
         sizeof(CHAMELEON_Complex64_t),         &alpha, VALUE,
-        sizeof(CHAMELEON_Complex64_t)*M*N,     RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),      OUTPUT,
-        sizeof(int),                        &LDA,   VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),      OUTPUT,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zlatro.c b/runtime/quark/codelets/codelet_zlatro.c
index 220a0c6f882bfb4a0060bf405cd5d42047b9117f..055bee94c90a740c8b200445949371cf6104834f 100644
--- a/runtime/quark/codelets/codelet_zlatro.c
+++ b/runtime/quark/codelets/codelet_zlatro.c
@@ -19,7 +19,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zlatro_quark(Quark *quark)
 {
@@ -27,20 +27,18 @@ void CORE_zlatro_quark(Quark *quark)
     cham_trans_t trans;
     int M;
     int N;
-    const CHAMELEON_Complex64_t *A;
-    int LDA;
-    CHAMELEON_Complex64_t *B;
-    int LDB;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
 
-    quark_unpack_args_8(quark, uplo, trans, M, N, A, LDA, B, LDB);
-    CORE_zlatro(uplo, trans, M, N, A, LDA, B, LDB);
+    quark_unpack_args_6(quark, uplo, trans, M, N, tileA, tileB);
+    TCORE_zlatro(uplo, trans, M, N, tileA, tileB);
 }
 
 void INSERT_TASK_zlatro(const RUNTIME_option_t *options,
                        cham_uplo_t uplo, cham_trans_t trans,
                        int m, int n, int mb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *B, int Bm, int Bn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
 
@@ -49,9 +47,7 @@ void INSERT_TASK_zlatro(const RUNTIME_option_t *options,
         sizeof(int),              &trans, VALUE,
         sizeof(int),                     &m,     VALUE,
         sizeof(int),                     &n,     VALUE,
-        sizeof(CHAMELEON_Complex64_t)*mb*mb,  RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT,
-        sizeof(int),                     &lda,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*mb*mb,  RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), OUTPUT,
-        sizeof(int),                     &ldb,   VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT,
+        sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn), OUTPUT,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zlauum.c b/runtime/quark/codelets/codelet_zlauum.c
index 17022ca6ad39b7e51ecf0e2546bc22bd6773e47b..356fb8adcb24876f9aa2e527aafb3167c0fa52f4 100644
--- a/runtime/quark/codelets/codelet_zlauum.c
+++ b/runtime/quark/codelets/codelet_zlauum.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zlauum Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Julien Langou
  * @author Henricus Bouwmeester
  * @author Mathieu Faverge
@@ -25,29 +23,27 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zlauum_quark(Quark *quark)
 {
     cham_uplo_t uplo;
     int N;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
+    CHAM_tile_t *tileA;
 
-    quark_unpack_args_4(quark, uplo, N, A, LDA);
-    CORE_zlauum(uplo, N, A, LDA);
+    quark_unpack_args_3(quark, uplo, N, tileA);
+    TCORE_zlauum(uplo, N, tileA);
 }
 
 void INSERT_TASK_zlauum(const RUNTIME_option_t *options,
                        cham_uplo_t uplo, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda)
+                       const CHAM_desc_t *A, int Am, int An)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_LAUUM;
     QUARK_Insert_Task(opt->quark, CORE_zlauum_quark, (Quark_Task_Flags*)opt,
         sizeof(int),                &uplo,  VALUE,
         sizeof(int),                        &n,     VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),             INOUT,
-        sizeof(int),                        &lda,   VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),             INOUT,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zplghe.c b/runtime/quark/codelets/codelet_zplghe.c
index b57d82c8a40bad4fad8e9db3153ac8832affd21e..eab1f625590a1782b236e7be4e78ebddf1e47a45 100644
--- a/runtime/quark/codelets/codelet_zplghe.c
+++ b/runtime/quark/codelets/codelet_zplghe.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zplghe Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Piotr Luszczek
  * @author Pierre Lemarinier
  * @author Mathieu Faverge
@@ -25,26 +23,25 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zplghe_quark(Quark *quark)
 {
     double bump;
     int m;
     int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
+    CHAM_tile_t *tileA;
     int bigM;
     int m0;
     int n0;
     unsigned long long int seed;
 
-    quark_unpack_args_9( quark, bump, m, n, A, lda, bigM, m0, n0, seed );
-    CORE_zplghe( bump, m, n, A, lda, bigM, m0, n0, seed );
+    quark_unpack_args_8( quark, bump, m, n, tileA, bigM, m0, n0, seed );
+    TCORE_zplghe( bump, m, n, tileA, bigM, m0, n0, seed );
 }
 
 void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
-                        double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
+                        double bump, int m, int n, const CHAM_desc_t *A, int Am, int An,
                         int bigM, int m0, int n0, unsigned long long int seed )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
@@ -53,8 +50,7 @@ void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
         sizeof(double),                   &bump, VALUE,
         sizeof(int),                      &m,    VALUE,
         sizeof(int),                      &n,    VALUE,
-        sizeof(CHAMELEON_Complex64_t)*lda*n, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),         OUTPUT,
-        sizeof(int),                      &lda,  VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),         OUTPUT,
         sizeof(int),                      &bigM, VALUE,
         sizeof(int),                      &m0,   VALUE,
         sizeof(int),                      &n0,   VALUE,
diff --git a/runtime/quark/codelets/codelet_zplgsy.c b/runtime/quark/codelets/codelet_zplgsy.c
index 60eee5385aa74edecb2773eeabc8aa7e1038aa45..8ed9175fc575ee5564b55c59751d0677ad0cc118 100644
--- a/runtime/quark/codelets/codelet_zplgsy.c
+++ b/runtime/quark/codelets/codelet_zplgsy.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zplgsy Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Piotr Luszczek
  * @author Pierre Lemarinier
  * @author Mathieu Faverge
@@ -25,26 +23,25 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zplgsy_quark(Quark *quark)
 {
     CHAMELEON_Complex64_t bump;
     int m;
     int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
+    CHAM_tile_t *tileA;
     int bigM;
     int m0;
     int n0;
     unsigned long long int seed;
 
-    quark_unpack_args_9( quark, bump, m, n, A, lda, bigM, m0, n0, seed );
-    CORE_zplgsy( bump, m, n, A, lda, bigM, m0, n0, seed );
+    quark_unpack_args_8( quark, bump, m, n, tileA, bigM, m0, n0, seed );
+    TCORE_zplgsy( bump, m, n, tileA, bigM, m0, n0, seed );
 }
 
 void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
-                        CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
+                        CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An,
                         int bigM, int m0, int n0, unsigned long long int seed )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
@@ -53,8 +50,7 @@ void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
         sizeof(CHAMELEON_Complex64_t),       &bump, VALUE,
         sizeof(int),                      &m,    VALUE,
         sizeof(int),                      &n,    VALUE,
-        sizeof(CHAMELEON_Complex64_t)*lda*n, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),         OUTPUT,
-        sizeof(int),                      &lda,  VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),         OUTPUT,
         sizeof(int),                      &bigM, VALUE,
         sizeof(int),                      &m0,   VALUE,
         sizeof(int),                      &n0,   VALUE,
diff --git a/runtime/quark/codelets/codelet_zplrnt.c b/runtime/quark/codelets/codelet_zplrnt.c
index 455c0a8d01288ab104ca9b8f5aeea129f39c37b1..9f84ba3a47b156cd29d3e0ac076daaa88e3bb0cb 100644
--- a/runtime/quark/codelets/codelet_zplrnt.c
+++ b/runtime/quark/codelets/codelet_zplrnt.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zplrnt Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Piotr Luszczek
  * @author Pierre Lemarinier
  * @author Mathieu Faverge
@@ -25,25 +23,24 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zplrnt_quark(Quark *quark)
 {
     int m;
     int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
+    CHAM_tile_t *tileA;
     int bigM;
     int m0;
     int n0;
     unsigned long long int seed;
 
-    quark_unpack_args_8( quark, m, n, A, lda, bigM, m0, n0, seed );
-    CORE_zplrnt( m, n, A, lda, bigM, m0, n0, seed );
+    quark_unpack_args_7( quark, m, n, tileA, bigM, m0, n0, seed );
+    TCORE_zplrnt( m, n, tileA, bigM, m0, n0, seed );
 }
 
 void INSERT_TASK_zplrnt( const RUNTIME_option_t *options,
-                        int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
+                        int m, int n, const CHAM_desc_t *A, int Am, int An,
                         int bigM, int m0, int n0, unsigned long long int seed )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
@@ -51,8 +48,7 @@ void INSERT_TASK_zplrnt( const RUNTIME_option_t *options,
     QUARK_Insert_Task(opt->quark, CORE_zplrnt_quark, (Quark_Task_Flags*)opt,
         sizeof(int),                      &m,    VALUE,
         sizeof(int),                      &n,    VALUE,
-        sizeof(CHAMELEON_Complex64_t)*lda*n, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),         OUTPUT,
-        sizeof(int),                      &lda,  VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),         OUTPUT,
         sizeof(int),                      &bigM, VALUE,
         sizeof(int),                      &m0,   VALUE,
         sizeof(int),                      &n0,   VALUE,
diff --git a/runtime/quark/codelets/codelet_zplssq.c b/runtime/quark/codelets/codelet_zplssq.c
index b340873af51c0e8fae9659def50170eecdb16f83..bc985a5a3e79eb400273d63185c96946d631d85c 100644
--- a/runtime/quark/codelets/codelet_zplssq.c
+++ b/runtime/quark/codelets/codelet_zplssq.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zplssq Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @date 2014-11-16
  * @precisions normal z -> c d s
@@ -22,76 +20,36 @@
 #include <math.h>
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zplssq_quark(Quark *quark)
 {
     cham_store_t storev;
     int M;
     int N;
-    double *SCLSSQ_IN;
-    double *SCLSSQ_OUT;
+    CHAM_tile_t *tileIN;
+    CHAM_tile_t *tileOUT;
 
-    quark_unpack_args_5( quark, storev, M, N, SCLSSQ_IN, SCLSSQ_OUT );
+    assert( tileIN->format  & CHAMELEON_TILE_FULLRANK );
+    assert( tileOUT->format & CHAMELEON_TILE_FULLRANK );
 
-    CORE_zplssq(storev, M, N, SCLSSQ_IN, SCLSSQ_OUT);
+    quark_unpack_args_5( quark, storev, M, N, tileIN, tileOUT );
+    CORE_zplssq( storev, M, N, tileIN->mat, tileOUT->mat );
 }
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- * @brief Compute sum( a_ij ^ 2 ) = scl * sqrt(ssq)
- *
- * with scl and ssq such that
- *
- *    ( scl**2 )*ssq = sum( A( 2*i )**2 * A( 2*i+1 ) )
- *                      i
- *
- * The values of A(2*i+1) are assumed to be at least unity.
- * The values of A(2*i) are assumed to be non-negative and scl is
- *
- *    scl = max( A( 2*i ) ),
- *           i
- *
- * The routine makes only one pass through the matrix A.
- *
- *******************************************************************************
- *
- *  @param[in] M
- *          The number of couple (scale, sumsq) in the matrix A.
- *
- *  @param[in] A
- *          The 2-by-M matrix.
- *
- *  @param[out] result
- *          On exit, result contains scl * sqrt( ssq )
- *
- */
 void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
                          cham_store_t storev, int M, int N,
-                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn,
-                         const CHAM_desc_t *SCLSSQ,     int SCLSSQm,     int SCLSSQn )
+                         const CHAM_desc_t *IN,  int INm,  int INn,
+                         const CHAM_desc_t *OUT, int OUTm, int OUTn )
 {
-    int sizein = 2*M*N;
-    int sizeout;
-
-    if ( storev == ChamColumnwise ) {
-        sizeout = 2*N;
-    } else if ( storev == ChamRowwise ) {
-        sizeout = 2*M;
-    } else {
-        sizeout = 2;
-    }
-
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_PLSSQ;
     QUARK_Insert_Task(opt->quark, CORE_zplssq_quark, (Quark_Task_Flags*)opt,
         sizeof(int),            &storev,    VALUE,
         sizeof(int),            &M,         VALUE,
         sizeof(int),            &N,         VALUE,
-        sizeof(double)*sizein,  RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn), INPUT,
-        sizeof(double)*sizeout, RTBLKADDR(SCLSSQ,     double, SCLSSQm,     SCLSSQn),     INOUT,
+        sizeof(void*), RTBLKADDR(IN,  double, INm,  INn),  INPUT,
+        sizeof(void*), RTBLKADDR(OUT, double, OUTm, OUTn), INOUT,
         0);
 }
 
@@ -99,11 +57,12 @@ void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
 void CORE_zplssq2_quark(Quark *quark)
 {
     int N;
-    double *RESULT;
+    CHAM_tile_t *RESULT;
 
-    quark_unpack_args_2( quark, N, RESULT );
+    assert( tileRESULT->format & CHAMELEON_TILE_FULLRANK );
 
-    CORE_zplssq2(N, RESULT);
+    quark_unpack_args_2( quark, N, RESULT );
+    CORE_zplssq2(N, RESULT->mat);
 }
 
 void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, int N,
@@ -113,6 +72,6 @@ void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, int N,
     DAG_CORE_PLSSQ2;
     QUARK_Insert_Task(opt->quark, CORE_zplssq2_quark, (Quark_Task_Flags*)opt,
         sizeof(int),        &N,         VALUE,
-        sizeof(double)*2*N, RTBLKADDR(RESULT, double, RESULTm, RESULTn), INOUT,
+        sizeof(void*), RTBLKADDR(RESULT, double, RESULTm, RESULTn), INOUT,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zpotrf.c b/runtime/quark/codelets/codelet_zpotrf.c
index b3bb0256c30730c23c091e891d7d4de88594158e..7b8df563fd922f233ed3d370150659caca144b50 100644
--- a/runtime/quark/codelets/codelet_zpotrf.c
+++ b/runtime/quark/codelets/codelet_zpotrf.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zpotrf Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
@@ -25,22 +23,21 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zpotrf_quark(Quark *quark)
 {
     cham_uplo_t uplo;
     int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
+    CHAM_tile_t *tileA;
     RUNTIME_sequence_t *sequence;
     RUNTIME_request_t *request;
     int iinfo;
 
     int info;
 
-    quark_unpack_args_7(quark, uplo, n, A, lda, sequence, request, iinfo);
-    CORE_zpotrf(uplo, n, A, lda, &info);
+    quark_unpack_args_6(quark, uplo, n, tileA, sequence, request, iinfo);
+    TCORE_zpotrf(uplo, n, tileA, &info);
     if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
         RUNTIME_sequence_flush( (CHAM_context_t*)quark, sequence, request, iinfo+info );
     }
@@ -48,7 +45,7 @@ void CORE_zpotrf_quark(Quark *quark)
 
 void INSERT_TASK_zpotrf(const RUNTIME_option_t *options,
                        cham_uplo_t uplo, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
+                       const CHAM_desc_t *A, int Am, int An,
                        int iinfo)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
@@ -56,8 +53,7 @@ void INSERT_TASK_zpotrf(const RUNTIME_option_t *options,
     QUARK_Insert_Task(opt->quark, CORE_zpotrf_quark, (Quark_Task_Flags*)opt,
                       sizeof(int),                &uplo,      VALUE,
                       sizeof(int),                        &n,         VALUE,
-                      sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INOUT,
-                      sizeof(int),                        &lda,       VALUE,
+                      sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INOUT,
                       sizeof(RUNTIME_sequence_t*),           &(options->sequence),  VALUE,
                       sizeof(RUNTIME_request_t*),            &(options->request),   VALUE,
                       sizeof(int),                        &iinfo,     VALUE,
diff --git a/runtime/quark/codelets/codelet_zssssm.c b/runtime/quark/codelets/codelet_zssssm.c
index bc5b3ae5caee9fbc235c415b92d9ea2dd8185be2..83ec806967f7fe58bb03a0ca5f2126591498416b 100644
--- a/runtime/quark/codelets/codelet_zssssm.c
+++ b/runtime/quark/codelets/codelet_zssssm.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zssssm Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
@@ -26,7 +24,7 @@
 #include "coreblas/cblas.h"
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zssssm_quark(Quark *quark)
 {
@@ -36,95 +34,22 @@ void CORE_zssssm_quark(Quark *quark)
     int n2;
     int k;
     int ib;
-    CHAMELEON_Complex64_t *A1;
-    int lda1;
-    CHAMELEON_Complex64_t *A2;
-    int lda2;
-    CHAMELEON_Complex64_t *L1;
-    int ldl1;
-    CHAMELEON_Complex64_t *L2;
-    int ldl2;
+    CHAM_tile_t *tileA1;
+    CHAM_tile_t *tileA2;
+    CHAM_tile_t *tileL1;
+    CHAM_tile_t *tileL2;
     int *IPIV;
 
-    quark_unpack_args_15(quark, m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, L1, ldl1, L2, ldl2, IPIV);
-    CORE_zssssm(m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, L1, ldl1, L2, ldl2, IPIV);
+    quark_unpack_args_11(quark, m1, n1, m2, n2, k, ib, tileA1, tileA2, tileL1, tileL2, IPIV);
+    TCORE_zssssm(m1, n1, m2, n2, k, ib, tileA1, tileA2, tileL1, tileL2, IPIV);
 }
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zssssm applies the LU factorization update from a complex
- *  matrix formed by a lower triangular IB-by-K tile L1 on top of a
- *  M2-by-K tile L2 to a second complex matrix formed by a M1-by-N1
- *  tile A1 on top of a M2-by-N2 tile A2 (N1 == N2).
- *
- *  This is the right-looking Level 2.5 BLAS version of the algorithm.
- *
- *******************************************************************************
- *
- * @param[in] M1
- *         The number of rows of the tile A1.  M1 >= 0.
- *
- * @param[in] N1
- *         The number of columns of the tile A1.  N1 >= 0.
- *
- * @param[in] M2
- *         The number of rows of the tile A2 and of the tile L2.
- *         M2 >= 0.
- *
- * @param[in] N2
- *         The number of columns of the tile A2.  N2 >= 0.
- *
- * @param[in] K
- *         The number of columns of the tiles L1 and L2.  K >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in,out] A1
- *         On entry, the M1-by-N1 tile A1.
- *         On exit, A1 is updated by the application of L (L1 L2).
- *
- * @param[in] LDA1
- *         The leading dimension of the array A1.  LDA1 >= max(1,M1).
- *
- * @param[in,out] A2
- *         On entry, the M2-by-N2 tile A2.
- *         On exit, A2 is updated by the application of L (L1 L2).
- *
- * @param[in] LDA2
- *         The leading dimension of the array A2.  LDA2 >= max(1,M2).
- *
- * @param[in] L1
- *         The IB-by-K lower triangular tile as returned by
- *         CORE_ztstrf.
- *
- * @param[in] LDL1
- *         The leading dimension of the array L1.  LDL1 >= max(1,IB).
- *
- * @param[in] L2
- *         The M2-by-K tile as returned by CORE_ztstrf.
- *
- * @param[in] LDL2
- *         The leading dimension of the array L2.  LDL2 >= max(1,M2).
- *
- * @param[in] IPIV
- *         The pivot indices array of size K as returned by
- *         CORE_ztstrf.
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if INFO = -k, the k-th argument had an illegal value
- *
- */
 void INSERT_TASK_zssssm(const RUNTIME_option_t *options,
                        int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                       const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                       const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                       const CHAM_desc_t *L1, int L1m, int L1n, int ldl1,
-                       const CHAM_desc_t *L2, int L2m, int L2n, int ldl2,
+                       const CHAM_desc_t *A1, int A1m, int A1n,
+                       const CHAM_desc_t *A2, int A2m, int A2n,
+                       const CHAM_desc_t *L1, int L1m, int L1n,
+                       const CHAM_desc_t *L2, int L2m, int L2n,
                        const int *IPIV)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
@@ -136,14 +61,10 @@ void INSERT_TASK_zssssm(const RUNTIME_option_t *options,
         sizeof(int),                        &n2,    VALUE,
         sizeof(int),                        &k,     VALUE,
         sizeof(int),                        &ib,    VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n),            INOUT,
-        sizeof(int),                        &lda1,  VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n),            INOUT | LOCALITY,
-        sizeof(int),                        &lda2,  VALUE,
-        sizeof(CHAMELEON_Complex64_t)*ib*nb,    RTBLKADDR(L1, CHAMELEON_Complex64_t, L1m, L1n),            INPUT,
-        sizeof(int),                        &ldl1,  VALUE,
-        sizeof(CHAMELEON_Complex64_t)*ib*nb,    RTBLKADDR(L2, CHAMELEON_Complex64_t, L2m, L2n),            INPUT,
-        sizeof(int),                        &ldl2,  VALUE,
+        sizeof(void*), RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n),            INOUT,
+        sizeof(void*), RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n),            INOUT | LOCALITY,
+        sizeof(void*), RTBLKADDR(L1, CHAMELEON_Complex64_t, L1m, L1n),            INPUT,
+        sizeof(void*), RTBLKADDR(L2, CHAMELEON_Complex64_t, L2m, L2n),            INPUT,
         sizeof(int)*nb,                      IPIV,          INPUT,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zsymm.c b/runtime/quark/codelets/codelet_zsymm.c
index acb3cd4491120b222e45709ba40158e55d966138..b1965360c3b1afb0fc1c313a20a7c63ae5e95990 100644
--- a/runtime/quark/codelets/codelet_zsymm.c
+++ b/runtime/quark/codelets/codelet_zsymm.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zsymm Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
@@ -25,7 +23,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zsymm_quark(Quark *quark)
 {
@@ -34,28 +32,25 @@ void CORE_zsymm_quark(Quark *quark)
     int M;
     int N;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-    CHAMELEON_Complex64_t *B;
-    int LDB;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
     CHAMELEON_Complex64_t beta;
-    CHAMELEON_Complex64_t *C;
-    int LDC;
+    CHAM_tile_t *tileC;
 
-    quark_unpack_args_12(quark, side, uplo, M, N, alpha, A, LDA, B, LDB, beta, C, LDC);
-    CORE_zsymm(side, uplo,
+    quark_unpack_args_9(quark, side, uplo, M, N, alpha, tileA, tileB, beta, tileC);
+    TCORE_zsymm(side, uplo,
         M, N,
-        alpha, A, LDA,
-        B, LDB,
-        beta, C, LDC);
+        alpha, tileA,
+        tileB,
+        beta, tileC);
 }
 
 void INSERT_TASK_zsymm(const RUNTIME_option_t *options,
                       cham_side_t side, cham_uplo_t uplo,
                       int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                      const CHAM_desc_t *B, int Bm, int Bn,
+                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_SYMM;
@@ -65,12 +60,9 @@ void INSERT_TASK_zsymm(const RUNTIME_option_t *options,
         sizeof(int),                        &m,       VALUE,
         sizeof(int),                        &n,       VALUE,
         sizeof(CHAMELEON_Complex64_t),         &alpha,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),               INPUT,
-        sizeof(int),                        &lda,     VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),               INPUT,
-        sizeof(int),                        &ldb,     VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),               INPUT,
+        sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),               INPUT,
         sizeof(CHAMELEON_Complex64_t),         &beta,    VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),               INOUT,
-        sizeof(int),                        &ldc,     VALUE,
+        sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),               INOUT,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zsyr2k.c b/runtime/quark/codelets/codelet_zsyr2k.c
index 66da8d9c6853843f4d89ad5efbaa8676af61133f..d0306a94284854a10ca5f2eaed20a0de8f8f52d3 100644
--- a/runtime/quark/codelets/codelet_zsyr2k.c
+++ b/runtime/quark/codelets/codelet_zsyr2k.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zsyr2k Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
@@ -25,7 +23,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zsyr2k_quark(Quark *quark)
 {
@@ -34,25 +32,22 @@ void CORE_zsyr2k_quark(Quark *quark)
     int n;
     int k;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    CHAMELEON_Complex64_t *B;
-    int ldb;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
     CHAMELEON_Complex64_t beta;
-    CHAMELEON_Complex64_t *C;
-    int ldc;
+    CHAM_tile_t *tileC;
 
-    quark_unpack_args_12(quark, uplo, trans, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
-    CORE_zsyr2k(uplo, trans,
-                n, k, alpha, A, lda, B, ldb, beta, C, ldc);
+    quark_unpack_args_9(quark, uplo, trans, n, k, alpha, tileA, tileB, beta, tileC);
+    TCORE_zsyr2k(uplo, trans,
+                n, k, alpha, tileA, tileB, beta, tileC);
 }
 
 void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options,
                        cham_uplo_t uplo, cham_trans_t trans,
                        int n, int k, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                       CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *B, int Bm, int Bn,
+                       CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_SYR2K;
@@ -62,12 +57,9 @@ void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options,
         sizeof(int),                        &n,         VALUE,
         sizeof(int),                        &k,         VALUE,
         sizeof(CHAMELEON_Complex64_t),         &alpha,     VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
-        sizeof(int),                        &lda,       VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),                 INPUT,
-        sizeof(int),                        &ldb,       VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
+        sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),                 INPUT,
         sizeof(CHAMELEON_Complex64_t),         &beta,      VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),                 INOUT,
-        sizeof(int),                        &ldc,       VALUE,
+        sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),                 INOUT,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zsyrk.c b/runtime/quark/codelets/codelet_zsyrk.c
index d32372f7af30f5f9dfee03c4f025891078017e1b..5add74bf7f386a53a2d986ca9ea07dec434bcbbc 100644
--- a/runtime/quark/codelets/codelet_zsyrk.c
+++ b/runtime/quark/codelets/codelet_zsyrk.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zsyrk Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
@@ -25,7 +23,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zsyrk_quark(Quark *quark)
 {
@@ -34,24 +32,22 @@ void CORE_zsyrk_quark(Quark *quark)
     int n;
     int k;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int lda;
+    CHAM_tile_t *tileA;
     CHAMELEON_Complex64_t beta;
-    CHAMELEON_Complex64_t *C;
-    int ldc;
+    CHAM_tile_t *tileC;
 
-    quark_unpack_args_10(quark, uplo, trans, n, k, alpha, A, lda, beta, C, ldc);
-    CORE_zsyrk(uplo, trans,
+    quark_unpack_args_8(quark, uplo, trans, n, k, alpha, tileA, beta, tileC);
+    TCORE_zsyrk(uplo, trans,
         n, k,
-        alpha, A, lda,
-        beta, C, ldc);
+        alpha, tileA,
+        beta, tileC);
 }
 
 void INSERT_TASK_zsyrk(const RUNTIME_option_t *options,
                       cham_uplo_t uplo, cham_trans_t trans,
                       int n, int k, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_SYRK;
@@ -61,10 +57,8 @@ void INSERT_TASK_zsyrk(const RUNTIME_option_t *options,
         sizeof(int),                        &n,         VALUE,
         sizeof(int),                        &k,         VALUE,
         sizeof(CHAMELEON_Complex64_t),         &alpha,     VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
-        sizeof(int),                        &lda,       VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
         sizeof(CHAMELEON_Complex64_t),         &beta,      VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),                 INOUT,
-        sizeof(int),                        &ldc,       VALUE,
+        sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),                 INOUT,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zsyssq.c b/runtime/quark/codelets/codelet_zsyssq.c
index 913424820fd8e01995ef2d889ebd97ac791758cc..8636e3a69f56698be7feecc081b7ba891824be8d 100644
--- a/runtime/quark/codelets/codelet_zsyssq.c
+++ b/runtime/quark/codelets/codelet_zsyssq.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zsyssq Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @date 2014-11-16
  * @precisions normal z -> c d s
@@ -21,42 +19,32 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zsyssq_quark(Quark *quark)
 {
     cham_store_t storev;
     cham_uplo_t uplo;
     int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    double *SCALESUMSQ;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileW;
 
-    quark_unpack_args_6( quark, storev, uplo, n, A, lda, SCALESUMSQ );
-    CORE_zsyssq( storev, uplo, n, A, lda, SCALESUMSQ );
+    quark_unpack_args_5( quark, storev, uplo, n, tileA, tileW );
+    TCORE_zsyssq( storev, uplo, n, tileA, tileW );
 }
 
 void INSERT_TASK_zsyssq( const RUNTIME_option_t *options,
                         cham_store_t storev, cham_uplo_t uplo, int n,
-                        const CHAM_desc_t *A, int Am, int An, int lda,
+                        const CHAM_desc_t *A, int Am, int An,
                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
 {
-    int sizessq;
-
-    if ( storev == ChamEltwise ) {
-        sizessq = 2;
-    } else {
-        sizessq = 2*n;
-    }
-
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_SYSSQ;
     QUARK_Insert_Task(opt->quark, CORE_zsyssq_quark, (Quark_Task_Flags*)opt,
         sizeof(cham_store_t),            &storev, VALUE,
         sizeof(int),                     &uplo, VALUE,
         sizeof(int),                     &n,    VALUE,
-        sizeof(CHAMELEON_Complex64_t)*n*n, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT,
-        sizeof(int),                     &lda,  VALUE,
-        sizeof(double)*sizessq,          RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn), INOUT,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT,
+        sizeof(void*), RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn), INOUT,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zsytrf_nopiv.c b/runtime/quark/codelets/codelet_zsytrf_nopiv.c
index cf41a06c0130af5d15f88df5a1796df115aa2e5a..5e3922ec1469fa643d6615ddad251eebc0225854 100644
--- a/runtime/quark/codelets/codelet_zsytrf_nopiv.c
+++ b/runtime/quark/codelets/codelet_zsytrf_nopiv.c
@@ -25,21 +25,20 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zsytrf_nopiv_quark(Quark *quark)
 {
     cham_uplo_t uplo;
     int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
+    CHAM_tile_t *tileA;
     RUNTIME_sequence_t *sequence;
     RUNTIME_request_t *request;
     int iinfo;
     int info = 0;
 
-    quark_unpack_args_7(quark, uplo, n, A, lda, sequence, request, iinfo);
-    info = CORE_zsytf2_nopiv(uplo, n, A, lda);
+    quark_unpack_args_6(quark, uplo, n, tileA, sequence, request, iinfo);
+    info = TCORE_zsytf2_nopiv(uplo, n, tileA);
     if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
         RUNTIME_sequence_flush( (CHAM_context_t*)quark, sequence, request, iinfo+info );
     }
@@ -47,7 +46,7 @@ void CORE_zsytrf_nopiv_quark(Quark *quark)
 
 void INSERT_TASK_zsytrf_nopiv(const RUNTIME_option_t *options,
                              cham_uplo_t uplo, int n, int nb,
-                             const CHAM_desc_t *A, int Am, int An, int lda,
+                             const CHAM_desc_t *A, int Am, int An,
                              int iinfo)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
@@ -55,8 +54,7 @@ void INSERT_TASK_zsytrf_nopiv(const RUNTIME_option_t *options,
     QUARK_Insert_Task(opt->quark, CORE_zsytrf_nopiv_quark, (Quark_Task_Flags*)opt,
         sizeof(int),              &uplo,                VALUE,
         sizeof(int),                     &n,                   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INOUT,
-        sizeof(int),                     &lda,                 VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INOUT,
         sizeof(RUNTIME_sequence_t*),       &(options->sequence), VALUE,
         sizeof(RUNTIME_request_t*),        &(options->request),  VALUE,
         sizeof(int),                     &iinfo,               VALUE,
diff --git a/runtime/quark/codelets/codelet_ztplqt.c b/runtime/quark/codelets/codelet_ztplqt.c
index e151f0a9c69effb189d71aca391e9ce392a665e5..1b31544c6b12655143083cd2581daa24a70904d5 100644
--- a/runtime/quark/codelets/codelet_ztplqt.c
+++ b/runtime/quark/codelets/codelet_ztplqt.c
@@ -19,7 +19,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 static void
 CORE_ztplqt_quark( Quark *quark )
@@ -28,27 +28,24 @@ CORE_ztplqt_quark( Quark *quark )
     int N;
     int L;
     int ib;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    CHAMELEON_Complex64_t *B;
-    int ldb;
-    CHAMELEON_Complex64_t *T;
-    int ldt;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+    CHAM_tile_t *tileT;
     CHAMELEON_Complex64_t *WORK;
 
-    quark_unpack_args_11( quark, M, N, L, ib,
-                          A, lda, B, ldb, T, ldt, WORK );
+    quark_unpack_args_8( quark, M, N, L, ib,
+                         tileA, tileB, tileT, WORK );
 
-    CORE_zlaset( ChamUpperLower, ib, N, 0., 0., T, ldt );
-    CORE_ztplqt( M, N, L, ib,
-                 A, lda, B, ldb, T, ldt, WORK );
+    TCORE_zlaset( ChamUpperLower, ib, N, 0., 0., tileT );
+    TCORE_ztplqt( M, N, L, ib,
+                 tileA, tileB, tileT, WORK );
 }
 
 void INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
                          int M, int N, int L, int ib, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                         const CHAM_desc_t *T, int Tm, int Tn, int ldt )
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn,
+                         const CHAM_desc_t *T, int Tm, int Tn )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_TPLQT;
@@ -61,12 +58,9 @@ void INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
         sizeof(int),                         &N,   VALUE,
         sizeof(int),                         &L,   VALUE,
         sizeof(int),                         &ib,  VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,      RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), INOUT | QUARK_REGION_L | QUARK_REGION_D,
-        sizeof(int),                         &lda, VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,      RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), INOUT | shapeB | LOCALITY,
-        sizeof(int),                         &ldb, VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*ib,      RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), OUTPUT,
-        sizeof(int),                         &ldt, VALUE,
+        sizeof(void*), RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), INOUT | QUARK_REGION_L | QUARK_REGION_D,
+        sizeof(void*), RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), INOUT | shapeB | LOCALITY,
+        sizeof(void*), RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), OUTPUT,
         sizeof(CHAMELEON_Complex64_t)*(ib+1)*nb,  NULL, SCRATCH,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_ztpmlqt.c b/runtime/quark/codelets/codelet_ztpmlqt.c
index d2ccd5fa18c8cb6da6598193278f0b2cbde3cd02..4fe5df16194e2ceae29f2ccfb19707a613dc4f33 100644
--- a/runtime/quark/codelets/codelet_ztpmlqt.c
+++ b/runtime/quark/codelets/codelet_ztpmlqt.c
@@ -19,7 +19,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 static void
 CORE_ztpmlqt_quark( Quark *quark )
@@ -31,30 +31,26 @@ CORE_ztpmlqt_quark( Quark *quark )
     int K;
     int L;
     int ib;
-    const CHAMELEON_Complex64_t *V;
-    int ldv;
-    const CHAMELEON_Complex64_t *T;
-    int ldt;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    CHAMELEON_Complex64_t *B;
-    int ldb;
+    CHAM_tile_t *tileV;
+    CHAM_tile_t *tileT;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
     CHAMELEON_Complex64_t *WORK;
 
-    quark_unpack_args_16( quark, side, trans, M, N, K, L, ib,
-                          V, ldv, T, ldt, A, lda, B, ldb, WORK );
+    quark_unpack_args_12( quark, side, trans, M, N, K, L, ib,
+                          tileV, tileT, tileA, tileB, WORK );
 
-    CORE_ztpmlqt( side, trans, M, N, K, L, ib,
-                  V, ldv, T, ldt, A, lda, B, ldb, WORK );
+    TCORE_ztpmlqt( side, trans, M, N, K, L, ib,
+                  tileV, tileT, tileA, tileB, WORK );
 }
 
 void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
                          cham_side_t side, cham_trans_t trans,
                          int M, int N, int K, int L, int ib, int nb,
-                         const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                         const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+                         const CHAM_desc_t *V, int Vm, int Vn,
+                         const CHAM_desc_t *T, int Tm, int Tn,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_TPMLQT;
@@ -70,14 +66,10 @@ void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
         sizeof(int),                     &K,     VALUE,
         sizeof(int),                     &L,     VALUE,
         sizeof(int),                     &ib,    VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,  RTBLKADDR( V, CHAMELEON_Complex64_t, Vm, Vn ), INPUT | shapeV,
-        sizeof(int),                     &ldv,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*ib*nb,  RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), INPUT,
-        sizeof(int),                     &ldt,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,  RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), INOUT,
-        sizeof(int),                     &lda,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,  RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), INOUT | LOCALITY,
-        sizeof(int),                     &ldb,   VALUE,
+        sizeof(void*), RTBLKADDR( V, CHAMELEON_Complex64_t, Vm, Vn ), INPUT | shapeV,
+        sizeof(void*), RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), INPUT,
+        sizeof(void*), RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), INOUT,
+        sizeof(void*), RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), INOUT | LOCALITY,
         sizeof(CHAMELEON_Complex64_t)*ib*nb,  NULL, SCRATCH,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_ztpmqrt.c b/runtime/quark/codelets/codelet_ztpmqrt.c
index ac396d88ad5247bbcac415351be9faa4daa6741d..a7670ec15cb603dcbfc120844b662a6641fc8813 100644
--- a/runtime/quark/codelets/codelet_ztpmqrt.c
+++ b/runtime/quark/codelets/codelet_ztpmqrt.c
@@ -19,7 +19,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 static void
 CORE_ztpmqrt_quark( Quark *quark )
@@ -31,30 +31,26 @@ CORE_ztpmqrt_quark( Quark *quark )
     int K;
     int L;
     int ib;
-    const CHAMELEON_Complex64_t *V;
-    int ldv;
-    const CHAMELEON_Complex64_t *T;
-    int ldt;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    CHAMELEON_Complex64_t *B;
-    int ldb;
+    CHAM_tile_t *tileV;
+    CHAM_tile_t *tileT;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
     CHAMELEON_Complex64_t *WORK;
 
-    quark_unpack_args_16( quark, side, trans, M, N, K, L, ib,
-                          V, ldv, T, ldt, A, lda, B, ldb, WORK );
+    quark_unpack_args_12( quark, side, trans, M, N, K, L, ib,
+                          tileV, tileT, tileA, tileB, WORK );
 
-    CORE_ztpmqrt( side, trans, M, N, K, L, ib,
-                  V, ldv, T, ldt, A, lda, B, ldb, WORK );
+    TCORE_ztpmqrt( side, trans, M, N, K, L, ib,
+                  tileV, tileT, tileA, tileB, WORK );
 }
 
 void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options,
                          cham_side_t side, cham_trans_t trans,
                          int M, int N, int K, int L, int ib, int nb,
-                         const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                         const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+                         const CHAM_desc_t *V, int Vm, int Vn,
+                         const CHAM_desc_t *T, int Tm, int Tn,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_TPMQRT;
@@ -70,14 +66,10 @@ void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options,
         sizeof(int),                     &K,     VALUE,
         sizeof(int),                     &L,     VALUE,
         sizeof(int),                     &ib,    VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,  RTBLKADDR( V, CHAMELEON_Complex64_t, Vm, Vn ), INPUT | shapeV,
-        sizeof(int),                     &ldv,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*ib*nb,  RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), INPUT,
-        sizeof(int),                     &ldt,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,  RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), INOUT,
-        sizeof(int),                     &lda,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,  RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), INOUT | LOCALITY,
-        sizeof(int),                     &ldb,   VALUE,
+        sizeof(void*), RTBLKADDR( V, CHAMELEON_Complex64_t, Vm, Vn ), INPUT | shapeV,
+        sizeof(void*), RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), INPUT,
+        sizeof(void*), RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), INOUT,
+        sizeof(void*), RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), INOUT | LOCALITY,
         sizeof(CHAMELEON_Complex64_t)*ib*nb,  NULL, SCRATCH,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_ztpqrt.c b/runtime/quark/codelets/codelet_ztpqrt.c
index 814300de138185d20c137a34843010900d473e17..efecbd2be7a42bd4c859322c2002758ebe706ad4 100644
--- a/runtime/quark/codelets/codelet_ztpqrt.c
+++ b/runtime/quark/codelets/codelet_ztpqrt.c
@@ -19,7 +19,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 static void
 CORE_ztpqrt_quark( Quark *quark )
@@ -28,27 +28,24 @@ CORE_ztpqrt_quark( Quark *quark )
     int N;
     int L;
     int ib;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    CHAMELEON_Complex64_t *B;
-    int ldb;
-    CHAMELEON_Complex64_t *T;
-    int ldt;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+    CHAM_tile_t *tileT;
     CHAMELEON_Complex64_t *WORK;
 
-    quark_unpack_args_11( quark, M, N, L, ib,
-                          A, lda, B, ldb, T, ldt, WORK );
+    quark_unpack_args_8( quark, M, N, L, ib,
+                          tileA, tileB, tileT, WORK );
 
-    CORE_zlaset( ChamUpperLower, ib, N, 0., 0., T, ldt );
-    CORE_ztpqrt( M, N, L, ib,
-                 A, lda, B, ldb, T, ldt, WORK );
+    TCORE_zlaset( ChamUpperLower, ib, N, 0., 0., tileT );
+    TCORE_ztpqrt( M, N, L, ib,
+                  tileA, tileB, tileT, WORK );
 }
 
 void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
                          int M, int N, int L, int ib, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                         const CHAM_desc_t *T, int Tm, int Tn, int ldt )
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn,
+                         const CHAM_desc_t *T, int Tm, int Tn )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_TPQRT;
@@ -61,12 +58,9 @@ void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
         sizeof(int),                         &N,   VALUE,
         sizeof(int),                         &L,   VALUE,
         sizeof(int),                         &ib,  VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,      RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), INOUT | QUARK_REGION_U | QUARK_REGION_D,
-        sizeof(int),                         &lda, VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,      RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), INOUT | shapeB | LOCALITY,
-        sizeof(int),                         &ldb, VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*ib,      RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), OUTPUT,
-        sizeof(int),                         &ldt, VALUE,
+        sizeof(void*), RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), INOUT | QUARK_REGION_U | QUARK_REGION_D,
+        sizeof(void*), RTBLKADDR( B, CHAMELEON_Complex64_t, Bm, Bn ), INOUT | shapeB | LOCALITY,
+        sizeof(void*), RTBLKADDR( T, CHAMELEON_Complex64_t, Tm, Tn ), OUTPUT,
         sizeof(CHAMELEON_Complex64_t)*(ib+1)*nb,  NULL, SCRATCH,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_ztradd.c b/runtime/quark/codelets/codelet_ztradd.c
index d804d95990d42441021b903291179a59261f2c90..c2f99242d9ba3e7fdfed0ecedf3c9b516452a27e 100644
--- a/runtime/quark/codelets/codelet_ztradd.c
+++ b/runtime/quark/codelets/codelet_ztradd.c
@@ -12,8 +12,6 @@
  * @brief Chameleon ztradd Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @date 2015-11-03
  * @precisions normal z -> c d s
@@ -21,7 +19,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_ztradd_quark(Quark *quark)
 {
@@ -30,14 +28,12 @@ void CORE_ztradd_quark(Quark *quark)
     int M;
     int N;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
+    CHAM_tile_t *tileA;
     CHAMELEON_Complex64_t beta;
-    CHAMELEON_Complex64_t *B;
-    int LDB;
+    CHAM_tile_t *tileB;
 
-    quark_unpack_args_10(quark, uplo, trans, M, N, alpha, A, LDA, beta, B, LDB);
-    CORE_ztradd(uplo, trans, M, N, alpha, A, LDA, beta, B, LDB);
+    quark_unpack_args_8(quark, uplo, trans, M, N, alpha, tileA, beta, tileB);
+    TCORE_ztradd(uplo, trans, M, N, alpha, tileA, beta, tileB);
     return;
 }
 
@@ -102,8 +98,8 @@ void CORE_ztradd_quark(Quark *quark)
  */
 void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
-                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_GEADD;
@@ -113,11 +109,9 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
         sizeof(int),                        &m,     VALUE,
         sizeof(int),                        &n,     VALUE,
         sizeof(CHAMELEON_Complex64_t),         &alpha, VALUE,
-        sizeof(CHAMELEON_Complex64_t)*lda*n,    RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),             INPUT,
-        sizeof(int),                        &lda,   VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),             INPUT,
         sizeof(CHAMELEON_Complex64_t),         &beta,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*ldb*n,    RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),             INOUT,
-        sizeof(int),                        &ldb,   VALUE,
+        sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),             INOUT,
         0);
 
     (void)nb;
diff --git a/runtime/quark/codelets/codelet_ztrasm.c b/runtime/quark/codelets/codelet_ztrasm.c
index bed2262809381256aa3c9c731df359b11803120f..5ce77d8b1c1df571a7818bbf2ae9dfd9d9172e28 100644
--- a/runtime/quark/codelets/codelet_ztrasm.c
+++ b/runtime/quark/codelets/codelet_ztrasm.c
@@ -12,8 +12,6 @@
  * @brief Chameleon ztrasm Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @date 2014-11-16
  * @precisions normal z -> c d s
@@ -21,7 +19,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_ztrasm_quark(Quark *quark)
 {
@@ -30,29 +28,27 @@ void CORE_ztrasm_quark(Quark *quark)
     cham_diag_t diag;
     int M;
     int N;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    double *work;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileW;
 
-    quark_unpack_args_8(quark, storev, uplo, diag, M, N, A, lda, work);
-    CORE_ztrasm(storev, uplo, diag, M, N, A, lda, work);
+    quark_unpack_args_7( quark, storev, uplo, diag, M, N, tileA, tileW );
+    TCORE_ztrasm( storev, uplo, diag, M, N, tileA, tileW->mat );
 }
 
 void INSERT_TASK_ztrasm(const RUNTIME_option_t *options,
                        cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
+                       const CHAM_desc_t *A, int Am, int An,
                        const CHAM_desc_t *B, int Bm, int Bn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
-    int szeW = storev == ChamRowwise ? M : N ;
+
     QUARK_Insert_Task(opt->quark, CORE_ztrasm_quark, (Quark_Task_Flags*)opt,
         sizeof(int),              &storev,    VALUE,
         sizeof(int),              &uplo,      VALUE,
         sizeof(int),              &diag,      VALUE,
         sizeof(int),                     &M,         VALUE,
         sizeof(int),                     &N,         VALUE,
-        sizeof(CHAMELEON_Complex64_t)*lda*N, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
-        sizeof(int),                     &lda,       VALUE,
-        sizeof(double)*szeW,             RTBLKADDR(B, double, Bm, Bn), INOUT,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),  INPUT,
+        sizeof(void*), RTBLKADDR(B, double, Bm, Bn),                 INOUT,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_ztrmm.c b/runtime/quark/codelets/codelet_ztrmm.c
index 5697128bf89e59fc63a4f70cd364367b45e75e92..ce869b781c6da7238d922e4464a5d197e9a99db1 100644
--- a/runtime/quark/codelets/codelet_ztrmm.c
+++ b/runtime/quark/codelets/codelet_ztrmm.c
@@ -12,8 +12,6 @@
  * @brief Chameleon ztrmm Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Julien Langou
  * @author Henricus Bouwmeester
  * @author Mathieu Faverge
@@ -25,7 +23,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_ztrmm_quark(Quark *quark)
 {
@@ -36,24 +34,22 @@ void CORE_ztrmm_quark(Quark *quark)
     int M;
     int N;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-    CHAMELEON_Complex64_t *B;
-    int LDB;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
 
-    quark_unpack_args_11(quark, side, uplo, transA, diag, M, N, alpha, A, LDA, B, LDB);
-    CORE_ztrmm(side, uplo,
+    quark_unpack_args_9(quark, side, uplo, transA, diag, M, N, alpha, tileA, tileB);
+    TCORE_ztrmm(side, uplo,
         transA, diag,
         M, N,
-        alpha, A, LDA,
-        B, LDB);
+        alpha, tileA,
+        tileB);
 }
 
 void INSERT_TASK_ztrmm(const RUNTIME_option_t *options,
                       cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
                       int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                      const CHAM_desc_t *B, int Bm, int Bn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_TRMM;
@@ -65,9 +61,7 @@ void INSERT_TASK_ztrmm(const RUNTIME_option_t *options,
         sizeof(int),                        &m,         VALUE,
         sizeof(int),                        &n,         VALUE,
         sizeof(CHAMELEON_Complex64_t),         &alpha,     VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
-        sizeof(int),                        &lda,       VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),                 INOUT,
-        sizeof(int),                        &ldb,       VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
+        sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),                 INOUT,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_ztrsm.c b/runtime/quark/codelets/codelet_ztrsm.c
index ce15a4ca1c890f446e5c88866e1b72859380777c..5c9b068502a4f8a65bc2cd60c43f6c727b9ab802 100644
--- a/runtime/quark/codelets/codelet_ztrsm.c
+++ b/runtime/quark/codelets/codelet_ztrsm.c
@@ -12,8 +12,6 @@
  * @brief Chameleon ztrsm Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
@@ -25,7 +23,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_ztrsm_quark(Quark *quark)
 {
@@ -36,24 +34,22 @@ void CORE_ztrsm_quark(Quark *quark)
     int m;
     int n;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    CHAMELEON_Complex64_t *B;
-    int ldb;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
 
-    quark_unpack_args_11(quark, side, uplo, transA, diag, m, n, alpha, A, lda, B, ldb);
-    CORE_ztrsm(side, uplo,
+    quark_unpack_args_9(quark, side, uplo, transA, diag, m, n, alpha, tileA, tileB);
+    TCORE_ztrsm(side, uplo,
         transA, diag,
         m, n,
-        alpha, A, lda,
-        B, ldb);
+        alpha, tileA,
+        tileB);
 }
 
 void INSERT_TASK_ztrsm(const RUNTIME_option_t *options,
                       cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
                       int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                      const CHAM_desc_t *B, int Bm, int Bn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_TRSM;
@@ -65,9 +61,7 @@ void INSERT_TASK_ztrsm(const RUNTIME_option_t *options,
         sizeof(int),                        &m,         VALUE,
         sizeof(int),                        &n,         VALUE,
         sizeof(CHAMELEON_Complex64_t),         &alpha,     VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
-        sizeof(int),                        &lda,       VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),                 INOUT,
-        sizeof(int),                        &ldb,       VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
+        sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),                 INOUT,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_ztrssq.c b/runtime/quark/codelets/codelet_ztrssq.c
index 80bb9e78c3e41fff60f96fea3d9e9b768a25d65d..fc9b34497067b430a9a527f312ed0e746ebbf9e9 100644
--- a/runtime/quark/codelets/codelet_ztrssq.c
+++ b/runtime/quark/codelets/codelet_ztrssq.c
@@ -12,8 +12,6 @@
  * @brief Chameleon ztrssq Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.6.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @date 2014-11-16
  * @precisions normal z -> c d s
@@ -21,7 +19,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_ztrssq_quark(Quark *quark)
 {
@@ -29,18 +27,17 @@ void CORE_ztrssq_quark(Quark *quark)
     cham_diag_t diag;
     int m;
     int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    double *SCALESUMSQ;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileW;
 
-    quark_unpack_args_7( quark, uplo, diag, m, n, A, lda, SCALESUMSQ );
-    CORE_ztrssq( uplo, diag, m, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1]);
+    quark_unpack_args_6( quark, uplo, diag, m, n, tileA, tileW );
+    TCORE_ztrssq( uplo, diag, m, n, tileA, tileW );
 }
 
 void INSERT_TASK_ztrssq( const RUNTIME_option_t *options,
                         cham_uplo_t uplo, cham_diag_t diag,
                         int m, int n,
-                        const CHAM_desc_t *A, int Am, int An, int lda,
+                        const CHAM_desc_t *A, int Am, int An,
                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
@@ -50,8 +47,7 @@ void INSERT_TASK_ztrssq( const RUNTIME_option_t *options,
         sizeof(int),              &diag, VALUE,
         sizeof(int),                     &m,    VALUE,
         sizeof(int),                     &n,    VALUE,
-        sizeof(CHAMELEON_Complex64_t)*lda*n, RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT,
-        sizeof(int),                     &lda,  VALUE,
-        sizeof(double)*2,                RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn), INOUT,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT,
+        sizeof(void*), RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn), INOUT,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_ztrtri.c b/runtime/quark/codelets/codelet_ztrtri.c
index edc02bc7f2ab8cbfb3d66238834b55cc0f7b54a6..650dc8d19849ae5c8b4ceb47a16035cee6af42b3 100644
--- a/runtime/quark/codelets/codelet_ztrtri.c
+++ b/runtime/quark/codelets/codelet_ztrtri.c
@@ -12,8 +12,6 @@
  * @brief Chameleon ztrtri Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Julien Langou
  * @author Henricus Bouwmeester
  * @author Mathieu Faverge
@@ -25,23 +23,22 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_ztrtri_quark(Quark *quark)
 {
     cham_uplo_t uplo;
     cham_diag_t diag;
     int N;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
+    CHAM_tile_t *tileA;
     RUNTIME_sequence_t *sequence;
     RUNTIME_request_t *request;
     int iinfo;
 
     int info;
 
-    quark_unpack_args_8(quark, uplo, diag, N, A, LDA, sequence, request, iinfo);
-    CORE_ztrtri(uplo, diag, N, A, LDA, &info);
+    quark_unpack_args_7(quark, uplo, diag, N, tileA, sequence, request, iinfo);
+    TCORE_ztrtri(uplo, diag, N, tileA, &info);
     if ( (sequence->status == CHAMELEON_SUCCESS) && (info > 0) ) {
         RUNTIME_sequence_flush( (CHAM_context_t*)quark, sequence, request, iinfo+info );
     }
@@ -50,7 +47,7 @@ void CORE_ztrtri_quark(Quark *quark)
 void INSERT_TASK_ztrtri(const RUNTIME_option_t *options,
                        cham_uplo_t uplo, cham_diag_t diag,
                        int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
+                       const CHAM_desc_t *A, int Am, int An,
                        int iinfo)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
@@ -59,8 +56,7 @@ void INSERT_TASK_ztrtri(const RUNTIME_option_t *options,
         sizeof(int),                &uplo,      VALUE,
         sizeof(int),                &diag,      VALUE,
         sizeof(int),                        &n,         VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INOUT,
-        sizeof(int),                        &lda,       VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INOUT,
         sizeof(RUNTIME_sequence_t*),           &(options->sequence),  VALUE,
         sizeof(RUNTIME_request_t*),            &(options->request),   VALUE,
         sizeof(int),                        &iinfo,     VALUE,
diff --git a/runtime/quark/codelets/codelet_ztsmlq_hetra1.c b/runtime/quark/codelets/codelet_ztsmlq_hetra1.c
index 6b68cfed4e9c7134b8d0bf99730f6c5a561e9c83..4349c864f29056ae4e7d01e386069f9d5479001a 100644
--- a/runtime/quark/codelets/codelet_ztsmlq_hetra1.c
+++ b/runtime/quark/codelets/codelet_ztsmlq_hetra1.c
@@ -22,7 +22,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_ztsmlq_hetra1_quark(Quark *quark)
 {
@@ -34,28 +34,25 @@ void CORE_ztsmlq_hetra1_quark(Quark *quark)
     int n2;
     int k;
     int ib;
-    CHAMELEON_Complex64_t *A1;
-    int lda1;
-    CHAMELEON_Complex64_t *A2;
-    int lda2;
-    CHAMELEON_Complex64_t *V;
-    int ldv;
-    CHAMELEON_Complex64_t *T;
-    int ldt;
+    CHAM_tile_t *tileA1;
+    CHAM_tile_t *tileA2;
+    CHAM_tile_t *tileV;
+    CHAM_tile_t *tileT;
     CHAMELEON_Complex64_t *WORK;
     int ldwork;
 
-    quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
-    CORE_ztsmlq_hetra1(side, trans, m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
+    quark_unpack_args_14(quark, side, trans, m1, n1, m2, n2, k, ib, tileA1, tileA2, tileV, tileT, WORK, ldwork);
+    TCORE_ztsmlq_hetra1( side, trans, m1, n1, m2, n2, k, ib,
+                         tileA1, tileA2, tileV, tileT, WORK, ldwork );
 }
 
 void INSERT_TASK_ztsmlq_hetra1(const RUNTIME_option_t *options,
                               cham_side_t side, cham_trans_t trans,
                               int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                              const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                              const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                              const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                              const CHAM_desc_t *T, int Tm, int Tn, int ldt)
+                              const CHAM_desc_t *A1, int A1m, int A1n,
+                              const CHAM_desc_t *A2, int A2m, int A2n,
+                              const CHAM_desc_t *V, int Vm, int Vn,
+                              const CHAM_desc_t *T, int Tm, int Tn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     int ldwork = side == ChamLeft ? ib : nb;
@@ -69,14 +66,10 @@ void INSERT_TASK_ztsmlq_hetra1(const RUNTIME_option_t *options,
         sizeof(int),                     &n2,     VALUE,
         sizeof(int),                     &k,      VALUE,
         sizeof(int),                     &ib,     VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,  RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INOUT|QUARK_REGION_U|QUARK_REGION_D,
-        sizeof(int),                     &lda1,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,  RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT,
-        sizeof(int),                     &lda2,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,  RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn),    INPUT,
-        sizeof(int),                     &ldv,    VALUE,
-        sizeof(CHAMELEON_Complex64_t)*ib*nb,  RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),    INPUT,
-        sizeof(int),                     &ldt,    VALUE,
+        sizeof(void*), RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INOUT|QUARK_REGION_U|QUARK_REGION_D,
+        sizeof(void*), RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT,
+        sizeof(void*), RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn),    INPUT,
+        sizeof(void*), RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),    INPUT,
         sizeof(CHAMELEON_Complex64_t)*ib*nb,  NULL,   SCRATCH,
         sizeof(int),                     &ldwork, VALUE,
         0);
diff --git a/runtime/quark/codelets/codelet_ztsmqr_hetra1.c b/runtime/quark/codelets/codelet_ztsmqr_hetra1.c
index 879302364314166cf8f86d2fe98b820cdd8b0aea..934338eea8e50313b75b15e3a053ce9a0e24d3e0 100644
--- a/runtime/quark/codelets/codelet_ztsmqr_hetra1.c
+++ b/runtime/quark/codelets/codelet_ztsmqr_hetra1.c
@@ -22,7 +22,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_ztsmqr_hetra1_quark(Quark *quark)
 {
@@ -34,28 +34,24 @@ void CORE_ztsmqr_hetra1_quark(Quark *quark)
     int n2;
     int k;
     int ib;
-    CHAMELEON_Complex64_t *A1;
-    int lda1;
-    CHAMELEON_Complex64_t *A2;
-    int lda2;
-    CHAMELEON_Complex64_t *V;
-    int ldv;
-    CHAMELEON_Complex64_t *T;
-    int ldt;
+    CHAM_tile_t *tileA1;
+    CHAM_tile_t *tileA2;
+    CHAM_tile_t *tileV;
+    CHAM_tile_t *tileT;
     CHAMELEON_Complex64_t *WORK;
     int ldwork;
 
-    quark_unpack_args_18(quark, side, trans, m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
-    CORE_ztsmqr_hetra1(side, trans, m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
+    quark_unpack_args_14(quark, side, trans, m1, n1, m2, n2, k, ib, tileA1, tileA2, tileV, tileT, WORK, ldwork);
+    TCORE_ztsmqr_hetra1(side, trans, m1, n1, m2, n2, k, ib, tileA1, tileA2, tileV, tileT, WORK, ldwork);
 }
 
 void INSERT_TASK_ztsmqr_hetra1(const RUNTIME_option_t *options,
                               cham_side_t side, cham_trans_t trans,
                               int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                              const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                              const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                              const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                              const CHAM_desc_t *T, int Tm, int Tn, int ldt)
+                              const CHAM_desc_t *A1, int A1m, int A1n,
+                              const CHAM_desc_t *A2, int A2m, int A2n,
+                              const CHAM_desc_t *V, int Vm, int Vn,
+                              const CHAM_desc_t *T, int Tm, int Tn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     int ldwork = side == ChamLeft ? ib : nb;
@@ -69,14 +65,10 @@ void INSERT_TASK_ztsmqr_hetra1(const RUNTIME_option_t *options,
         sizeof(int),                     &n2,     VALUE,
         sizeof(int),                     &k,      VALUE,
         sizeof(int),                     &ib,     VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,  RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INOUT|QUARK_REGION_L|QUARK_REGION_D,
-        sizeof(int),                     &lda1,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,  RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT,
-        sizeof(int),                     &lda2,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,  RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn),    INPUT,
-        sizeof(int),                     &ldv,    VALUE,
-        sizeof(CHAMELEON_Complex64_t)*ib*nb,  RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),    INPUT,
-        sizeof(int),                     &ldt,    VALUE,
+        sizeof(void*), RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n), INOUT|QUARK_REGION_L|QUARK_REGION_D,
+        sizeof(void*), RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n), INOUT,
+        sizeof(void*), RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn),    INPUT,
+        sizeof(void*), RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),    INPUT,
         sizeof(CHAMELEON_Complex64_t)*ib*nb,  NULL,   SCRATCH,
         sizeof(int),                     &ldwork, VALUE,
         0);
diff --git a/runtime/quark/codelets/codelet_ztstrf.c b/runtime/quark/codelets/codelet_ztstrf.c
index e9671f4daa0a4652c9a4cb32cc044f7999c9d82c..1bc4a8d357ef4313ea48b9cdb7f5e0bd189d8301 100644
--- a/runtime/quark/codelets/codelet_ztstrf.c
+++ b/runtime/quark/codelets/codelet_ztstrf.c
@@ -12,8 +12,6 @@
  * @brief Chameleon ztstrf Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
@@ -25,7 +23,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 #include "coreblas/cblas.h"
 #include <math.h>
 
@@ -35,12 +33,9 @@ void CORE_ztstrf_quark(Quark *quark)
     int n;
     int ib;
     int nb;
-    CHAMELEON_Complex64_t *U;
-    int ldu;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    CHAMELEON_Complex64_t *L;
-    int ldl;
+    CHAM_tile_t *tileU;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileL;
     int *IPIV;
     CHAMELEON_Complex64_t *WORK;
     int ldwork;
@@ -51,83 +46,18 @@ void CORE_ztstrf_quark(Quark *quark)
 
     int info;
 
-    quark_unpack_args_17(quark, m, n, ib, nb, U, ldu, A, lda, L, ldl, IPIV, WORK, ldwork, sequence, request, check_info, iinfo);
-    CORE_ztstrf(m, n, ib, nb, U, ldu, A, lda, L, ldl, IPIV, WORK, ldwork, &info);
+    quark_unpack_args_14(quark, m, n, ib, nb, tileU, tileA, tileL, IPIV, WORK, ldwork, sequence, request, check_info, iinfo);
+    TCORE_ztstrf(m, n, ib, nb, tileU, tileA, tileL, IPIV, WORK, ldwork, &info);
     if ( (info != CHAMELEON_SUCCESS) && check_info ) {
         RUNTIME_sequence_flush( (CHAM_context_t*)quark, sequence, request, iinfo+info );
     }
 }
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_ztstrf computes an LU factorization of a complex matrix formed
- *  by an upper triangular NB-by-N tile U on top of a M-by-N tile A
- *  using partial pivoting with row interchanges.
- *
- *  This is the right-looking Level 2.5 BLAS version of the algorithm.
- *
- *******************************************************************************
- *
- * @param[in] M
- *         The number of rows of the tile A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the tile A.  N >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in] NB
- *
- * @param[in,out] U
- *         On entry, the NB-by-N upper triangular tile.
- *         On exit, the new factor U from the factorization
- *
- * @param[in] LDU
- *         The leading dimension of the array U.  LDU >= max(1,NB).
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile to be factored.
- *         On exit, the factor L from the factorization
- *
- * @param[in] LDA
- *         The leading dimension of the array A.  LDA >= max(1,M).
- *
- * @param[in,out] L
- *         On entry, the IB-by-N lower triangular tile.
- *         On exit, the interchanged rows form the tile A in case of pivoting.
- *
- * @param[in] LDL
- *         The leading dimension of the array L.  LDL >= max(1,IB).
- *
- * @param[out] IPIV
- *         The pivot indices; for 1 <= i <= min(M,N), row i of the
- *         tile U was interchanged with row IPIV(i) of the tile A.
- *
- * @param[in,out] WORK
- *
- * @param[in] LDWORK
- *         The dimension of the array WORK.
- *
- * @param[out] INFO
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if INFO = -k, the k-th argument had an illegal value
- * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
- *              has been completed, but the factor U is exactly
- *              singular, and division by zero will occur if it is used
- *              to solve a system of equations.
- *
- */
 void INSERT_TASK_ztstrf(const RUNTIME_option_t *options,
                        int m, int n, int ib, int nb,
-                       const CHAM_desc_t *U, int Um, int Un, int ldu,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *L, int Lm, int Ln, int ldl,
+                       const CHAM_desc_t *U, int Um, int Un,
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *L, int Lm, int Ln,
                        int *IPIV,
                        cham_bool_t check_info, int iinfo)
 {
@@ -138,12 +68,9 @@ void INSERT_TASK_ztstrf(const RUNTIME_option_t *options,
         sizeof(int),                        &n,             VALUE,
         sizeof(int),                        &ib,            VALUE,
         sizeof(int),                        &nb,            VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un),                     INOUT | QUARK_REGION_D | QUARK_REGION_U,
-        sizeof(int),                        &ldu,           VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,    RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                     INOUT | LOCALITY,
-        sizeof(int),                        &lda,           VALUE,
-        sizeof(CHAMELEON_Complex64_t)*ib*nb,    RTBLKADDR(L, CHAMELEON_Complex64_t, Lm, Ln),                     OUTPUT,
-        sizeof(int),                        &ldl,           VALUE,
+        sizeof(void*), RTBLKADDR(U, CHAMELEON_Complex64_t, Um, Un),                     INOUT | QUARK_REGION_D | QUARK_REGION_U,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                     INOUT | LOCALITY,
+        sizeof(void*), RTBLKADDR(L, CHAMELEON_Complex64_t, Lm, Ln),                     OUTPUT,
         sizeof(int)*nb,                      IPIV,                  OUTPUT,
         sizeof(CHAMELEON_Complex64_t)*ib*nb,    NULL,                  SCRATCH,
         sizeof(int),                        &nb,            VALUE,
diff --git a/runtime/quark/codelets/codelet_zunmlq.c b/runtime/quark/codelets/codelet_zunmlq.c
index 5b8687571af808182585b223ed9636099fccc296..eb884f674db48022692570a85b35cf61ce57dc4d 100644
--- a/runtime/quark/codelets/codelet_zunmlq.c
+++ b/runtime/quark/codelets/codelet_zunmlq.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zunmlq Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Dulceneia Becker
@@ -26,7 +24,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zunmlq_quark(Quark *quark)
 {
@@ -36,109 +34,24 @@ void CORE_zunmlq_quark(Quark *quark)
     int n;
     int k;
     int ib;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    CHAMELEON_Complex64_t *T;
-    int ldt;
-    CHAMELEON_Complex64_t *C;
-    int ldc;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileT;
+    CHAM_tile_t *tileC;
     CHAMELEON_Complex64_t *WORK;
     int ldwork;
 
-    quark_unpack_args_14(quark, side, trans, m, n, k, ib,
-                         A, lda, T, ldt, C, ldc, WORK, ldwork);
-    CORE_zunmlq(side, trans, m, n, k, ib,
-                A, lda, T, ldt, C, ldc, WORK, ldwork);
+    quark_unpack_args_11(quark, side, trans, m, n, k, ib,
+                         tileA, tileT, tileC, WORK, ldwork);
+    TCORE_zunmlq(side, trans, m, n, k, ib,
+                tileA, tileT, tileC, WORK, ldwork);
 }
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zunmlq overwrites the general complex M-by-N tile C with
- *
- *                    SIDE = 'L'     SIDE = 'R'
- *    TRANS = 'N':      Q * C          C * Q
- *    TRANS = 'C':      Q^H * C       C * Q^H
- *
- *  where Q is a complex unitary matrix defined as the product of k
- *  elementary reflectors
- *
- *    Q = H(k) . . . H(2) H(1)
- *
- *  as returned by CORE_zgelqt. Q is of order M if SIDE = 'L' and of order N
- *  if SIDE = 'R'.
- *
- *******************************************************************************
- *
- * @param[in] side
- *         @arg ChamLeft  : apply Q or Q^H from the Left;
- *         @arg ChamRight : apply Q or Q^H from the Right.
- *
- * @param[in] trans
- *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  Transpose, apply Q^H.
- *
- * @param[in] M
- *         The number of rows of the tile C.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the tile C.  N >= 0.
- *
- * @param[in] K
- *         The number of elementary reflectors whose product defines
- *         the matrix Q.
- *         If SIDE = ChamLeft,  M >= K >= 0;
- *         if SIDE = ChamRight, N >= K >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in] A
- *         Dimension:  (LDA,M) if SIDE = ChamLeft,
- *                     (LDA,N) if SIDE = ChamRight,
- *         The i-th row must contain the vector which defines the
- *         elementary reflector H(i), for i = 1,2,...,k, as returned by
- *         CORE_zgelqt in the first k rows of its array argument A.
- *
- * @param[in] LDA
- *         The leading dimension of the array A.  LDA >= max(1,K).
- *
- * @param[in] T
- *         The IB-by-K triangular factor T of the block reflector.
- *         T is upper triangular by block (economic storage);
- *         The rest of the array is not referenced.
- *
- * @param[in] LDT
- *         The leading dimension of the array T. LDT >= IB.
- *
- * @param[in,out] C
- *         On entry, the M-by-N tile C.
- *         On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q.
- *
- * @param[in] LDC
- *         The leading dimension of the array C. LDC >= max(1,M).
- *
- * @param[in,out] WORK
- *         On exit, if INFO = 0, WORK(1) returns the optimal LDWORK.
- *
- * @param[in] LDWORK
- *         The dimension of the array WORK.
- *         If SIDE = ChamLeft,  LDWORK >= max(1,N);
- *         if SIDE = ChamRight, LDWORK >= max(1,M).
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if -i, the i-th argument had an illegal value
- *
- */
 void INSERT_TASK_zunmlq(const RUNTIME_option_t *options,
                        cham_side_t side, cham_trans_t trans,
                        int m, int n, int k, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                       const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *T, int Tm, int Tn,
+                       const CHAM_desc_t *C, int Cm, int Cn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_UNMLQ;
@@ -149,12 +62,9 @@ void INSERT_TASK_zunmlq(const RUNTIME_option_t *options,
         sizeof(int),                     &n,     VALUE,
         sizeof(int),                     &k,     VALUE,
         sizeof(int),                     &ib,    VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,  RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT | QUARK_REGION_U,
-        sizeof(int),                     &lda,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*ib*nb,  RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), INPUT,
-        sizeof(int),                     &ldt,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,  RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), INOUT,
-        sizeof(int),                     &ldc,   VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT | QUARK_REGION_U,
+        sizeof(void*), RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), INPUT,
+        sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), INOUT,
         sizeof(CHAMELEON_Complex64_t)*ib*nb,  NULL,      SCRATCH,
         sizeof(int),                     &nb,    VALUE,
         0);
diff --git a/runtime/quark/codelets/codelet_zunmqr.c b/runtime/quark/codelets/codelet_zunmqr.c
index f03746016c3012467e4a1370353084f8cb21f282..2cabe00f57c1ac31ab8b4d519e61d0ca7afb10c9 100644
--- a/runtime/quark/codelets/codelet_zunmqr.c
+++ b/runtime/quark/codelets/codelet_zunmqr.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zunmqr Quark codelet
  *
  * @version 0.9.2
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
@@ -25,7 +23,7 @@
  */
 #include "chameleon_quark.h"
 #include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 
 void CORE_zunmqr_quark(Quark *quark)
 {
@@ -35,110 +33,24 @@ void CORE_zunmqr_quark(Quark *quark)
     int n;
     int k;
     int ib;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    CHAMELEON_Complex64_t *T;
-    int ldt;
-    CHAMELEON_Complex64_t *C;
-    int ldc;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileT;
+    CHAM_tile_t *tileC;
     CHAMELEON_Complex64_t *WORK;
     int ldwork;
 
-    quark_unpack_args_14(quark, side, trans, m, n, k, ib,
-                         A, lda, T, ldt, C, ldc, WORK, ldwork);
-    CORE_zunmqr(side, trans, m, n, k, ib,
-                A, lda, T, ldt, C, ldc, WORK, ldwork);
+    quark_unpack_args_11(quark, side, trans, m, n, k, ib,
+                         tileA, tileT, tileC, WORK, ldwork);
+    TCORE_zunmqr(side, trans, m, n, k, ib,
+                tileA, tileT, tileC, WORK, ldwork);
 }
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zunmqr overwrites the general complex M-by-N tile C with
- *
- *                    SIDE = 'L'     SIDE = 'R'
- *    TRANS = 'N':      Q * C          C * Q
- *    TRANS = 'C':      Q^H * C       C * Q^H
- *
- *  where Q is a complex unitary matrix defined as the product of k
- *  elementary reflectors
- *
- *    Q = H(1) H(2) . . . H(k)
- *
- *  as returned by CORE_zgeqrt. Q is of order M if SIDE = 'L' and of order N
- *  if SIDE = 'R'.
- *
- *******************************************************************************
- *
- * @param[in] side
- *         @arg ChamLeft  : apply Q or Q^H from the Left;
- *         @arg ChamRight : apply Q or Q^H from the Right.
- *
- * @param[in] trans
- *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  Transpose, apply Q^H.
- *
- * @param[in] M
- *         The number of rows of the tile C.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the tile C.  N >= 0.
- *
- * @param[in] K
- *         The number of elementary reflectors whose product defines
- *         the matrix Q.
- *         If SIDE = ChamLeft,  M >= K >= 0;
- *         if SIDE = ChamRight, N >= K >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in] A
- *         Dimension:  (LDA,K)
- *         The i-th column must contain the vector which defines the
- *         elementary reflector H(i), for i = 1,2,...,k, as returned by
- *         CORE_zgeqrt in the first k columns of its array argument A.
- *
- * @param[in] LDA
- *         The leading dimension of the array A.
- *         If SIDE = ChamLeft,  LDA >= max(1,M);
- *         if SIDE = ChamRight, LDA >= max(1,N).
- *
- * @param[in] T
- *         The IB-by-K triangular factor T of the block reflector.
- *         T is upper triangular by block (economic storage);
- *         The rest of the array is not referenced.
- *
- * @param[in] LDT
- *         The leading dimension of the array T. LDT >= IB.
- *
- * @param[in,out] C
- *         On entry, the M-by-N tile C.
- *         On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q.
- *
- * @param[in] LDC
- *         The leading dimension of the array C. LDC >= max(1,M).
- *
- * @param[in,out] WORK
- *         On exit, if INFO = 0, WORK(1) returns the optimal LDWORK.
- *
- * @param[in] LDWORK
- *         The dimension of the array WORK.
- *         If SIDE = ChamLeft,  LDWORK >= max(1,N);
- *         if SIDE = ChamRight, LDWORK >= max(1,M).
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if -i, the i-th argument had an illegal value
- *
- */
 void INSERT_TASK_zunmqr(const RUNTIME_option_t *options,
                        cham_side_t side, cham_trans_t trans,
                        int m, int n, int k, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                       const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *T, int Tm, int Tn,
+                       const CHAM_desc_t *C, int Cm, int Cn)
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_UNMQR;
@@ -149,12 +61,9 @@ void INSERT_TASK_zunmqr(const RUNTIME_option_t *options,
         sizeof(int),                     &n,     VALUE,
         sizeof(int),                     &k,     VALUE,
         sizeof(int),                     &ib,    VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,  RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT | QUARK_REGION_L,
-        sizeof(int),                     &lda,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*ib*nb,  RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), INPUT,
-        sizeof(int),                     &ldt,   VALUE,
-        sizeof(CHAMELEON_Complex64_t)*nb*nb,  RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), INOUT,
-        sizeof(int),                     &ldc,   VALUE,
+        sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INPUT | QUARK_REGION_L,
+        sizeof(void*), RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn), INPUT,
+        sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn), INOUT,
         sizeof(CHAMELEON_Complex64_t)*ib*nb,  NULL,      SCRATCH,
         sizeof(int),                     &nb,    VALUE,
         0);
diff --git a/runtime/quark/control/runtime_descriptor.c b/runtime/quark/control/runtime_descriptor.c
index cb4d198ba479155c69e8aa7778166bfba0b51927..b09e44165b3496ccaf4b8e9d15713aeb01d04e0e 100644
--- a/runtime/quark/control/runtime_descriptor.c
+++ b/runtime/quark/control/runtime_descriptor.c
@@ -101,5 +101,5 @@ void RUNTIME_data_migrate( const RUNTIME_sequence_t *sequence,
 
 void *RUNTIME_data_getaddr( const CHAM_desc_t *desc, int m, int n )
 {
-    return desc->get_blkaddr( desc, m, n );
+    return desc->get_blktile( desc, m, n );
 }
diff --git a/runtime/starpu/CMakeLists.txt b/runtime/starpu/CMakeLists.txt
index ff990638938b710275c6952057b45832159b71b1..924d480e81ed08fc4cc29ddd4920c64243e5757c 100644
--- a/runtime/starpu/CMakeLists.txt
+++ b/runtime/starpu/CMakeLists.txt
@@ -94,6 +94,7 @@ set(RUNTIME_COMMON
   control/runtime_options.c
   control/runtime_profiling.c
   control/runtime_workspace.c
+  interface/cham_tile_interface.c
   ${RUNTIME_COMMON_GENERATED}
   )
 
diff --git a/runtime/starpu/codelets/codelet_dzasum.c b/runtime/starpu/codelets/codelet_dzasum.c
index 869927a85d13fa9eca1a309c19625fcafa4db182..5b2c9827e1292b99fd097246288b795b0c3580ae 100644
--- a/runtime/starpu/codelets/codelet_dzasum.c
+++ b/runtime/starpu/codelets/codelet_dzasum.c
@@ -30,16 +30,14 @@ static void cl_dzasum_cpu_func(void *descr[], void *cl_arg)
     cham_uplo_t uplo;
     int M;
     int N;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    double *work;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tilework;
 
-    A    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    work = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
+    tileA    = cti_interface_get(descr[0]);
+    tilework = cti_interface_get(descr[1]);
 
     starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &M, &N);
-    CORE_dzasum(storev, uplo, M, N, A, ldA, work);
+    TCORE_dzasum(storev, uplo, M, N, tileA, tilework->mat );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -50,7 +48,7 @@ CODELETS_CPU(dzasum, 2, cl_dzasum_cpu_func)
 
 void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
                          cham_store_t storev, cham_uplo_t uplo, int M, int N,
-                         const CHAM_desc_t *A, int Am, int An, int ldA,
+                         const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *B, int Bm, int Bn )
 {
     struct starpu_codelet *codelet = &cl_dzasum;
@@ -63,17 +61,16 @@ void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
 
     starpu_insert_task(
         starpu_mpi_codelet(codelet),
-        STARPU_VALUE,    &storev,                sizeof(int),
-        STARPU_VALUE,      &uplo,                sizeof(int),
-        STARPU_VALUE,         &M,                        sizeof(int),
-        STARPU_VALUE,         &N,                        sizeof(int),
-        STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_VALUE,    &storev,              sizeof(cham_store_t),
+        STARPU_VALUE,    &uplo,                sizeof(cham_uplo_t),
+        STARPU_VALUE,    &M,                   sizeof(int),
+        STARPU_VALUE,    &N,                   sizeof(int),
+        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_RW,        RTBLKADDR(B, double, Bm, Bn),
-        STARPU_PRIORITY,    options->priority,
-        STARPU_CALLBACK,    callback,
+        STARPU_PRIORITY,  options->priority,
+        STARPU_CALLBACK,  callback,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
         STARPU_NAME, "dzasum",
 #endif
         0);
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_map.c b/runtime/starpu/codelets/codelet_map.c
index 2388af472872c0c3b260737be7c880ed3afd5054..bab6a097cf046ef7aeed306bb6b14830449b3504 100644
--- a/runtime/starpu/codelets/codelet_map.c
+++ b/runtime/starpu/codelets/codelet_map.c
@@ -17,7 +17,7 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-CHAMELEON_CL_CB(map, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0, M*N);
+CHAMELEON_CL_CB(map, cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0, M*N)
 
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_map_cpu_func(void *descr[], void *cl_arg)
@@ -26,13 +26,13 @@ static void cl_map_cpu_func(void *descr[], void *cl_arg)
     cham_uplo_t uplo;
     int m;
     int n;
-    void *data;
+    CHAM_tile_t *tile;
     cham_unary_operator_t op_fct;
     void *op_args;
 
-    data = (void *)STARPU_MATRIX_GET_PTR(descr[0]);
+    tile = cti_interface_get(descr[0]);
     starpu_codelet_unpack_args(cl_arg, &desc, &uplo, &m, &n, &op_fct, &op_args );
-    op_fct( desc, uplo, m, n, data, op_args );
+    op_fct( desc, uplo, m, n, tile, op_args );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
diff --git a/runtime/starpu/codelets/codelet_zaxpy.c b/runtime/starpu/codelets/codelet_zaxpy.c
index 38368ffff4a358a11a762ad46b597c9c790f5b5d..3458a9968acc2de81ed12cbf5cd25d2f5be2dd94 100644
--- a/runtime/starpu/codelets/codelet_zaxpy.c
+++ b/runtime/starpu/codelets/codelet_zaxpy.c
@@ -25,15 +25,15 @@ static void cl_zaxpy_cpu_func(void *descr[], void *cl_arg)
 {
     int M;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
+    CHAM_tile_t *tileA;
     int incA;
-    CHAMELEON_Complex64_t *B;
+    CHAM_tile_t *tileB;
     int incB;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
     starpu_codelet_unpack_args(cl_arg, &M, &alpha, &incA, &incB);
-    CORE_zaxpy(M, alpha, A, incA, B, incB);
+    TCORE_zaxpy(M, alpha, tileA, incA, tileB, incB);
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
diff --git a/runtime/starpu/codelets/codelet_zbuild.c b/runtime/starpu/codelets/codelet_zbuild.c
index e329dc377cb5599ef06b53981fc418fb7205b506..f0879fddb53aa9d929ff533371aee48fddda4343 100644
--- a/runtime/starpu/codelets/codelet_zbuild.c
+++ b/runtime/starpu/codelets/codelet_zbuild.c
@@ -31,22 +31,20 @@
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zbuild_cpu_func(void *descr[], void *cl_arg)
 {
-  CHAMELEON_Complex64_t *A;
-  int ldA;
-  void *user_data;
-  void (*user_build_callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ldA, void *user_data) ;
-  int row_min, row_max, col_min, col_max;
+    CHAM_tile_t *tileA;
+    void *user_data;
+    void (*user_build_callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) ;
+    int row_min, row_max, col_min, col_max;
 
-  A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-  ldA = STARPU_MATRIX_GET_LD( descr[0] );
+    tileA = cti_interface_get(descr[0]);
 
-  starpu_codelet_unpack_args(cl_arg, &row_min, &row_max, &col_min, &col_max, &user_data, &user_build_callback );
+    starpu_codelet_unpack_args(cl_arg, &row_min, &row_max, &col_min, &col_max, &user_data, &user_build_callback );
 
-  /* The callback 'user_build_callback' is expected to build the block of matrix [row_min, row_max] x [col_min, col_max]
-   * (with both min and max values included in the intervals, index start at 0 like in C, NOT 1 like in Fortran)
-   * and store it at the address 'buffer' with leading dimension 'ld'
-   */
-  user_build_callback(row_min, row_max, col_min, col_max, A, ldA, user_data);
+    /* The callback 'user_build_callback' is expected to build the block of matrix [row_min, row_max] x [col_min, col_max]
+     * (with both min and max values included in the intervals, index start at 0 like in C, NOT 1 like in Fortran)
+     * and store it at the address 'buffer' with leading dimension 'ld'
+     */
+    user_build_callback(row_min, row_max, col_min, col_max, tileA->mat, tileA->ld, user_data);
 
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
@@ -56,24 +54,24 @@ static void cl_zbuild_cpu_func(void *descr[], void *cl_arg)
  */
 CODELETS_CPU(zbuild, 1, cl_zbuild_cpu_func)
 
-void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
-                         const CHAM_desc_t *A, int Am, int An, int ldA,
-                         void *user_data, void* user_build_callback )
+    void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
+                             const CHAM_desc_t *A, int Am, int An,
+                             void *user_data, void* user_build_callback )
 {
 
-  struct starpu_codelet *codelet = &cl_zbuild;
-  void (*callback)(void*) = options->profiling ? cl_zbuild_callback : NULL;
-  int row_min, row_max, col_min, col_max;
+    struct starpu_codelet *codelet = &cl_zbuild;
+    void (*callback)(void*) = options->profiling ? cl_zbuild_callback : NULL;
+    int row_min, row_max, col_min, col_max;
 
-  CHAMELEON_BEGIN_ACCESS_DECLARATION;
-  CHAMELEON_ACCESS_W(A, Am, An);
-  CHAMELEON_END_ACCESS_DECLARATION;
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_W(A, Am, An);
+    CHAMELEON_END_ACCESS_DECLARATION;
 
-  row_min = Am*A->mb ;
-  row_max = Am == A->mt-1 ? A->m-1 : row_min+A->mb-1 ;
-  col_min = An*A->nb ;
-  col_max = An == A->nt-1 ? A->n-1 : col_min+A->nb-1 ;
-  starpu_insert_task(
+    row_min = Am*A->mb ;
+    row_max = Am == A->mt-1 ? A->m-1 : row_min+A->mb-1 ;
+    col_min = An*A->nb ;
+    col_max = An == A->nt-1 ? A->n-1 : col_min+A->nb-1 ;
+    starpu_insert_task(
         starpu_mpi_codelet(codelet),
         STARPU_VALUE,    &row_min,                      sizeof(int),
         STARPU_VALUE,    &row_max,                      sizeof(int),
@@ -88,5 +86,4 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
         STARPU_NAME, "zbuild",
 #endif
         0);
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zcallback.c b/runtime/starpu/codelets/codelet_zcallback.c
index 44d431b7dafc684188f869aa560e0dabdb1f311a..1aee0b3fe5ff978d9c3b3b5de042af8a332d0d5c 100644
--- a/runtime/starpu/codelets/codelet_zcallback.c
+++ b/runtime/starpu/codelets/codelet_zcallback.c
@@ -22,56 +22,56 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-CHAMELEON_CL_CB(dzasum,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                      M*N)
-CHAMELEON_CL_CB(zaxpy,         starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[1]), 0,                                      M)
-CHAMELEON_CL_CB(zgeadd,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                      M*N)
-CHAMELEON_CL_CB(zlascal,       starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                      M*N)
-CHAMELEON_CL_CB(zgelqt,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                      (4./3.)*M*N*K)
-CHAMELEON_CL_CB(zgemm,         starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_ny(task->handles[2]), starpu_matrix_get_ny(task->handles[0]),     2. *M*N*K) /* If A^t, computation is wrong */
-CHAMELEON_CL_CB(zgeqrt,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                      (4./3.)*M*M*N)
-CHAMELEON_CL_CB(zgessm,        starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_nx(task->handles[2]),     2. *M*N*K)
-CHAMELEON_CL_CB(zgessq,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), 0,                                      4.*M*N)
-CHAMELEON_CL_CB(zgetrf,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (2./3.)*M*N*K)
-CHAMELEON_CL_CB(zgetrf_incpiv, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (2./3.)*M*N*K)
-CHAMELEON_CL_CB(zgetrf_nopiv,  starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (2./3.)*M*N*K)
-CHAMELEON_CL_CB(zgram,         starpu_matrix_get_nx(task->handles[3]), starpu_matrix_get_ny(task->handles[3]), 0,                                                M*N)
-CHAMELEON_CL_CB(zhe2ge,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                       (1./2.0)*M*N)
-CHAMELEON_CL_CB(zherfb,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                         2. *M* M*M)
+CHAMELEON_CL_CB(dzasum,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      M*N)
+CHAMELEON_CL_CB(zaxpy,         cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[1]), 0,                                      M)
+CHAMELEON_CL_CB(zgeadd,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      M*N)
+CHAMELEON_CL_CB(zlascal,       cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      M*N)
+CHAMELEON_CL_CB(zgelqt,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      (4./3.)*M*N*K)
+CHAMELEON_CL_CB(zgemm,         cti_handle_get_m(task->handles[2]), cti_handle_get_n(task->handles[2]), cti_handle_get_n(task->handles[0]),     2. *M*N*K) /* If A^t, computation is wrong */
+CHAMELEON_CL_CB(zgeqrt,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      (4./3.)*M*M*N)
+CHAMELEON_CL_CB(zgessm,        cti_handle_get_m(task->handles[2]), cti_handle_get_m(task->handles[2]), cti_handle_get_m(task->handles[2]),     2. *M*N*K)
+CHAMELEON_CL_CB(zgessq,        cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), 0,                                      4.*M*N)
+CHAMELEON_CL_CB(zgetrf,        cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), (2./3.)*M*N*K)
+CHAMELEON_CL_CB(zgetrf_incpiv, cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), (2./3.)*M*N*K)
+CHAMELEON_CL_CB(zgetrf_nopiv,  cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), (2./3.)*M*N*K)
+CHAMELEON_CL_CB(zgram,         cti_handle_get_m(task->handles[3]), cti_handle_get_n(task->handles[3]), 0,                                                M*N)
+CHAMELEON_CL_CB(zhe2ge,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                       (1./2.0)*M*N)
+CHAMELEON_CL_CB(zherfb,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                         2. *M* M*M)
 #if defined(PRECISION_z) || defined(PRECISION_c)
-CHAMELEON_CL_CB(zhemm,         starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_ny(task->handles[2]), 0,                                          2.*M*M *N)
-CHAMELEON_CL_CB(zher2k,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                     ( 1.+2.*M*N)*M)
-CHAMELEON_CL_CB(zherk,         starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                     ( 1.+   M)*M*N)
+CHAMELEON_CL_CB(zhemm,         cti_handle_get_m(task->handles[2]), cti_handle_get_n(task->handles[2]), 0,                                          2.*M*M *N)
+CHAMELEON_CL_CB(zher2k,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                     ( 1.+2.*M*N)*M)
+CHAMELEON_CL_CB(zherk,         cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                     ( 1.+   M)*M*N)
 #endif
-CHAMELEON_CL_CB(zlacpy,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                                M*N)
-CHAMELEON_CL_CB(zlange,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                                M*N)
-CHAMELEON_CL_CB(zlaset,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                                M*N)
-CHAMELEON_CL_CB(zlaset2,       starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                                M*N)
-CHAMELEON_CL_CB(zlatro,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                                M*N)
-CHAMELEON_CL_CB(zlauum,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (1./3.)*M* M*M)
+CHAMELEON_CL_CB(zlacpy,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                                M*N)
+CHAMELEON_CL_CB(zlange,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                                M*N)
+CHAMELEON_CL_CB(zlaset,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                                M*N)
+CHAMELEON_CL_CB(zlaset2,       cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                                M*N)
+CHAMELEON_CL_CB(zlatro,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                                M*N)
+CHAMELEON_CL_CB(zlauum,        cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), (1./3.)*M* M*M)
 #if defined(PRECISION_z) || defined(PRECISION_c)
-CHAMELEON_CL_CB(zplghe,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                                M*N)
-CHAMELEON_CL_CB(zsytrf_nopiv,  starpu_matrix_get_nx(task->handles[0]), 0, 0,                                                                           (1./3.)*M* M*M)
+CHAMELEON_CL_CB(zplghe,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                                M*N)
+CHAMELEON_CL_CB(zsytrf_nopiv,  cti_handle_get_m(task->handles[0]), 0, 0,                                                                           (1./3.)*M* M*M)
 #endif
-CHAMELEON_CL_CB(zplgsy,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                                M*N)
-CHAMELEON_CL_CB(zplrnt,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                                M*N)
-CHAMELEON_CL_CB(zbuild,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                                M*N)
-CHAMELEON_CL_CB(zplssq,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                                M*N)
-CHAMELEON_CL_CB(zplssq2,       starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                                2*N)
-CHAMELEON_CL_CB(zpotrf,        starpu_matrix_get_nx(task->handles[0]), 0, 0,                                                                           (1./3.)*M* M*M)
-CHAMELEON_CL_CB(zssssm,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), M*M*(2.*M+starpu_matrix_get_nx(task->handles[2])))
-CHAMELEON_CL_CB(zsymm,         starpu_matrix_get_nx(task->handles[2]), starpu_matrix_get_ny(task->handles[2]), 0,                                           2.*M*M *N)
-CHAMELEON_CL_CB(zsyr2k,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                      ( 1.+2.*M*N)*M)
-CHAMELEON_CL_CB(zsyrk,         starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                      ( 1.+   M)*M*N)
-CHAMELEON_CL_CB(ztplqt,        starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), starpu_matrix_get_nx(task->handles[0]),       2.*M*N*K)
-CHAMELEON_CL_CB(ztpqrt,        starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), starpu_matrix_get_nx(task->handles[0]),       2.*M*N*K)
-CHAMELEON_CL_CB(ztpmlqt,       starpu_matrix_get_nx(task->handles[3]), starpu_matrix_get_ny(task->handles[3]), starpu_matrix_get_nx(task->handles[2]),       4.*M*N*K)
-CHAMELEON_CL_CB(ztpmqrt,       starpu_matrix_get_nx(task->handles[3]), starpu_matrix_get_ny(task->handles[3]), starpu_matrix_get_nx(task->handles[2]),       4.*M*N*K)
-CHAMELEON_CL_CB(ztrasm,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_ny(task->handles[0]), 0,                                         0.5*M*(M+1))
-CHAMELEON_CL_CB(ztrmm,         starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), 0,                                               M*M*N)
-CHAMELEON_CL_CB(ztrsm,         starpu_matrix_get_nx(task->handles[1]), starpu_matrix_get_ny(task->handles[1]), 0,                                               M*M*N)
-CHAMELEON_CL_CB(ztrtri,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (1./3.)*M *M*M)
-CHAMELEON_CL_CB(ztsmlq_hetra1, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (4.0*M+starpu_matrix_get_nx(task->handles[3]))*M*M)
-CHAMELEON_CL_CB(ztsmqr_hetra1, starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), (4.0*M+starpu_matrix_get_nx(task->handles[3]))*M*M)
-CHAMELEON_CL_CB(ztstrf,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]),         M* M*M)
-CHAMELEON_CL_CB(zunmlq,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]),     2. *M* M*M)
-CHAMELEON_CL_CB(zunmqr,        starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]), starpu_matrix_get_nx(task->handles[0]),     2. *M* M*M)
+CHAMELEON_CL_CB(zplgsy,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                                M*N)
+CHAMELEON_CL_CB(zplrnt,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                                M*N)
+CHAMELEON_CL_CB(zbuild,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                                M*N)
+CHAMELEON_CL_CB(zplssq,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                                M*N)
+CHAMELEON_CL_CB(zplssq2,       cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                                2*N)
+CHAMELEON_CL_CB(zpotrf,        cti_handle_get_m(task->handles[0]), 0, 0,                                                                           (1./3.)*M* M*M)
+CHAMELEON_CL_CB(zssssm,        cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), M*M*(2.*M+cti_handle_get_m(task->handles[2])))
+CHAMELEON_CL_CB(zsymm,         cti_handle_get_m(task->handles[2]), cti_handle_get_n(task->handles[2]), 0,                                           2.*M*M *N)
+CHAMELEON_CL_CB(zsyr2k,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      ( 1.+2.*M*N)*M)
+CHAMELEON_CL_CB(zsyrk,         cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      ( 1.+   M)*M*N)
+CHAMELEON_CL_CB(ztplqt,        cti_handle_get_m(task->handles[1]), cti_handle_get_n(task->handles[1]), cti_handle_get_m(task->handles[0]),       2.*M*N*K)
+CHAMELEON_CL_CB(ztpqrt,        cti_handle_get_m(task->handles[1]), cti_handle_get_n(task->handles[1]), cti_handle_get_m(task->handles[0]),       2.*M*N*K)
+CHAMELEON_CL_CB(ztpmlqt,       cti_handle_get_m(task->handles[3]), cti_handle_get_n(task->handles[3]), cti_handle_get_m(task->handles[2]),       4.*M*N*K)
+CHAMELEON_CL_CB(ztpmqrt,       cti_handle_get_m(task->handles[3]), cti_handle_get_n(task->handles[3]), cti_handle_get_m(task->handles[2]),       4.*M*N*K)
+CHAMELEON_CL_CB(ztrasm,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                         0.5*M*(M+1))
+CHAMELEON_CL_CB(ztrmm,         cti_handle_get_m(task->handles[1]), cti_handle_get_n(task->handles[1]), 0,                                               M*M*N)
+CHAMELEON_CL_CB(ztrsm,         cti_handle_get_m(task->handles[1]), cti_handle_get_n(task->handles[1]), 0,                                               M*M*N)
+CHAMELEON_CL_CB(ztrtri,        cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), (1./3.)*M *M*M)
+CHAMELEON_CL_CB(ztsmlq_hetra1, cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), (4.0*M+cti_handle_get_m(task->handles[3]))*M*M)
+CHAMELEON_CL_CB(ztsmqr_hetra1, cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), (4.0*M+cti_handle_get_m(task->handles[3]))*M*M)
+CHAMELEON_CL_CB(ztstrf,        cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]),         M* M*M)
+CHAMELEON_CL_CB(zunmlq,        cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]),     2. *M* M*M)
+CHAMELEON_CL_CB(zunmqr,        cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[0]),     2. *M* M*M)
diff --git a/runtime/starpu/codelets/codelet_zgeadd.c b/runtime/starpu/codelets/codelet_zgeadd.c
index 9e8ec52efa05a8eec1999a16353eb735fbfec64b..e6a73fd887431993f95929f904c0fa74fb74a620 100644
--- a/runtime/starpu/codelets/codelet_zgeadd.c
+++ b/runtime/starpu/codelets/codelet_zgeadd.c
@@ -32,19 +32,15 @@ static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg)
     int M;
     int N;
     CHAMELEON_Complex64_t alpha;
-    const CHAMELEON_Complex64_t *A;
-    int ldA;
+    CHAM_tile_t *tileA;
     CHAMELEON_Complex64_t beta;
-    CHAMELEON_Complex64_t *B;
-    int ldB;
+    CHAM_tile_t *tileB;
 
-    A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
 
     starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &beta);
-    CORE_zgeadd(trans, M, N, alpha, A, ldA, beta, B, ldB);
+    TCORE_zgeadd(trans, M, N, alpha, tileA, beta, tileB);
     return;
 }
 
@@ -55,25 +51,21 @@ static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg)
     int M;
     int N;
     cuDoubleComplex alpha;
-    const cuDoubleComplex *A;
-    int ldA;
+    CHAM_tile_t *tileA;
     cuDoubleComplex beta;
-    cuDoubleComplex *B;
-    int ldB;
+    CHAM_tile_t *tileB;
 
-    A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
-    starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &beta);
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &beta );
 
     RUNTIME_getStream( stream );
 
     CUDA_zgeadd(
         trans,
         M, N,
-        &alpha, A, ldA,
-        &beta,  B, ldB,
+        &alpha, tileA->mat, tileA->ld,
+        &beta,  tileB->mat, tileB->ld,
         stream);
 
 #ifndef STARPU_CUDA_ASYNC
@@ -149,8 +141,8 @@ CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func)
  */
 void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
                          cham_trans_t trans, int m, int n, int nb,
-                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA,
-                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldB )
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn )
 {
     struct starpu_codelet *codelet = &cl_zgeadd;
     void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL;
@@ -175,7 +167,6 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
         STARPU_NAME, "zgeadd",
 #endif
         0);
-    (void)ldA;
 
     (void)nb;
 }
diff --git a/runtime/starpu/codelets/codelet_zgelqt.c b/runtime/starpu/codelets/codelet_zgelqt.c
index 21e38b440c18bbcf5e1472476503cfcef7e2b621..d40413f58562788572b2c497c08e9fc10e226233 100644
--- a/runtime/starpu/codelets/codelet_zgelqt.c
+++ b/runtime/starpu/codelets/codelet_zgelqt.c
@@ -34,23 +34,22 @@ static void cl_zgelqt_cpu_func(void *descr[], void *cl_arg)
     int m;
     int n;
     int ib;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    CHAMELEON_Complex64_t *T;
-    int ldT;
-    CHAMELEON_Complex64_t *TAU, *WORK;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileT;
+    CHAM_tile_t *tileW;
+    CHAMELEON_Complex64_t *TAU;
+    CHAMELEON_Complex64_t *WORK;
 
-    A   = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T   = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    TAU = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); /* max(m,n) + ib*n */
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldT = STARPU_MATRIX_GET_LD( descr[1] );
+    tileA = cti_interface_get(descr[0]);
+    tileT = cti_interface_get(descr[1]);
+    tileW = cti_interface_get(descr[2]); /* max(m,n) + ib * n */
 
     starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &h_work);
 
+    TAU  = tileW->mat;
     WORK = TAU + chameleon_max( m, n );
-    CORE_zlaset( ChamUpperLower, ib, m, 0., 0., T, ldT );
-    CORE_zgelqt(m, n, ib, A, ldA, T, ldT, TAU, WORK);
+    TCORE_zlaset( ChamUpperLower, ib, m, 0., 0., tileT );
+    TCORE_zgelqt(m, n, ib, tileA, tileT, TAU, WORK);
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -59,70 +58,10 @@ static void cl_zgelqt_cpu_func(void *descr[], void *cl_arg)
  */
 CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func)
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zgelqt - computes a LQ factorization of a complex M-by-N tile A: A = L * Q.
- *
- *  The tile Q is represented as a product of elementary reflectors
- *
- *    Q = H(k)' . . . H(2)' H(1)', where k = min(M,N).
- *
- *  Each H(i) has the form
- *
- *    H(i) = I - tau * v * v'
- *
- *  where tau is a complex scalar, and v is a complex vector with
- *  v(1:i-1) = 0 and v(i) = 1; conjg(v(i+1:n)) is stored on exit in
- *  A(i,i+1:n), and tau in TAU(i).
- *
- *******************************************************************************
- *
- * @param[in] M
- *          The number of rows of the tile A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the tile A.  N >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile A.
- *         On exit, the elements on and below the diagonal of the array
- *         contain the M-by-min(M,N) lower trapezoidal tile L (L is
- *         lower triangular if M <= N); the elements above the diagonal,
- *         with the array TAU, represent the unitary tile Q as a
- *         product of elementary reflectors (see Further Details).
- *
- * @param[in] ldA
- *         The leading dimension of the array A.  ldA >= max(1,M).
- *
- * @param[out] T
- *         The IB-by-N triangular factor T of the block reflector.
- *         T is upper triangular by block (economic storage);
- *         The rest of the array is not referenced.
- *
- * @param[in] ldT
- *         The leading dimension of the array T. ldT >= IB.
- *
- * @param[out] TAU
- *         The scalar factors of the elementary reflectors (see Further
- *         Details).
- *
- * @param[out] WORK
- *
- *******************************************************************************
- *
- *          @retval CHAMELEON_SUCCESS successful exit
- *          @retval <0 if -i, the i-th argument had an illegal value
- *
- */
 void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
                        int m, int n, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int ldA,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldT)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *T, int Tm, int Tn)
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zgelqt;
@@ -151,6 +90,4 @@ void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
         STARPU_NAME, "zgelqt",
 #endif
         0);
-    (void)ldT;
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c
index 3979260158bbc277014ed2d6da688000911faac6..55c480f7c1a1ceff8e1fd225c9af39e9b63090b0 100644
--- a/runtime/starpu/codelets/codelet_zgemm.c
+++ b/runtime/starpu/codelets/codelet_zgemm.c
@@ -36,27 +36,20 @@ static void cl_zgemm_cpu_func(void *descr[], void *cl_arg)
     int n;
     int k;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    CHAMELEON_Complex64_t *B;
-    int ldB;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
     CHAMELEON_Complex64_t beta;
-    CHAMELEON_Complex64_t *C;
-    int ldC;
+    CHAM_tile_t *tileC;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
-    ldC = STARPU_MATRIX_GET_LD( descr[2] );
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
 
     starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &beta);
-    CORE_zgemm(transA, transB,
-        m, n, k,
-        alpha, A, ldA,
-        B, ldB,
-        beta, C, ldC);
+    TCORE_zgemm( transA, transB,
+                 m, n, k,
+                 alpha, tileA, tileB,
+                 beta,  tileC );
 }
 
 #ifdef CHAMELEON_USE_CUDA
@@ -68,20 +61,14 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
     int n;
     int k;
     cuDoubleComplex alpha;
-    const cuDoubleComplex *A;
-    int ldA;
-    const cuDoubleComplex *B;
-    int ldB;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
     cuDoubleComplex beta;
-    cuDoubleComplex *C;
-    int ldC;
+    CHAM_tile_t *tileC;
 
-    A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
-    ldC = STARPU_MATRIX_GET_LD( descr[2] );
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
 
     starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &beta);
 
@@ -90,9 +77,9 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
     CUDA_zgemm(
         transA, transB,
         m, n, k,
-        &alpha, A, ldA,
-                B, ldB,
-        &beta,  C, ldC,
+        &alpha, tileA->mat, tileA->ld,
+                tileB->mat, tileB->ld,
+        &beta,  tileC->mat, tileC->ld,
         stream);
 
 #ifndef STARPU_CUDA_ASYNC
@@ -117,9 +104,9 @@ CODELETS(zgemm, 3, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC)
 void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
                       cham_trans_t transA, cham_trans_t transB,
                       int m, int n, int k, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA,
-                                                   const CHAM_desc_t *B, int Bm, int Bn, int ldB,
-                      CHAMELEON_Complex64_t beta,  const CHAM_desc_t *C, int Cm, int Cn, int ldC)
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                                                   const CHAM_desc_t *B, int Bm, int Bn,
+                      CHAMELEON_Complex64_t beta,  const CHAM_desc_t *C, int Cm, int Cn)
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zgemm;
@@ -150,7 +137,4 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
 #endif
         0);
 
-    (void)ldA;
-    (void)ldB;
-    (void)ldC;
 }
diff --git a/runtime/starpu/codelets/codelet_zgeqrt.c b/runtime/starpu/codelets/codelet_zgeqrt.c
index ddc681630a08bc9e7cc8a7949e83847346cb12aa..ccbdb322e129333b6fe3371ce5d8971e3be86c90 100644
--- a/runtime/starpu/codelets/codelet_zgeqrt.c
+++ b/runtime/starpu/codelets/codelet_zgeqrt.c
@@ -34,24 +34,23 @@ static void cl_zgeqrt_cpu_func(void *descr[], void *cl_arg)
     int m;
     int n;
     int ib;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    CHAMELEON_Complex64_t *T;
-    int ldT;
-    CHAMELEON_Complex64_t *TAU, *WORK;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileT;
+    CHAM_tile_t *tileW;
+    CHAMELEON_Complex64_t *TAU;
+    CHAMELEON_Complex64_t *WORK;
 
-    A   = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T   = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    TAU = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]); /* max(m,n) + n * ib */
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldT = STARPU_MATRIX_GET_LD( descr[1] );
+    tileA = cti_interface_get(descr[0]);
+    tileT = cti_interface_get(descr[1]);
+    tileW = cti_interface_get(descr[2]); /* max(m,n) + ib * n */
 
     starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &h_work);
 
+    TAU  = tileW->mat;
     WORK = TAU + chameleon_max( m, n );
 
-    CORE_zlaset( ChamUpperLower, ib, n, 0., 0., T, ldT );
-    CORE_zgeqrt(m, n, ib, A, ldA, T, ldT, TAU, WORK);
+    TCORE_zlaset( ChamUpperLower, ib, n, 0., 0., tileT );
+    TCORE_zgeqrt(m, n, ib, tileA, tileT, TAU, WORK );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -60,71 +59,10 @@ static void cl_zgeqrt_cpu_func(void *descr[], void *cl_arg)
  */
 CODELETS_CPU(zgeqrt, 3, cl_zgeqrt_cpu_func)
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zgeqrt computes a QR factorization of a complex M-by-N tile A:
- *  A = Q * R.
- *
- *  The tile Q is represented as a product of elementary reflectors
- *
- *    Q = H(1) H(2) . . . H(k), where k = min(M,N).
- *
- *  Each H(i) has the form
- *
- *    H(i) = I - tau * v * v'
- *
- *  where tau is a complex scalar, and v is a complex vector with
- *  v(1:i-1) = 0 and v(i) = 1; v(i+1:m) is stored on exit in A(i+1:m,i),
- *  and tau in TAU(i).
- *
- *******************************************************************************
- *
- * @param[in] M
- *          The number of rows of the tile A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the tile A.  N >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile A.
- *         On exit, the elements on and above the diagonal of the array
- *         contain the min(M,N)-by-N upper trapezoidal tile R (R is
- *         upper triangular if M >= N); the elements below the diagonal,
- *         with the array TAU, represent the unitary tile Q as a
- *         product of elementary reflectors (see Further Details).
- *
- * @param[in] ldA
- *         The leading dimension of the array A.  ldA >= max(1,M).
- *
- * @param[out] T
- *         The IB-by-N triangular factor T of the block reflector.
- *         T is upper triangular by block (economic storage);
- *         The rest of the array is not referenced.
- *
- * @param[in] ldT
- *         The leading dimension of the array T. ldT >= IB.
- *
- * @param[out] TAU
- *         The scalar factors of the elementary reflectors (see Further
- *         Details).
- *
- * @param[out] WORK
- *
- *******************************************************************************
- *
- *          @retval CHAMELEON_SUCCESS successful exit
- *          @retval <0 if -i, the i-th argument had an illegal value
- *
- */
 void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
                        int m, int n, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int ldA,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldT)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *T, int Tm, int Tn)
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zgeqrt;
@@ -153,6 +91,4 @@ void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
         STARPU_NAME, "zgeqrt",
 #endif
         0);
-    (void)ldT;
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zgessm.c b/runtime/starpu/codelets/codelet_zgessm.c
index 6144862d0d1bd6b44589ac03c5a44deaab26046e..6d94b0ffb9684cbb171597112ee1a64a9169299a 100644
--- a/runtime/starpu/codelets/codelet_zgessm.c
+++ b/runtime/starpu/codelets/codelet_zgessm.c
@@ -35,19 +35,15 @@ static void cl_zgessm_cpu_func(void *descr[], void *cl_arg)
     int k;
     int ib;
     int *IPIV;
-    CHAMELEON_Complex64_t *D;
-    int ldD;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
+    CHAM_tile_t *tileD;
+    CHAM_tile_t *tileA;
 
-    D = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
+    tileD = cti_interface_get(descr[1]);
+    tileA = cti_interface_get(descr[2]);
 
-    ldD = STARPU_MATRIX_GET_LD( descr[1] );
-    ldA = STARPU_MATRIX_GET_LD( descr[2] );
 
     starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV);
-    CORE_zgessm(m, n, k, ib, IPIV, D, ldD, A, ldA);
+    TCORE_zgessm(m, n, k, ib, IPIV, tileD, tileA);
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -56,57 +52,12 @@ static void cl_zgessm_cpu_func(void *descr[], void *cl_arg)
  */
 CODELETS_CPU(zgessm, 3, cl_zgessm_cpu_func)
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zgessm applies the factors L computed by CORE_zgetrf_incpiv to
- *  a complex M-by-N tile A.
- *
- *******************************************************************************
- *
- * @param[in] M
- *          The number of rows of the tile A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the tile A.  N >= 0.
- *
- * @param[in] K
- *         The number of columns of the tile L. K >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in] IPIV
- *         The pivot indices array of size K as returned by
- *         CORE_zgetrf_incpiv.
- *
- * @param[in] L
- *         The M-by-K lower triangular tile.
- *
- * @param[in] ldL
- *         The leading dimension of the array L.  ldL >= max(1,M).
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile A.
- *         On exit, updated by the application of L.
- *
- * @param[in] ldA
- *         The leading dimension of the array A.  ldA >= max(1,M).
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if INFO = -k, the k-th argument had an illegal value
- *
- */
-
 void INSERT_TASK_zgessm( const RUNTIME_option_t *options,
                          int m, int n, int k, int ib, int nb,
                          int *IPIV,
-                         const CHAM_desc_t *L, int Lm, int Ln, int ldL,
-                         const CHAM_desc_t *D, int Dm, int Dn, int ldD,
-                         const CHAM_desc_t *A, int Am, int An, int ldA )
+                         const CHAM_desc_t *L, int Lm, int Ln,
+                         const CHAM_desc_t *D, int Dm, int Dn,
+                         const CHAM_desc_t *A, int Am, int An )
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zgessm;
@@ -134,6 +85,4 @@ void INSERT_TASK_zgessm( const RUNTIME_option_t *options,
         STARPU_NAME, "zgessm",
 #endif
         0);
-    (void)ldD;
-    (void)ldL;
 }
diff --git a/runtime/starpu/codelets/codelet_zgessq.c b/runtime/starpu/codelets/codelet_zgessq.c
index 16517741181e63b4cb499ac2b646552d68e7816c..1a63e1a22e5266d408ae1fc568db75f6c2db242e 100644
--- a/runtime/starpu/codelets/codelet_zgessq.c
+++ b/runtime/starpu/codelets/codelet_zgessq.c
@@ -29,16 +29,14 @@ static void cl_zgessq_cpu_func(void *descr[], void *cl_arg)
     cham_store_t storev;
     int m;
     int n;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    double *SCALESUMSQ;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileW;
 
-    A          = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
+    tileA = cti_interface_get(descr[0]);
+    tileW = cti_interface_get(descr[1]);
 
-    SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &storev, &m, &n);
-    CORE_zgessq( storev, m, n, A, ldA, SCALESUMSQ );
+    starpu_codelet_unpack_args( cl_arg, &storev, &m, &n );
+    TCORE_zgessq( storev, m, n, tileA, tileW );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -49,7 +47,7 @@ CODELETS_CPU(zgessq, 2, cl_zgessq_cpu_func)
 
 void INSERT_TASK_zgessq( const RUNTIME_option_t *options,
                          cham_store_t storev, int m, int n,
-                         const CHAM_desc_t *A, int Am, int An, int ldA,
+                         const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
 {
     struct starpu_codelet *codelet = &cl_zgessq;
@@ -73,5 +71,4 @@ void INSERT_TASK_zgessq( const RUNTIME_option_t *options,
         STARPU_NAME, "zgessq",
 #endif
         0);
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zgetrf.c b/runtime/starpu/codelets/codelet_zgetrf.c
index 3ce6227d994e7ad497ce6f98bab1a9843932c6ef..b744a43a9de6f0485cc64639e64f3b005eab3284 100644
--- a/runtime/starpu/codelets/codelet_zgetrf.c
+++ b/runtime/starpu/codelets/codelet_zgetrf.c
@@ -30,8 +30,7 @@ static void cl_zgetrf_cpu_func(void *descr[], void *cl_arg)
 {
     int m;
     int n;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
+    CHAM_tile_t *tileA;
     int *IPIV;
     cham_bool_t check_info;
     int iinfo;
@@ -39,11 +38,10 @@ static void cl_zgetrf_cpu_func(void *descr[], void *cl_arg)
     RUNTIME_request_t *request;
     int info = 0;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
+    tileA = cti_interface_get(descr[0]);
 
     starpu_codelet_unpack_args(cl_arg, &m, &n, &IPIV, &check_info, &iinfo, &sequence, &request);
-    CORE_zgetrf( m, n, A, ldA, IPIV, &info );
+    TCORE_zgetrf( m, n, tileA, IPIV, &info );
 
     if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
         RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
@@ -58,7 +56,7 @@ CODELETS_CPU(zgetrf, 1, cl_zgetrf_cpu_func)
 
 void INSERT_TASK_zgetrf( const RUNTIME_option_t *options,
                          int m, int n, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int ldA,
+                         const CHAM_desc_t *A, int Am, int An,
                          int *IPIV,
                          cham_bool_t check_info, int iinfo )
 {
@@ -86,5 +84,4 @@ void INSERT_TASK_zgetrf( const RUNTIME_option_t *options,
         STARPU_NAME, "zgetrf",
 #endif
         0);
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
index 3a7c599d4250cc39cae91228362e92e3038a4dc5..61eef5b399d4ecb661fe95fb8c894a586c98235e 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
@@ -34,8 +34,7 @@ static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg)
     int m;
     int n;
     int ib;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
+    CHAM_tile_t *tileA;
     int *IPIV;
     cham_bool_t check_info;
     int iinfo;
@@ -43,11 +42,10 @@ static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg)
     RUNTIME_request_t *request;
     int info = 0;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
+    tileA = cti_interface_get(descr[0]);
 
     starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &IPIV, &check_info, &iinfo, &h_work, &sequence, &request);
-    CORE_zgetrf_incpiv(m, n, ib, A, ldA, IPIV, &info);
+    TCORE_zgetrf_incpiv(m, n, ib, tileA, IPIV, &info);
 
     if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
         RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
@@ -60,64 +58,10 @@ static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg)
  */
 CODELETS_CPU(zgetrf_incpiv, 3, cl_zgetrf_incpiv_cpu_func)
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zgetrf_incpiv computes an LU factorization of a general M-by-N tile A
- *  using partial pivoting with row interchanges.
- *
- *  The factorization has the form
- *
- *    A = P * L * U
- *
- *  where P is a permutation matrix, L is lower triangular with unit
- *  diagonal elements (lower trapezoidal if m > n), and U is upper
- *  triangular (upper trapezoidal if m < n).
- *
- *  This is the right-looking Level 2.5 BLAS version of the algorithm.
- *
- *******************************************************************************
- *
- * @param[in] M
- *          The number of rows of the tile A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the tile A.  N >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile to be factored.
- *         On exit, the factors L and U from the factorization
- *         A = P*L*U; the unit diagonal elements of L are not stored.
- *
- * @param[in] ldA
- *         The leading dimension of the array A.  ldA >= max(1,M).
- *
- * @param[out] IPIV
- *         The pivot indices; for 1 <= i <= min(M,N), row i of the
- *         tile was interchanged with row IPIV(i).
- *
- * @param[out] INFO
- *         See returned value.
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if INFO = -k, the k-th argument had an illegal value
- * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
- *              has been completed, but the factor U is exactly
- *              singular, and division by zero will occur if it is used
- *              to solve a system of equations.
- *
- */
-
 void INSERT_TASK_zgetrf_incpiv(const RUNTIME_option_t *options,
                               int m, int n, int ib, int nb,
-                              const CHAM_desc_t *A, int Am, int An, int ldA,
-                              const CHAM_desc_t *L, int Lm, int Ln, int ldL,
+                              const CHAM_desc_t *A, int Am, int An,
+                              const CHAM_desc_t *L, int Lm, int Ln,
                               int *IPIV,
                               cham_bool_t check_info, int iinfo)
 {
@@ -152,6 +96,4 @@ void INSERT_TASK_zgetrf_incpiv(const RUNTIME_option_t *options,
         STARPU_NAME, "zgetrf_incpiv",
 #endif
         0);
-    (void)ldL;
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
index 34daf949517b070614b8d67cf05e895d6049768d..d6fd0239e8c7ae38a5683a2b2b08dd47e4712861 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
@@ -33,18 +33,16 @@ static void cl_zgetrf_nopiv_cpu_func(void *descr[], void *cl_arg)
     int m;
     int n;
     int ib;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
+    CHAM_tile_t *tileA;
     int iinfo;
     RUNTIME_sequence_t *sequence;
     RUNTIME_request_t *request;
     int info = 0;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
+    tileA = cti_interface_get(descr[0]);
 
     starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &iinfo, &sequence, &request);
-    CORE_zgetrf_nopiv(m, n, ib, A, ldA, &info);
+    TCORE_zgetrf_nopiv(m, n, ib, tileA, &info);
 
     if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
         RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
@@ -57,56 +55,9 @@ static void cl_zgetrf_nopiv_cpu_func(void *descr[], void *cl_arg)
  */
 CODELETS_CPU(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func)
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zgetrf_nopiv computes an LU factorization of a general diagonal
- *  dominant M-by-N matrix A witout pivoting.
- *
- *  The factorization has the form
- *     A = L * U
- *  where L is lower triangular with unit
- *  diagonal elements (lower trapezoidal if m > n), and U is upper
- *  triangular (upper trapezoidal if m < n).
- *
- *  This is the right-looking Level 3 BLAS version of the algorithm.
- *  WARNING: Your matrix need to be diagonal dominant if you want to call this
- *  routine safely.
- *
- *******************************************************************************
- *
- *  @param[in] M
- *          The number of rows of the matrix A.  M >= 0.
- *
- *  @param[in] N
- *          The number of columns of the matrix A.  N >= 0.
- *
- *  @param[in] IB
- *          The block size to switch between blocked and unblocked code.
- *
- *  @param[in,out] A
- *          On entry, the M-by-N matrix to be factored.
- *          On exit, the factors L and U from the factorization
- *          A = P*L*U; the unit diagonal elements of L are not stored.
- *
- *  @param[in] ldA
- *          The leading dimension of the array A.  ldA >= max(1,M).
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if INFO = -k, the k-th argument had an illegal value
- * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
- *              has been completed, but the factor U is exactly
- *              singular, and division by zero will occur if it is used
- *              to solve a system of equations.
- *
- */
-
 void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options,
                               int m, int n, int ib, int nb,
-                              const CHAM_desc_t *A, int Am, int An, int ldA,
+                              const CHAM_desc_t *A, int Am, int An,
                               int iinfo)
 {
     (void)nb;
@@ -132,5 +83,4 @@ void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options,
         STARPU_NAME, "zgetrf_nopiv",
 #endif
         0);
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zgram.c b/runtime/starpu/codelets/codelet_zgram.c
index f8997ad1c1817d2842945ccfc1d4193e944377e1..1aa1833b41c5cbe817a86382047459902222f98b 100644
--- a/runtime/starpu/codelets/codelet_zgram.c
+++ b/runtime/starpu/codelets/codelet_zgram.c
@@ -25,30 +25,18 @@ static void cl_zgram_cpu_func(void *descr[], void *cl_arg)
 {
     cham_uplo_t uplo;
     int m, n, mt, nt;
-    double *Di;
-    int ldDI;
-    double *Dj;
-    int ldDJ;
-    double *D;
-    double *A;
-    int ldA;
+    CHAM_tile_t *Di;
+    CHAM_tile_t *Dj;
+    CHAM_tile_t *D;
+    CHAM_tile_t *A;
 
-    Di = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
-    Dj = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    D  = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
-    A  = (double *)STARPU_MATRIX_GET_PTR(descr[3]);
+    Di = cti_interface_get(descr[0]);
+    Dj = cti_interface_get(descr[1]);
+    D  = cti_interface_get(descr[2]);
+    A  = cti_interface_get(descr[3]);
 
-    ldDI = STARPU_MATRIX_GET_LD( descr[0] );
-    ldDJ = STARPU_MATRIX_GET_LD( descr[1] );
-    ldA = STARPU_MATRIX_GET_LD( descr[3] );
-
-    starpu_codelet_unpack_args(cl_arg, &uplo, &m, &n, &mt, &nt);
-    CORE_zgram( uplo,
-                m, n, mt, nt,
-                Di, ldDI,
-                Dj, ldDJ,
-                D,
-                A, ldA);
+    starpu_codelet_unpack_args( cl_arg, &uplo, &m, &n, &mt, &nt );
+    TCORE_zgram( uplo, m, n, mt, nt, Di, Dj, D, A );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -60,10 +48,10 @@ CODELETS_CPU(zgram, 4, cl_zgram_cpu_func)
 void INSERT_TASK_zgram( const RUNTIME_option_t *options,
                         cham_uplo_t uplo,
                         int m, int n, int mt, int nt,
-                        const CHAM_desc_t *Di, int Dim, int Din, int ldDI,
-                        const CHAM_desc_t *Dj, int Djm, int Djn, int ldDJ,
+                        const CHAM_desc_t *Di, int Dim, int Din,
+                        const CHAM_desc_t *Dj, int Djm, int Djn,
                         const CHAM_desc_t *D, int Dm, int Dn,
-                        CHAM_desc_t *A, int Am, int An, int ldA)
+                        CHAM_desc_t *A, int Am, int An)
 {
   struct starpu_codelet *codelet = &cl_zgram;
   void (*callback)(void*) = options->profiling ? cl_zgram_callback : NULL;
@@ -92,7 +80,4 @@ void INSERT_TASK_zgram( const RUNTIME_option_t *options,
         STARPU_NAME, "zgram",
 #endif
         0);
-    (void)ldA;
-    (void)ldDJ;
-    (void)ldDI;
 }
diff --git a/runtime/starpu/codelets/codelet_zhe2ge.c b/runtime/starpu/codelets/codelet_zhe2ge.c
index e740f97dd19d374ccf8da9f75a40aa86a032a6d8..cb8f66cd1f3820f7499858f6bb20dd0f140c3dc2 100644
--- a/runtime/starpu/codelets/codelet_zhe2ge.c
+++ b/runtime/starpu/codelets/codelet_zhe2ge.c
@@ -27,19 +27,15 @@ static void cl_zhe2ge_cpu_func(void *descr[], void *cl_arg)
     cham_uplo_t uplo;
     int M;
     int N;
-    const CHAMELEON_Complex64_t *A;
-    int ldA;
-    CHAMELEON_Complex64_t *B;
-    int ldB;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
 
-    A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
 
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
 
     starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N);
-    CORE_zhe2ge(uplo, M, N, A, ldA, B, ldB);
+    TCORE_zhe2ge(uplo, M, N, tileA, tileB);
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -56,8 +52,8 @@ CODELETS_CPU(zhe2ge, 2, cl_zhe2ge_cpu_func)
 void INSERT_TASK_zhe2ge(const RUNTIME_option_t *options,
                        cham_uplo_t uplo,
                        int m, int n, int mb,
-                       const CHAM_desc_t *A, int Am, int An, int ldA,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldB)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *B, int Bm, int Bn)
 {
     (void)mb;
     struct starpu_codelet *codelet = &cl_zhe2ge;
@@ -81,6 +77,4 @@ void INSERT_TASK_zhe2ge(const RUNTIME_option_t *options,
         STARPU_NAME, "zhe2ge",
 #endif
         0);
-    (void)ldB;
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c
index df1b09627e0271dc7b028542da9c987a7f9a981d..db4d7c7f85babe20d7f7bc0eeadafdda0dabcfdd 100644
--- a/runtime/starpu/codelets/codelet_zhemm.c
+++ b/runtime/starpu/codelets/codelet_zhemm.c
@@ -35,28 +35,21 @@ static void cl_zhemm_cpu_func(void *descr[], void *cl_arg)
     int M;
     int N;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    CHAMELEON_Complex64_t *B;
-    int ldB;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
     CHAMELEON_Complex64_t beta;
-    CHAMELEON_Complex64_t *C;
-    int ldC;
+    CHAM_tile_t *tileC;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
-    ldC = STARPU_MATRIX_GET_LD( descr[2] );
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
 
     starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &beta);
-    CORE_zhemm(side, uplo,
+    TCORE_zhemm(side, uplo,
         M, N,
-        alpha, A, ldA,
-        B, ldB,
-        beta, C, ldC);
+        alpha, tileA,
+        tileB,
+        beta, tileC);
 }
 
 #ifdef CHAMELEON_USE_CUDA
@@ -67,21 +60,14 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
     int M;
     int N;
     cuDoubleComplex alpha;
-    const cuDoubleComplex *A;
-    int ldA;
-    const cuDoubleComplex *B;
-    int ldB;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
     cuDoubleComplex beta;
-    cuDoubleComplex *C;
-    int ldC;
-
-    A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
+    CHAM_tile_t *tileC;
 
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
-    ldC = STARPU_MATRIX_GET_LD( descr[2] );
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
 
     starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &beta);
 
@@ -90,9 +76,9 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
     CUDA_zhemm(
         side, uplo,
         M, N,
-        &alpha, A, ldA,
-        B, ldB,
-        &beta, C, ldC,
+        &alpha, tileA->mat, tileA->ld,
+                tileB->mat, tileB->ld,
+        &beta,  tileC->mat, tileC->ld,
         stream);
 
 #ifndef STARPU_CUDA_ASYNC
@@ -117,9 +103,9 @@ CODELETS(zhemm, 3, cl_zhemm_cpu_func, cl_zhemm_cuda_func, STARPU_CUDA_ASYNC)
 void INSERT_TASK_zhemm(const RUNTIME_option_t *options,
                       cham_side_t side, cham_uplo_t uplo,
                       int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldB,
-                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldC)
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                      const CHAM_desc_t *B, int Bm, int Bn,
+                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zhemm;
@@ -148,7 +134,4 @@ void INSERT_TASK_zhemm(const RUNTIME_option_t *options,
         STARPU_NAME, "zhemm",
 #endif
         0);
-    (void)ldC;
-    (void)ldB;
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c
index bdd303ea513c6ac3812abfca145f55e7050f0886..1e3c2f5b1c9f430ea1872646a028e3f9697b3f0c 100644
--- a/runtime/starpu/codelets/codelet_zher2k.c
+++ b/runtime/starpu/codelets/codelet_zher2k.c
@@ -35,25 +35,18 @@ static void cl_zher2k_cpu_func(void *descr[], void *cl_arg)
     int n;
     int k;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    CHAMELEON_Complex64_t *B;
-    int ldB;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
     double beta;
-    CHAMELEON_Complex64_t *C;
-    int ldC;
+    CHAM_tile_t *tileC;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
-    ldC = STARPU_MATRIX_GET_LD( descr[2] );
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
 
     starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta);
-    CORE_zher2k(uplo, trans,
-                n, k, alpha, A, ldA, B, ldB, beta, C, ldC);
+    TCORE_zher2k(uplo, trans,
+                n, k, alpha, tileA, tileB, beta, tileC);
 }
 
 #ifdef CHAMELEON_USE_CUDA
@@ -64,28 +57,24 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
     int n;
     int k;
     cuDoubleComplex alpha;
-    const cuDoubleComplex *A;
-    int ldA;
-    const cuDoubleComplex *B;
-    int ldB;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
     double beta;
-    cuDoubleComplex *C;
-    int ldC;
-
-    A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
+    CHAM_tile_t *tileC;
 
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
-    ldC = STARPU_MATRIX_GET_LD( descr[2] );
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
 
     starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta);
 
     RUNTIME_getStream(stream);
 
     CUDA_zher2k( uplo, trans,
-                 n, k, &alpha, A, ldA, B, ldB, &beta, C, ldC,
+                 n, k,
+                 &alpha, tileA->mat, tileA->ld,
+                         tileB->mat, tileB->ld,
+                 &beta,  tileC->mat, tileC->ld,
                  stream);
 
 #ifndef STARPU_CUDA_ASYNC
@@ -107,12 +96,13 @@ CODELETS(zher2k, 3, cl_zher2k_cpu_func, cl_zher2k_cuda_func, STARPU_CUDA_ASYNC)
  * @ingroup INSERT_TASK_Complex64_t
  *
  */
-void INSERT_TASK_zher2k(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, cham_trans_t trans,
-                       int n, int k, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldB,
-                       double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldC)
+void
+INSERT_TASK_zher2k( const RUNTIME_option_t *options,
+                    cham_uplo_t uplo, cham_trans_t trans,
+                    int n, int k, int nb,
+                    CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                                                 const CHAM_desc_t *B, int Bm, int Bn,
+                    double beta,                 const CHAM_desc_t *C, int Cm, int Cn )
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zher2k;
@@ -141,7 +131,4 @@ void INSERT_TASK_zher2k(const RUNTIME_option_t *options,
         STARPU_NAME, "zher2k",
 #endif
         0);
-    (void)ldC;
-    (void)ldB;
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zherfb.c b/runtime/starpu/codelets/codelet_zherfb.c
index f0bbcb02e6c1a9e2451a4fb83067e11c71a04d6d..a160d04907897cce2ba9767d009d92c1dc5fc366 100644
--- a/runtime/starpu/codelets/codelet_zherfb.c
+++ b/runtime/starpu/codelets/codelet_zherfb.c
@@ -25,64 +25,48 @@
 static void cl_zherfb_cpu_func(void *descr[], void *cl_arg)
 {
     cham_uplo_t uplo;
-    int n;
-    int k;
-    int ib;
-    int nb;
-    const CHAMELEON_Complex64_t *A;
-    int ldA;
-    const CHAMELEON_Complex64_t *T;
-    int ldT;
-    CHAMELEON_Complex64_t *C;
-    int ldC;
-    CHAMELEON_Complex64_t *WORK;
-    int ldWORK;
+    int n, k, ib, nb;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileT;
+    CHAM_tile_t *tileC;
+    CHAM_tile_t *tileW;
+    int ldW;
 
-    A    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
+    tileA = cti_interface_get(descr[0]);
+    tileT = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
+    tileW = cti_interface_get(descr[3]); /* ib * nb */
 
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldT = STARPU_MATRIX_GET_LD( descr[1] );
-    ldC = STARPU_MATRIX_GET_LD( descr[2] );
+    starpu_codelet_unpack_args( cl_arg, &uplo, &n, &k, &ib, &nb, &ldW );
 
-    starpu_codelet_unpack_args(cl_arg, &uplo, &n, &k, &ib, &nb, &ldWORK);
-
-    CORE_zherfb(uplo, n, k, ib, nb, A, ldA, T, ldT, C, ldC, WORK, ldWORK);
+    TCORE_zherfb( uplo, n, k, ib, nb, tileA, tileT, tileC, tileW->mat, ldW );
 }
 
 #if defined(CHAMELEON_USE_CUDA)
 static void cl_zherfb_cuda_func(void *descr[], void *cl_arg)
 {
     cham_uplo_t uplo;
-    int n;
-    int k;
-    int ib;
-    int nb;
-    const cuDoubleComplex *A;
-    int ldA;
-    const cuDoubleComplex *T;
-    int ldT;
-    cuDoubleComplex *C;
-    int ldC;
-    cuDoubleComplex *WORK;
-    int ldWORK;
+    int n, k, ib, nb;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileT;
+    CHAM_tile_t *tileC;
+    CHAM_tile_t *tileW;
+    int ldW;
 
     RUNTIME_getStream(stream);
 
-    A    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C    = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
-
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldT = STARPU_MATRIX_GET_LD( descr[1] );
-    ldC = STARPU_MATRIX_GET_LD( descr[2] );
+    tileA = cti_interface_get(descr[0]);
+    tileT = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
+    tileW = cti_interface_get(descr[3]); /* ib * nb */
 
-    starpu_codelet_unpack_args(cl_arg, &uplo, &n, &k, &ib, &nb, &ldWORK);
+    starpu_codelet_unpack_args( cl_arg, &uplo, &n, &k, &ib, &nb, &ldW );
 
-    CUDA_zherfb( uplo, n, k, ib, nb, A, ldA, T, ldT, C, ldC, WORK, ldWORK, stream );
+    CUDA_zherfb( uplo, n, k, ib, nb,
+                 tileA->mat, tileA->ld,
+                 tileT->mat, tileT->ld,
+                 tileC->mat, tileC->ld,
+                 tileW->mat, ldW, stream );
 
 #ifndef STARPU_CUDA_ASYNC
     cudaStreamSynchronize( stream );
@@ -104,9 +88,9 @@ CODELETS(zherfb, 4, cl_zherfb_cpu_func, cl_zherfb_cuda_func, STARPU_CUDA_ASYNC)
 void INSERT_TASK_zherfb(const RUNTIME_option_t *options,
                        cham_uplo_t uplo,
                        int n, int k, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int ldA,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldT,
-                       const CHAM_desc_t *C, int Cm, int Cn, int ldC)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *T, int Tm, int Tn,
+                       const CHAM_desc_t *C, int Cm, int Cn)
 {
     struct starpu_codelet *codelet = &cl_zherfb;
     void (*callback)(void*) = options->profiling ? cl_zherfb_callback : NULL;
@@ -124,18 +108,15 @@ void INSERT_TASK_zherfb(const RUNTIME_option_t *options,
         STARPU_VALUE,    &k,                 sizeof(int),
         STARPU_VALUE,    &ib,                sizeof(int),
         STARPU_VALUE,    &nb,                sizeof(int),
+        STARPU_VALUE,    &nb,                sizeof(int), /* ldw */
         STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_R,         RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
         STARPU_RW,        RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
         STARPU_SCRATCH,   options->ws_worker,
-        STARPU_VALUE,    &nb,                sizeof(int),
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
         STARPU_NAME, "zherfb",
 #endif
         0);
-    (void)ldC;
-    (void)ldT;
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c
index 89ac73aa3f4afb6e5334d68621c73d2c5770efc7..bd6131b6b4103ebb1150acb604e3a094a572eef3 100644
--- a/runtime/starpu/codelets/codelet_zherk.c
+++ b/runtime/starpu/codelets/codelet_zherk.c
@@ -35,23 +35,18 @@ static void cl_zherk_cpu_func(void *descr[], void *cl_arg)
     int n;
     int k;
     double alpha;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
+    CHAM_tile_t *tileA;
     double beta;
-    CHAMELEON_Complex64_t *C;
-    int ldC;
+    CHAM_tile_t *tileC;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldC = STARPU_MATRIX_GET_LD( descr[1] );
+    tileA = cti_interface_get(descr[0]);
+    tileC = cti_interface_get(descr[1]);
 
     starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta);
-    CORE_zherk(uplo, trans,
+    TCORE_zherk(uplo, trans,
         n, k,
-        alpha, A, ldA,
-        beta, C, ldC);
+        alpha, tileA,
+        beta, tileC);
 }
 
 #ifdef CHAMELEON_USE_CUDA
@@ -62,27 +57,21 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
     int n;
     int k;
     double alpha;
-    const cuDoubleComplex *A;
-    int ldA;
+    CHAM_tile_t *tileA;
     double beta;
-    cuDoubleComplex *C;
-    int ldC;
-
-    A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
+    CHAM_tile_t *tileC;
 
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldC = STARPU_MATRIX_GET_LD( descr[1] );
+    tileA = cti_interface_get(descr[0]);
+    tileC = cti_interface_get(descr[1]);
 
     starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta);
 
     RUNTIME_getStream(stream);
 
     CUDA_zherk(
-        uplo, trans,
-        n, k,
-        &alpha, A, ldA,
-        &beta, C, ldC,
+        uplo, trans, n, k,
+        &alpha, tileA->mat, tileA->ld,
+        &beta,  tileC->mat, tileC->ld,
         stream);
 
 #ifndef STARPU_CUDA_ASYNC
@@ -107,8 +96,8 @@ CODELETS(zherk, 2, cl_zherk_cpu_func, cl_zherk_cuda_func, STARPU_CUDA_ASYNC)
 void INSERT_TASK_zherk(const RUNTIME_option_t *options,
                       cham_uplo_t uplo, cham_trans_t trans,
                       int n, int k, int nb,
-                      double alpha, const CHAM_desc_t *A, int Am, int An, int ldA,
-                      double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldC)
+                      double alpha, const CHAM_desc_t *A, int Am, int An,
+                      double beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zherk;
@@ -135,6 +124,4 @@ void INSERT_TASK_zherk(const RUNTIME_option_t *options,
         STARPU_NAME, "zherk",
 #endif
         0);
-    (void)ldC;
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zhessq.c b/runtime/starpu/codelets/codelet_zhessq.c
index 884f02aa1e5ff7999ece8d94c25568dae98d4c38..cb24c4e6925ac784b5ed8561ccd17556c956c181 100644
--- a/runtime/starpu/codelets/codelet_zhessq.c
+++ b/runtime/starpu/codelets/codelet_zhessq.c
@@ -24,10 +24,10 @@
 
 void INSERT_TASK_zhessq( const RUNTIME_option_t *options,
                          cham_store_t storev, cham_uplo_t uplo, int n,
-                         const CHAM_desc_t *A, int Am, int An, int ldA,
+                         const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
 {
     INSERT_TASK_zsyssq( options, storev, uplo, n,
-                        A, Am, An, ldA,
+                        A, Am, An,
                         SCALESUMSQ, SCALESUMSQm, SCALESUMSQn );
 }
diff --git a/runtime/starpu/codelets/codelet_zlacpy.c b/runtime/starpu/codelets/codelet_zlacpy.c
index 6c07e6586fc65d1091a2ac31e6fb980bf78ec45a..d13e6ec088d0a5808354770ef77e894301c04308 100644
--- a/runtime/starpu/codelets/codelet_zlacpy.c
+++ b/runtime/starpu/codelets/codelet_zlacpy.c
@@ -35,19 +35,22 @@ static void cl_zlacpy_cpu_func(void *descr[], void *cl_arg)
     int N;
     int displA;
     int displB;
-    const CHAMELEON_Complex64_t *A;
-    int ldA;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+    CHAMELEON_Complex64_t *A;
     CHAMELEON_Complex64_t *B;
-    int ldB;
 
-    A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
 
     starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &displA, &displB);
-    CORE_zlacpy(uplo, M, N, A + displA, ldA, B + displB, ldB);
+
+    assert( tileA->format & CHAMELEON_TILE_FULLRANK );
+    assert( tileB->format & CHAMELEON_TILE_FULLRANK );
+
+    A = tileA->mat;
+    B = tileB->mat;
+    CORE_zlacpy( uplo, M, N, A + displA, tileA->ld, B + displB, tileB->ld );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -56,15 +59,10 @@ static void cl_zlacpy_cpu_func(void *descr[], void *cl_arg)
  */
 CODELETS_CPU(zlacpy, 2, cl_zlacpy_cpu_func)
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
 void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
                           cham_uplo_t uplo, int m, int n, int nb,
-                          int displA, const CHAM_desc_t *A, int Am, int An, int ldA,
-                          int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldB )
+                          int displA, const CHAM_desc_t *A, int Am, int An,
+                          int displB, const CHAM_desc_t *B, int Bm, int Bn )
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zlacpy;
@@ -90,16 +88,14 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
         STARPU_NAME, "zlacpy",
 #endif
         0);
-    (void)ldA;
-    (void)ldA;
 }
 
 void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, int m, int n, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int ldA,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldB )
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     INSERT_TASK_zlacpyx( options, uplo, m, n, nb,
-                         0, A, Am, An, ldA,
-                         0, B, Bm, Bn, ldB );
+                         0, A, Am, An,
+                         0, B, Bm, Bn );
 }
diff --git a/runtime/starpu/codelets/codelet_zlag2c.c b/runtime/starpu/codelets/codelet_zlag2c.c
index 21823b861f9e51a5377fc3bc0a8cfc2daeda346e..89b36aa0c00d2f2e4fffb548927a7053d8c92eeb 100644
--- a/runtime/starpu/codelets/codelet_zlag2c.c
+++ b/runtime/starpu/codelets/codelet_zlag2c.c
@@ -30,19 +30,15 @@ static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg)
 {
     int m;
     int n;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    CHAMELEON_Complex32_t *B;
-    int ldB;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex32_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
 
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
 
     starpu_codelet_unpack_args(cl_arg, &m, &n);
-    CORE_zlag2c( m, n, A, ldA, B, ldB);
+    TCORE_zlag2c( m, n, tileA, tileB);
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -58,8 +54,8 @@ CODELETS_CPU(zlag2c, 1, cl_zlag2c_cpu_func)
  */
 void INSERT_TASK_zlag2c(const RUNTIME_option_t *options,
                        int m, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int ldA,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldB)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *B, int Bm, int Bn)
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zlag2c;
@@ -82,10 +78,6 @@ void INSERT_TASK_zlag2c(const RUNTIME_option_t *options,
         STARPU_NAME, "zlag2c",
 #endif
         0);
-    (void)ldB;
-    (void)ldA;
-    (void)ldB;
-    (void)ldA;
 }
 
 #if !defined(CHAMELEON_SIMULATION)
@@ -93,19 +85,15 @@ static void cl_clag2z_cpu_func(void *descr[], void *cl_arg)
 {
     int m;
     int n;
-    CHAMELEON_Complex32_t *A;
-    int ldA;
-    CHAMELEON_Complex64_t *B;
-    int ldB;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
 
-    A = (CHAMELEON_Complex32_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
 
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
 
     starpu_codelet_unpack_args(cl_arg, &m, &n);
-    CORE_clag2z( m, n, A, ldA, B, ldB);
+    TCORE_clag2z( m, n, tileA, tileB);
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -116,8 +104,8 @@ CODELETS_CPU(clag2z, 2, cl_clag2z_cpu_func)
 
 void INSERT_TASK_clag2z(const RUNTIME_option_t *options,
                        int m, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int ldA,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldB)
+                       const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *B, int Bm, int Bn)
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_clag2z;
@@ -140,8 +128,4 @@ void INSERT_TASK_clag2z(const RUNTIME_option_t *options,
         STARPU_NAME, "clag2z",
 #endif
         0);
-    (void)ldB;
-    (void)ldA;
-    (void)ldB;
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zlange.c b/runtime/starpu/codelets/codelet_zlange.c
index 7ed0446384828fda3f00b639f96a1a5ce4d1f0d1..477f364dff7caf95572c7c13ca79a763f82ebc0d 100644
--- a/runtime/starpu/codelets/codelet_zlange.c
+++ b/runtime/starpu/codelets/codelet_zlange.c
@@ -28,22 +28,19 @@
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zlange_cpu_func(void *descr[], void *cl_arg)
 {
-    double *normA;
     cham_normtype_t norm;
     int M;
     int N;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    double *work;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tilework;
+    CHAM_tile_t *tilenormA;
 
-    A     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    work  = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
+    tileA     = cti_interface_get(descr[0]);
+    tilework  = cti_interface_get(descr[1]);
+    tilenormA = cti_interface_get(descr[2]);
 
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-
-    starpu_codelet_unpack_args(cl_arg, &norm, &M, &N);
-    CORE_zlange( norm, M, N, A, ldA, work, normA );
+    starpu_codelet_unpack_args( cl_arg, &norm, &M, &N );
+    TCORE_zlange( norm, M, N, tileA, tilework->mat, tilenormA->mat );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -54,7 +51,7 @@ CODELETS_CPU(zlange, 3, cl_zlange_cpu_func)
 
 void INSERT_TASK_zlange( const RUNTIME_option_t *options,
                          cham_normtype_t norm, int M, int N, int NB,
-                         const CHAM_desc_t *A, int Am, int An, int ldA,
+                         const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *B, int Bm, int Bn )
 {
     (void)NB;
@@ -68,7 +65,7 @@ void INSERT_TASK_zlange( const RUNTIME_option_t *options,
 
     starpu_insert_task(
         starpu_mpi_codelet(codelet),
-        STARPU_VALUE,    &norm,              sizeof(int),
+        STARPU_VALUE,    &norm,              sizeof(cham_normtype_t),
         STARPU_VALUE,    &M,                 sizeof(int),
         STARPU_VALUE,    &N,                 sizeof(int),
         STARPU_R,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
@@ -80,20 +77,23 @@ void INSERT_TASK_zlange( const RUNTIME_option_t *options,
         STARPU_NAME, "zlange",
 #endif
         0);
-    (void)ldA;
 }
 
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zlange_max_cpu_func(void *descr[], void *cl_arg)
 {
-    double *A;
-    double *B;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileNorm;
+    double *A, *norm;
+
+    tileA    = cti_interface_get(descr[0]);
+    tileNorm = cti_interface_get(descr[1]);
 
-    A = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    A    = tileA->mat;
+    norm = tileNorm->mat;
 
-    if ( *A > *B ) {
-        *B = *A;
+    if ( A[0] > *norm ) {
+        *norm = A[0];
     }
     (void)cl_arg;
 }
diff --git a/runtime/starpu/codelets/codelet_zlanhe.c b/runtime/starpu/codelets/codelet_zlanhe.c
index 3428c08faf9dc2202e1ea73914a82b8525b1409b..8b7854127db7349a2336b2a8b8e214aafe4391c3 100644
--- a/runtime/starpu/codelets/codelet_zlanhe.c
+++ b/runtime/starpu/codelets/codelet_zlanhe.c
@@ -28,22 +28,19 @@
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zlanhe_cpu_func(void *descr[], void *cl_arg)
 {
-    double *normA;
+    CHAM_tile_t *tilenormA;
     cham_normtype_t norm;
     cham_uplo_t uplo;
     int N;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    double *work;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tilework;
 
-    A     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    work  = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
-
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
+    tileA     = cti_interface_get(descr[0]);
+    tilework  = cti_interface_get(descr[1]);
+    tilenormA = cti_interface_get(descr[2]);
 
     starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &N);
-    CORE_zlanhe( norm, uplo, N, A, ldA, work, normA);
+    TCORE_zlanhe( norm, uplo, N, tileA, tilework->mat, tilenormA->mat );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -54,7 +51,7 @@ CODELETS_CPU(zlanhe, 3, cl_zlanhe_cpu_func)
 
 void INSERT_TASK_zlanhe(const RUNTIME_option_t *options,
                        cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
-                       const CHAM_desc_t *A, int Am, int An, int ldA,
+                       const CHAM_desc_t *A, int Am, int An,
                        const CHAM_desc_t *B, int Bm, int Bn)
 {
     struct starpu_codelet *codelet = &cl_zlanhe;
@@ -79,7 +76,6 @@ void INSERT_TASK_zlanhe(const RUNTIME_option_t *options,
         STARPU_NAME, "zlanhe",
 #endif
         0);
-    (void)ldA;
 
     (void)NB;
 }
diff --git a/runtime/starpu/codelets/codelet_zlansy.c b/runtime/starpu/codelets/codelet_zlansy.c
index 42a4a4ee47bc53713f8a1aa2cf45214b28f86d05..2345339cebd9290d2b77fa31b13567659f7de5f6 100644
--- a/runtime/starpu/codelets/codelet_zlansy.c
+++ b/runtime/starpu/codelets/codelet_zlansy.c
@@ -28,21 +28,19 @@
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zlansy_cpu_func(void *descr[], void *cl_arg)
 {
-    double *normA;
+    CHAM_tile_t *tilenormA;
     cham_normtype_t norm;
     cham_uplo_t uplo;
     int N;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    double *work;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tilework;
 
-    A     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    work  = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
+    tileA     = cti_interface_get(descr[0]);
+    tilework  = cti_interface_get(descr[1]);
+    tilenormA = cti_interface_get(descr[2]);
 
     starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &N);
-    CORE_zlansy( norm, uplo, N, A, ldA, work, normA);
+    TCORE_zlansy( norm, uplo, N, tileA, tilework->mat, tilenormA->mat );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -53,7 +51,7 @@ CODELETS_CPU(zlansy, 3, cl_zlansy_cpu_func)
 
 void INSERT_TASK_zlansy( const RUNTIME_option_t *options,
                          cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
-                         const CHAM_desc_t *A, int Am, int An, int ldA,
+                         const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *B, int Bm, int Bn )
 {
     (void)NB;
@@ -79,5 +77,4 @@ void INSERT_TASK_zlansy( const RUNTIME_option_t *options,
         STARPU_NAME, "zlansy",
 #endif
         0);
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zlantr.c b/runtime/starpu/codelets/codelet_zlantr.c
index 1154bfd47d89217d5aaf3cf39d2ffba8b57dace1..4d4020d67e6d41f5b7f77613f94b6ed84bfd03c1 100644
--- a/runtime/starpu/codelets/codelet_zlantr.c
+++ b/runtime/starpu/codelets/codelet_zlantr.c
@@ -25,20 +25,18 @@
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zlantr_cpu_func(void *descr[], void *cl_arg)
 {
-    double *normA;
+    CHAM_tile_t *tilenormA;
     cham_normtype_t norm, uplo, diag;
     int M, N;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    double *work;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tilework;
 
-    A     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    work  = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
+    tileA     = cti_interface_get(descr[0]);
+    tilework  = cti_interface_get(descr[1]);
+    tilenormA = cti_interface_get(descr[2]);
 
     starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &diag, &M, &N);
-    CORE_zlantr( norm, uplo, diag, M, N, A, ldA, work, normA);
+    TCORE_zlantr( norm, uplo, diag, M, N, tileA, tilework->mat, tilenormA->mat );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -50,7 +48,7 @@ CODELETS_CPU(zlantr, 3, cl_zlantr_cpu_func)
 void INSERT_TASK_zlantr( const RUNTIME_option_t *options,
                          cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
                          int M, int N, int NB,
-                         const CHAM_desc_t *A, int Am, int An, int ldA,
+                         const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *B, int Bm, int Bn )
 {
     struct starpu_codelet *codelet = &cl_zlantr;
@@ -77,7 +75,6 @@ void INSERT_TASK_zlantr( const RUNTIME_option_t *options,
         STARPU_NAME, "zlantr",
 #endif
         0);
-    (void)ldA;
 
     (void)NB;
 }
diff --git a/runtime/starpu/codelets/codelet_zlascal.c b/runtime/starpu/codelets/codelet_zlascal.c
index c454234a5b8eafd5556536f1bdfe04cd94bcc49e..58b4d3ad2871f57aef7631818ba269d85ebbc16c 100644
--- a/runtime/starpu/codelets/codelet_zlascal.c
+++ b/runtime/starpu/codelets/codelet_zlascal.c
@@ -30,14 +30,12 @@ static void cl_zlascal_cpu_func(void *descr[], void *cl_arg)
     int M;
     int N;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
+    CHAM_tile_t *tileA;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
+    tileA = cti_interface_get(descr[0]);
 
     starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha);
-    CORE_zlascal(uplo, M, N, alpha, A, ldA);
+    TCORE_zlascal(uplo, M, N, alpha, tileA);
     return;
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
@@ -47,42 +45,11 @@ static void cl_zlascal_cpu_func(void *descr[], void *cl_arg)
  */
 CODELETS_CPU(zlascal, 1, cl_zlascal_cpu_func)
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zlascal adds to matrices together.
- *
- *       A <- alpha * A
- *
- *******************************************************************************
- *
- * @param[in] M
- *          Number of rows of the matrices A and B.
- *
- * @param[in] N
- *          Number of columns of the matrices A and B.
- *
- * @param[in] alpha
- *          Scalar factor of A.
- *
- * @param[in] A
- *          Matrix of size ldA-by-N.
- *
- * @param[in] ldA
- *          Leading dimension of the array A. ldA >= max(1,M)
- *
- *******************************************************************************
- *
- *          @retval CHAMELEON_SUCCESS successful exit
- *          @retval <0 if -i, the i-th argument had an illegal value
- *
- */
 void INSERT_TASK_zlascal(const RUNTIME_option_t *options,
                         cham_uplo_t uplo,
                         int m, int n, int nb,
                         CHAMELEON_Complex64_t alpha,
-                        const CHAM_desc_t *A, int Am, int An, int ldA)
+                        const CHAM_desc_t *A, int Am, int An)
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zlascal;
@@ -105,5 +72,4 @@ void INSERT_TASK_zlascal(const RUNTIME_option_t *options,
         STARPU_NAME, "zlascal",
 #endif
         0);
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zlaset.c b/runtime/starpu/codelets/codelet_zlaset.c
index ba27e1925dd038b016ec1b9f76a5d076d6063545..5aecc3c3a1fade54184a35093e7b99c9c5eaaf7b 100644
--- a/runtime/starpu/codelets/codelet_zlaset.c
+++ b/runtime/starpu/codelets/codelet_zlaset.c
@@ -34,14 +34,12 @@ static void cl_zlaset_cpu_func(void *descr[], void *cl_arg)
     int N;
     CHAMELEON_Complex64_t alpha;
     CHAMELEON_Complex64_t beta;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
+    CHAM_tile_t *tileA;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
+    tileA = cti_interface_get(descr[0]);
 
     starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &beta);
-    CORE_zlaset(uplo, M, N, alpha, beta, A, ldA);
+    TCORE_zlaset(uplo, M, N, alpha, beta, tileA);
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -50,45 +48,10 @@ static void cl_zlaset_cpu_func(void *descr[], void *cl_arg)
  */
 CODELETS_CPU(zlaset, 1, cl_zlaset_cpu_func)
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zlaset - Sets the elements of the matrix A on the diagonal
- *  to beta and on the off-diagonals to alpha
- *
- *******************************************************************************
- *
- * @param[in] uplo
- *          Specifies which elements of the matrix are to be set
- *          = ChamUpper: Upper part of A is set;
- *          = ChamLower: Lower part of A is set;
- *          = ChamUpperLower: ALL elements of A are set.
- *
- * @param[in] M
- *          The number of rows of the matrix A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the matrix A.  N >= 0.
- *
- * @param[in] alpha
- *         The constant to which the off-diagonal elements are to be set.
- *
- * @param[in] beta
- *         The constant to which the diagonal elements are to be set.
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile A.
- *         On exit, A has been set accordingly.
- *
- * @param[in] ldA
- *         The leading dimension of the array A.  ldA >= max(1,M).
- *
- */
 void INSERT_TASK_zlaset(const RUNTIME_option_t *options,
                        cham_uplo_t uplo, int M, int N,
                        CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta,
-                       const CHAM_desc_t *A, int Am, int An, int ldA)
+                       const CHAM_desc_t *A, int Am, int An)
 {
 
     struct starpu_codelet *codelet = &cl_zlaset;
@@ -112,5 +75,4 @@ void INSERT_TASK_zlaset(const RUNTIME_option_t *options,
         STARPU_NAME, "zlaset",
 #endif
         0);
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zlaset2.c b/runtime/starpu/codelets/codelet_zlaset2.c
index c7efc2a9c7b5b951f0ad11786df39fc27ea516b4..f9344fc291f26c95a1c1c49069e2d929ea9f3b70 100644
--- a/runtime/starpu/codelets/codelet_zlaset2.c
+++ b/runtime/starpu/codelets/codelet_zlaset2.c
@@ -33,13 +33,11 @@ static void cl_zlaset2_cpu_func(void *descr[], void *cl_arg)
     int M;
     int N;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
+    CHAM_tile_t *tileA;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
+    tileA = cti_interface_get(descr[0]);
     starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha);
-    CORE_zlaset2(uplo, M, N, alpha, A, ldA);
+    TCORE_zlaset2(uplo, M, N, alpha, tileA);
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -48,42 +46,9 @@ static void cl_zlaset2_cpu_func(void *descr[], void *cl_arg)
  */
 CODELETS_CPU(zlaset2, 1, cl_zlaset2_cpu_func)
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zlaset2 - Sets the elements of the matrix A to alpha.
- *  Not LAPACK compliant! Read below.
- *
- *******************************************************************************
- *
- * @param[in] uplo
- *          Specifies which elements of the matrix are to be set
- *          = ChamUpper: STRICT Upper part of A is set to alpha;
- *          = ChamLower: STRICT Lower part of A is set to alpha;
- *          = ChamUpperLower: ALL elements of A are set to alpha.
- *          Not LAPACK Compliant.
- *
- * @param[in] M
- *          The number of rows of the matrix A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the matrix A.  N >= 0.
- *
- * @param[in] alpha
- *         The constant to which the elements are to be set.
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile A.
- *         On exit, A has been set to alpha accordingly.
- *
- * @param[in] ldA
- *         The leading dimension of the array A.  ldA >= max(1,M).
- *
- */
 void INSERT_TASK_zlaset2(const RUNTIME_option_t *options,
                        cham_uplo_t uplo, int M, int N,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA)
+                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An)
 {
 
     struct starpu_codelet *codelet = &cl_zlaset2;
@@ -106,5 +71,4 @@ void INSERT_TASK_zlaset2(const RUNTIME_option_t *options,
         STARPU_NAME, "zlaset2",
 #endif
         0);
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zlatro.c b/runtime/starpu/codelets/codelet_zlatro.c
index f2a1b07797af1a2eed1ae385d99dca7c57e8823e..d3d6a18dc3c614a440c3334c63c76048fd2715af 100644
--- a/runtime/starpu/codelets/codelet_zlatro.c
+++ b/runtime/starpu/codelets/codelet_zlatro.c
@@ -34,18 +34,14 @@ static void cl_zlatro_cpu_func(void *descr[], void *cl_arg)
     cham_trans_t trans;
     int M;
     int N;
-    const CHAMELEON_Complex64_t *A;
-    int ldA;
-    CHAMELEON_Complex64_t *B;
-    int ldB;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
 
-    A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
 
     starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N);
-    CORE_zlatro(uplo, trans, M, N, A, ldA, B, ldB);
+    TCORE_zlatro(uplo, trans, M, N, tileA, tileB);
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -62,8 +58,8 @@ CODELETS_CPU(zlatro, 2, cl_zlatro_cpu_func)
 void INSERT_TASK_zlatro( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_trans_t trans,
                          int m, int n, int mb,
-                         const CHAM_desc_t *A, int Am, int An, int ldA,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldB )
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     struct starpu_codelet *codelet = &cl_zlatro;
     void (*callback)(void*) = NULL;
@@ -87,7 +83,5 @@ void INSERT_TASK_zlatro( const RUNTIME_option_t *options,
         STARPU_NAME, "zlatro",
 #endif
         0);
-    (void)ldA;
-    (void)ldB;
     (void)mb;
 }
diff --git a/runtime/starpu/codelets/codelet_zlauum.c b/runtime/starpu/codelets/codelet_zlauum.c
index 31f742e60e2bc188e50b276fab7b5b88c67c21c9..db67785550c6c6c0ca93d37a1d101e7461774a86 100644
--- a/runtime/starpu/codelets/codelet_zlauum.c
+++ b/runtime/starpu/codelets/codelet_zlauum.c
@@ -32,14 +32,12 @@ static void cl_zlauum_cpu_func(void *descr[], void *cl_arg)
 {
     cham_uplo_t uplo;
     int N;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
+    CHAM_tile_t *tileA;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
+    tileA = cti_interface_get(descr[0]);
 
     starpu_codelet_unpack_args(cl_arg, &uplo, &N);
-    CORE_zlauum(uplo, N, A, ldA);
+    TCORE_zlauum(uplo, N, tileA);
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -55,7 +53,7 @@ CODELETS_CPU(zlauum, 1, cl_zlauum_cpu_func)
  */
 void INSERT_TASK_zlauum( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, int n, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int ldA )
+                         const CHAM_desc_t *A, int Am, int An )
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zlauum;
@@ -77,5 +75,4 @@ void INSERT_TASK_zlauum( const RUNTIME_option_t *options,
 #endif
         0);
 
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zplghe.c b/runtime/starpu/codelets/codelet_zplghe.c
index fc3a062f7bb5d9daacae243e80cb23a0286853cd..20da9d0ab75e78e6e0d6379354b3597493302fb7 100644
--- a/runtime/starpu/codelets/codelet_zplghe.c
+++ b/runtime/starpu/codelets/codelet_zplghe.c
@@ -35,18 +35,16 @@ static void cl_zplghe_cpu_func(void *descr[], void *cl_arg)
     double bump;
     int m;
     int n;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
+    CHAM_tile_t *tileA;
     int bigM;
     int m0;
     int n0;
     unsigned long long int seed;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
+    tileA = cti_interface_get(descr[0]);
 
     starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &bigM, &m0, &n0, &seed );
-    CORE_zplghe( bump, m, n, A, ldA, bigM, m0, n0, seed );
+    TCORE_zplghe( bump, m, n, tileA, bigM, m0, n0, seed );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -56,7 +54,7 @@ static void cl_zplghe_cpu_func(void *descr[], void *cl_arg)
 CODELETS_CPU(zplghe, 1, cl_zplghe_cpu_func)
 
 void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
-                         double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int ldA,
+                         double bump, int m, int n, const CHAM_desc_t *A, int Am, int An,
                          int bigM, int m0, int n0, unsigned long long int seed )
 {
     struct starpu_codelet *codelet = &cl_zplghe;
@@ -82,5 +80,4 @@ void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
         STARPU_NAME, "zplghe",
 #endif
         0);
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zplgsy.c b/runtime/starpu/codelets/codelet_zplgsy.c
index e98a40ea03fe04a7ef94b116bab302308f9f9b03..4fbc00d7d6f5a4b8968721bd7a08a3e1408b47c5 100644
--- a/runtime/starpu/codelets/codelet_zplgsy.c
+++ b/runtime/starpu/codelets/codelet_zplgsy.c
@@ -35,18 +35,16 @@ static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg)
     CHAMELEON_Complex64_t bump;
     int m;
     int n;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
+    CHAM_tile_t *tileA;
     int bigM;
     int m0;
     int n0;
     unsigned long long int seed;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
+    tileA = cti_interface_get(descr[0]);
 
     starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &bigM, &m0, &n0, &seed );
-    CORE_zplgsy( bump, m, n, A, ldA, bigM, m0, n0, seed );
+    TCORE_zplgsy( bump, m, n, tileA, bigM, m0, n0, seed );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -56,7 +54,7 @@ static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg)
 CODELETS_CPU(zplgsy, 1, cl_zplgsy_cpu_func)
 
 void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
-                        CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int ldA,
+                        CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An,
                         int bigM, int m0, int n0, unsigned long long int seed )
 {
 
@@ -83,5 +81,4 @@ void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
         STARPU_NAME, "zplgsy",
 #endif
         0);
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zplrnt.c b/runtime/starpu/codelets/codelet_zplrnt.c
index 8ec8b960985c5af8d22bd69354907caff847cb7e..28b82334c073a28e9d4cfedbeae16529f979e822 100644
--- a/runtime/starpu/codelets/codelet_zplrnt.c
+++ b/runtime/starpu/codelets/codelet_zplrnt.c
@@ -32,18 +32,16 @@ static void cl_zplrnt_cpu_func(void *descr[], void *cl_arg)
 {
     int m;
     int n;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
+    CHAM_tile_t *tileA;
     int bigM;
     int m0;
     int n0;
     unsigned long long int seed;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
+    tileA = cti_interface_get(descr[0]);
 
     starpu_codelet_unpack_args(cl_arg, &m, &n, &bigM, &m0, &n0, &seed );
-    CORE_zplrnt( m, n, A, ldA, bigM, m0, n0, seed );
+    TCORE_zplrnt( m, n, tileA, bigM, m0, n0, seed );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -53,7 +51,7 @@ static void cl_zplrnt_cpu_func(void *descr[], void *cl_arg)
 CODELETS_CPU(zplrnt, 1, cl_zplrnt_cpu_func)
 
 void INSERT_TASK_zplrnt( const RUNTIME_option_t *options,
-                         int m, int n, const CHAM_desc_t *A, int Am, int An, int ldA,
+                         int m, int n, const CHAM_desc_t *A, int Am, int An,
                          int bigM, int m0, int n0, unsigned long long int seed )
 {
 
@@ -79,5 +77,4 @@ void INSERT_TASK_zplrnt( const RUNTIME_option_t *options,
         STARPU_NAME, "zplrnt",
 #endif
         0);
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zplssq.c b/runtime/starpu/codelets/codelet_zplssq.c
index 96a41eb75bf9e30d9ad939acd8c29f89c71cc7c0..7da4758d7366c3e32a93e550d2a17ddc8793ce1d 100644
--- a/runtime/starpu/codelets/codelet_zplssq.c
+++ b/runtime/starpu/codelets/codelet_zplssq.c
@@ -29,14 +29,17 @@ static void cl_zplssq_cpu_func(void *descr[], void *cl_arg)
     cham_store_t storev;
     int M;
     int N;
-    double *SCLSSQ_IN;
-    double *SCLSSQ_OUT;
+    CHAM_tile_t *tileIN;
+    CHAM_tile_t *tileOUT;
 
-    starpu_codelet_unpack_args(cl_arg, &storev, &M, &N);
-    SCLSSQ_IN  = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
-    SCLSSQ_OUT = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args( cl_arg, &storev, &M, &N );
+    tileIN  = cti_interface_get(descr[0]);
+    tileOUT = cti_interface_get(descr[1]);
 
-    CORE_zplssq(storev, M, N, SCLSSQ_IN, SCLSSQ_OUT);
+    assert( tileIN->format  & CHAMELEON_TILE_FULLRANK );
+    assert( tileOUT->format & CHAMELEON_TILE_FULLRANK );
+
+    CORE_zplssq( storev, M, N, tileIN->mat, tileOUT->mat );
 
     (void)cl_arg;
 }
@@ -49,15 +52,15 @@ CODELETS_CPU(zplssq, 2, cl_zplssq_cpu_func)
 
 void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
                          cham_store_t storev, int M, int N,
-                         const CHAM_desc_t *SCLSSQ_IN,  int SCLSSQ_INm,  int SCLSSQ_INn,
-                         const CHAM_desc_t *SCLSSQ_OUT, int SCLSSQ_OUTm, int SCLSSQ_OUTn )
+                         const CHAM_desc_t *IN,  int INm,  int INn,
+                         const CHAM_desc_t *OUT, int OUTm, int OUTn )
 {
     struct starpu_codelet *codelet = &cl_zplssq;
     void (*callback)(void*) = options->profiling ? cl_zplssq_callback : NULL;
 
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_R(  SCLSSQ_IN,  SCLSSQ_INm,  SCLSSQ_INn  );
-    CHAMELEON_ACCESS_RW( SCLSSQ_OUT, SCLSSQ_OUTm, SCLSSQ_OUTn );
+    CHAMELEON_ACCESS_R(  IN,  INm,  INn  );
+    CHAMELEON_ACCESS_RW( OUT, OUTm, OUTn );
     CHAMELEON_END_ACCESS_DECLARATION;
 
     starpu_insert_task(
@@ -65,8 +68,8 @@ void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
         STARPU_VALUE,    &storev,            sizeof(int),
         STARPU_VALUE,    &M,                 sizeof(int),
         STARPU_VALUE,    &N,                 sizeof(int),
-        STARPU_R,  RTBLKADDR( SCLSSQ_IN,  double, SCLSSQ_INm,  SCLSSQ_INn  ),
-        STARPU_RW, RTBLKADDR( SCLSSQ_OUT, double, SCLSSQ_OUTm, SCLSSQ_OUTn ),
+        STARPU_R,  RTBLKADDR( IN,  double, INm,  INn  ),
+        STARPU_RW, RTBLKADDR( OUT, double, OUTm, OUTn ),
         STARPU_PRIORITY,    options->priority,
         STARPU_CALLBACK,    callback,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
@@ -79,12 +82,14 @@ void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
 static void cl_zplssq2_cpu_func(void *descr[], void *cl_arg)
 {
     int N;
-    double *RESULT;
+    CHAM_tile_t *tileRESULT;
 
     starpu_codelet_unpack_args(cl_arg, &N);
-    RESULT = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
+    tileRESULT = cti_interface_get(descr[0]);
+
+    assert( tileRESULT->format  & CHAMELEON_TILE_FULLRANK );
 
-    CORE_zplssq2(N, RESULT);
+    CORE_zplssq2( N, tileRESULT->mat );
 
     (void)cl_arg;
 }
diff --git a/runtime/starpu/codelets/codelet_zpotrf.c b/runtime/starpu/codelets/codelet_zpotrf.c
index c0bead183bc7223ae5acfb3e3f50156886f0798e..01ef85d594e64be2db165fac4e9a3f697fe07088 100644
--- a/runtime/starpu/codelets/codelet_zpotrf.c
+++ b/runtime/starpu/codelets/codelet_zpotrf.c
@@ -32,18 +32,16 @@ static void cl_zpotrf_cpu_func(void *descr[], void *cl_arg)
 {
     cham_uplo_t uplo;
     int n;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
+    CHAM_tile_t *tileA;
     int iinfo;
     RUNTIME_sequence_t *sequence;
     RUNTIME_request_t *request;
     int info = 0;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
+    tileA = cti_interface_get(descr[0]);
 
     starpu_codelet_unpack_args(cl_arg, &uplo, &n, &iinfo, &sequence, &request);
-    CORE_zpotrf(uplo, n, A, ldA, &info);
+    TCORE_zpotrf(uplo, n, tileA, &info);
 
     if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
         RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
@@ -63,7 +61,7 @@ CODELETS_CPU(zpotrf, 1, cl_zpotrf_cpu_func)
  */
 void INSERT_TASK_zpotrf(const RUNTIME_option_t *options,
                        cham_uplo_t uplo, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int ldA,
+                       const CHAM_desc_t *A, int Am, int An,
                        int iinfo)
 {
     (void)nb;
@@ -89,5 +87,4 @@ void INSERT_TASK_zpotrf(const RUNTIME_option_t *options,
         STARPU_NAME, "zpotrf",
 #endif
         0);
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zssssm.c b/runtime/starpu/codelets/codelet_zssssm.c
index a46520512775908bddd6453e3595f7f64f43d1a6..78275624e8a67c0d6cc6d50ee50622159aa40037 100644
--- a/runtime/starpu/codelets/codelet_zssssm.c
+++ b/runtime/starpu/codelets/codelet_zssssm.c
@@ -36,26 +36,18 @@ static void cl_zssssm_cpu_func(void *descr[], void *cl_arg)
     int n2;
     int k;
     int ib;
-    CHAMELEON_Complex64_t *A1;
-    int ldA1;
-    CHAMELEON_Complex64_t *A2;
-    int ldA2;
-    CHAMELEON_Complex64_t *L1;
-    int ldL1;
-    CHAMELEON_Complex64_t *L2;
-    int ldL2;
+    CHAM_tile_t *tileA1;
+    CHAM_tile_t *tileA2;
+    CHAM_tile_t *tileL1;
+    CHAM_tile_t *tileL2;
     int *IPIV;
 
-    A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    L1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    L2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
-    ldA1 = STARPU_MATRIX_GET_LD( descr[0] );
-    ldA2 = STARPU_MATRIX_GET_LD( descr[1] );
-    ldL1 = STARPU_MATRIX_GET_LD( descr[2] );
-    ldL2 = STARPU_MATRIX_GET_LD( descr[3] );
+    tileA1 = cti_interface_get(descr[0]);
+    tileA2 = cti_interface_get(descr[1]);
+    tileL1 = cti_interface_get(descr[2]);
+    tileL2 = cti_interface_get(descr[3]);
     starpu_codelet_unpack_args(cl_arg, &m1, &n1, &m2, &n2, &k, &ib, &IPIV);
-    CORE_zssssm(m1, n1, m2, n2, k, ib, A1, ldA1, A2, ldA2, L1, ldL1, L2, ldL2, IPIV);
+    TCORE_zssssm(m1, n1, m2, n2, k, ib, tileA1, tileA2, tileL1, tileL2, IPIV);
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -64,81 +56,12 @@ static void cl_zssssm_cpu_func(void *descr[], void *cl_arg)
  */
 CODELETS_CPU(zssssm, 4, cl_zssssm_cpu_func)
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zssssm applies the LU factorization update from a complex
- *  matrix formed by a lower triangular IB-by-K tile L1 on top of a
- *  M2-by-K tile L2 to a second complex matrix formed by a M1-by-N1
- *  tile A1 on top of a M2-by-N2 tile A2 (N1 == N2).
- *
- *  This is the right-looking Level 2.5 BLAS version of the algorithm.
- *
- *******************************************************************************
- *
- * @param[in] M1
- *         The number of rows of the tile A1.  M1 >= 0.
- *
- * @param[in] N1
- *         The number of columns of the tile A1.  N1 >= 0.
- *
- * @param[in] M2
- *         The number of rows of the tile A2 and of the tile L2.
- *         M2 >= 0.
- *
- * @param[in] N2
- *         The number of columns of the tile A2.  N2 >= 0.
- *
- * @param[in] K
- *         The number of columns of the tiles L1 and L2.  K >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in,out] A1
- *         On entry, the M1-by-N1 tile A1.
- *         On exit, A1 is updated by the application of L (L1 L2).
- *
- * @param[in] ldA1
- *         The leading dimension of the array A1.  ldA1 >= max(1,M1).
- *
- * @param[in,out] A2
- *         On entry, the M2-by-N2 tile A2.
- *         On exit, A2 is updated by the application of L (L1 L2).
- *
- * @param[in] ldA2
- *         The leading dimension of the array A2.  ldA2 >= max(1,M2).
- *
- * @param[in] L1
- *         The IB-by-K lower triangular tile as returned by
- *         CORE_ztstrf.
- *
- * @param[in] ldL1
- *         The leading dimension of the array L1.  ldL1 >= max(1,IB).
- *
- * @param[in] L2
- *         The M2-by-K tile as returned by CORE_ztstrf.
- *
- * @param[in] ldL2
- *         The leading dimension of the array L2.  ldL2 >= max(1,M2).
- *
- * @param[in] IPIV
- *         The pivot indices array of size K as returned by
- *         CORE_ztstrf.
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if INFO = -k, the k-th argument had an illegal value
- *
- */
 void INSERT_TASK_zssssm( const RUNTIME_option_t *options,
                          int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                         const CHAM_desc_t *A1, int A1m, int A1n, int ldA1,
-                         const CHAM_desc_t *A2, int A2m, int A2n, int ldA2,
-                         const CHAM_desc_t *L1, int L1m, int L1n, int ldL1,
-                         const CHAM_desc_t *L2, int L2m, int L2n, int ldL2,
+                         const CHAM_desc_t *A1, int A1m, int A1n,
+                         const CHAM_desc_t *A2, int A2m, int A2n,
+                         const CHAM_desc_t *L1, int L1m, int L1n,
+                         const CHAM_desc_t *L2, int L2m, int L2n,
                          const int *IPIV )
 {
     (void)nb;
diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c
index fc22d08f905b71bbce52a68e00637eb5f5105660..844347ac3b4f75f8c2338c0dbc7c41a239667340 100644
--- a/runtime/starpu/codelets/codelet_zsymm.c
+++ b/runtime/starpu/codelets/codelet_zsymm.c
@@ -35,26 +35,21 @@ static void cl_zsymm_cpu_func(void *descr[], void *cl_arg)
     int M;
     int N;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    CHAMELEON_Complex64_t *B;
-    int ldB;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
     CHAMELEON_Complex64_t beta;
-    CHAMELEON_Complex64_t *C;
-    int ldC;
+    CHAM_tile_t *tileC;
+
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
-    ldC = STARPU_MATRIX_GET_LD( descr[2] );
     starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &beta);
-    CORE_zsymm(side, uplo,
+    TCORE_zsymm(side, uplo,
         M, N,
-        alpha, A, ldA,
-        B, ldB,
-        beta, C, ldC);
+        alpha, tileA,
+        tileB,
+        beta, tileC);
 }
 
 #ifdef CHAMELEON_USE_CUDA
@@ -65,20 +60,15 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg)
     int M;
     int N;
     cuDoubleComplex alpha;
-    const cuDoubleComplex *A;
-    int ldA;
-    const cuDoubleComplex *B;
-    int ldB;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
     cuDoubleComplex beta;
-    cuDoubleComplex *C;
-    int ldC;
+    CHAM_tile_t *tileC;
+
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
 
-    A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
-    ldC = STARPU_MATRIX_GET_LD( descr[2] );
     starpu_codelet_unpack_args(cl_arg, &side, &uplo, &M, &N, &alpha, &beta);
 
     RUNTIME_getStream(stream);
@@ -86,9 +76,9 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg)
     CUDA_zsymm(
         side, uplo,
         M, N,
-        &alpha, A, ldA,
-        B, ldB,
-        &beta, C, ldC,
+        &alpha, tileA->mat, tileA->ld,
+                tileB->mat, tileB->ld,
+        &beta,  tileC->mat, tileC->ld,
         stream);
 
 #ifndef STARPU_CUDA_ASYNC
@@ -113,9 +103,9 @@ CODELETS(zsymm, 3, cl_zsymm_cpu_func, cl_zsymm_cuda_func, STARPU_CUDA_ASYNC)
 void INSERT_TASK_zsymm(const RUNTIME_option_t *options,
                       cham_side_t side, cham_uplo_t uplo,
                       int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldB,
-                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldC)
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                      const CHAM_desc_t *B, int Bm, int Bn,
+                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zsymm;
@@ -144,7 +134,4 @@ void INSERT_TASK_zsymm(const RUNTIME_option_t *options,
         STARPU_NAME, "zsymm",
 #endif
         0);
-    (void)ldC;
-    (void)ldB;
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c
index 45b5377a7999ff0f93816d255205e795efb20cb4..95f5f28a91ed292a79e1e7519580cf325c7d906f 100644
--- a/runtime/starpu/codelets/codelet_zsyr2k.c
+++ b/runtime/starpu/codelets/codelet_zsyr2k.c
@@ -35,23 +35,18 @@ static void cl_zsyr2k_cpu_func(void *descr[], void *cl_arg)
     int n;
     int k;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    CHAMELEON_Complex64_t *B;
-    int ldB;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
     CHAMELEON_Complex64_t beta;
-    CHAMELEON_Complex64_t *C;
-    int ldC;
+    CHAM_tile_t *tileC;
+
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
-    ldC = STARPU_MATRIX_GET_LD( descr[2] );
     starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta);
-    CORE_zsyr2k(uplo, trans,
-                 n, k, alpha, A, ldA, B, ldB, beta, C, ldC);
+    TCORE_zsyr2k(uplo, trans,
+                 n, k, alpha, tileA, tileB, beta, tileC);
 }
 
 #ifdef CHAMELEON_USE_CUDA
@@ -62,26 +57,24 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg)
     int n;
     int k;
     cuDoubleComplex alpha;
-    const cuDoubleComplex *A;
-    int ldA;
-    const cuDoubleComplex *B;
-    int ldB;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
     cuDoubleComplex beta;
-    cuDoubleComplex *C;
-    int ldC;
+    CHAM_tile_t *tileC;
+
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
 
-    A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
-    ldC = STARPU_MATRIX_GET_LD( descr[2] );
     starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta);
 
     RUNTIME_getStream(stream);
 
     CUDA_zsyr2k( uplo, trans,
-                 n, k, &alpha, A, ldA, B, ldB, &beta, C, ldC,
+                 n, k,
+                 &alpha, tileA->mat, tileA->ld,
+                         tileB->mat, tileB->ld,
+                 &beta,  tileC->mat, tileC->ld,
                  stream);
 
 #ifndef STARPU_CUDA_ASYNC
@@ -106,9 +99,9 @@ CODELETS(zsyr2k, 3, cl_zsyr2k_cpu_func, cl_zsyr2k_cuda_func, STARPU_CUDA_ASYNC)
 void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options,
                        cham_uplo_t uplo, cham_trans_t trans,
                        int n, int k, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldB,
-                       CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldC)
+                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                       const CHAM_desc_t *B, int Bm, int Bn,
+                       CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zsyr2k;
@@ -137,7 +130,4 @@ void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options,
         STARPU_NAME, "zsyr2k",
 #endif
         0);
-    (void)ldC;
-    (void)ldB;
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c
index 76faf111d8419347a345255c18d5fffc89c72553..a9dd529de0e0879e42b794feee8ea02c87df26db 100644
--- a/runtime/starpu/codelets/codelet_zsyrk.c
+++ b/runtime/starpu/codelets/codelet_zsyrk.c
@@ -35,22 +35,18 @@ static void cl_zsyrk_cpu_func(void *descr[], void *cl_arg)
     int n;
     int k;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
+    CHAM_tile_t *tileA;
     CHAMELEON_Complex64_t beta;
-    CHAMELEON_Complex64_t *C;
-    int ldC;
+    CHAM_tile_t *tileC;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    C = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    tileA = cti_interface_get(descr[0]);
+    tileC = cti_interface_get(descr[1]);
 
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldC = STARPU_MATRIX_GET_LD( descr[1] );
     starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta);
-    CORE_zsyrk(uplo, trans,
+    TCORE_zsyrk(uplo, trans,
         n, k,
-        alpha, A, ldA,
-        beta, C, ldC);
+        alpha, tileA,
+        beta, tileC);
 }
 
 #ifdef CHAMELEON_USE_CUDA
@@ -61,25 +57,21 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg)
     int n;
     int k;
     cuDoubleComplex alpha;
-    const cuDoubleComplex *A;
-    int ldA;
+    CHAM_tile_t *tileA;
     cuDoubleComplex beta;
-    cuDoubleComplex *C;
-    int ldC;
+    CHAM_tile_t *tileC;
+
+    tileA = cti_interface_get(descr[0]);
+    tileC = cti_interface_get(descr[1]);
 
-    A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    C = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldC = STARPU_MATRIX_GET_LD( descr[1] );
     starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta);
 
     RUNTIME_getStream(stream);
 
     CUDA_zsyrk(
-        uplo, trans,
-        n, k,
-        &alpha, A, ldA,
-        &beta, C, ldC,
+        uplo, trans, n, k,
+        &alpha, tileA->mat, tileA->ld,
+        &beta,  tileC->mat, tileC->ld,
         stream);
 
 #ifndef STARPU_CUDA_ASYNC
@@ -104,8 +96,8 @@ CODELETS(zsyrk, 2, cl_zsyrk_cpu_func, cl_zsyrk_cuda_func, STARPU_CUDA_ASYNC)
 void INSERT_TASK_zsyrk(const RUNTIME_option_t *options,
                       cham_uplo_t uplo, cham_trans_t trans,
                       int n, int k, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA,
-                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldC)
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zsyrk;
@@ -132,6 +124,4 @@ void INSERT_TASK_zsyrk(const RUNTIME_option_t *options,
         STARPU_NAME, "zsyrk",
 #endif
         0);
-    (void)ldC;
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zsyssq.c b/runtime/starpu/codelets/codelet_zsyssq.c
index 678fb8d024522f5d669181fb3fb866a0329a2202..c2d6c8edb81de46be3b9e7ee9efe2583ffce0b79 100644
--- a/runtime/starpu/codelets/codelet_zsyssq.c
+++ b/runtime/starpu/codelets/codelet_zsyssq.c
@@ -28,15 +28,13 @@ static void cl_zsyssq_cpu_func(void *descr[], void *cl_arg)
     cham_store_t storev;
     cham_uplo_t uplo;
     int n;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    double *SCALESUMSQ;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileW;
 
-    A          = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &n);
-    CORE_zsyssq( storev, uplo, n, A, ldA, SCALESUMSQ );
+    tileA = cti_interface_get(descr[0]);
+    tileW = cti_interface_get(descr[1]);
+    starpu_codelet_unpack_args( cl_arg, &storev, &uplo, &n );
+    TCORE_zsyssq( storev, uplo, n, tileA, tileW );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -47,7 +45,7 @@ CODELETS_CPU(zsyssq, 2, cl_zsyssq_cpu_func)
 
 void INSERT_TASK_zsyssq( const RUNTIME_option_t *options,
                          cham_store_t storev, cham_uplo_t uplo, int n,
-                         const CHAM_desc_t *A, int Am, int An, int ldA,
+                         const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
 {
     struct starpu_codelet *codelet = &cl_zsyssq;
@@ -71,5 +69,4 @@ void INSERT_TASK_zsyssq( const RUNTIME_option_t *options,
         STARPU_NAME, "zsyssq",
 #endif
         0);
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c
index 3f4d7f1070ad37f2e0043292188be3c934edac12..92e0b60b08704c5c2f1f0b0534dc84ca19d15bca 100644
--- a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c
+++ b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c
@@ -32,14 +32,12 @@ static void cl_zsytrf_nopiv_cpu_func(void *descr[], void *cl_arg)
 {
     cham_uplo_t uplo;
     int n;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
+    CHAM_tile_t *tileA;
     int iinfo;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
+    tileA = cti_interface_get(descr[0]);
     starpu_codelet_unpack_args(cl_arg, &uplo, &n, &iinfo);
-    CORE_zsytf2_nopiv(uplo, n, A, ldA);
+    TCORE_zsytf2_nopiv(uplo, n, tileA);
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -50,7 +48,7 @@ CODELETS_CPU(zsytrf_nopiv, 1, cl_zsytrf_nopiv_cpu_func)
 
 void INSERT_TASK_zsytrf_nopiv( const RUNTIME_option_t *options,
                               cham_uplo_t uplo, int n, int nb,
-                               const CHAM_desc_t *A, int Am, int An, int ldA,
+                               const CHAM_desc_t *A, int Am, int An,
                                int iinfo )
 {
     (void)nb;
@@ -74,5 +72,4 @@ void INSERT_TASK_zsytrf_nopiv( const RUNTIME_option_t *options,
         STARPU_NAME, "zsytrf_nopiv",
 #endif
         0);
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_ztplqt.c b/runtime/starpu/codelets/codelet_ztplqt.c
index 3809706d50485c4bc3ddb917a4186f995bdefc81..04aca83f3f54d29e3ae5c21e4323d2132bff0749 100644
--- a/runtime/starpu/codelets/codelet_ztplqt.c
+++ b/runtime/starpu/codelets/codelet_ztplqt.c
@@ -28,26 +28,20 @@ static void cl_ztplqt_cpu_func(void *descr[], void *cl_arg)
     int N;
     int L;
     int ib;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    CHAMELEON_Complex64_t *B;
-    int ldB;
-    CHAMELEON_Complex64_t *T;
-    int ldT;
-    CHAMELEON_Complex64_t *WORK;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+    CHAM_tile_t *tileT;
+    CHAM_tile_t *tileWORK;
 
-    A    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    T    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
-    ldT = STARPU_MATRIX_GET_LD( descr[2] );
+    tileA    = cti_interface_get(descr[0]);
+    tileB    = cti_interface_get(descr[1]);
+    tileT    = cti_interface_get(descr[2]);
+    tileWORK = cti_interface_get(descr[3]); /* ib * nb */
     starpu_codelet_unpack_args( cl_arg, &M, &N, &L, &ib );
 
-    CORE_zlaset( ChamUpperLower, ib, M, 0., 0., T, ldT );
-    CORE_ztplqt( M, N, L, ib,
-                 A, ldA, B, ldB, T, ldT, WORK );
+    TCORE_zlaset( ChamUpperLower, ib, M, 0., 0., tileT );
+    TCORE_ztplqt( M, N, L, ib,
+                 tileA, tileB, tileT, tileWORK->mat );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -58,9 +52,9 @@ CODELETS_CPU(ztplqt, 4, cl_ztplqt_cpu_func)
 
 void INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
                          int M, int N, int L, int ib, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int ldA,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldB,
-                         const CHAM_desc_t *T, int Tm, int Tn, int ldT )
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn,
+                         const CHAM_desc_t *T, int Tm, int Tn )
 {
     struct starpu_codelet *codelet = &cl_ztplqt;
     void (*callback)(void*) = options->profiling ? cl_ztplqt_callback : NULL;
@@ -91,8 +85,6 @@ void INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
         STARPU_NAME, (L == 0) ? "ztplqs" : "ztplqt",
 #endif
         0);
-    (void)ldB;
-    (void)ldA;
 
     (void)ib; (void)nb;
 }
diff --git a/runtime/starpu/codelets/codelet_ztpmlqt.c b/runtime/starpu/codelets/codelet_ztpmlqt.c
index 15f9be5a2ae9c8d2333a8bd8f547fdbcd7f7578f..32cefc983b2db4f5d256657bfb68bbc14c0ec633 100644
--- a/runtime/starpu/codelets/codelet_ztpmlqt.c
+++ b/runtime/starpu/codelets/codelet_ztpmlqt.c
@@ -29,32 +29,22 @@ static void cl_ztpmlqt_cpu_func(void *descr[], void *cl_arg)
     int K;
     int L;
     int ib;
-    const CHAMELEON_Complex64_t *V;
-    int ldV;
-    const CHAMELEON_Complex64_t *T;
-    int ldT;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    CHAMELEON_Complex64_t *B;
-    int ldB;
-    CHAMELEON_Complex64_t *WORK;
     size_t lwork;
+    CHAM_tile_t *tileV;
+    CHAM_tile_t *tileT;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+    CHAM_tile_t *tileW;
 
-    V    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    A    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    B    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
-    WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */
-    ldV = STARPU_MATRIX_GET_LD( descr[0] );
-    ldT = STARPU_MATRIX_GET_LD( descr[1] );
-    ldA = STARPU_MATRIX_GET_LD( descr[2] );
-    ldB = STARPU_MATRIX_GET_LD( descr[3] );
+    tileV = cti_interface_get(descr[0]);
+    tileT = cti_interface_get(descr[1]);
+    tileA = cti_interface_get(descr[2]);
+    tileB = cti_interface_get(descr[3]);
+    tileW = cti_interface_get(descr[4]); /* ib * nb */
     starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib, &lwork );
 
-    CORE_ztpmlqt( side, trans, M, N, K, L, ib,
-                  V, ldV, T, ldT, A, ldA, B, ldB, WORK );
-
-    (void)lwork;
+    TCORE_ztpmlqt( side, trans, M, N, K, L, ib,
+                   tileV, tileT, tileA, tileB, tileW->mat );
 }
 
 #if defined(CHAMELEON_USE_CUDA)
@@ -67,26 +57,18 @@ static void cl_ztpmlqt_cuda_func(void *descr[], void *cl_arg)
     int K;
     int L;
     int ib;
-    const cuDoubleComplex *V;
-    int ldV;
-    const cuDoubleComplex *T;
-    int ldT;
-    cuDoubleComplex *A;
-    int ldA;
-    cuDoubleComplex *B;
-    int ldB;
-    cuDoubleComplex *W;
     size_t lwork;
+    CHAM_tile_t *tileV;
+    CHAM_tile_t *tileT;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+    CHAM_tile_t *tileW;
 
-    V = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]);
-    W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[4]); /* 2*ib*nb */
-    ldV = STARPU_MATRIX_GET_LD( descr[0] );
-    ldT = STARPU_MATRIX_GET_LD( descr[1] );
-    ldA = STARPU_MATRIX_GET_LD( descr[2] );
-    ldB = STARPU_MATRIX_GET_LD( descr[3] );
+    tileV = cti_interface_get(descr[0]);
+    tileT = cti_interface_get(descr[1]);
+    tileA = cti_interface_get(descr[2]);
+    tileB = cti_interface_get(descr[3]);
+    tileW = cti_interface_get(descr[4]); /* 3*ib*nb */
 
     starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib, &lwork );
 
@@ -94,8 +76,11 @@ static void cl_ztpmlqt_cuda_func(void *descr[], void *cl_arg)
 
     CUDA_ztpmlqt(
             side, trans, M, N, K, L, ib,
-            V, ldV, T, ldT, A, ldA, B, ldB,
-            W, lwork, stream );
+            tileV->mat, tileV->ld,
+            tileT->mat, tileT->ld,
+            tileA->mat, tileA->ld,
+            tileB->mat, tileB->ld,
+            tileW->mat, lwork, stream );
 
 #ifndef STARPU_CUDA_ASYNC
     cudaStreamSynchronize( stream );
@@ -112,10 +97,10 @@ CODELETS(ztpmlqt, 5, cl_ztpmlqt_cpu_func, cl_ztpmlqt_cuda_func, STARPU_CUDA_ASYN
 void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
                           cham_side_t side, cham_trans_t trans,
                           int M, int N, int K, int L, int ib, int nb,
-                          const CHAM_desc_t *V, int Vm, int Vn, int ldV,
-                          const CHAM_desc_t *T, int Tm, int Tn, int ldT,
-                          const CHAM_desc_t *A, int Am, int An, int ldA,
-                          const CHAM_desc_t *B, int Bm, int Bn, int ldB )
+                          const CHAM_desc_t *V, int Vm, int Vn,
+                          const CHAM_desc_t *T, int Tm, int Tn,
+                          const CHAM_desc_t *A, int Am, int An,
+                          const CHAM_desc_t *B, int Bm, int Bn )
 {
     struct starpu_codelet *codelet = &cl_ztpmlqt;
     void (*callback)(void*) = options->profiling ? cl_ztpmlqt_callback : NULL;
@@ -136,11 +121,11 @@ void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
         STARPU_VALUE, &K,     sizeof(int),
         STARPU_VALUE, &L,     sizeof(int),
         STARPU_VALUE, &ib,     sizeof(int),
+        STARPU_VALUE, &(options->ws_wsize), sizeof(size_t),
         STARPU_R,      RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn),
         STARPU_R,      RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
         STARPU_RW,     RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_RW,     RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        STARPU_VALUE, &(options->ws_wsize), sizeof(size_t),
         /* Other options */
         STARPU_SCRATCH,   options->ws_worker,
         STARPU_PRIORITY,  options->priority,
@@ -152,9 +137,6 @@ void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
         STARPU_NAME, (( L == 0 ) ? "ztsmlq" : "ztpmlqt"),
 #endif
         0);
-    (void)ldA;
-    (void)ldT;
-    (void)ldV;
 
     (void)ib; (void)nb;
 }
diff --git a/runtime/starpu/codelets/codelet_ztpmqrt.c b/runtime/starpu/codelets/codelet_ztpmqrt.c
index ff225663d9c1f298bf8b16ad5eb7c342e8640165..2f921000d506c73baafa5f48ab5526c880a42986 100644
--- a/runtime/starpu/codelets/codelet_ztpmqrt.c
+++ b/runtime/starpu/codelets/codelet_ztpmqrt.c
@@ -29,35 +29,24 @@ static void cl_ztpmqrt_cpu_func(void *descr[], void *cl_arg)
     int K;
     int L;
     int ib;
-    const CHAMELEON_Complex64_t *V;
-    int ldV;
-    const CHAMELEON_Complex64_t *T;
-    int ldT;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    CHAMELEON_Complex64_t *B;
-    int ldB;
-    CHAMELEON_Complex64_t *WORK;
     size_t lwork;
+    CHAM_tile_t *tileV;
+    CHAM_tile_t *tileT;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+    CHAM_tile_t *tileW;
 
-    V    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    A    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    B    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
-    WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */
-    ldV = STARPU_MATRIX_GET_LD( descr[0] );
-    ldT = STARPU_MATRIX_GET_LD( descr[1] );
-    ldA = STARPU_MATRIX_GET_LD( descr[2] );
-    ldB = STARPU_MATRIX_GET_LD( descr[3] );
+    tileV = cti_interface_get(descr[0]);
+    tileT = cti_interface_get(descr[1]);
+    tileA = cti_interface_get(descr[2]);
+    tileB = cti_interface_get(descr[3]);
+    tileW = cti_interface_get(descr[4]); /* ib * nb */
     starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib, &lwork );
 
-    CORE_ztpmqrt( side, trans, M, N, K, L, ib,
-                  V, ldV, T, ldT, A, ldA, B, ldB, WORK );
-
-    (void)lwork;
+    TCORE_ztpmqrt( side, trans, M, N, K, L, ib,
+                   tileV, tileT, tileA, tileB, tileW->mat );
 }
 
-
 #if defined(CHAMELEON_USE_CUDA)
 static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg)
 {
@@ -68,34 +57,30 @@ static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg)
     int K;
     int L;
     int ib;
-    const cuDoubleComplex *V;
-    int ldV;
-    const cuDoubleComplex *T;
-    int ldT;
-    cuDoubleComplex *A;
-    int ldA;
-    cuDoubleComplex *B;
-    int ldB;
-    cuDoubleComplex *W;
     size_t lwork;
+    CHAM_tile_t *tileV;
+    CHAM_tile_t *tileT;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+    CHAM_tile_t *tileW;
+
+    tileV = cti_interface_get(descr[0]);
+    tileT = cti_interface_get(descr[1]);
+    tileA = cti_interface_get(descr[2]);
+    tileB = cti_interface_get(descr[3]);
+    tileW = cti_interface_get(descr[4]); /* 3*ib*nb */
 
-    V = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    A = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]);
-    W = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[4]); /* 3*ib*nb */
-    ldV = STARPU_MATRIX_GET_LD( descr[0] );
-    ldT = STARPU_MATRIX_GET_LD( descr[1] );
-    ldA = STARPU_MATRIX_GET_LD( descr[2] );
-    ldB = STARPU_MATRIX_GET_LD( descr[3] );
     starpu_codelet_unpack_args( cl_arg, &side, &trans, &M, &N, &K, &L, &ib, &lwork );
 
     RUNTIME_getStream(stream);
 
     CUDA_ztpmqrt(
             side, trans, M, N, K, L, ib,
-            V, ldV, T, ldT, A, ldA, B, ldB,
-            W, lwork, stream );
+            tileV->mat, tileV->ld,
+            tileT->mat, tileT->ld,
+            tileA->mat, tileA->ld,
+            tileB->mat, tileB->ld,
+            tileW->mat, lwork, stream );
 
 #ifndef STARPU_CUDA_ASYNC
     cudaStreamSynchronize( stream );
@@ -112,10 +97,10 @@ CODELETS(ztpmqrt, 5, cl_ztpmqrt_cpu_func, cl_ztpmqrt_cuda_func, STARPU_CUDA_ASYN
 void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options,
                           cham_side_t side, cham_trans_t trans,
                           int M, int N, int K, int L, int ib, int nb,
-                          const CHAM_desc_t *V, int Vm, int Vn, int ldV,
-                          const CHAM_desc_t *T, int Tm, int Tn, int ldT,
-                          const CHAM_desc_t *A, int Am, int An, int ldA,
-                          const CHAM_desc_t *B, int Bm, int Bn, int ldB )
+                          const CHAM_desc_t *V, int Vm, int Vn,
+                          const CHAM_desc_t *T, int Tm, int Tn,
+                          const CHAM_desc_t *A, int Am, int An,
+                          const CHAM_desc_t *B, int Bm, int Bn )
 {
     struct starpu_codelet *codelet = &cl_ztpmqrt;
     void (*callback)(void*) = options->profiling ? cl_ztpmqrt_callback : NULL;
@@ -136,11 +121,11 @@ void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options,
         STARPU_VALUE, &K,     sizeof(int),
         STARPU_VALUE, &L,     sizeof(int),
         STARPU_VALUE, &ib,     sizeof(int),
+        STARPU_VALUE, &(options->ws_wsize), sizeof(size_t),
         STARPU_R,      RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn),
         STARPU_R,      RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
         STARPU_RW,     RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_RW,     RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        STARPU_VALUE, &(options->ws_wsize), sizeof(size_t),
         /* Other options */
         STARPU_SCRATCH,   options->ws_worker,
         STARPU_PRIORITY,  options->priority,
@@ -152,9 +137,6 @@ void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options,
         STARPU_NAME, (( L == 0 ) ? "ztsmqr" : "ztpmqrt"),
 #endif
         0);
-    (void)ldA;
-    (void)ldT;
-    (void)ldV;
 
     (void)ib; (void)nb;
 }
diff --git a/runtime/starpu/codelets/codelet_ztpqrt.c b/runtime/starpu/codelets/codelet_ztpqrt.c
index 26962b5cb0189ea53a8200be56a5b1127cb54d5c..c806232a2ad3b09f0588b672cf1c59289e58c733 100644
--- a/runtime/starpu/codelets/codelet_ztpqrt.c
+++ b/runtime/starpu/codelets/codelet_ztpqrt.c
@@ -27,26 +27,20 @@ static void cl_ztpqrt_cpu_func(void *descr[], void *cl_arg)
     int N;
     int L;
     int ib;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    CHAMELEON_Complex64_t *B;
-    int ldB;
-    CHAMELEON_Complex64_t *T;
-    int ldT;
-    CHAMELEON_Complex64_t *WORK;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+    CHAM_tile_t *tileT;
+    CHAM_tile_t *tileWORK;
 
-    A    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    T    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
-    ldT = STARPU_MATRIX_GET_LD( descr[2] );
+    tileA    = cti_interface_get(descr[0]);
+    tileB    = cti_interface_get(descr[1]);
+    tileT    = cti_interface_get(descr[2]);
+    tileWORK = cti_interface_get(descr[3]); /* ib * nb */
     starpu_codelet_unpack_args( cl_arg, &M, &N, &L, &ib );
 
-    CORE_zlaset( ChamUpperLower, ib, N, 0., 0., T, ldT );
-    CORE_ztpqrt( M, N, L, ib,
-                 A, ldA, B, ldB, T, ldT, WORK );
+    TCORE_zlaset( ChamUpperLower, ib, N, 0., 0., tileT );
+    TCORE_ztpqrt( M, N, L, ib,
+                  tileA, tileB, tileT, tileWORK->mat );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -57,9 +51,9 @@ CODELETS_CPU(ztpqrt, 4, cl_ztpqrt_cpu_func)
 
 void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
                          int M, int N, int L, int ib, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int ldA,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldB,
-                         const CHAM_desc_t *T, int Tm, int Tn, int ldT )
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn,
+                         const CHAM_desc_t *T, int Tm, int Tn )
 {
     struct starpu_codelet *codelet = &cl_ztpqrt;
     void (*callback)(void*) = options->profiling ? cl_ztpqrt_callback : NULL;
@@ -90,8 +84,6 @@ void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
         STARPU_NAME, "ztpqrt",
 #endif
         0);
-    (void)ldB;
-    (void)ldA;
 
     (void)ib; (void)nb;
 }
diff --git a/runtime/starpu/codelets/codelet_ztradd.c b/runtime/starpu/codelets/codelet_ztradd.c
index ada8343d759bd8c8972d6da1c93def0dd2793dff..d7799dc408dddac73b544e95cfcb2e4ef6abb158 100644
--- a/runtime/starpu/codelets/codelet_ztradd.c
+++ b/runtime/starpu/codelets/codelet_ztradd.c
@@ -31,18 +31,14 @@ static void cl_ztradd_cpu_func(void *descr[], void *cl_arg)
     int M;
     int N;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
+    CHAM_tile_t *tileA;
     CHAMELEON_Complex64_t beta;
-    CHAMELEON_Complex64_t *B;
-    int ldB;
+    CHAM_tile_t *tileB;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
     starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &alpha, &beta);
-    CORE_ztradd(uplo, trans, M, N, alpha, A, ldA, beta, B, ldB);
+    TCORE_ztradd(uplo, trans, M, N, alpha, tileA, beta, tileB);
     return;
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
@@ -113,8 +109,8 @@ CODELETS_CPU(ztradd, 2, cl_ztradd_cpu_func)
  */
 void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
-                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA,
-                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldB )
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn )
 {
     struct starpu_codelet *codelet = &cl_ztradd;
     void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL;
@@ -140,7 +136,6 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
         STARPU_NAME, "ztradd",
 #endif
         0);
-    (void)ldA;
 
     (void)nb;
 }
diff --git a/runtime/starpu/codelets/codelet_ztrasm.c b/runtime/starpu/codelets/codelet_ztrasm.c
index a616b14c8efd0e47803a0d5115bfbe4863ab2938..1062237b4e9055a189b3bfd23e04719887c506e2 100644
--- a/runtime/starpu/codelets/codelet_ztrasm.c
+++ b/runtime/starpu/codelets/codelet_ztrasm.c
@@ -31,15 +31,13 @@ static void cl_ztrasm_cpu_func(void *descr[], void *cl_arg)
     cham_diag_t diag;
     int M;
     int N;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    double *work;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileW;
 
-    A    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    work = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
+    tileA = cti_interface_get(descr[0]);
+    tileW = cti_interface_get(descr[1]);
     starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &diag, &M, &N);
-    CORE_ztrasm(storev, uplo, diag, M, N, A, ldA, work);
+    TCORE_ztrasm(storev, uplo, diag, M, N, tileA, tileW->mat );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -50,7 +48,7 @@ CODELETS_CPU(ztrasm, 2, cl_ztrasm_cpu_func)
 
 void INSERT_TASK_ztrasm( const RUNTIME_option_t *options,
                          cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N,
-                         const CHAM_desc_t *A, int Am, int An, int ldA,
+                         const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *B, int Bm, int Bn )
 {
     struct starpu_codelet *codelet = &cl_ztrasm;
@@ -76,5 +74,4 @@ void INSERT_TASK_ztrasm( const RUNTIME_option_t *options,
         STARPU_NAME, "ztrasm",
 #endif
         0);
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c
index 322c2326c554dd6ecebb36e2e3b30568d9164a20..9ae9cdf796356e5b5438746d825dda5bd7a765a3 100644
--- a/runtime/starpu/codelets/codelet_ztrmm.c
+++ b/runtime/starpu/codelets/codelet_ztrmm.c
@@ -37,22 +37,18 @@ static void cl_ztrmm_cpu_func(void *descr[], void *cl_arg)
     int M;
     int N;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    CHAMELEON_Complex64_t *B;
-    int ldB;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
 
     starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &M, &N, &alpha);
-    CORE_ztrmm(side, uplo,
+    TCORE_ztrmm(side, uplo,
         transA, diag,
         M, N,
-        alpha, A, ldA,
-        B, ldB);
+        alpha, tileA,
+        tileB);
 }
 
 #ifdef CHAMELEON_USE_CUDA
@@ -65,26 +61,20 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
     int M;
     int N;
     cuDoubleComplex alpha;
-    const cuDoubleComplex *A;
-    int ldA;
-    cuDoubleComplex *B;
-    int ldB;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
 
-    A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
     starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &M, &N, &alpha);
 
     RUNTIME_getStream(stream);
 
     CUDA_ztrmm(
-        side, uplo,
-        transA, diag,
-        M, N,
-        &alpha, A, ldA,
-        B, ldB,
-        stream);
+        side, uplo, transA, diag, M, N, &alpha,
+        tileA->mat, tileA->ld,
+        tileB->mat, tileB->ld,
+        stream );
 
 #ifndef STARPU_CUDA_ASYNC
     cudaStreamSynchronize( stream );
@@ -109,8 +99,8 @@ CODELETS(ztrmm, 2, cl_ztrmm_cpu_func, cl_ztrmm_cuda_func, STARPU_CUDA_ASYNC)
 void INSERT_TASK_ztrmm(const RUNTIME_option_t *options,
                       cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
                       int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldB)
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                      const CHAM_desc_t *B, int Bm, int Bn)
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_ztrmm;
@@ -138,6 +128,4 @@ void INSERT_TASK_ztrmm(const RUNTIME_option_t *options,
         STARPU_NAME, "ztrmm",
 #endif
         0);
-    (void)ldB;
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c
index 1d3281bdb93e65cbc6948a7486643bccb950aad9..6155433fac2fd0cae8e3cc324dfc00f4d6a6e7ae 100644
--- a/runtime/starpu/codelets/codelet_ztrsm.c
+++ b/runtime/starpu/codelets/codelet_ztrsm.c
@@ -37,21 +37,17 @@ static void cl_ztrsm_cpu_func(void *descr[], void *cl_arg)
     int m;
     int n;
     CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    CHAMELEON_Complex64_t *B;
-    int ldB;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
     starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha);
-    CORE_ztrsm(side, uplo,
+    TCORE_ztrsm(side, uplo,
         transA, diag,
         m, n,
-        alpha, A, ldA,
-        B, ldB);
+        alpha, tileA,
+        tileB);
 }
 
 #ifdef CHAMELEON_USE_CUDA
@@ -64,25 +60,20 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
     int m;
     int n;
     cuDoubleComplex alpha;
-    const cuDoubleComplex *A;
-    int ldA;
-    cuDoubleComplex *B;
-    int ldB;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
 
-    A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldB = STARPU_MATRIX_GET_LD( descr[1] );
+    tileA = cti_interface_get(descr[0]);
+    tileB = cti_interface_get(descr[1]);
     starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha);
 
     RUNTIME_getStream(stream);
 
     CUDA_ztrsm(
-        side, uplo, transA, diag,
-        m, n,
-        &alpha, A, ldA,
-        B, ldB,
-        stream);
+        side, uplo, transA, diag, m, n, &alpha,
+        tileA->mat, tileA->ld,
+        tileB->mat, tileB->ld,
+        stream );
 
 #ifndef STARPU_CUDA_ASYNC
     cudaStreamSynchronize( stream );
@@ -106,8 +97,8 @@ CODELETS(ztrsm, 2, cl_ztrsm_cpu_func, cl_ztrsm_cuda_func, STARPU_CUDA_ASYNC)
 void INSERT_TASK_ztrsm(const RUNTIME_option_t *options,
                       cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
                       int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int ldA,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldB)
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                      const CHAM_desc_t *B, int Bm, int Bn)
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_ztrsm;
@@ -135,6 +126,4 @@ void INSERT_TASK_ztrsm(const RUNTIME_option_t *options,
         STARPU_NAME, "ztrsm",
 #endif
         0);
-    (void)ldB;
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_ztrssq.c b/runtime/starpu/codelets/codelet_ztrssq.c
index 4374a51f487e67885dcb2a27a2e1573e44b56cbe..2ce632d23adb39bdabca34e50b3bd0151141a9b1 100644
--- a/runtime/starpu/codelets/codelet_ztrssq.c
+++ b/runtime/starpu/codelets/codelet_ztrssq.c
@@ -29,15 +29,13 @@ static void cl_ztrssq_cpu_func(void *descr[], void *cl_arg)
     cham_diag_t diag;
     int m;
     int n;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    double *SCALESUMSQ;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileW;
 
-    A          = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &m, &n);
-    CORE_ztrssq( uplo, diag, m, n, A, ldA, &SCALESUMSQ[0], &SCALESUMSQ[1]);
+    tileA = cti_interface_get(descr[0]);
+    tileW = cti_interface_get(descr[1]);
+    starpu_codelet_unpack_args( cl_arg, &uplo, &diag, &m, &n );
+    TCORE_ztrssq( uplo, diag, m, n, tileA, tileW );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -49,7 +47,7 @@ CODELETS_CPU(ztrssq, 2, cl_ztrssq_cpu_func)
 void INSERT_TASK_ztrssq( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_diag_t diag,
                          int m, int n,
-                         const CHAM_desc_t *A, int Am, int An, int ldA,
+                         const CHAM_desc_t *A, int Am, int An,
                          const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
 {
     struct starpu_codelet *codelet = &cl_ztrssq;
@@ -74,5 +72,4 @@ void INSERT_TASK_ztrssq( const RUNTIME_option_t *options,
         STARPU_NAME, "ztrssq",
 #endif
         0);
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_ztrtri.c b/runtime/starpu/codelets/codelet_ztrtri.c
index 50b4921064ed6c3dec73ee45204a50076ea6dd39..aac4c7a04230b603c5a53d2feb7705f56a4a9a32 100644
--- a/runtime/starpu/codelets/codelet_ztrtri.c
+++ b/runtime/starpu/codelets/codelet_ztrtri.c
@@ -33,17 +33,15 @@ static void cl_ztrtri_cpu_func(void *descr[], void *cl_arg)
     cham_uplo_t uplo;
     cham_diag_t diag;
     int N;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
+    CHAM_tile_t *tileA;
     int iinfo;
     RUNTIME_sequence_t *sequence;
     RUNTIME_request_t *request;
     int info = 0;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
+    tileA = cti_interface_get(descr[0]);
     starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &N, &iinfo, &sequence, &request);
-    CORE_ztrtri(uplo, diag, N, A, ldA, &info);
+    TCORE_ztrtri(uplo, diag, N, tileA, &info);
 
     if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
         RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
@@ -64,7 +62,7 @@ CODELETS_CPU(ztrtri, 1, cl_ztrtri_cpu_func)
 void INSERT_TASK_ztrtri( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_diag_t diag,
                          int n, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int ldA,
+                         const CHAM_desc_t *A, int Am, int An,
                          int iinfo )
 {
     (void)nb;
@@ -90,5 +88,4 @@ void INSERT_TASK_ztrtri( const RUNTIME_option_t *options,
         STARPU_NAME, "ztrtri",
 #endif
         0);
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
index 9056a098b17d1b38c83b4005de3387f4d0721c26..bca185baada20eb253a5029a7b61ab53b0e46e4c 100644
--- a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
+++ b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
@@ -34,31 +34,22 @@ static void cl_ztsmlq_hetra1_cpu_func(void *descr[], void *cl_arg)
     int n2;
     int k;
     int ib;
-    int nb;
-    CHAMELEON_Complex64_t *A1;
-    int ldA1;
-    CHAMELEON_Complex64_t *A2;
-    int ldA2;
-    CHAMELEON_Complex64_t *V;
-    int ldV;
-    CHAMELEON_Complex64_t *T;
-    int ldT;
+    CHAM_tile_t *tileA1;
+    CHAM_tile_t *tileA2;
+    CHAM_tile_t *tileV;
+    CHAM_tile_t *tileT;
+    CHAM_tile_t *tileW;
+    int ldW;
 
-    CHAMELEON_Complex64_t *WORK;
-    int ldWORK;
+    tileA1 = cti_interface_get(descr[0]);
+    tileA2 = cti_interface_get(descr[1]);
+    tileV  = cti_interface_get(descr[2]);
+    tileT  = cti_interface_get(descr[3]);
+    tileW  = cti_interface_get(descr[4]); /* ib * nb */
 
-    A1    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    A2    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    V     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    T     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
-    WORK  = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */
-    ldA1 = STARPU_MATRIX_GET_LD( descr[0] );
-    ldA2 = STARPU_MATRIX_GET_LD( descr[1] );
-    ldV = STARPU_MATRIX_GET_LD( descr[2] );
-    ldT = STARPU_MATRIX_GET_LD( descr[3] );
-    starpu_codelet_unpack_args( cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib, &nb, &ldWORK);
-    CORE_ztsmlq_hetra1(side, trans, m1, n1, m2, n2, k,
-                       ib, A1, ldA1, A2, ldA2, V, ldV, T, ldT, WORK, ldWORK);
+    starpu_codelet_unpack_args( cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib, &ldW );
+    TCORE_ztsmlq_hetra1( side, trans, m1, n1, m2, n2, k, ib,
+                         tileA1, tileA2, tileV, tileT, tileW->mat, ldW );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -75,10 +66,10 @@ CODELETS_CPU(ztsmlq_hetra1, 5, cl_ztsmlq_hetra1_cpu_func)
 void INSERT_TASK_ztsmlq_hetra1( const RUNTIME_option_t *options,
                                 cham_side_t side, cham_trans_t trans,
                                 int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                                const CHAM_desc_t *A1, int A1m, int A1n, int ldA1,
-                                const CHAM_desc_t *A2, int A2m, int A2n, int ldA2,
-                                const CHAM_desc_t *V,  int Vm,  int Vn,  int ldV,
-                                const CHAM_desc_t *T,  int Tm,  int Tn,  int ldT )
+                                const CHAM_desc_t *A1, int A1m, int A1n,
+                                const CHAM_desc_t *A2, int A2m, int A2n,
+                                const CHAM_desc_t *V,  int Vm,  int Vn,
+                                const CHAM_desc_t *T,  int Tm,  int Tn )
 {
     struct starpu_codelet *codelet = &cl_ztsmlq_hetra1;
     void (*callback)(void*) = options->profiling ? cl_ztsmlq_hetra1_callback : NULL;
@@ -102,13 +93,12 @@ void INSERT_TASK_ztsmlq_hetra1( const RUNTIME_option_t *options,
         STARPU_VALUE,    &n2,                sizeof(int),
         STARPU_VALUE,    &k,                 sizeof(int),
         STARPU_VALUE,    &ib,                sizeof(int),
-        STARPU_VALUE,    &nb,                sizeof(int),
+        STARPU_VALUE,    &ldWORK,            sizeof(int),
         STARPU_RW,        RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n),
         STARPU_RW,        RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n),
         STARPU_R,         RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn),
         STARPU_R,         RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
         STARPU_SCRATCH,   options->ws_worker,
-        STARPU_VALUE,    &ldWORK,            sizeof(int),
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
diff --git a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
index 9f3c2d3fbab220830105b149475f97787d6f34bc..e6e2ff53afcfb9e97fdf6843830d24d758534128 100644
--- a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
+++ b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
@@ -34,32 +34,22 @@ static void cl_ztsmqr_hetra1_cpu_func(void *descr[], void *cl_arg)
     int n2;
     int k;
     int ib;
-    CHAMELEON_Complex64_t *A1;
-    int ldA1;
-    CHAMELEON_Complex64_t *A2;
-    int ldA2;
-    CHAMELEON_Complex64_t *V;
-    int ldV;
-    CHAMELEON_Complex64_t *T;
-    int ldT;
+    CHAM_tile_t *tileA1;
+    CHAM_tile_t *tileA2;
+    CHAM_tile_t *tileV;
+    CHAM_tile_t *tileT;
+    CHAM_tile_t *tileW;
+    int ldW;
 
-    /* TODO: manage workspace */
-    CHAMELEON_Complex64_t *WORK;
-    int ldWORK;
+    tileA1 = cti_interface_get(descr[0]);
+    tileA2 = cti_interface_get(descr[1]);
+    tileV  = cti_interface_get(descr[2]);
+    tileT  = cti_interface_get(descr[3]);
+    tileW  = cti_interface_get(descr[4]);
 
-    A1    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    A2    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    V     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    T     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
-    WORK  = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]);
-    ldA1 = STARPU_MATRIX_GET_LD( descr[0] );
-    ldA2 = STARPU_MATRIX_GET_LD( descr[1] );
-    ldV = STARPU_MATRIX_GET_LD( descr[2] );
-    ldT = STARPU_MATRIX_GET_LD( descr[3] );
-    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k,
-                               &ib, &ldWORK);
-    CORE_ztsmqr_hetra1(side, trans, m1, n1, m2, n2, k,
-                       ib, A1, ldA1, A2, ldA2, V, ldV, T, ldT, WORK, ldWORK);
+    starpu_codelet_unpack_args( cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k, &ib, &ldW );
+    TCORE_ztsmqr_hetra1( side, trans, m1, n1, m2, n2, k, ib,
+                         tileA1, tileA2, tileV, tileT, tileW->mat, ldW );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
@@ -76,10 +66,10 @@ CODELETS_CPU(ztsmqr_hetra1, 5, cl_ztsmqr_hetra1_cpu_func)
 void INSERT_TASK_ztsmqr_hetra1( const RUNTIME_option_t *options,
                                 cham_side_t side, cham_trans_t trans,
                                 int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                                const CHAM_desc_t *A1, int A1m, int A1n, int ldA1,
-                                const CHAM_desc_t *A2, int A2m, int A2n, int ldA2,
-                                const CHAM_desc_t *V,  int Vm,  int Vn,  int ldV,
-                                const CHAM_desc_t *T,  int Tm,  int Tn,  int ldT )
+                                const CHAM_desc_t *A1, int A1m, int A1n,
+                                const CHAM_desc_t *A2, int A2m, int A2n,
+                                const CHAM_desc_t *V,  int Vm,  int Vn,
+                                const CHAM_desc_t *T,  int Tm,  int Tn )
 {
     struct starpu_codelet *codelet = &cl_ztsmqr_hetra1;
     void (*callback)(void*) = options->profiling ? cl_ztsmqr_hetra1_callback : NULL;
@@ -103,12 +93,12 @@ void INSERT_TASK_ztsmqr_hetra1( const RUNTIME_option_t *options,
         STARPU_VALUE,    &n2,                sizeof(int),
         STARPU_VALUE,    &k,                 sizeof(int),
         STARPU_VALUE,    &ib,                sizeof(int),
+        STARPU_VALUE,    &ldWORK,            sizeof(int),
         STARPU_RW,        RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n),
         STARPU_RW,        RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n),
         STARPU_R,         RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn),
         STARPU_R,         RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
         STARPU_SCRATCH,   options->ws_worker,
-        STARPU_VALUE,    &ldWORK,            sizeof(int),
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
diff --git a/runtime/starpu/codelets/codelet_ztstrf.c b/runtime/starpu/codelets/codelet_ztstrf.c
index a711810e9aa29c211e60f24d42ccc5a3c3033690..e8115be4f146e757539dadd79cbb34f1089e813d 100644
--- a/runtime/starpu/codelets/codelet_ztstrf.c
+++ b/runtime/starpu/codelets/codelet_ztstrf.c
@@ -35,33 +35,28 @@ static void cl_ztstrf_cpu_func(void *descr[], void *cl_arg)
     int n;
     int ib;
     int nb;
-    CHAMELEON_Complex64_t *U;
-    int ldU;
-    CHAMELEON_Complex64_t *A;
-    int ldA;
-    CHAMELEON_Complex64_t *L;
-    int ldL;
+    CHAM_tile_t *tileU;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileL;
     int *IPIV;
-    CHAMELEON_Complex64_t *WORK;
-    int ldWORK;
+    CHAM_tile_t *tileW;
+    int ldW;
     cham_bool_t check_info;
     int iinfo;
     RUNTIME_sequence_t *sequence;
     RUNTIME_request_t *request;
     int info = 0;
 
-    U = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    L = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
-    ldU = STARPU_MATRIX_GET_LD( descr[0] );
-    ldA = STARPU_MATRIX_GET_LD( descr[1] );
-    ldL = STARPU_MATRIX_GET_LD( descr[2] );
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &nb,
-                               &IPIV, &d_work, &ldWORK, &check_info, &iinfo,
-                               &sequence, &request);
+    tileU = cti_interface_get(descr[0]);
+    tileA = cti_interface_get(descr[1]);
+    tileL = cti_interface_get(descr[2]);
+    tileW = cti_interface_get(descr[3]);
 
-    CORE_ztstrf(m, n, ib, nb, U, ldU, A, ldA, L, ldL, IPIV, WORK, ldWORK, &info);
+    starpu_codelet_unpack_args( cl_arg, &m, &n, &ib, &nb,
+                                &IPIV, &d_work, &ldW, &check_info, &iinfo,
+                                &sequence, &request );
+
+    TCORE_ztstrf(m, n, ib, nb, tileU, tileA, tileL, IPIV, tileW->mat, ldW, &info);
 
     if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
         RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
@@ -74,76 +69,11 @@ static void cl_ztstrf_cpu_func(void *descr[], void *cl_arg)
  */
 CODELETS_CPU(ztstrf, 4, cl_ztstrf_cpu_func)
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_ztstrf computes an LU factorization of a complex matrix formed
- *  by an upper triangular NB-by-N tile U on top of a M-by-N tile A
- *  using partial pivoting with row interchanges.
- *
- *  This is the right-looking Level 2.5 BLAS version of the algorithm.
- *
- *******************************************************************************
- *
- * @param[in] M
- *         The number of rows of the tile A.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the tile A.  N >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in] NB
- *
- * @param[in,out] U
- *         On entry, the NB-by-N upper triangular tile.
- *         On exit, the new factor U from the factorization
- *
- * @param[in] ldU
- *         The leading dimension of the array U.  ldU >= max(1,NB).
- *
- * @param[in,out] A
- *         On entry, the M-by-N tile to be factored.
- *         On exit, the factor L from the factorization
- *
- * @param[in] ldA
- *         The leading dimension of the array A.  ldA >= max(1,M).
- *
- * @param[in,out] L
- *         On entry, the IB-by-N lower triangular tile.
- *         On exit, the interchanged rows form the tile A in case of pivoting.
- *
- * @param[in] ldL
- *         The leading dimension of the array L.  ldL >= max(1,IB).
- *
- * @param[out] IPIV
- *         The pivot indices; for 1 <= i <= min(M,N), row i of the
- *         tile U was interchanged with row IPIV(i) of the tile A.
- *
- * @param[in,out] WORK
- *
- * @param[in] ldWORK
- *         The dimension of the array WORK.
- *
- * @param[out] INFO
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if INFO = -k, the k-th argument had an illegal value
- * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
- *              has been completed, but the factor U is exactly
- *              singular, and division by zero will occur if it is used
- *              to solve a system of equations.
- *
- */
 void INSERT_TASK_ztstrf( const RUNTIME_option_t *options,
                          int m, int n, int ib, int nb,
-                         const CHAM_desc_t *U, int Um, int Un, int ldU,
-                         const CHAM_desc_t *A, int Am, int An, int ldA,
-                         const CHAM_desc_t *L, int Lm, int Ln, int ldL,
+                         const CHAM_desc_t *U, int Um, int Un,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *L, int Lm, int Ln,
                          int *IPIV,
                          cham_bool_t check_info, int iinfo )
 {
@@ -181,7 +111,4 @@ void INSERT_TASK_ztstrf( const RUNTIME_option_t *options,
         STARPU_NAME, "ztstrf",
 #endif
         0);
-    (void)ldL;
-    (void)ldA;
-    (void)ldU;
 }
diff --git a/runtime/starpu/codelets/codelet_zunmlq.c b/runtime/starpu/codelets/codelet_zunmlq.c
index dd9c8d7dd58731fb3bfbf58f06e2541dab21d5b0..4769f790b7c06f944da115a9d218c1783861aa76 100644
--- a/runtime/starpu/codelets/codelet_zunmlq.c
+++ b/runtime/starpu/codelets/codelet_zunmlq.c
@@ -37,27 +37,21 @@ static void cl_zunmlq_cpu_func(void *descr[], void *cl_arg)
     int n;
     int k;
     int ib;
-    const CHAMELEON_Complex64_t *A;
-    int ldA;
-    const CHAMELEON_Complex64_t *T;
-    int ldT;
-    CHAMELEON_Complex64_t *C;
-    int ldC;
-    CHAMELEON_Complex64_t *WORK;
-    int ldWORK;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileT;
+    CHAM_tile_t *tileC;
+    CHAM_tile_t *tileW;
+    int ldW;
 
-    A    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldT = STARPU_MATRIX_GET_LD( descr[1] );
-    ldC = STARPU_MATRIX_GET_LD( descr[2] );
+    tileA = cti_interface_get(descr[0]);
+    tileT = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
+    tileW = cti_interface_get(descr[3]); /* ib * nb */
 
-    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, &ldWORK);
+    starpu_codelet_unpack_args( cl_arg, &side, &trans, &m, &n, &k, &ib, &ldW );
 
-    CORE_zunmlq(side, trans, m, n, k, ib,
-                A, ldA, T, ldT, C, ldC, WORK, ldWORK);
+    TCORE_zunmlq( side, trans, m, n, k, ib,
+                  tileA, tileT, tileC, tileW->mat, ldW );
 }
 
 #if defined(CHAMELEON_USE_CUDA)
@@ -69,25 +63,27 @@ static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg)
     int n;
     int k;
     int ib;
-    const cuDoubleComplex *A, *T;
-    cuDoubleComplex *C, *WORK;
-    int ldA, ldT, ldC, ldWORK;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileT;
+    CHAM_tile_t *tileC;
+    CHAM_tile_t *tileW;
+    int ldW;
 
-    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, &ldWORK);
+    tileA = cti_interface_get(descr[0]);
+    tileT = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
+    tileW = cti_interface_get(descr[3]); /* ib * nb */
 
-    A    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C    = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldT = STARPU_MATRIX_GET_LD( descr[1] );
-    ldC = STARPU_MATRIX_GET_LD( descr[2] );
+    starpu_codelet_unpack_args( cl_arg, &side, &trans, &m, &n, &k, &ib, &ldW );
 
     RUNTIME_getStream(stream);
 
     CUDA_zunmlqt(
             side, trans, m, n, k, ib,
-            A, ldA, T, ldT, C, ldC, WORK, ldWORK, stream );
+            tileA->mat, tileA->ld,
+            tileT->mat, tileT->ld,
+            tileC->mat, tileC->ld,
+            tileW->mat, ldW, stream );
 
 #ifndef STARPU_CUDA_ASYNC
     cudaStreamSynchronize( stream );
@@ -101,94 +97,12 @@ static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg)
  */
 CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC)
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zunmlq overwrites the general complex M-by-N tile C with
- *
- *                    SIDE = 'L'     SIDE = 'R'
- *    TRANS = 'N':      Q * C          C * Q
- *    TRANS = 'C':      Q^H * C       C * Q^H
- *
- *  where Q is a complex unitary matrix defined as the product of k
- *  elementary reflectors
- *
- *    Q = H(k) . . . H(2) H(1)
- *
- *  as returned by CORE_zgelqt. Q is of order M if SIDE = 'L' and of order N
- *  if SIDE = 'R'.
- *
- *******************************************************************************
- *
- * @param[in] side
- *         @arg ChamLeft  : apply Q or Q^H from the Left;
- *         @arg ChamRight : apply Q or Q^H from the Right.
- *
- * @param[in] trans
- *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  Transpose, apply Q^H.
- *
- * @param[in] M
- *         The number of rows of the tile C.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the tile C.  N >= 0.
- *
- * @param[in] K
- *         The number of elementary reflectors whose product defines
- *         the matrix Q.
- *         If SIDE = ChamLeft,  M >= K >= 0;
- *         if SIDE = ChamRight, N >= K >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in] A
- *         Dimension:  (ldA,M) if SIDE = ChamLeft,
- *                     (ldA,N) if SIDE = ChamRight,
- *         The i-th row must contain the vector which defines the
- *         elementary reflector H(i), for i = 1,2,...,k, as returned by
- *         CORE_zgelqt in the first k rows of its array argument A.
- *
- * @param[in] ldA
- *         The leading dimension of the array A.  ldA >= max(1,K).
- *
- * @param[in] T
- *         The IB-by-K triangular factor T of the block reflector.
- *         T is upper triangular by block (economic storage);
- *         The rest of the array is not referenced.
- *
- * @param[in] ldT
- *         The leading dimension of the array T. ldT >= IB.
- *
- * @param[in,out] C
- *         On entry, the M-by-N tile C.
- *         On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q.
- *
- * @param[in] ldC
- *         The leading dimension of the array C. ldC >= max(1,M).
- *
- * @param[in,out] WORK
- *         On exit, if INFO = 0, WORK(1) returns the optimal ldWORK.
- *
- * @param[in] ldWORK
- *         The dimension of the array WORK.
- *         If SIDE = ChamLeft,  ldWORK >= max(1,N);
- *         if SIDE = ChamRight, ldWORK >= max(1,M).
- *
- *******************************************************************************
- *
- *          @retval CHAMELEON_SUCCESS successful exit
- *          @retval <0 if -i, the i-th argument had an illegal value
- *
- */
 void INSERT_TASK_zunmlq( const RUNTIME_option_t *options,
                          cham_side_t side, cham_trans_t trans,
                          int m, int n, int k, int ib, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int ldA,
-                         const CHAM_desc_t *T, int Tm, int Tn, int ldT,
-                         const CHAM_desc_t *C, int Cm, int Cn, int ldC )
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *T, int Tm, int Tn,
+                         const CHAM_desc_t *C, int Cm, int Cn )
 {
     struct starpu_codelet *codelet = &cl_zunmlq;
     void (*callback)(void*) = options->profiling ? cl_zunmlq_callback : NULL;
@@ -219,7 +133,4 @@ void INSERT_TASK_zunmlq( const RUNTIME_option_t *options,
         STARPU_NAME, "zunmlq",
 #endif
         0);
-
-    (void)ldT;
-    (void)ldA;
 }
diff --git a/runtime/starpu/codelets/codelet_zunmqr.c b/runtime/starpu/codelets/codelet_zunmqr.c
index a8ef47db4d87612dd100c269cfd0159a507b099f..dafd7fbe29095308f77c78675bff57e2402a4086 100644
--- a/runtime/starpu/codelets/codelet_zunmqr.c
+++ b/runtime/starpu/codelets/codelet_zunmqr.c
@@ -36,27 +36,21 @@ static void cl_zunmqr_cpu_func(void *descr[], void *cl_arg)
     int n;
     int k;
     int ib;
-    const CHAMELEON_Complex64_t *A;
-    int ldA;
-    const CHAMELEON_Complex64_t *T;
-    int ldT;
-    CHAMELEON_Complex64_t *C;
-    int ldC;
-    CHAMELEON_Complex64_t *WORK;
-int ldWORK;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileT;
+    CHAM_tile_t *tileC;
+    CHAM_tile_t *tileW;
+    int ldW;
 
-    A    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldT = STARPU_MATRIX_GET_LD( descr[1] );
-    ldC = STARPU_MATRIX_GET_LD( descr[2] );
+    tileA = cti_interface_get(descr[0]);
+    tileT = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
+    tileW = cti_interface_get(descr[3]); /* ib * nb */
 
-    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, &ldWORK);
+    starpu_codelet_unpack_args( cl_arg, &side, &trans, &m, &n, &k, &ib, &ldW );
 
-    CORE_zunmqr(side, trans, m, n, k, ib,
-                A, ldA, T, ldT, C, ldC, WORK, ldWORK);
+    TCORE_zunmqr( side, trans, m, n, k, ib,
+                  tileA, tileT, tileC, tileW->mat, ldW );
 }
 
 #if defined(CHAMELEON_USE_CUDA)
@@ -68,25 +62,27 @@ static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg)
     int n;
     int k;
     int ib;
-    const cuDoubleComplex *A, *T;
-    cuDoubleComplex *C, *WORK;
-    int ldA, ldT, ldC, ldWORK;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileT;
+    CHAM_tile_t *tileC;
+    CHAM_tile_t *tileW;
+    int ldW;
 
-    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib, &ldWORK);
+    tileA = cti_interface_get(descr[0]);
+    tileT = cti_interface_get(descr[1]);
+    tileC = cti_interface_get(descr[2]);
+    tileW = cti_interface_get(descr[3]); /* ib * nb */
 
-    A    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C    = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
-    ldA = STARPU_MATRIX_GET_LD( descr[0] );
-    ldT = STARPU_MATRIX_GET_LD( descr[1] );
-    ldC = STARPU_MATRIX_GET_LD( descr[2] );
+    starpu_codelet_unpack_args( cl_arg, &side, &trans, &m, &n, &k, &ib, &ldW );
 
     RUNTIME_getStream(stream);
 
     CUDA_zunmqrt(
             side, trans, m, n, k, ib,
-            A, ldA, T, ldT, C, ldC, WORK, ldWORK, stream );
+            tileA->mat, tileA->ld,
+            tileT->mat, tileT->ld,
+            tileC->mat, tileC->ld,
+            tileW->mat, ldW, stream );
 
 #ifndef STARPU_CUDA_ASYNC
     cudaStreamSynchronize( stream );
@@ -100,95 +96,12 @@ static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg)
  */
 CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, STARPU_CUDA_ASYNC)
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  CORE_zunmqr overwrites the general complex M-by-N tile C with
- *
- *                    SIDE = 'L'     SIDE = 'R'
- *    TRANS = 'N':      Q * C          C * Q
- *    TRANS = 'C':      Q^H * C       C * Q^H
- *
- *  where Q is a complex unitary matrix defined as the product of k
- *  elementary reflectors
- *
- *    Q = H(1) H(2) . . . H(k)
- *
- *  as returned by CORE_zgeqrt. Q is of order M if SIDE = 'L' and of order N
- *  if SIDE = 'R'.
- *
- *******************************************************************************
- *
- * @param[in] side
- *         @arg ChamLeft  : apply Q or Q^H from the Left;
- *         @arg ChamRight : apply Q or Q^H from the Right.
- *
- * @param[in] trans
- *         @arg ChamNoTrans   :  No transpose, apply Q;
- *         @arg ChamConjTrans :  Transpose, apply Q^H.
- *
- * @param[in] M
- *         The number of rows of the tile C.  M >= 0.
- *
- * @param[in] N
- *         The number of columns of the tile C.  N >= 0.
- *
- * @param[in] K
- *         The number of elementary reflectors whose product defines
- *         the matrix Q.
- *         If SIDE = ChamLeft,  M >= K >= 0;
- *         if SIDE = ChamRight, N >= K >= 0.
- *
- * @param[in] IB
- *         The inner-blocking size.  IB >= 0.
- *
- * @param[in] A
- *         Dimension:  (ldA,K)
- *         The i-th column must contain the vector which defines the
- *         elementary reflector H(i), for i = 1,2,...,k, as returned by
- *         CORE_zgeqrt in the first k columns of its array argument A.
- *
- * @param[in] ldA
- *         The leading dimension of the array A.
- *         If SIDE = ChamLeft,  ldA >= max(1,M);
- *         if SIDE = ChamRight, ldA >= max(1,N).
- *
- * @param[in] T
- *         The IB-by-K triangular factor T of the block reflector.
- *         T is upper triangular by block (economic storage);
- *         The rest of the array is not referenced.
- *
- * @param[in] ldT
- *         The leading dimension of the array T. ldT >= IB.
- *
- * @param[in,out] C
- *         On entry, the M-by-N tile C.
- *         On exit, C is overwritten by Q*C or Q^T*C or C*Q^T or C*Q.
- *
- * @param[in] ldC
- *         The leading dimension of the array C. ldC >= max(1,M).
- *
- * @param[in,out] WORK
- *         On exit, if INFO = 0, WORK(1) returns the optimal ldWORK.
- *
- * @param[in] ldWORK
- *         The dimension of the array WORK.
- *         If SIDE = ChamLeft,  ldWORK >= max(1,N);
- *         if SIDE = ChamRight, ldWORK >= max(1,M).
- *
- *******************************************************************************
- *
- *          @retval CHAMELEON_SUCCESS successful exit
- *          @retval <0 if -i, the i-th argument had an illegal value
- *
- */
 void INSERT_TASK_zunmqr( const RUNTIME_option_t *options,
                          cham_side_t side, cham_trans_t trans,
                          int m, int n, int k, int ib, int nb,
-                         const CHAM_desc_t *A, int Am, int An, int ldA,
-                         const CHAM_desc_t *T, int Tm, int Tn, int ldT,
-                         const CHAM_desc_t *C, int Cm, int Cn, int ldC )
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *T, int Tm, int Tn,
+                         const CHAM_desc_t *C, int Cm, int Cn )
 {
     struct starpu_codelet *codelet = &cl_zunmqr;
     void (*callback)(void*) = options->profiling ? cl_zunmqr_callback : NULL;
@@ -219,7 +132,4 @@ void INSERT_TASK_zunmqr( const RUNTIME_option_t *options,
         STARPU_NAME, "zunmqr",
 #endif
         0);
-
-    (void)ldT;
-    (void)ldA;
 }
diff --git a/runtime/starpu/control/runtime_control.c b/runtime/starpu/control/runtime_control.c
index 52c790e727c7d9d3497ce7163c3a872f3199943e..1279986bfd8f58b172c77dacb59f9fff738ccfd7 100644
--- a/runtime/starpu/control/runtime_control.c
+++ b/runtime/starpu/control/runtime_control.c
@@ -21,6 +21,9 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include "chameleon_starpu.h"
+#if defined(HAVE_STARPU_FXT_PROFILING)
+#include <starpu_fxt.h>
+#endif
 
 /**
  *
@@ -37,6 +40,10 @@ static int chameleon_starpu_init( starpu_conf_t *conf )
         MPI_Initialized( &flag );
 #  endif
 
+#if defined(HAVE_STARPU_FXT_PROFILING)
+        starpu_fxt_autostart_profiling(0);
+#endif
+
 #  ifdef HAVE_STARPU_MPI_INIT_CONF
         hres = starpu_mpi_init_conf(NULL, NULL, !flag, MPI_COMM_WORLD, conf);
 #  else
diff --git a/runtime/starpu/control/runtime_descriptor.c b/runtime/starpu/control/runtime_descriptor.c
index 24ae67e66aed8aeb96a8de68771f551ac1595ffb..77ca06002ded80f3ec016aa14cc52e58161d868d 100644
--- a/runtime/starpu/control/runtime_descriptor.c
+++ b/runtime/starpu/control/runtime_descriptor.c
@@ -456,23 +456,18 @@ void *RUNTIME_data_getaddr( const CHAM_desc_t *A, int m, int n )
 
     if (*ptrtile == NULL) {
         int home_node = -1;
-        void *user_ptr = NULL;
         int myrank = A->myrank;
         int owner  = A->get_rankof( A, m, n );
-        int64_t eltsze = CHAMELEON_Element_Size(A->dtyp);
-        int tempmm = (mm == A->lmt-1) ? (A->lm - mm * A->mb) : A->mb;
-        int tempnn = (nn == A->lnt-1) ? (A->ln - nn * A->nb) : A->nb;
+        CHAM_tile_t *tile = A->get_blktile( A, m, n );
 
         if ( myrank == owner ) {
-            user_ptr = A->get_blkaddr(A, m, n);
-            if ( user_ptr != NULL ) {
+            if ( tile->mat != NULL )
+            {
                 home_node = STARPU_MAIN_RAM;
             }
         }
 
-        starpu_matrix_data_register( ptrtile, home_node, (uintptr_t) user_ptr,
-                                     BLKLDD(A, m),
-                                     tempmm, tempnn, eltsze );
+        starpu_cham_tile_register( ptrtile, home_node, tile, A->dtyp );
 
 #if defined(HAVE_STARPU_DATA_SET_OOC_FLAG)
         if ( A->ooc == 0 ) {
diff --git a/runtime/starpu/control/runtime_options.c b/runtime/starpu/control/runtime_options.c
index 221c07e1ddd7bc592d3ac30d09e82663a390d61b..32dce7c5b9c3e717a61fd702b836ad3e4721b07b 100644
--- a/runtime/starpu/control/runtime_options.c
+++ b/runtime/starpu/control/runtime_options.c
@@ -48,10 +48,16 @@ int RUNTIME_options_ws_alloc( RUNTIME_option_t *options, size_t worker_size, siz
 {
     int ret = 0;
     if ( worker_size > 0 ) {
+        CHAM_tile_t tile = {
+            .format = CHAMELEON_TILE_FULLRANK,
+            .m      = worker_size,
+            .n      = 1,
+            .ld     = worker_size,
+            .mat    = NULL,
+        };
         options->ws_wsize = worker_size;
-        starpu_matrix_data_register( (starpu_data_handle_t*)(&(options->ws_worker)),
-                                     -1, (uintptr_t)NULL,
-                                     worker_size, worker_size, 1, sizeof(char));
+        starpu_cham_tile_register( (starpu_data_handle_t*)(&(options->ws_worker)),
+                                   -1, &tile, sizeof(char) );
     }
     if ( host_size > 0 ) {
         options->ws_hsize = host_size;
diff --git a/runtime/starpu/include/cham_tile_interface.h b/runtime/starpu/include/cham_tile_interface.h
new file mode 100644
index 0000000000000000000000000000000000000000..2d449941d833940fb08d592eca447a5258c52bb0
--- /dev/null
+++ b/runtime/starpu/include/cham_tile_interface.h
@@ -0,0 +1,56 @@
+/**
+ *
+ * @file starpu/cham_tile_interface.h
+ *
+ * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Header to describe the Chameleon tile interface in StarPU
+ *
+ * @version 0.9.2
+ * @author Mathieu Faverge
+ * @author Gwenole Lucas
+ * @date 2019-07-23
+ *
+ */
+#ifndef _cham_tile_interface_h_
+#define _cham_tile_interface_h_
+
+extern struct starpu_data_interface_ops starpu_interface_cham_tile_ops;
+#define STARPU_CHAM_TILE_INTERFACE_ID starpu_interface_cham_tile_ops.interfaceid
+
+struct starpu_cham_tile_interface_s;
+typedef struct starpu_cham_tile_interface_s starpu_cham_tile_interface_t;
+
+/**
+ * Chameleon tile interface
+ */
+struct starpu_cham_tile_interface_s
+{
+    enum starpu_data_interface_id id; /**< Identifier of the interface           */
+    uintptr_t      dev_handle;        /**< device handle of the matrix           */
+    cham_flttype_t flttype;           /**< Type of the elements of the matrix    */
+    size_t         allocsize;         /**< size actually currently allocated     */
+    size_t         tilesize;          /**< size of the elements of the matrix    */
+    CHAM_tile_t    tile;             /**< Internal tile structure used to store
+                                           information on non memory home_node   */
+};
+
+void starpu_cham_tile_register( starpu_data_handle_t *handleptr,
+                                int                   home_node,
+                                CHAM_tile_t          *tile,
+                                cham_flttype_t        flttype );
+
+int    cti_handle_get_m        ( starpu_data_handle_t handle );
+int    cti_handle_get_n        ( starpu_data_handle_t handle );
+size_t cti_handle_get_allocsize( starpu_data_handle_t handle );
+
+static inline CHAM_tile_t *
+cti_interface_get( starpu_cham_tile_interface_t *interface )
+{
+    return &(interface->tile);
+}
+
+#endif /* _cham_tile_interface_h_ */
diff --git a/runtime/starpu/include/chameleon_starpu.h.in b/runtime/starpu/include/chameleon_starpu.h.in
index 1a07cc8021fe0b7bfebfc38f665eaf23b4ce4ecf..a36fdc613c780ca867760d2bd43deb2fcbdc5f85 100644
--- a/runtime/starpu/include/chameleon_starpu.h.in
+++ b/runtime/starpu/include/chameleon_starpu.h.in
@@ -72,6 +72,7 @@
 #include "runtime_profiling.h"
 #include "runtime_codelet_profile.h"
 #include "runtime_workspace.h"
+#include "cham_tile_interface.h"
 
 typedef struct starpu_conf starpu_conf_t;
 
@@ -80,16 +81,18 @@ typedef struct starpu_conf starpu_conf_t;
 /*
  * MPI Redefinitions
  */
+#if defined(CHAMELEON_STARPU_SYNC)
+#define TASK_SYNCHRONOUS , STARPU_TASK_SYNCHRONOUS, 1
+#else
+#define TASK_SYNCHRONOUS
+#endif
+
 #if defined(CHAMELEON_USE_MPI)
 #undef STARPU_REDUX
-//#define starpu_insert_task(...) starpu_mpi_insert_task(MPI_COMM_WORLD, __VA_ARGS__)
 #define starpu_insert_task starpu_mpi_insert_task
-#define starpu_mpi_codelet(_codelet_) MPI_COMM_WORLD, _codelet_
-
+#define starpu_mpi_codelet(_codelet_) MPI_COMM_WORLD, _codelet_ TASK_SYNCHRONOUS
 #else
-
-#define starpu_mpi_codelet(_codelet_) _codelet_
-
+#define starpu_mpi_codelet(_codelet_) _codelet_ TASK_SYNCHRONOUS
 #endif
 
 /*
diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h
index f416b632032ece52beadc25f2a64c786701ffaf0..3e957e69de2c13cf65bf39bd6478d8fad74d88c5 100644
--- a/runtime/starpu/include/runtime_codelet_z.h
+++ b/runtime/starpu/include/runtime_codelet_z.h
@@ -28,6 +28,7 @@
 #include "chameleon/tasks_z.h"
 #if !defined(CHAMELEON_SIMULATION)
 #include "coreblas/coreblas_z.h"
+#include "coreblas/coreblas_ztile.h"
 #if defined(CHAMELEON_USE_CUDA)
 #include "cudablas.h"
 #endif
diff --git a/runtime/starpu/include/runtime_codelets.h b/runtime/starpu/include/runtime_codelets.h
index a9a1b9b775d7218c25b8432c62b9fb27b9f83997..026e754c4f72b0e2cf221dcd9611773041cd3331 100644
--- a/runtime/starpu/include/runtime_codelets.h
+++ b/runtime/starpu/include/runtime_codelets.h
@@ -31,61 +31,60 @@
 #define CODELET_CUDA_FLAGS(flags)
 #endif
 
-#define CODELETS_ALL(cl_name, _nbuffers, cpu_func_name, cuda_func_name, _original_location_, cuda_flags)	\
-    struct starpu_perfmodel cl_##cl_name##_fake = {                           \
-        .type   = STARPU_HISTORY_BASED,                                       \
-        .symbol = "fake_"#cl_name                                             \
-    };                                                                        \
-                                                                              \
-    struct starpu_perfmodel cl_##cl_name##_model = {                          \
-        .type   = STARPU_HISTORY_BASED,                                       \
-        .symbol = ""#cl_name                                                  \
-    };                                                                        \
-                                                                              \
-    struct starpu_codelet cl_##cl_name = {                                    \
-        .where     = (_original_location_),                                   \
-        .cpu_func  = ((cpu_func_name)),                                       \
-        CODELET_CUDA_FLAGS(cuda_flags)                                        \
-        .cuda_func = ((cuda_func_name)),                                      \
-        .nbuffers  = ((_nbuffers)),                                           \
-        .model     = &cl_##cl_name##_model,                                   \
-        .name      = #cl_name                                                 \
-    };                                                                        \
-                                                                              \
-    void cl_##cl_name##_restrict_where(uint32_t where)                        \
-    {                                                                         \
-      if ( cl_##cl_name.where & where )                                       \
-        cl_##cl_name.where = (cl_##cl_name.where & where);                    \
-    }                                                                         \
-                                                                              \
-    void cl_##cl_name##_restore_where(void)                                   \
-    {                                                                         \
-        cl_##cl_name.where = (_original_location_);                           \
-    }                                                                         \
-                                                                              \
-    void cl_##cl_name##_restore_model(void)                                   \
-    {                                                                         \
-        cl_##cl_name.model = &cl_##cl_name##_model;                           \
+#define CODELETS_ALL(cl_name, _nbuffers, cpu_func_name, cuda_func_name, _original_location_, cuda_flags) \
+    struct starpu_perfmodel cl_##cl_name##_fake = {                     \
+        .type   = STARPU_HISTORY_BASED,                                 \
+        .symbol = "fake_"#cl_name                                       \
+    };                                                                  \
+                                                                        \
+    struct starpu_perfmodel cl_##cl_name##_model = {                    \
+        .type   = STARPU_HISTORY_BASED,                                 \
+        .symbol = ""#cl_name                                            \
+    };                                                                  \
+                                                                        \
+    struct starpu_codelet cl_##cl_name = {                              \
+        .where     = (_original_location_),                             \
+        .cpu_func  = ((cpu_func_name)),                                 \
+        CODELET_CUDA_FLAGS(cuda_flags)                                  \
+        .cuda_func = ((cuda_func_name)),                                \
+        .nbuffers  = ((_nbuffers)),                                     \
+        .model     = &cl_##cl_name##_model,                             \
+        .name      = #cl_name                                           \
+    };                                                                  \
+                                                                        \
+    void cl_##cl_name##_restrict_where(uint32_t where)                  \
+    {                                                                   \
+        if ( cl_##cl_name.where & where )                               \
+            cl_##cl_name.where = (cl_##cl_name.where & where);          \
+    }                                                                   \
+                                                                        \
+    void cl_##cl_name##_restore_where(void)                             \
+    {                                                                   \
+        cl_##cl_name.where = (_original_location_);                     \
+    }                                                                   \
+                                                                        \
+    void cl_##cl_name##_restore_model(void)                             \
+    {                                                                   \
+        cl_##cl_name.model = &cl_##cl_name##_model;                     \
     }
 
 #if defined(CHAMELEON_SIMULATION)
-#define CODELETS_CPU(name, _nbuffers, cpu_func_name)                          \
-  CODELETS_ALL( name, _nbuffers, (starpu_cpu_func_t) 1, NULL, STARPU_CPU, 0 )
+#define CODELETS_CPU(name, _nbuffers, cpu_func_name)                    \
+    CODELETS_ALL( name, _nbuffers, (starpu_cpu_func_t) 1, NULL, STARPU_CPU, 0 )
 #else
-#define CODELETS_CPU(name, _nbuffers, cpu_func_name)                          \
-  CODELETS_ALL( name, _nbuffers, cpu_func_name, NULL, STARPU_CPU, 0 )
+#define CODELETS_CPU(name, _nbuffers, cpu_func_name)                    \
+    CODELETS_ALL( name, _nbuffers, cpu_func_name, NULL, STARPU_CPU, 0 )
 #endif
 
 #define CODELETS_GPU(name, _nbuffers, cpu_func_name, cuda_func_name, cuda_flags) \
-  CODELETS_ALL( name, _nbuffers, cpu_func_name, cuda_func_name, STARPU_CPU  | STARPU_CUDA, cuda_flags )
+    CODELETS_ALL( name, _nbuffers, cpu_func_name, cuda_func_name, STARPU_CPU  | STARPU_CUDA, cuda_flags )
 
-
-#define CODELETS_ALL_HEADER(name)                                             \
-     CHAMELEON_CL_CB_HEADER(name);                                            \
-     void cl_##name##_load_fake_model(void);                                  \
-     void cl_##name##_restore_model(void);                                    \
-     extern struct starpu_codelet cl_##name;                                  \
-     void cl_##name##_restrict_where(uint32_t where);                         \
+#define CODELETS_ALL_HEADER(name)                            \
+     CHAMELEON_CL_CB_HEADER(name);                           \
+     void cl_##name##_load_fake_model(void);                 \
+     void cl_##name##_restore_model(void);                   \
+     extern struct starpu_codelet cl_##name;                 \
+     void cl_##name##_restrict_where(uint32_t where);        \
      void cl_##name##_restore_where(void)
 
 #if defined(CHAMELEON_SIMULATION)
diff --git a/runtime/starpu/interface/cham_tile_interface.c b/runtime/starpu/interface/cham_tile_interface.c
new file mode 100644
index 0000000000000000000000000000000000000000..ca78c6b5a8449551314494f66334a06d9869e56d
--- /dev/null
+++ b/runtime/starpu/interface/cham_tile_interface.c
@@ -0,0 +1,431 @@
+/**
+ *
+ * @file starpu/cham_tile_interface.c
+ *
+ * @copyright 2012-2019 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
+ *                      Univ. Bordeaux. All rights reserved.
+ *
+ ***
+ *
+ * @brief Chameleon tile interface for StarPU
+ *
+ * @version 0.9.2
+ * @author Mathieu Faverge
+ * @author Gwenole Lucas
+ * @date 2019-07-23
+ *
+ */
+#include "chameleon_starpu.h"
+
+static inline CHAM_tile_t *
+cti_handle_get( starpu_data_handle_t handle )
+{
+    starpu_cham_tile_interface_t *cham_tile_interface = (starpu_cham_tile_interface_t *)
+        starpu_data_get_interface_on_node( handle, STARPU_MAIN_RAM );
+
+#ifdef STARPU_DEBUG
+    STARPU_ASSERT_MSG( cham_tile_interface->id == STARPU_CHAM_TILE_INTERFACE_ID,
+                       "Error. The given data is not a cham_tile." );
+#endif
+
+    return &(cham_tile_interface->tile);
+}
+
+int
+cti_handle_get_m( starpu_data_handle_t handle )
+{
+    CHAM_tile_t *tile = cti_handle_get( handle );
+    return tile->m;
+}
+
+int
+cti_handle_get_n( starpu_data_handle_t handle )
+{
+    CHAM_tile_t *tile = cti_handle_get( handle );
+    return tile->n;
+}
+
+static void
+cti_init( void *data_interface )
+{
+    starpu_cham_tile_interface_t *cham_tile_interface = data_interface;
+    cham_tile_interface->id = STARPU_CHAM_TILE_INTERFACE_ID;
+    cham_tile_interface->allocsize = -1;
+}
+
+static void
+cti_register_data_handle( starpu_data_handle_t  handle,
+                          unsigned              home_node,
+                          void                 *data_interface )
+{
+    starpu_cham_tile_interface_t *cham_tile_interface = (starpu_cham_tile_interface_t *) data_interface;
+    unsigned node;
+
+    for (node = 0; node < STARPU_MAXNODES; node++)
+    {
+        starpu_cham_tile_interface_t *local_interface = (starpu_cham_tile_interface_t *)
+            starpu_data_get_interface_on_node(handle, node);
+
+        memcpy( local_interface, cham_tile_interface,
+                sizeof( starpu_cham_tile_interface_t ) );
+
+        if ( node != home_node )
+        {
+            local_interface->dev_handle = 0;
+            local_interface->tile.mat  = NULL;
+            local_interface->tile.ld   = -1;
+        }
+    }
+}
+
+static starpu_ssize_t
+cti_allocate_data_on_node( void *data_interface, unsigned node )
+{
+    uintptr_t addr = 0, handle;
+    starpu_cham_tile_interface_t *cham_tile_interface =
+        (starpu_cham_tile_interface_t *) data_interface;
+
+    uint32_t ld = cham_tile_interface->tile.m;
+    starpu_ssize_t allocated_memory;
+
+    allocated_memory = cham_tile_interface->allocsize;
+    if ( allocated_memory <= 0 ) {
+        return 0;
+    }
+
+    handle = starpu_malloc_on_node( node, allocated_memory );
+
+    if ( !handle ) {
+        return -ENOMEM;
+    }
+
+    if ( starpu_node_get_kind(node) != STARPU_OPENCL_RAM ) {
+        addr = handle;
+    }
+
+    /* update the data properly */
+    cham_tile_interface->tile.mat   = (void*)addr;
+    cham_tile_interface->tile.ld    = ld;
+    cham_tile_interface->dev_handle = handle;
+
+    return allocated_memory;
+}
+
+static void
+cti_free_data_on_node( void *data_interface, unsigned node )
+{
+    starpu_cham_tile_interface_t *cham_tile_interface =
+        (starpu_cham_tile_interface_t *) data_interface;
+
+    starpu_free_on_node( node, cham_tile_interface->dev_handle, cham_tile_interface->allocsize );
+    cham_tile_interface->tile.mat = NULL;
+    cham_tile_interface->dev_handle = 0;
+}
+
+static void *
+cti_to_pointer( void *data_interface, unsigned node )
+{
+    (void) node;
+    starpu_cham_tile_interface_t *cham_tile_interface = data_interface;
+
+    return (void*)(cham_tile_interface->tile.mat);
+}
+
+static int
+cti_pointer_is_inside( void *data_interface, unsigned node, void *ptr )
+{
+    (void) node;
+    starpu_cham_tile_interface_t *cham_tile_interface = data_interface;
+    char *begin = cham_tile_interface->tile.mat;
+    char *end   = begin + cham_tile_interface->allocsize;
+
+    STARPU_ASSERT_MSG( cham_tile_interface->tile.format & CHAMELEON_TILE_FULLRANK,
+                       "Only full-rank matrices are supported." );
+
+    return ( (char*) ptr >= begin )
+        && ( (char*) ptr <  end   );
+}
+
+static size_t
+cti_get_size(starpu_data_handle_t handle)
+{
+    starpu_cham_tile_interface_t *cham_tile_interface =
+        starpu_data_get_interface_on_node( handle, STARPU_MAIN_RAM );
+
+#ifdef STARPU_DEBUG
+    STARPU_ASSERT_MSG( cham_tile_interface->id == STARPU_CHAM_TILE_INTERFACE_ID,
+                       "Error. The given data is not a cham_tile." );
+#endif
+
+    return cham_tile_interface->allocsize;
+}
+
+static size_t
+cti_get_alloc_size(starpu_data_handle_t handle)
+{
+    starpu_cham_tile_interface_t *cham_tile_interface =
+        starpu_data_get_interface_on_node( handle, STARPU_MAIN_RAM );
+
+#ifdef STARPU_DEBUG
+    STARPU_ASSERT_MSG( cham_tile_interface->id == STARPU_CHAM_TILE_INTERFACE_ID,
+                       "Error. The given data is not a cham_tile." );
+#endif
+
+    STARPU_ASSERT_MSG( cham_tile_interface->allocsize != (size_t)-1,
+                       "The cham_tile allocation size needs to be defined" );
+
+    return cham_tile_interface->allocsize;
+}
+
+static uint32_t
+cti_footprint( starpu_data_handle_t handle )
+{
+    CHAM_tile_t *tile = cti_handle_get( handle );
+    return starpu_hash_crc32c_be( tile->m, tile->n );
+}
+
+static uint32_t
+cti_alloc_footprint( starpu_data_handle_t handle )
+{
+    return starpu_hash_crc32c_be( cti_handle_get_allocsize(handle), 0 );
+}
+
+static int
+cti_compare( void *data_interface_a, void *data_interface_b )
+{
+    starpu_cham_tile_interface_t *cham_tile_a = (starpu_cham_tile_interface_t *) data_interface_a;
+    starpu_cham_tile_interface_t *cham_tile_b = (starpu_cham_tile_interface_t *) data_interface_b;
+
+    /* Two matrices are considered compatible if they have the same size */
+    return ( cham_tile_a->tile.m  == cham_tile_b->tile.m  )
+        && ( cham_tile_a->tile.n  == cham_tile_b->tile.n  )
+        && ( cham_tile_a->flttype == cham_tile_b->flttype );
+}
+
+static int
+cti_alloc_compare(void *data_interface_a, void *data_interface_b)
+{
+    starpu_cham_tile_interface_t *cham_tile_a = (starpu_cham_tile_interface_t *) data_interface_a;
+    starpu_cham_tile_interface_t *cham_tile_b = (starpu_cham_tile_interface_t *) data_interface_b;
+
+    /* Two matrices are considered compatible if they have the same allocated size */
+    return ( cham_tile_a->allocsize   == cham_tile_b->allocsize   );
+}
+
+static void
+cti_display( starpu_data_handle_t handle, FILE *f )
+{
+    starpu_cham_tile_interface_t *cham_tile_interface = (starpu_cham_tile_interface_t *)
+        starpu_data_get_interface_on_node(handle, STARPU_MAIN_RAM);
+
+    fprintf( f, "%u\t%u\t",
+             cham_tile_interface->tile.m,
+             cham_tile_interface->tile.n );
+}
+
+static int
+cti_pack_data_fullrank( starpu_cham_tile_interface_t *cham_tile_interface,
+                        void *ptr )
+{
+    char *matrix = (void *)cham_tile_interface->tile.mat;
+
+    if ( cham_tile_interface->tile.m == cham_tile_interface->tile.ld ) {
+        memcpy( ptr, matrix, cham_tile_interface->allocsize );
+    }
+    else {
+        int   n;
+        char *tmpptr = ptr;
+
+        for(n=0; n<cham_tile_interface->tile.n; n++)
+        {
+            size_t elemsize = CHAMELEON_Element_Size( cham_tile_interface->flttype );
+            size_t size = cham_tile_interface->tile.m * elemsize;
+            memcpy( tmpptr, matrix, size );
+            tmpptr += size;
+            matrix += cham_tile_interface->tile.ld * elemsize;
+        }
+    }
+    return 0;
+}
+
+static int
+cti_pack_data( starpu_data_handle_t handle, unsigned node, void **ptr, starpu_ssize_t *count )
+{
+    STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
+
+    starpu_cham_tile_interface_t *cham_tile_interface = (starpu_cham_tile_interface_t *)
+        starpu_data_get_interface_on_node(handle, node);
+
+    *count = (starpu_ssize_t)(cham_tile_interface->allocsize);
+    *count += sizeof(size_t) + sizeof(CHAM_tile_t);
+
+    if ( ptr != NULL )
+    {
+        char *tmp;
+        *ptr = (void *)starpu_malloc_on_node_flags( node, *count, 0 );
+        tmp = (char*)(*ptr);
+
+        /* Start by the size to allocate on reception */
+        memcpy( tmp, &(cham_tile_interface->allocsize), sizeof(size_t) );
+        tmp += sizeof(size_t);
+
+        /* Copy the tile metadata */
+        memcpy( tmp, &(cham_tile_interface->tile), sizeof(CHAM_tile_t) );
+        tmp += sizeof(CHAM_tile_t);
+
+        /* Pack the real data */
+        if ( cham_tile_interface->tile.format & CHAMELEON_TILE_FULLRANK ) {
+            cti_pack_data_fullrank( cham_tile_interface, tmp );
+        }
+        else {
+            STARPU_ASSERT_MSG( 1, "Unsupported format for pack." );
+        }
+    }
+
+    return 0;
+}
+
+static int
+cti_unpack_data_fullrank( starpu_cham_tile_interface_t *cham_tile_interface,
+                          void *ptr )
+{
+    char *matrix = (void *)cham_tile_interface->tile.mat;
+
+    if ( cham_tile_interface->tile.m == cham_tile_interface->tile.ld ) {
+        memcpy( matrix, ptr, cham_tile_interface->allocsize );
+    }
+    else {
+        int   n;
+        char *tmpptr = ptr;
+
+        for(n=0 ; n<cham_tile_interface->tile.n; n++)
+        {
+            size_t elemsize = CHAMELEON_Element_Size( cham_tile_interface->flttype );
+            size_t size = cham_tile_interface->tile.m * elemsize;
+            memcpy( matrix, tmpptr, size );
+            tmpptr += size;
+            matrix += cham_tile_interface->tile.ld * elemsize;
+        }
+    }
+    return 0;
+}
+
+static int
+cti_unpack_data( starpu_data_handle_t handle, unsigned node, void *ptr, size_t count )
+{
+    STARPU_ASSERT(starpu_data_test_if_allocated_on_node(handle, node));
+
+    starpu_cham_tile_interface_t *cham_tile_interface = (starpu_cham_tile_interface_t *)
+        starpu_data_get_interface_on_node(handle, node);
+
+    CHAM_tile_t dsttile;
+    char *tmp = ptr;
+
+    /* Extract the size of the information t unpack */
+    memcpy( &(cham_tile_interface->allocsize), tmp, sizeof(size_t) );
+    tmp += sizeof(size_t);
+
+    /* Extract the tile metadata of the remote tile */
+    memcpy( &dsttile, tmp, sizeof(CHAM_tile_t) );
+    tmp += sizeof(CHAM_tile_t);
+
+    cham_tile_interface->tile.format = dsttile.format;
+    cham_tile_interface->tile.ld = cham_tile_interface->tile.m;
+    STARPU_ASSERT( cham_tile_interface->tile.m == dsttile.m );
+    STARPU_ASSERT( cham_tile_interface->tile.n == dsttile.n );
+    STARPU_ASSERT( count == cham_tile_interface->allocsize + sizeof(size_t) + sizeof(CHAM_tile_t) );
+
+
+    /* Unpack the real data */
+    if ( cham_tile_interface->tile.format & CHAMELEON_TILE_FULLRANK ) {
+        cti_unpack_data_fullrank( cham_tile_interface, tmp );
+    }
+    else {
+        STARPU_ASSERT_MSG( 1, "Unsupported format for pack." );
+    }
+
+    /* Free the received information */
+    starpu_free_on_node_flags( node, (uintptr_t)ptr, count, 0 );
+
+    return 0;
+}
+
+static starpu_ssize_t
+cti_describe( void *data_interface, char *buf, size_t size )
+{
+    starpu_cham_tile_interface_t *cham_tile_interface = (starpu_cham_tile_interface_t *) data_interface;
+    return snprintf( buf, size, "M%ux%ux%u",
+                     (unsigned) cham_tile_interface->tile.m,
+                     (unsigned) cham_tile_interface->tile.n,
+                     (unsigned) cham_tile_interface->flttype );
+}
+
+struct starpu_data_interface_ops starpu_interface_cham_tile_ops =
+{
+    .init                  = cti_init,
+    .register_data_handle  = cti_register_data_handle,
+    .allocate_data_on_node = cti_allocate_data_on_node,
+    .free_data_on_node     = cti_free_data_on_node,
+    .to_pointer            = cti_to_pointer,
+    .pointer_is_inside     = cti_pointer_is_inside,
+    .get_size              = cti_get_size,
+    .get_alloc_size        = cti_get_alloc_size,
+    .footprint             = cti_footprint,
+    .alloc_footprint       = cti_alloc_footprint,
+    .compare               = cti_compare,
+    .alloc_compare         = cti_alloc_compare,
+    .display               = cti_display,
+    .pack_data             = cti_pack_data,
+    .unpack_data           = cti_unpack_data,
+    .describe              = cti_describe,
+    //.copy_methods          =&cti_copy_methods,
+    .interfaceid           = STARPU_UNKNOWN_INTERFACE_ID,
+    .interface_size        = sizeof(starpu_cham_tile_interface_t),
+    .name                  = "STARPU_CHAM_TILE_INTERFACE"
+};
+
+void
+starpu_cham_tile_register( starpu_data_handle_t *handleptr,
+                           int                   home_node,
+                           CHAM_tile_t          *tile,
+                           cham_flttype_t        flttype )
+{
+    size_t elemsize = CHAMELEON_Element_Size( flttype );
+    starpu_cham_tile_interface_t cham_tile_interface =
+        {
+            .id         = STARPU_CHAM_TILE_INTERFACE_ID,
+            .flttype    = flttype,
+            .dev_handle = (intptr_t)(tile->mat),
+            .allocsize  = tile->m * tile->n * elemsize, /* We compute with m even if it's allocated with ld */
+            .tilesize   = tile->m * tile->n * elemsize,
+        };
+    memcpy( &(cham_tile_interface.tile), tile, sizeof( CHAM_tile_t ) );
+
+    starpu_data_register( handleptr, home_node, &cham_tile_interface, &starpu_interface_cham_tile_ops );
+}
+
+size_t
+cti_handle_get_allocsize( starpu_data_handle_t handle )
+{
+    starpu_cham_tile_interface_t *cham_tile_interface = (starpu_cham_tile_interface_t *)
+        starpu_data_get_interface_on_node( handle, STARPU_MAIN_RAM );
+
+#ifdef STARPU_DEBUG
+    STARPU_ASSERT_MSG( cham_tile_interface->id == STARPU_CHAM_TILE_INTERFACE_ID,
+                       "Error. The given data is not a cham_tile." );
+#endif
+
+    return cham_tile_interface->allocsize;
+}
+
+void
+starpu_cham_tile_interface_init() __attribute__((constructor));
+
+void
+starpu_cham_tile_interface_init()
+{
+    if ( starpu_interface_cham_tile_ops.interfaceid == STARPU_UNKNOWN_INTERFACE_ID )
+    {
+        starpu_interface_cham_tile_ops.interfaceid = starpu_data_interface_get_next_id();
+    }
+}