diff --git a/.gitlab-ci-env.sh b/.gitlab-ci-env.sh
index 687df90dc59f1c561b2d45e826a197d3b83cfeab..ba9cd4dcfa9f48b59dde7a0b91cb2c2b18bdb957 100755
--- a/.gitlab-ci-env.sh
+++ b/.gitlab-ci-env.sh
@@ -8,6 +8,10 @@
 # too noisy
 export STARPU_SILENT=1
 
+# Make sure threads are not bound
+export STARPU_MPI_NOBIND=1
+export STARPU_WORKERS_NOBIND=1
+
 # initialize empty to get just what we need
 export PKG_CONFIG_PATH=""
 
diff --git a/.gitlab/test_starpu.yml b/.gitlab/test_starpu.yml
index d22d47349efe90d27955338c9a2ff3b97f17529d..1aa626ccd4c48d431aaec02e20f42043555945e8 100644
--- a/.gitlab/test_starpu.yml
+++ b/.gitlab/test_starpu.yml
@@ -28,8 +28,6 @@
 test_starpu_shm_s:
   <<: *test_starpu_master
   variables:
-    STARPU_WORKERS_NOBIND: 1
-    STARPU_SILENT: 1
     VERSION: starpu
     CATEGORY: shm
     PRECISION: s
@@ -38,8 +36,6 @@ test_starpu_shm_s:
 test_starpu_shm_d:
   <<: *test_starpu_branches
   variables:
-    STARPU_WORKERS_NOBIND: 1
-    STARPU_SILENT: 1
     VERSION: starpu
     CATEGORY: shm
     PRECISION: d
@@ -48,8 +44,6 @@ test_starpu_shm_d:
 test_starpu_shm_c:
   <<: *test_starpu_master
   variables:
-    STARPU_WORKERS_NOBIND: 1
-    STARPU_SILENT: 1
     VERSION: starpu
     CATEGORY: shm
     PRECISION: c
@@ -58,8 +52,6 @@ test_starpu_shm_c:
 test_starpu_shm_z:
   <<: *test_starpu_master
   variables:
-    STARPU_WORKERS_NOBIND: 1
-    STARPU_SILENT: 1
     VERSION: starpu
     CATEGORY: shm
     PRECISION: z
@@ -68,8 +60,6 @@ test_starpu_shm_z:
 test_starpu_mpi_s:
   <<: *test_starpu_branches
   variables:
-    STARPU_WORKERS_NOBIND: 1
-    STARPU_SILENT: 1
     VERSION: starpu
     CATEGORY: mpi
     PRECISION: s
@@ -78,8 +68,6 @@ test_starpu_mpi_s:
 test_starpu_mpi_d:
   <<: *test_starpu_master
   variables:
-    STARPU_WORKERS_NOBIND: 1
-    STARPU_SILENT: 1
     VERSION: starpu
     CATEGORY: mpi
     PRECISION: d
@@ -88,8 +76,6 @@ test_starpu_mpi_d:
 test_starpu_mpi_c:
   <<: *test_starpu_master
   variables:
-    STARPU_WORKERS_NOBIND: 1
-    STARPU_SILENT: 1
     VERSION: starpu
     CATEGORY: mpi
     PRECISION: c
@@ -98,8 +84,6 @@ test_starpu_mpi_c:
 test_starpu_mpi_z:
   <<: *test_starpu_master
   variables:
-    STARPU_WORKERS_NOBIND: 1
-    STARPU_SILENT: 1
     VERSION: starpu
     CATEGORY: mpi
     PRECISION: z
diff --git a/cmake_modules/morse_cmake b/cmake_modules/morse_cmake
index eeb878b31a2c149e9ede3f8bea03db94db020ae7..169c202ba9cfb54f35afd90021a3226478cf3304 160000
--- a/cmake_modules/morse_cmake
+++ b/cmake_modules/morse_cmake
@@ -1 +1 @@
-Subproject commit eeb878b31a2c149e9ede3f8bea03db94db020ae7
+Subproject commit 169c202ba9cfb54f35afd90021a3226478cf3304
diff --git a/control/descriptor.c b/control/descriptor.c
index dd1be56c45f11f4968b4a447ea0fe9c938e7c343..58c01f7356bb17b85f8d36dc27e277baf29c54ea 100644
--- a/control/descriptor.c
+++ b/control/descriptor.c
@@ -860,7 +860,7 @@ int CHAMELEON_Desc_Destroy(CHAM_desc_t **desc)
  * @retval CHAMELEON_SUCCESS successful exit
  *
  */
-int CHAMELEON_Desc_Acquire (CHAM_desc_t  *desc) {
+int CHAMELEON_Desc_Acquire( const CHAM_desc_t *desc ) {
     return RUNTIME_desc_acquire( desc );
 }
 
@@ -883,7 +883,7 @@ int CHAMELEON_Desc_Acquire (CHAM_desc_t  *desc) {
  * @retval CHAMELEON_SUCCESS successful exit
  *
  */
-int CHAMELEON_Desc_Release (CHAM_desc_t  *desc) {
+int CHAMELEON_Desc_Release( const CHAM_desc_t *desc ) {
     return RUNTIME_desc_release( desc );
 }
 
diff --git a/coreblas/compute/core_ztile.c b/coreblas/compute/core_ztile.c
index f27cebfd2eaa0221e41afeb05ea7c25f6f035a89..a544dfa7840865ec4abe08f14f7588bd6220be3a 100644
--- a/coreblas/compute/core_ztile.c
+++ b/coreblas/compute/core_ztile.c
@@ -39,7 +39,7 @@ TCORE_dzasum( cham_store_t       storev,
               double *           work )
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
-    CORE_dzasum( storev, uplo, M, N, A->mat, A->ld, work );
+    CORE_dzasum( storev, uplo, M, N, CHAM_tile_get_ptr( A ), A->ld, work );
 }
 
 int
@@ -52,7 +52,7 @@ TCORE_zaxpy( int                   M,
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
     assert( B->format & CHAMELEON_TILE_FULLRANK );
-    return CORE_zaxpy( M, alpha, A->mat, incA, B->mat, incB );
+    return CORE_zaxpy( M, alpha, CHAM_tile_get_ptr( A ), incA, CHAM_tile_get_ptr( B ), incB );
 }
 
 int
@@ -64,9 +64,15 @@ TCORE_zgeadd( cham_trans_t          trans,
               CHAMELEON_Complex64_t beta,
               CHAM_tile_t *         B )
 {
-    assert( A->format & CHAMELEON_TILE_FULLRANK );
-    assert( B->format & CHAMELEON_TILE_FULLRANK );
-    return CORE_zgeadd( trans, M, N, alpha, A->mat, A->ld, beta, B->mat, B->ld );
+    if ( (A->format & CHAMELEON_TILE_DESC) &&
+         (B->format & CHAMELEON_TILE_DESC) )
+    {
+        assert(0);
+    }
+
+    return CORE_zgeadd( trans, M, N,
+                        alpha, CHAM_tile_get_ptr( A ), A->ld,
+                        beta,  CHAM_tile_get_ptr( B ), B->ld );
 }
 
 int
@@ -80,7 +86,7 @@ TCORE_zgelqt( int                    M,
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
     assert( T->format & CHAMELEON_TILE_FULLRANK );
-    return CORE_zgelqt( M, N, IB, A->mat, A->ld, T->mat, T->ld, TAU, WORK );
+    return CORE_zgelqt( M, N, IB, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( T ), T->ld, TAU, WORK );
 }
 
 void
@@ -112,7 +118,11 @@ TCORE_zgemm( cham_trans_t          transA,
     assert( B->format & CHAMELEON_TILE_FULLRANK );
     assert( C->format & CHAMELEON_TILE_FULLRANK );
     CORE_zgemm(
-        transA, transB, M, N, K, alpha, A->mat, A->ld, B->mat, B->ld, beta, C->mat, C->ld );
+        transA, transB, M, N, K, alpha,
+        CHAM_tile_get_ptr( A ), A->ld,
+        CHAM_tile_get_ptr( B ), B->ld,
+        beta,
+        CHAM_tile_get_ptr( C ), C->ld );
 }
 
 int
@@ -126,7 +136,7 @@ TCORE_zgeqrt( int                    M,
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
     assert( T->format & CHAMELEON_TILE_FULLRANK );
-    return CORE_zgeqrt( M, N, IB, A->mat, A->ld, T->mat, T->ld, TAU, WORK );
+    return CORE_zgeqrt( M, N, IB, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( T ), T->ld, TAU, WORK );
 }
 
 int
@@ -134,7 +144,7 @@ TCORE_zgessm( int M, int N, int K, int IB, const int *IPIV, const CHAM_tile_t *L
 {
     assert( L->format & CHAMELEON_TILE_FULLRANK );
     assert( A->format & CHAMELEON_TILE_FULLRANK );
-    return CORE_zgessm( M, N, K, IB, IPIV, L->mat, L->ld, A->mat, A->ld );
+    return CORE_zgessm( M, N, K, IB, IPIV, CHAM_tile_get_ptr( L ), L->ld, CHAM_tile_get_ptr( A ), A->ld );
 }
 
 int
@@ -142,28 +152,28 @@ TCORE_zgessq( cham_store_t storev, int M, int N, const CHAM_tile_t *A, CHAM_tile
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
     assert( sclssq->format & CHAMELEON_TILE_FULLRANK );
-    return CORE_zgessq( storev, M, N, A->mat, A->ld, sclssq->mat );
+    return CORE_zgessq( storev, M, N, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( sclssq ) );
 }
 
 int
 TCORE_zgetrf( int M, int N, CHAM_tile_t *A, int *IPIV, int *INFO )
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
-    return CORE_zgetrf( M, N, A->mat, A->ld, IPIV, INFO );
+    return CORE_zgetrf( M, N, CHAM_tile_get_ptr( A ), A->ld, IPIV, INFO );
 }
 
 int
 TCORE_zgetrf_incpiv( int M, int N, int IB, CHAM_tile_t *A, int *IPIV, int *INFO )
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
-    return CORE_zgetrf_incpiv( M, N, IB, A->mat, A->ld, IPIV, INFO );
+    return CORE_zgetrf_incpiv( M, N, IB, CHAM_tile_get_ptr( A ), A->ld, IPIV, INFO );
 }
 
 int
 TCORE_zgetrf_nopiv( int M, int N, int IB, CHAM_tile_t *A, int *INFO )
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
-    return CORE_zgetrf_nopiv( M, N, IB, A->mat, A->ld, INFO );
+    return CORE_zgetrf_nopiv( M, N, IB, CHAM_tile_get_ptr( A ), A->ld, INFO );
 }
 
 void
@@ -171,7 +181,7 @@ TCORE_zhe2ge( cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, CHAM_tile_t
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
     assert( B->format & CHAMELEON_TILE_FULLRANK );
-    CORE_zhe2ge( uplo, M, N, A->mat, A->ld, B->mat, B->ld );
+    CORE_zhe2ge( uplo, M, N, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld );
 }
 
 #if defined( PRECISION_z ) || defined( PRECISION_c )
@@ -189,7 +199,7 @@ TCORE_zhemm( cham_side_t           side,
     assert( A->format & CHAMELEON_TILE_FULLRANK );
     assert( B->format & CHAMELEON_TILE_FULLRANK );
     assert( C->format & CHAMELEON_TILE_FULLRANK );
-    CORE_zhemm( side, uplo, M, N, alpha, A->mat, A->ld, B->mat, B->ld, beta, C->mat, C->ld );
+    CORE_zhemm( side, uplo, M, N, alpha, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld, beta, CHAM_tile_get_ptr( C ), C->ld );
 }
 
 void
@@ -204,7 +214,7 @@ TCORE_zherk( cham_uplo_t        uplo,
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
     assert( C->format & CHAMELEON_TILE_FULLRANK );
-    CORE_zherk( uplo, trans, N, K, alpha, A->mat, A->ld, beta, C->mat, C->ld );
+    CORE_zherk( uplo, trans, N, K, alpha, CHAM_tile_get_ptr( A ), A->ld, beta, CHAM_tile_get_ptr( C ), C->ld );
 }
 
 void
@@ -221,7 +231,7 @@ TCORE_zher2k( cham_uplo_t           uplo,
     assert( A->format & CHAMELEON_TILE_FULLRANK );
     assert( B->format & CHAMELEON_TILE_FULLRANK );
     assert( C->format & CHAMELEON_TILE_FULLRANK );
-    CORE_zher2k( uplo, trans, N, K, alpha, A->mat, A->ld, B->mat, B->ld, beta, C->mat, C->ld );
+    CORE_zher2k( uplo, trans, N, K, alpha, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld, beta, CHAM_tile_get_ptr( C ), C->ld );
 }
 #endif
 
@@ -241,7 +251,7 @@ TCORE_zherfb( cham_uplo_t            uplo,
     assert( T->format & CHAMELEON_TILE_FULLRANK );
     assert( C->format & CHAMELEON_TILE_FULLRANK );
     return CORE_zherfb(
-        uplo, N, K, IB, NB, A->mat, A->ld, T->mat, T->ld, C->mat, C->ld, WORK, ldwork );
+        uplo, N, K, IB, NB, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( T ), T->ld, CHAM_tile_get_ptr( C ), C->ld, WORK, ldwork );
 }
 
 #if defined( PRECISION_z ) || defined( PRECISION_c )
@@ -254,16 +264,19 @@ TCORE_zhessq( cham_store_t       storev,
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
     assert( sclssq->format & CHAMELEON_TILE_FULLRANK );
-    return CORE_zhessq( storev, uplo, N, A->mat, A->ld, sclssq->mat );
+    return CORE_zhessq( storev, uplo, N, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( sclssq ) );
 }
 #endif
 
 void
 TCORE_zlacpy( cham_uplo_t uplo, int M, int N, const CHAM_tile_t *A, CHAM_tile_t *B )
 {
-    assert( A->format & CHAMELEON_TILE_FULLRANK );
-    assert( B->format & CHAMELEON_TILE_FULLRANK );
-    CORE_zlacpy( uplo, M, N, A->mat, A->ld, B->mat, B->ld );
+    if (( A->format & CHAMELEON_TILE_DESC ) &&
+        ( B->format & CHAMELEON_TILE_DESC ) )
+    {
+        assert(0); /* This should have been handled at the codelet level */
+    }
+    CORE_zlacpy( uplo, M, N, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld );
 }
 
 void
@@ -275,7 +288,7 @@ TCORE_zlange( cham_normtype_t    norm,
               double *           normA )
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
-    CORE_zlange( norm, M, N, A->mat, A->ld, work, normA );
+    CORE_zlange( norm, M, N, CHAM_tile_get_ptr( A ), A->ld, work, normA );
 }
 
 #if defined( PRECISION_z ) || defined( PRECISION_c )
@@ -288,7 +301,7 @@ TCORE_zlanhe( cham_normtype_t    norm,
               double *           normA )
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
-    CORE_zlanhe( norm, uplo, N, A->mat, A->ld, work, normA );
+    CORE_zlanhe( norm, uplo, N, CHAM_tile_get_ptr( A ), A->ld, work, normA );
 }
 #endif
 
@@ -301,7 +314,7 @@ TCORE_zlansy( cham_normtype_t    norm,
               double *           normA )
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
-    CORE_zlansy( norm, uplo, N, A->mat, A->ld, work, normA );
+    CORE_zlansy( norm, uplo, N, CHAM_tile_get_ptr( A ), A->ld, work, normA );
 }
 
 void
@@ -315,14 +328,14 @@ TCORE_zlantr( cham_normtype_t    norm,
               double *           normA )
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
-    CORE_zlantr( norm, uplo, diag, M, N, A->mat, A->ld, work, normA );
+    CORE_zlantr( norm, uplo, diag, M, N, CHAM_tile_get_ptr( A ), A->ld, work, normA );
 }
 
 int
 TCORE_zlascal( cham_uplo_t uplo, int m, int n, CHAMELEON_Complex64_t alpha, CHAM_tile_t *A )
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
-    return CORE_zlascal( uplo, m, n, alpha, A->mat, A->ld );
+    return CORE_zlascal( uplo, m, n, alpha, CHAM_tile_get_ptr( A ), A->ld );
 }
 
 void
@@ -334,14 +347,14 @@ TCORE_zlaset( cham_uplo_t           uplo,
               CHAM_tile_t *         A )
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
-    CORE_zlaset( uplo, n1, n2, alpha, beta, A->mat, A->ld );
+    CORE_zlaset( uplo, n1, n2, alpha, beta, CHAM_tile_get_ptr( A ), A->ld );
 }
 
 void
 TCORE_zlaset2( cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha, CHAM_tile_t *A )
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
-    CORE_zlaset2( uplo, n1, n2, alpha, A->mat, A->ld );
+    CORE_zlaset2( uplo, n1, n2, alpha, CHAM_tile_get_ptr( A ), A->ld );
 }
 
 int
@@ -354,14 +367,14 @@ TCORE_zlatro( cham_uplo_t        uplo,
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
     assert( B->format & CHAMELEON_TILE_FULLRANK );
-    return CORE_zlatro( uplo, trans, M, N, A->mat, A->ld, B->mat, B->ld );
+    return CORE_zlatro( uplo, trans, M, N, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld );
 }
 
 void
 TCORE_zlauum( cham_uplo_t uplo, int N, CHAM_tile_t *A )
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
-    CORE_zlauum( uplo, N, A->mat, A->ld );
+    CORE_zlauum( uplo, N, CHAM_tile_get_ptr( A ), A->ld );
 }
 
 #if defined( PRECISION_z ) || defined( PRECISION_c )
@@ -376,7 +389,7 @@ TCORE_zplghe( double                 bump,
               unsigned long long int seed )
 {
     assert( tileA->format & CHAMELEON_TILE_FULLRANK );
-    CORE_zplghe( bump, m, n, tileA->mat, tileA->ld, bigM, m0, n0, seed );
+    CORE_zplghe( bump, m, n, CHAM_tile_get_ptr( tileA ), tileA->ld, bigM, m0, n0, seed );
 }
 #endif
 
@@ -391,7 +404,7 @@ TCORE_zplgsy( CHAMELEON_Complex64_t  bump,
               unsigned long long int seed )
 {
     assert( tileA->format & CHAMELEON_TILE_FULLRANK );
-    CORE_zplgsy( bump, m, n, tileA->mat, tileA->ld, bigM, m0, n0, seed );
+    CORE_zplgsy( bump, m, n, CHAM_tile_get_ptr( tileA ), tileA->ld, bigM, m0, n0, seed );
 }
 
 void
@@ -404,14 +417,14 @@ TCORE_zplrnt( int                    m,
               unsigned long long int seed )
 {
     assert( tileA->format & CHAMELEON_TILE_FULLRANK );
-    CORE_zplrnt( m, n, tileA->mat, tileA->ld, bigM, m0, n0, seed );
+    CORE_zplrnt( m, n, CHAM_tile_get_ptr( tileA ), tileA->ld, bigM, m0, n0, seed );
 }
 
 void
 TCORE_zpotrf( cham_uplo_t uplo, int n, CHAM_tile_t *A, int *INFO )
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
-    CORE_zpotrf( uplo, n, A->mat, A->ld, INFO );
+    CORE_zpotrf( uplo, n, CHAM_tile_get_ptr( A ), A->ld, INFO );
 }
 
 int
@@ -437,13 +450,13 @@ TCORE_zssssm( int                M1,
                         N2,
                         K,
                         IB,
-                        A1->mat,
+                        CHAM_tile_get_ptr( A1 ),
                         A1->ld,
-                        A2->mat,
+                        CHAM_tile_get_ptr( A2 ),
                         A2->ld,
-                        L1->mat,
+                        CHAM_tile_get_ptr( L1 ),
                         L1->ld,
-                        L2->mat,
+                        CHAM_tile_get_ptr( L2 ),
                         L2->ld,
                         IPIV );
 }
@@ -462,7 +475,7 @@ TCORE_zsymm( cham_side_t           side,
     assert( A->format & CHAMELEON_TILE_FULLRANK );
     assert( B->format & CHAMELEON_TILE_FULLRANK );
     assert( C->format & CHAMELEON_TILE_FULLRANK );
-    CORE_zsymm( side, uplo, M, N, alpha, A->mat, A->ld, B->mat, B->ld, beta, C->mat, C->ld );
+    CORE_zsymm( side, uplo, M, N, alpha, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld, beta, CHAM_tile_get_ptr( C ), C->ld );
 }
 
 void
@@ -477,7 +490,7 @@ TCORE_zsyrk( cham_uplo_t           uplo,
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
     assert( C->format & CHAMELEON_TILE_FULLRANK );
-    CORE_zsyrk( uplo, trans, N, K, alpha, A->mat, A->ld, beta, C->mat, C->ld );
+    CORE_zsyrk( uplo, trans, N, K, alpha, CHAM_tile_get_ptr( A ), A->ld, beta, CHAM_tile_get_ptr( C ), C->ld );
 }
 
 void
@@ -494,7 +507,7 @@ TCORE_zsyr2k( cham_uplo_t           uplo,
     assert( A->format & CHAMELEON_TILE_FULLRANK );
     assert( B->format & CHAMELEON_TILE_FULLRANK );
     assert( C->format & CHAMELEON_TILE_FULLRANK );
-    CORE_zsyr2k( uplo, trans, N, K, alpha, A->mat, A->ld, B->mat, B->ld, beta, C->mat, C->ld );
+    CORE_zsyr2k( uplo, trans, N, K, alpha, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld, beta, CHAM_tile_get_ptr( C ), C->ld );
 }
 
 int
@@ -506,7 +519,7 @@ TCORE_zsyssq( cham_store_t       storev,
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
     assert( sclssq->format & CHAMELEON_TILE_FULLRANK );
-    return CORE_zsyssq( storev, uplo, N, A->mat, A->ld, sclssq->mat );
+    return CORE_zsyssq( storev, uplo, N, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( sclssq ) );
 }
 
 #if defined( PRECISION_z ) || defined( PRECISION_c )
@@ -514,7 +527,7 @@ int
 TCORE_zsytf2_nopiv( cham_uplo_t uplo, int n, CHAM_tile_t *A )
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
-    return CORE_zsytf2_nopiv( uplo, n, A->mat, A->ld );
+    return CORE_zsytf2_nopiv( uplo, n, CHAM_tile_get_ptr( A ), A->ld );
 }
 #endif
 
@@ -531,7 +544,7 @@ TCORE_ztplqt( int                    M,
     assert( A->format & CHAMELEON_TILE_FULLRANK );
     assert( B->format & CHAMELEON_TILE_FULLRANK );
     assert( T->format & CHAMELEON_TILE_FULLRANK );
-    return CORE_ztplqt( M, N, L, IB, A->mat, A->ld, B->mat, B->ld, T->mat, T->ld, WORK );
+    return CORE_ztplqt( M, N, L, IB, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld, CHAM_tile_get_ptr( T ), T->ld, WORK );
 }
 
 int
@@ -559,13 +572,13 @@ TCORE_ztpmlqt( cham_side_t            side,
                          K,
                          L,
                          IB,
-                         V->mat,
+                         CHAM_tile_get_ptr( V ),
                          V->ld,
-                         T->mat,
+                         CHAM_tile_get_ptr( T ),
                          T->ld,
-                         A->mat,
+                         CHAM_tile_get_ptr( A ),
                          A->ld,
-                         B->mat,
+                         CHAM_tile_get_ptr( B ),
                          B->ld,
                          WORK );
 }
@@ -595,13 +608,13 @@ TCORE_ztpmqrt( cham_side_t            side,
                          K,
                          L,
                          IB,
-                         V->mat,
+                         CHAM_tile_get_ptr( V ),
                          V->ld,
-                         T->mat,
+                         CHAM_tile_get_ptr( T ),
                          T->ld,
-                         A->mat,
+                         CHAM_tile_get_ptr( A ),
                          A->ld,
-                         B->mat,
+                         CHAM_tile_get_ptr( B ),
                          B->ld,
                          WORK );
 }
@@ -619,7 +632,7 @@ TCORE_ztpqrt( int                    M,
     assert( A->format & CHAMELEON_TILE_FULLRANK );
     assert( B->format & CHAMELEON_TILE_FULLRANK );
     assert( T->format & CHAMELEON_TILE_FULLRANK );
-    return CORE_ztpqrt( M, N, L, IB, A->mat, A->ld, B->mat, B->ld, T->mat, T->ld, WORK );
+    return CORE_ztpqrt( M, N, L, IB, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld, CHAM_tile_get_ptr( T ), T->ld, WORK );
 }
 
 int
@@ -632,9 +645,12 @@ TCORE_ztradd( cham_uplo_t           uplo,
               CHAMELEON_Complex64_t beta,
               CHAM_tile_t *         B )
 {
-    assert( A->format & CHAMELEON_TILE_FULLRANK );
-    assert( B->format & CHAMELEON_TILE_FULLRANK );
-    return CORE_ztradd( uplo, trans, M, N, alpha, A->mat, A->ld, beta, B->mat, B->ld );
+    if (( A->format & CHAMELEON_TILE_DESC ) &&
+        ( B->format & CHAMELEON_TILE_DESC ) )
+    {
+        assert(0); /* This should have been handled at the codelet level */
+    }
+    return CORE_ztradd( uplo, trans, M, N, alpha, CHAM_tile_get_ptr( A ), A->ld, beta, CHAM_tile_get_ptr( B ), B->ld );
 }
 
 void
@@ -647,7 +663,7 @@ TCORE_ztrasm( cham_store_t       storev,
               double *           work )
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
-    CORE_ztrasm( storev, uplo, diag, M, N, A->mat, A->ld, work );
+    CORE_ztrasm( storev, uplo, diag, M, N, CHAM_tile_get_ptr( A ), A->ld, work );
 }
 
 void
@@ -663,7 +679,7 @@ TCORE_ztrmm( cham_side_t           side,
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
     assert( B->format & CHAMELEON_TILE_FULLRANK );
-    CORE_ztrmm( side, uplo, transA, diag, M, N, alpha, A->mat, A->ld, B->mat, B->ld );
+    CORE_ztrmm( side, uplo, transA, diag, M, N, alpha, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld );
 }
 
 void
@@ -679,7 +695,7 @@ TCORE_ztrsm( cham_side_t           side,
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
     assert( B->format & CHAMELEON_TILE_FULLRANK );
-    CORE_ztrsm( side, uplo, transA, diag, M, N, alpha, A->mat, A->ld, B->mat, B->ld );
+    CORE_ztrsm( side, uplo, transA, diag, M, N, alpha, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( B ), B->ld );
 }
 
 int
@@ -692,15 +708,15 @@ TCORE_ztrssq( cham_uplo_t        uplo,
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
     assert( sclssq->format & CHAMELEON_TILE_FULLRANK );
-    double *W = sclssq->mat;
-    return CORE_ztrssq( uplo, diag, M, N, A->mat, A->ld, W, W + 1 );
+    double *W = CHAM_tile_get_ptr( sclssq );
+    return CORE_ztrssq( uplo, diag, M, N, CHAM_tile_get_ptr( A ), A->ld, W, W + 1 );
 }
 
 void
 TCORE_ztrtri( cham_uplo_t uplo, cham_diag_t diag, int N, CHAM_tile_t *A, int *info )
 {
     assert( A->format & CHAMELEON_TILE_FULLRANK );
-    CORE_ztrtri( uplo, diag, N, A->mat, A->ld, info );
+    CORE_ztrtri( uplo, diag, N, CHAM_tile_get_ptr( A ), A->ld, info );
 }
 
 int
@@ -731,13 +747,13 @@ TCORE_ztsmlq_hetra1( cham_side_t            side,
                                n2,
                                k,
                                ib,
-                               A1->mat,
+                               CHAM_tile_get_ptr( A1 ),
                                A1->ld,
-                               A2->mat,
+                               CHAM_tile_get_ptr( A2 ),
                                A2->ld,
-                               V->mat,
+                               CHAM_tile_get_ptr( V ),
                                V->ld,
-                               T->mat,
+                               CHAM_tile_get_ptr( T ),
                                T->ld,
                                WORK,
                                ldwork );
@@ -771,13 +787,13 @@ TCORE_ztsmqr_hetra1( cham_side_t            side,
                                n2,
                                k,
                                ib,
-                               A1->mat,
+                               CHAM_tile_get_ptr( A1 ),
                                A1->ld,
-                               A2->mat,
+                               CHAM_tile_get_ptr( A2 ),
                                A2->ld,
-                               V->mat,
+                               CHAM_tile_get_ptr( V ),
                                V->ld,
-                               T->mat,
+                               CHAM_tile_get_ptr( T ),
                                T->ld,
                                WORK,
                                ldwork );
@@ -800,7 +816,7 @@ TCORE_ztstrf( int                    M,
     assert( A->format & CHAMELEON_TILE_FULLRANK );
     assert( L->format & CHAMELEON_TILE_FULLRANK );
     return CORE_ztstrf(
-        M, N, IB, NB, U->mat, U->ld, A->mat, A->ld, L->mat, L->ld, IPIV, WORK, LDWORK, INFO );
+        M, N, IB, NB, CHAM_tile_get_ptr( U ), U->ld, CHAM_tile_get_ptr( A ), A->ld, CHAM_tile_get_ptr( L ), L->ld, IPIV, WORK, LDWORK, INFO );
 }
 
 int
@@ -820,7 +836,7 @@ TCORE_zunmlq( cham_side_t            side,
     assert( T->format & CHAMELEON_TILE_FULLRANK );
     assert( C->format & CHAMELEON_TILE_FULLRANK );
     return CORE_zunmlq(
-        side, trans, M, N, K, IB, V->mat, V->ld, T->mat, T->ld, C->mat, C->ld, WORK, LDWORK );
+        side, trans, M, N, K, IB, CHAM_tile_get_ptr( V ), V->ld, CHAM_tile_get_ptr( T ), T->ld, CHAM_tile_get_ptr( C ), C->ld, WORK, LDWORK );
 }
 
 int
@@ -840,7 +856,7 @@ TCORE_zunmqr( cham_side_t            side,
     assert( T->format & CHAMELEON_TILE_FULLRANK );
     assert( C->format & CHAMELEON_TILE_FULLRANK );
     return CORE_zunmqr(
-        side, trans, M, N, K, IB, V->mat, V->ld, T->mat, T->ld, C->mat, C->ld, WORK, LDWORK );
+        side, trans, M, N, K, IB, CHAM_tile_get_ptr( V ), V->ld, CHAM_tile_get_ptr( T ), T->ld, CHAM_tile_get_ptr( C ), C->ld, WORK, LDWORK );
 }
 
 int
@@ -859,5 +875,5 @@ TCORE_zgram( cham_uplo_t        uplo,
     assert( D->format & CHAMELEON_TILE_FULLRANK );
     assert( A->format & CHAMELEON_TILE_FULLRANK );
     return CORE_zgram(
-        uplo, M, N, Mt, Nt, Di->mat, Di->ld, Dj->mat, Dj->ld, D->mat, A->mat, A->ld );
+        uplo, M, N, Mt, Nt, CHAM_tile_get_ptr( Di ), Di->ld, CHAM_tile_get_ptr( Dj ), Dj->ld, CHAM_tile_get_ptr( D ), CHAM_tile_get_ptr( A ), A->ld );
 }
diff --git a/include/chameleon.h b/include/chameleon.h
index 4ebc7ee846d4b2c382564e755255229a1ae14c7a..713f9493edebd70253dbce1e14096c2734e863fc 100644
--- a/include/chameleon.h
+++ b/include/chameleon.h
@@ -135,8 +135,8 @@ CHAM_desc_t *CHAMELEON_Desc_CopyOnZero( const CHAM_desc_t *descin, void *mat );
 CHAM_desc_t *CHAMELEON_Desc_SubMatrix( CHAM_desc_t *descA, int i, int j, int m, int n );
 
 int CHAMELEON_Desc_Destroy( CHAM_desc_t **desc );
-int CHAMELEON_Desc_Acquire( CHAM_desc_t  *desc );
-int CHAMELEON_Desc_Release( CHAM_desc_t  *desc );
+int CHAMELEON_Desc_Acquire( const CHAM_desc_t *desc );
+int CHAMELEON_Desc_Release( const CHAM_desc_t *desc );
 int CHAMELEON_Desc_Flush  ( const CHAM_desc_t        *desc,
                             const RUNTIME_sequence_t *sequence );
 
diff --git a/include/chameleon/struct.h b/include/chameleon/struct.h
index 1ac2edb55d5bd51d2fa969451d14eb0796e8536c..748daaa738b1fff51914ccb59856415797ab25e8 100644
--- a/include/chameleon/struct.h
+++ b/include/chameleon/struct.h
@@ -143,6 +143,15 @@ typedef struct chameleon_context_s {
     int                mpi_outer_init;     // MPI has been initialized outside our functions
 } CHAM_context_t;
 
+static inline void *
+CHAM_tile_get_ptr( const CHAM_tile_t *tile )
+{
+    if ( tile->format & CHAMELEON_TILE_DESC ) {
+        return ((CHAM_desc_t*)(tile->mat))->mat;
+    }
+    return tile->mat;
+}
+
 END_C_DECLS
 
 #endif /* _chameleon_struct_h_ */
diff --git a/runtime/starpu/codelets/codelet_zcallback.c b/runtime/starpu/codelets/codelet_zcallback.c
index 40c4c24ab4cbd1352d516a6ebae7a8447b3e5f67..d94d536a0c8343b0624c6534a3197b5d88ee7175 100644
--- a/runtime/starpu/codelets/codelet_zcallback.c
+++ b/runtime/starpu/codelets/codelet_zcallback.c
@@ -28,7 +28,7 @@ CHAMELEON_CL_CB(dlag2z,        cti_handle_get_m(task->handles[1]), cti_handle_ge
 CHAMELEON_CL_CB(dzasum,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      M*N)
 CHAMELEON_CL_CB(zaxpy,         cti_handle_get_m(task->handles[0]), cti_handle_get_m(task->handles[1]), 0,                                      M)
 CHAMELEON_CL_CB(zgeadd,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      M*N)
-CHAMELEON_CL_CB(ztradd,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                  0.5*M*N)
+CHAMELEON_CL_CB(ztradd,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      M*N/2.)
 CHAMELEON_CL_CB(zlascal,       cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      M*N)
 CHAMELEON_CL_CB(zgelqt,        cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      (4./3.)*M*N*K)
 CHAMELEON_CL_CB(zgemv,         cti_handle_get_m(task->handles[0]), cti_handle_get_n(task->handles[0]), 0,                                      2. *M*N  )
diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c
index 9a3b9ea3f127a9f7656afa8e27e08ecc9c976aab..0567065f3ceb12f9abf475429111f2df0cbae312 100644
--- a/runtime/starpu/codelets/codelet_zgemm.c
+++ b/runtime/starpu/codelets/codelet_zgemm.c
@@ -25,9 +25,7 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zgemm_cpu_func(void *descr[], void *cl_arg)
-{
+struct cl_zgemm_args_s {
     cham_trans_t transA;
     cham_trans_t transB;
     int m;
@@ -38,47 +36,54 @@ static void cl_zgemm_cpu_func(void *descr[], void *cl_arg)
     CHAM_tile_t *tileB;
     CHAMELEON_Complex64_t beta;
     CHAM_tile_t *tileC;
+};
+
+#if !defined(CHAMELEON_SIMULATION)
+static void
+cl_zgemm_cpu_func( void *descr[], void *cl_arg )
+{
+    struct cl_zgemm_args_s clargs;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
+    CHAM_tile_t *tileC;
 
     tileA = cti_interface_get(descr[0]);
     tileB = cti_interface_get(descr[1]);
     tileC = cti_interface_get(descr[2]);
 
-    starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &beta);
-    TCORE_zgemm( transA, transB,
-                 m, n, k,
-                 alpha, tileA, tileB,
-                 beta,  tileC );
+    starpu_codelet_unpack_args( cl_arg, &clargs );
+    TCORE_zgemm( clargs.transA, clargs.transB,
+                 clargs.m, clargs.n, clargs.k,
+                 clargs.alpha, tileA, tileB,
+                 clargs.beta,  tileC );
 }
 
 #ifdef CHAMELEON_USE_CUDA
-static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
+static void
+cl_zgemm_cuda_func( void *descr[], void *_cl_arg )
 {
-    cham_trans_t transA;
-    cham_trans_t transB;
-    int m;
-    int n;
-    int k;
-    cuDoubleComplex alpha;
+    struct cl_zgemm_args_s clargs;
     CHAM_tile_t *tileA;
     CHAM_tile_t *tileB;
-    cuDoubleComplex beta;
     CHAM_tile_t *tileC;
 
     tileA = cti_interface_get(descr[0]);
     tileB = cti_interface_get(descr[1]);
     tileC = cti_interface_get(descr[2]);
 
-    starpu_codelet_unpack_args(cl_arg, &transA, &transB, &m, &n, &k, &alpha, &beta);
+    starpu_codelet_unpack_args( _cl_arg, &clargs );
 
     RUNTIME_getStream( stream );
 
     CUDA_zgemm(
-        transA, transB,
-        m, n, k,
-        &alpha, tileA->mat, tileA->ld,
-                tileB->mat, tileB->ld,
-        &beta,  tileC->mat, tileC->ld,
-        stream);
+        clargs.transA, clargs.transB,
+        clargs.m, clargs.n, clargs.k,
+        (cuDoubleComplex*)&(clargs.alpha),
+        tileA->mat, tileA->ld,
+        tileB->mat, tileB->ld,
+        (cuDoubleComplex*)&(clargs.beta),
+        tileC->mat, tileC->ld,
+        stream );
 
 #ifndef STARPU_CUDA_ASYNC
     cudaStreamSynchronize( stream );
@@ -92,56 +97,72 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS(zgemm, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC)
-
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
-                      cham_trans_t transA, cham_trans_t transB,
-                      int m, int n, int k, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
-                                                   const CHAM_desc_t *B, int Bm, int Bn,
-                      CHAMELEON_Complex64_t beta,  const CHAM_desc_t *C, int Cm, int Cn)
+CODELETS( zgemm, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC )
+
+void INSERT_TASK_zgemm( const RUNTIME_option_t *options,
+                        cham_trans_t transA, cham_trans_t transB,
+                        int m, int n, int k, int nb,
+                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                                                     const CHAM_desc_t *B, int Bm, int Bn,
+                        CHAMELEON_Complex64_t beta,  const CHAM_desc_t *C, int Cm, int Cn )
 {
     if ( alpha == 0. ) {
         return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb,
                                     beta, C, Cm, Cn );
     }
 
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_zgemm;
-    void (*callback)(void*) = options->profiling ? cl_zgemm_callback : NULL;
-    starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
-    int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
-    int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
-
+    struct cl_zgemm_args_s clargs = {
+        .transA = transA,
+        .transB = transB,
+        .m      = m,
+        .n      = n,
+        .k      = k,
+        .alpha  = alpha,
+        .tileA  = A->get_blktile( A, Am, An ),
+        .tileB  = B->get_blktile( B, Bm, Bn ),
+        .beta   = beta,
+        .tileC  = C->get_blktile( C, Cm, Cn )
+    };
+    void (*callback)(void*);
+    RUNTIME_request_t       *request  = options->request;
+    starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
+    int                      workerid, accessC;
+    char                    *cl_name = "zgemm";
+
+    /* Handle cache */
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
     CHAMELEON_ACCESS_R(B, Bm, Bn);
     CHAMELEON_ACCESS_RW(C, Cm, Cn);
     CHAMELEON_END_ACCESS_DECLARATION;
 
+    /* Callback fro profiling information */
+    callback = options->profiling ? cl_zgemm_callback : NULL;
+
+    /* Fix the worker id */
+    workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+
+    /* Reduce the C access if needed */
+    accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
+
+    /* Insert the task */
     rt_starpu_insert_task(
-        codelet,
-        STARPU_VALUE,    &transA,            sizeof(int),
-        STARPU_VALUE,    &transB,            sizeof(int),
-        STARPU_VALUE,    &m,                 sizeof(int),
-        STARPU_VALUE,    &n,                 sizeof(int),
-        STARPU_VALUE,    &k,                 sizeof(int),
-        STARPU_VALUE,    &alpha,             sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_R,         RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        STARPU_VALUE,    &beta,              sizeof(CHAMELEON_Complex64_t),
-        accessC,          RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
-        STARPU_PRIORITY,  options->priority,
-        STARPU_CALLBACK,  callback,
+        &cl_zgemm,
+        /* Task codelet arguments */
+        STARPU_VALUE, &clargs, sizeof(struct cl_zgemm_args_s),
+        STARPU_R,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_R,      RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        accessC,       RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+
+        /* Common task arguments */
+        STARPU_PRIORITY,          options->priority,
+        STARPU_CALLBACK,          callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zgemm",
+        STARPU_NAME,              cl_name,
 #endif
-        0);
 
+        0 );
+
+    (void)nb;
 }
diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c
index 1b9d39818e5c3e45e0ae02df5dace718f160a0e7..b224473cbfa7de6d234012c033178c827bf1eeff 100644
--- a/runtime/starpu/codelets/codelet_zherk.c
+++ b/runtime/starpu/codelets/codelet_zherk.c
@@ -25,9 +25,7 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zherk_cpu_func(void *descr[], void *cl_arg)
-{
+struct cl_zherk_args_s {
     cham_uplo_t uplo;
     cham_trans_t trans;
     int n;
@@ -36,41 +34,46 @@ static void cl_zherk_cpu_func(void *descr[], void *cl_arg)
     CHAM_tile_t *tileA;
     double beta;
     CHAM_tile_t *tileC;
+};
+
+#if !defined(CHAMELEON_SIMULATION)
+static void
+cl_zherk_cpu_func(void *descr[], void *cl_arg)
+{
+    struct cl_zherk_args_s clargs;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileC;
 
     tileA = cti_interface_get(descr[0]);
     tileC = cti_interface_get(descr[1]);
 
-    starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta);
-    TCORE_zherk(uplo, trans,
-        n, k,
-        alpha, tileA,
-        beta, tileC);
+    starpu_codelet_unpack_args( cl_arg, &clargs );
+    TCORE_zherk( clargs.uplo, clargs.trans, clargs.n, clargs.k,
+                 clargs.alpha, tileA, clargs.beta, tileC );
 }
 
-#ifdef CHAMELEON_USE_CUDA
-static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
+#if defined(CHAMELEON_USE_CUDA)
+static void
+cl_zherk_cuda_func(void *descr[], void *cl_arg)
 {
-    cham_uplo_t uplo;
-    cham_trans_t trans;
-    int n;
-    int k;
-    double alpha;
+    struct cl_zherk_args_s clargs;
     CHAM_tile_t *tileA;
-    double beta;
     CHAM_tile_t *tileC;
 
     tileA = cti_interface_get(descr[0]);
     tileC = cti_interface_get(descr[1]);
 
-    starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta);
+    starpu_codelet_unpack_args( cl_arg, &clargs );
 
     RUNTIME_getStream(stream);
 
     CUDA_zherk(
-        uplo, trans, n, k,
-        &alpha, tileA->mat, tileA->ld,
-        &beta,  tileC->mat, tileC->ld,
-        stream);
+        clargs.uplo, clargs.trans, clargs.n, clargs.k,
+        (cuDoubleComplex*)&(clargs.alpha),
+        tileA->mat, tileA->ld,
+        (cuDoubleComplex*)&(clargs.beta),
+        tileC->mat, tileC->ld,
+        stream );
 
 #ifndef STARPU_CUDA_ASYNC
     cudaStreamSynchronize( stream );
@@ -78,52 +81,73 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
 
     return;
 }
-#endif /* CHAMELEON_USE_CUDA */
+#endif /* defined(CHAMELEON_USE_CUDA) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-CODELETS(zherk, cl_zherk_cpu_func, cl_zherk_cuda_func, STARPU_CUDA_ASYNC)
+CODELETS( zherk, cl_zherk_cpu_func, cl_zherk_cuda_func, STARPU_CUDA_ASYNC )
 
-void INSERT_TASK_zherk(const RUNTIME_option_t *options,
-                      cham_uplo_t uplo, cham_trans_t trans,
-                      int n, int k, int nb,
-                      double alpha, const CHAM_desc_t *A, int Am, int An,
-                      double beta, const CHAM_desc_t *C, int Cm, int Cn)
+void INSERT_TASK_zherk( const RUNTIME_option_t *options,
+                        cham_uplo_t uplo, cham_trans_t trans,
+                        int n, int k, int nb,
+                        double alpha, const CHAM_desc_t *A, int Am, int An,
+                        double beta,  const CHAM_desc_t *C, int Cm, int Cn )
 {
     if ( alpha == 0. ) {
         return INSERT_TASK_zlascal( options, uplo, n, n, nb,
                                     beta, C, Cm, Cn );
     }
 
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_zherk;
-    void (*callback)(void*) = options->profiling ? cl_zherk_callback : NULL;
-    starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
-    int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
-    int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
+    struct cl_zherk_args_s clargs = {
+        .uplo  = uplo,
+        .trans = trans,
+        .n     = n,
+        .k     = k,
+        .alpha = alpha,
+        .tileA = A->get_blktile( A, Am, An ),
+        .beta  = beta,
+        .tileC = C->get_blktile( C, Cm, Cn ),
+    };
+    void (*callback)(void*);
+    RUNTIME_request_t       *request  = options->request;
+    starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
+    int                      workerid, accessC;
+    char                    *cl_name = "zherk";
 
+    /* Handle cache */
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
     CHAMELEON_ACCESS_RW(C, Cm, Cn);
     CHAMELEON_END_ACCESS_DECLARATION;
 
+    /* Callback fro profiling information */
+    callback = options->profiling ? cl_zherk_callback : NULL;
+
+    /* Fix the worker id */
+    workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+
+    /* Reduce the C access if needed */
+    accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
+
+    /* Insert the task */
     rt_starpu_insert_task(
-        codelet,
-        STARPU_VALUE,    &uplo,              sizeof(int),
-        STARPU_VALUE,    &trans,             sizeof(int),
-        STARPU_VALUE,    &n,                 sizeof(int),
-        STARPU_VALUE,    &k,                 sizeof(int),
-        STARPU_VALUE,    &alpha,             sizeof(double),
-        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,    &beta,              sizeof(double),
-        accessC,          RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
-        STARPU_PRIORITY,  options->priority,
-        STARPU_CALLBACK,  callback,
+        &cl_zherk,
+        /* Task codelet arguments */
+        STARPU_VALUE, &clargs, sizeof(struct cl_zherk_args_s),
+        STARPU_R,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        accessC,       RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+
+        /* Common task arguments */
+        STARPU_PRIORITY,          options->priority,
+        STARPU_CALLBACK,          callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zherk",
+        STARPU_NAME,              cl_name,
 #endif
-        0);
+
+        0 );
+
+    (void)nb;
 }
diff --git a/runtime/starpu/codelets/codelet_zlacpy.c b/runtime/starpu/codelets/codelet_zlacpy.c
index bec2d92288ad496a8e34d746ea7513f63c6c4a0c..3fd038bded4bd94643ae36e354a40453d4511747 100644
--- a/runtime/starpu/codelets/codelet_zlacpy.c
+++ b/runtime/starpu/codelets/codelet_zlacpy.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zlacpy StarPU codelet
  *
  * @version 1.0.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Julien Langou
  * @author Henricus Bouwmeester
  * @author Mathieu Faverge
@@ -27,70 +25,93 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zlacpy_cpu_func(void *descr[], void *cl_arg)
-{
+struct cl_zlacpy_args_s {
     cham_uplo_t uplo;
-    int M;
-    int N;
+    int m;
+    int n;
     int displA;
     int displB;
     CHAM_tile_t *tileA;
     CHAM_tile_t *tileB;
-    CHAMELEON_Complex64_t *A;
-    CHAMELEON_Complex64_t *B;
+};
+
+#if !defined(CHAMELEON_SIMULATION)
+static void
+cl_zlacpy_cpu_func(void *descr[], void *cl_arg)
+{
+    struct cl_zlacpy_args_s clargs;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
 
     tileA = cti_interface_get(descr[0]);
     tileB = cti_interface_get(descr[1]);
 
-    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &displA, &displB);
-
-    assert( tileA->format & CHAMELEON_TILE_FULLRANK );
-    assert( tileB->format & CHAMELEON_TILE_FULLRANK );
-
-    A = tileA->mat;
-    B = tileB->mat;
-    CORE_zlacpy( uplo, M, N, A + displA, tileA->ld, B + displB, tileB->ld );
+    starpu_codelet_unpack_args( cl_arg, &clargs );
+    assert( clargs.displA == 0 );
+    assert( clargs.displB == 0 );
+    /* A = tileA->mat; */
+    /* B = tileB->mat; */
+    /* CORE_zlacpy( uplo, M, N, A + displA, tileA->ld, B + displB, tileB->ld ); */
+    TCORE_zlacpy( clargs.uplo, clargs.m, clargs.n, tileA, tileB );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-CODELETS_CPU(zlacpy, cl_zlacpy_cpu_func)
+CODELETS_CPU( zlacpy, cl_zlacpy_cpu_func )
 
 void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
                           cham_uplo_t uplo, int m, int n, int nb,
                           int displA, const CHAM_desc_t *A, int Am, int An,
                           int displB, const CHAM_desc_t *B, int Bm, int Bn )
 {
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_zlacpy;
-    void (*callback)(void*) = options->profiling ? cl_zlacpy_callback : NULL;
-    starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
-    int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    struct cl_zlacpy_args_s clargs = {
+        .uplo   = uplo,
+        .m      = m,
+        .n      = n,
+        .displA = displA,
+        .displB = displB,
+        .tileA  = A->get_blktile( A, Am, An ),
+        .tileB  = B->get_blktile( B, Bm, Bn ),
+    };
+    void (*callback)(void*);
+    RUNTIME_request_t       *request  = options->request;
+    starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
+    int                      workerid;
+    char                    *cl_name = "zlacpy";
 
+    /* Handle cache */
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_R( A, Am, An );
-    CHAMELEON_ACCESS_W( B, Bm, Bn );
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_W(B, Bm, Bn);
     CHAMELEON_END_ACCESS_DECLARATION;
 
+    /* Callback fro profiling information */
+    callback = options->profiling ? cl_zlacpy_callback : NULL;
+
+    /* Fix the worker id */
+    workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+
+    /* Insert the task */
     rt_starpu_insert_task(
-        codelet,
-        STARPU_VALUE,   &uplo,                sizeof(cham_uplo_t),
-        STARPU_VALUE,   &m,                   sizeof(int),
-        STARPU_VALUE,   &n,                   sizeof(int),
-        STARPU_VALUE,   &displA,              sizeof(int),
-        STARPU_R,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,   &displB,              sizeof(int),
-        STARPU_W,        RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        STARPU_PRIORITY, options->priority,
-        STARPU_CALLBACK, callback,
+        &cl_zlacpy,
+        /* Task codelet arguments */
+        STARPU_VALUE, &clargs, sizeof(struct cl_zlacpy_args_s),
+        STARPU_R,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_W,      RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+
+        /* Common task arguments */
+        STARPU_PRIORITY,          options->priority,
+        STARPU_CALLBACK,          callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zlacpy",
+        STARPU_NAME,              cl_name,
 #endif
-        0);
+
+        0 );
+
+    (void)nb;
 }
 
 void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
diff --git a/runtime/starpu/codelets/codelet_zlascal.c b/runtime/starpu/codelets/codelet_zlascal.c
index bd6823a08d8c3608141aeb9d145ffc671b6afad2..eb03a8685b19e0df448ccac2b2461de6e3250a54 100644
--- a/runtime/starpu/codelets/codelet_zlascal.c
+++ b/runtime/starpu/codelets/codelet_zlascal.c
@@ -21,33 +21,38 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zlascal_cpu_func(void *descr[], void *cl_arg)
-{
+struct cl_zlascal_args_s {
     cham_uplo_t uplo;
-    int M;
-    int N;
+    int m;
+    int n;
     CHAMELEON_Complex64_t alpha;
     CHAM_tile_t *tileA;
+};
+
+#if !defined(CHAMELEON_SIMULATION)
+static void
+cl_zlascal_cpu_func( void *descr[], void *cl_arg )
+{
+    struct cl_zlascal_args_s clargs;
+    CHAM_tile_t *tileA;
 
     tileA = cti_interface_get(descr[0]);
 
-    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha);
-    TCORE_zlascal(uplo, M, N, alpha, tileA);
-    return;
+    starpu_codelet_unpack_args( cl_arg, &clargs );
+    TCORE_zlascal( clargs.uplo, clargs.m, clargs.n, clargs.alpha, tileA );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-CODELETS_CPU(zlascal, cl_zlascal_cpu_func)
+CODELETS_CPU( zlascal, cl_zlascal_cpu_func )
 
 void INSERT_TASK_zlascal( const RUNTIME_option_t *options,
                           cham_uplo_t uplo,
                           int m, int n, int nb,
                           CHAMELEON_Complex64_t alpha,
-                          const CHAM_desc_t *A, int Am, int An)
+                          const CHAM_desc_t *A, int Am, int An )
 {
     if ( alpha == 0. ) {
         return INSERT_TASK_zlaset( options, uplo, m, n,
@@ -57,28 +62,46 @@ void INSERT_TASK_zlascal( const RUNTIME_option_t *options,
         return;
     }
 
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_zlascal;
-    void (*callback)(void*) = options->profiling ? cl_zlascal_callback : NULL;
-    starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
-    int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    struct cl_zlascal_args_s clargs = {
+        .uplo  = uplo,
+        .m     = m,
+        .n     = n,
+        .alpha = alpha,
+        .tileA = A->get_blktile( A, Am, An ),
+    };
+    void (*callback)(void*);
+    RUNTIME_request_t       *request  = options->request;
+    starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
+    int                      workerid;
+    char                    *cl_name = "zlascal";
 
+    /* Handle cache */
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_RW(A, Am, An);
     CHAMELEON_END_ACCESS_DECLARATION;
 
+    /* Callback fro profiling information */
+    callback = options->profiling ? cl_zlascal_callback : NULL;
+
+    /* Fix the worker id */
+    workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+
+    /* Insert the task */
     rt_starpu_insert_task(
-        codelet,
-        STARPU_VALUE,    &uplo,              sizeof(int),
-        STARPU_VALUE,    &m,                  sizeof(int),
-        STARPU_VALUE,    &n,                  sizeof(int),
-        STARPU_VALUE,    &alpha,              sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_PRIORITY,  options->priority,
-        STARPU_CALLBACK,  callback,
+        &cl_zlascal,
+        /* Task codelet arguments */
+        STARPU_VALUE, &clargs, sizeof(struct cl_zlascal_args_s),
+        STARPU_RW,     RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+
+        /* Common task arguments */
+        STARPU_PRIORITY,          options->priority,
+        STARPU_CALLBACK,          callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zlascal",
+        STARPU_NAME,              cl_name,
 #endif
-        0);
+
+        0 );
+
+    (void)nb;
 }
diff --git a/runtime/starpu/codelets/codelet_zlaset.c b/runtime/starpu/codelets/codelet_zlaset.c
index c7661bdb8278687f6d4cea4496eedd504d0df897..0636686e3be44a17947b741109eb1d9450fdf841 100644
--- a/runtime/starpu/codelets/codelet_zlaset.c
+++ b/runtime/starpu/codelets/codelet_zlaset.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zlaset StarPU codelet
  *
  * @version 1.0.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Mathieu Faverge
  * @author Emmanuel Agullo
@@ -26,56 +24,78 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zlaset_cpu_func(void *descr[], void *cl_arg)
-{
+struct cl_zlaset_args_s {
     cham_uplo_t uplo;
-    int M;
-    int N;
+    int m;
+    int n;
     CHAMELEON_Complex64_t alpha;
     CHAMELEON_Complex64_t beta;
     CHAM_tile_t *tileA;
+};
+
+#if !defined(CHAMELEON_SIMULATION)
+static void
+cl_zlaset_cpu_func( void *descr[], void *cl_arg )
+{
+    struct cl_zlaset_args_s clargs;
+    CHAM_tile_t *tileA;
 
     tileA = cti_interface_get(descr[0]);
 
-    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &beta);
-    TCORE_zlaset(uplo, M, N, alpha, beta, tileA);
+    starpu_codelet_unpack_args( cl_arg, &clargs );
+    TCORE_zlaset( clargs.uplo, clargs.m, clargs.n, clargs.alpha, clargs.beta, tileA );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-CODELETS_CPU(zlaset, cl_zlaset_cpu_func)
+CODELETS_CPU( zlaset, cl_zlaset_cpu_func )
 
-void INSERT_TASK_zlaset(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, int M, int N,
-                       CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta,
-                       const CHAM_desc_t *A, int Am, int An)
+void INSERT_TASK_zlaset( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, int m, int n,
+                         CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta,
+                         const CHAM_desc_t *A, int Am, int An )
 {
+    struct cl_zlaset_args_s clargs = {
+        .uplo  = uplo,
+        .m     = m,
+        .n     = n,
+        .alpha = alpha,
+        .beta  = beta,
+        .tileA = A->get_blktile( A, Am, An ),
+    };
+    void (*callback)(void*);
+    RUNTIME_request_t       *request  = options->request;
+    starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
+    int                      workerid;
+    char                    *cl_name = "zlaset";
 
-    struct starpu_codelet *codelet = &cl_zlaset;
-    void (*callback)(void*) = options->profiling ? cl_zlaset_callback : NULL;
-    starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
-    int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
-
+    /* Handle cache */
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_W(A, Am, An);
     CHAMELEON_END_ACCESS_DECLARATION;
 
+    /* Callback fro profiling information */
+    callback = options->profiling ? cl_zlaset_callback : NULL;
+
+    /* Fix the worker id */
+    workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+
+    /* Insert the task */
     rt_starpu_insert_task(
-        codelet,
-        STARPU_VALUE,  &uplo,                sizeof(int),
-        STARPU_VALUE,     &M,                        sizeof(int),
-        STARPU_VALUE,     &N,                        sizeof(int),
-        STARPU_VALUE, &alpha,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_VALUE,  &beta,         sizeof(CHAMELEON_Complex64_t),
+        &cl_zlaset,
+        /* Task codelet arguments */
+        STARPU_VALUE, &clargs, sizeof(struct cl_zlaset_args_s),
         STARPU_W,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_PRIORITY,    options->priority,
-        STARPU_CALLBACK,    callback,
+
+        /* Common task arguments */
+        STARPU_PRIORITY,          options->priority,
+        STARPU_CALLBACK,          callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zlaset",
+        STARPU_NAME,              cl_name,
 #endif
-        0);
+
+        0 );
 }
diff --git a/runtime/starpu/codelets/codelet_zlauum.c b/runtime/starpu/codelets/codelet_zlauum.c
index 0f3f1911bec1e1f83b684c398b82d67b0ff3fbcb..9ff3fc6e92bfa7fb42cb52ce9f83fa2b8b262d6e 100644
--- a/runtime/starpu/codelets/codelet_zlauum.c
+++ b/runtime/starpu/codelets/codelet_zlauum.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zlauum StarPU codelet
  *
  * @version 1.0.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Julien Langou
  * @author Henricus Bouwmeester
  * @author Mathieu Faverge
@@ -27,55 +25,74 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+struct cl_zlauum_args_s {
+    cham_uplo_t uplo;
+    int n;
+    CHAM_tile_t *tileA;
+};
+
 #if !defined(CHAMELEON_SIMULATION)
-static void cl_zlauum_cpu_func(void *descr[], void *cl_arg)
+static void
+cl_zlauum_cpu_func(void *descr[], void *cl_arg)
 {
-    cham_uplo_t uplo;
-    int N;
+    struct cl_zlauum_args_s clargs;
     CHAM_tile_t *tileA;
+    int info = 0;
 
     tileA = cti_interface_get(descr[0]);
 
-    starpu_codelet_unpack_args(cl_arg, &uplo, &N);
-    TCORE_zlauum(uplo, N, tileA);
+    starpu_codelet_unpack_args( cl_arg, &clargs );
+    TCORE_zlauum( clargs.uplo, clargs.n, tileA );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-CODELETS_CPU(zlauum, cl_zlauum_cpu_func)
+CODELETS_CPU( zlauum, cl_zlauum_cpu_func )
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
 void INSERT_TASK_zlauum( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, int n, int nb,
                          const CHAM_desc_t *A, int Am, int An )
 {
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_zlauum;
-    void (*callback)(void*) = options->profiling ? cl_zlauum_callback : NULL;
-    starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
-    int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    struct cl_zlauum_args_s clargs = {
+        .uplo  = uplo,
+        .n     = n,
+        .tileA = A->get_blktile( A, Am, An ),
+    };
+    void (*callback)(void*);
+    RUNTIME_request_t       *request  = options->request;
+    starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
+    int                      workerid;
+    char                    *cl_name = "zlauum";
 
+    /* Handle cache */
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_RW(A, Am, An);
     CHAMELEON_END_ACCESS_DECLARATION;
 
+    /* Callback fro profiling information */
+    callback = options->profiling ? cl_zlauum_callback : NULL;
+
+    /* Fix the worker id */
+    workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+
+    /* Insert the task */
     rt_starpu_insert_task(
-        codelet,
-        STARPU_VALUE,    &uplo,              sizeof(int),
-        STARPU_VALUE,    &n,                 sizeof(int),
-        STARPU_RW,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_PRIORITY,  options->priority,
-        STARPU_CALLBACK,  callback,
+        &cl_zlauum,
+        /* Task codelet arguments */
+        STARPU_VALUE, &clargs, sizeof(struct cl_zlauum_args_s),
+        STARPU_RW,     RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+
+        /* Common task arguments */
+        STARPU_PRIORITY,          options->priority,
+        STARPU_CALLBACK,          callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zlauum",
+        STARPU_NAME,              cl_name,
 #endif
-        0);
 
+        0 );
+
+    (void)nb;
 }
diff --git a/runtime/starpu/codelets/codelet_zplghe.c b/runtime/starpu/codelets/codelet_zplghe.c
index 87c071e44a71d4f48e3226b14e7e8c81d0fc13be..4c04611ccbbf97a8bffe43516bcf7979189551d2 100644
--- a/runtime/starpu/codelets/codelet_zplghe.c
+++ b/runtime/starpu/codelets/codelet_zplghe.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zplghe StarPU codelet
  *
  * @version 1.0.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Piotr Luszczek
  * @author Pierre Lemarinier
  * @author Mathieu Faverge
@@ -27,11 +25,7 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/*   cl_zplghe_cpu_func - Generate a tile for random hermitian (positive definite if bump is large enough) matrix. */
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zplghe_cpu_func(void *descr[], void *cl_arg)
-{
+struct cl_zplghe_args_s {
     double bump;
     int m;
     int n;
@@ -40,47 +34,73 @@ static void cl_zplghe_cpu_func(void *descr[], void *cl_arg)
     int m0;
     int n0;
     unsigned long long int seed;
+};
+
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zplghe_cpu_func(void *descr[], void *cl_arg)
+{
+    struct cl_zplghe_args_s clargs;
+    CHAM_tile_t *tileA;
 
     tileA = cti_interface_get(descr[0]);
 
-    starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &bigM, &m0, &n0, &seed );
-    TCORE_zplghe( bump, m, n, tileA, bigM, m0, n0, seed );
+    starpu_codelet_unpack_args( cl_arg, &clargs );
+    TCORE_zplghe( clargs.bump, clargs.m, clargs.n, tileA,
+                  clargs.bigM, clargs.m0, clargs.n0, clargs.seed );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-CODELETS_CPU(zplghe, cl_zplghe_cpu_func)
+CODELETS_CPU( zplghe, cl_zplghe_cpu_func )
 
 void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
                          double bump, int m, int n, const CHAM_desc_t *A, int Am, int An,
                          int bigM, int m0, int n0, unsigned long long int seed )
 {
-    struct starpu_codelet *codelet = &cl_zplghe;
-    void (*callback)(void*) = options->profiling ? cl_zplghe_callback : NULL;
-    starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
-    int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    struct cl_zplghe_args_s clargs = {
+        .bump  = bump,
+        .m     = m,
+        .n     = n,
+        .tileA = A->get_blktile( A, Am, An ),
+        .bigM  = bigM,
+        .m0    = m0,
+        .n0    = n0,
+        .seed  = seed,
+    };
+    void (*callback)(void*);
+    RUNTIME_request_t       *request  = options->request;
+    starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
+    int                      workerid;
+    char                    *cl_name = "zplghe";
 
+    /* Handle cache */
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_W(A, Am, An);
     CHAMELEON_END_ACCESS_DECLARATION;
 
+    /* Callback fro profiling information */
+    callback = options->profiling ? cl_zplghe_callback : NULL;
+
+    /* Fix the worker id */
+    workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+
+    /* Insert the task */
     rt_starpu_insert_task(
-        codelet,
-        STARPU_VALUE, &bump,                   sizeof(double),
-        STARPU_VALUE,    &m,                      sizeof(int),
-        STARPU_VALUE,    &n,                      sizeof(int),
-        STARPU_W,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE, &bigM,                      sizeof(int),
-        STARPU_VALUE,   &m0,                      sizeof(int),
-        STARPU_VALUE,   &n0,                      sizeof(int),
-        STARPU_VALUE, &seed,   sizeof(unsigned long long int),
-        STARPU_PRIORITY,    options->priority,
-        STARPU_CALLBACK,    callback,
+        &cl_zplghe,
+        /* Task codelet arguments */
+        STARPU_VALUE, &clargs, sizeof(struct cl_zplghe_args_s),
+        STARPU_W,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+
+        /* Common task arguments */
+        STARPU_PRIORITY,          options->priority,
+        STARPU_CALLBACK,          callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zplghe",
+        STARPU_NAME,              cl_name,
 #endif
-        0);
+
+        0 );
+
 }
diff --git a/runtime/starpu/codelets/codelet_zplgsy.c b/runtime/starpu/codelets/codelet_zplgsy.c
index 748e78d3310277cbcd0da4d30c9d563afb1a66d1..57aec1bf28967835817f9ddbef05800b57b93aa8 100644
--- a/runtime/starpu/codelets/codelet_zplgsy.c
+++ b/runtime/starpu/codelets/codelet_zplgsy.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zplgsy StarPU codelet
  *
  * @version 1.0.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Piotr Luszczek
  * @author Pierre Lemarinier
  * @author Mathieu Faverge
@@ -27,11 +25,7 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/*   cl_zplgsy_cpu_func - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. */
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg)
-{
+struct cl_zplgsy_args_s {
     CHAMELEON_Complex64_t bump;
     int m;
     int n;
@@ -40,48 +34,73 @@ static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg)
     int m0;
     int n0;
     unsigned long long int seed;
+};
+
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg)
+{
+    struct cl_zplgsy_args_s clargs;
+    CHAM_tile_t *tileA;
 
     tileA = cti_interface_get(descr[0]);
 
-    starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &bigM, &m0, &n0, &seed );
-    TCORE_zplgsy( bump, m, n, tileA, bigM, m0, n0, seed );
+    starpu_codelet_unpack_args( cl_arg, &clargs );
+    TCORE_zplgsy( clargs.bump, clargs.m, clargs.n, tileA,
+                  clargs.bigM, clargs.m0, clargs.n0, clargs.seed );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-CODELETS_CPU(zplgsy, cl_zplgsy_cpu_func)
+CODELETS_CPU( zplgsy, cl_zplgsy_cpu_func )
 
 void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
-                        CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An,
-                        int bigM, int m0, int n0, unsigned long long int seed )
+                         CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An,
+                         int bigM, int m0, int n0, unsigned long long int seed )
 {
+    struct cl_zplgsy_args_s clargs = {
+        .bump  = bump,
+        .m     = m,
+        .n     = n,
+        .tileA = A->get_blktile( A, Am, An ),
+        .bigM  = bigM,
+        .m0    = m0,
+        .n0    = n0,
+        .seed  = seed,
+    };
+    void (*callback)(void*);
+    RUNTIME_request_t       *request  = options->request;
+    starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
+    int                      workerid;
+    char                    *cl_name = "zplgsy";
 
-    struct starpu_codelet *codelet = &cl_zplgsy;
-    void (*callback)(void*) = options->profiling ? cl_zplgsy_callback : NULL;
-    starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
-    int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
-
+    /* Handle cache */
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_W(A, Am, An);
     CHAMELEON_END_ACCESS_DECLARATION;
 
+    /* Callback fro profiling information */
+    callback = options->profiling ? cl_zplgsy_callback : NULL;
+
+    /* Fix the worker id */
+    workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+
+    /* Insert the task */
     rt_starpu_insert_task(
-        codelet,
-        STARPU_VALUE, &bump,       sizeof(CHAMELEON_Complex64_t),
-        STARPU_VALUE,    &m,                      sizeof(int),
-        STARPU_VALUE,    &n,                      sizeof(int),
-        STARPU_W,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE, &bigM,                      sizeof(int),
-        STARPU_VALUE,   &m0,                      sizeof(int),
-        STARPU_VALUE,   &n0,                      sizeof(int),
-        STARPU_VALUE, &seed,   sizeof(unsigned long long int),
-        STARPU_PRIORITY,    options->priority,
-        STARPU_CALLBACK,    callback,
+        &cl_zplgsy,
+        /* Task codelet arguments */
+        STARPU_VALUE, &clargs, sizeof(struct cl_zplgsy_args_s),
+        STARPU_W,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+
+        /* Common task arguments */
+        STARPU_PRIORITY,          options->priority,
+        STARPU_CALLBACK,          callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zplgsy",
+        STARPU_NAME,              cl_name,
 #endif
-        0);
+
+        0 );
+
 }
diff --git a/runtime/starpu/codelets/codelet_zplrnt.c b/runtime/starpu/codelets/codelet_zplrnt.c
index 1376e950c222a2c130b9ac9745c059d631face37..a4eef0c8a1df265dc9afd8d9c896811cf4634e52 100644
--- a/runtime/starpu/codelets/codelet_zplrnt.c
+++ b/runtime/starpu/codelets/codelet_zplrnt.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zplrnt StarPU codelet
  *
  * @version 1.0.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Piotr Luszczek
  * @author Pierre Lemarinier
  * @author Mathieu Faverge
@@ -27,9 +25,7 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zplrnt_cpu_func(void *descr[], void *cl_arg)
-{
+struct cl_zplrnt_args_s {
     int m;
     int n;
     CHAM_tile_t *tileA;
@@ -37,47 +33,72 @@ static void cl_zplrnt_cpu_func(void *descr[], void *cl_arg)
     int m0;
     int n0;
     unsigned long long int seed;
+};
+
+#if !defined(CHAMELEON_SIMULATION)
+static void
+cl_zplrnt_cpu_func(void *descr[], void *cl_arg)
+{
+    struct cl_zplrnt_args_s clargs;
+    CHAM_tile_t *tileA;
 
     tileA = cti_interface_get(descr[0]);
 
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &bigM, &m0, &n0, &seed );
-    TCORE_zplrnt( m, n, tileA, bigM, m0, n0, seed );
+    starpu_codelet_unpack_args( cl_arg, &clargs );
+    TCORE_zplrnt( clargs.m, clargs.n, tileA,
+                  clargs.bigM, clargs.m0, clargs.n0, clargs.seed );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-CODELETS_CPU(zplrnt, cl_zplrnt_cpu_func)
+CODELETS_CPU( zplrnt, cl_zplrnt_cpu_func )
 
 void INSERT_TASK_zplrnt( const RUNTIME_option_t *options,
                          int m, int n, const CHAM_desc_t *A, int Am, int An,
                          int bigM, int m0, int n0, unsigned long long int seed )
 {
+    struct cl_zplrnt_args_s clargs = {
+        .m     = m,
+        .n     = n,
+        .tileA = A->get_blktile( A, Am, An ),
+        .bigM  = bigM,
+        .m0    = m0,
+        .n0    = n0,
+        .seed  = seed,
+    };
+    void (*callback)(void*);
+    RUNTIME_request_t       *request  = options->request;
+    starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
+    int                      workerid;
+    char                    *cl_name = "zplrnt";
 
-    struct starpu_codelet *codelet = &cl_zplrnt;
-    void (*callback)(void*) = options->profiling ? cl_zplrnt_callback : NULL;
-    starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
-    int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
-
+    /* Handle cache */
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_W(A, Am, An);
     CHAMELEON_END_ACCESS_DECLARATION;
 
+    /* Callback fro profiling information */
+    callback = options->profiling ? cl_zplrnt_callback : NULL;
+
+    /* Fix the worker id */
+    workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+
+    /* Insert the task */
     rt_starpu_insert_task(
-        codelet,
-        STARPU_VALUE,    &m,                      sizeof(int),
-        STARPU_VALUE,    &n,                      sizeof(int),
-        STARPU_W,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE, &bigM,                      sizeof(int),
-        STARPU_VALUE,   &m0,                      sizeof(int),
-        STARPU_VALUE,   &n0,                      sizeof(int),
-        STARPU_VALUE, &seed,   sizeof(unsigned long long int),
-        STARPU_PRIORITY,    options->priority,
-        STARPU_CALLBACK,    callback,
+        &cl_zplrnt,
+        /* Task codelet arguments */
+        STARPU_VALUE, &clargs, sizeof(struct cl_zplrnt_args_s),
+        STARPU_W,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+
+        /* Common task arguments */
+        STARPU_PRIORITY,          options->priority,
+        STARPU_CALLBACK,          callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zplrnt",
+        STARPU_NAME,              cl_name,
 #endif
-        0);
+
+        0 );
 }
diff --git a/runtime/starpu/codelets/codelet_zpotrf.c b/runtime/starpu/codelets/codelet_zpotrf.c
index 2337b00fde7207c893e935e250deb2d1cafc0cb6..93f4f640d72b22660bd18dcf7067ddff008a7dec 100644
--- a/runtime/starpu/codelets/codelet_zpotrf.c
+++ b/runtime/starpu/codelets/codelet_zpotrf.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zpotrf StarPU codelet
  *
  * @version 1.0.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
@@ -27,24 +25,30 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zpotrf_cpu_func(void *descr[], void *cl_arg)
-{
+struct cl_zpotrf_args_s {
     cham_uplo_t uplo;
     int n;
     CHAM_tile_t *tileA;
     int iinfo;
     RUNTIME_sequence_t *sequence;
     RUNTIME_request_t *request;
+};
+
+#if !defined(CHAMELEON_SIMULATION)
+static void
+cl_zpotrf_cpu_func(void *descr[], void *cl_arg)
+{
+    struct cl_zpotrf_args_s clargs;
+    CHAM_tile_t *tileA;
     int info = 0;
 
     tileA = cti_interface_get(descr[0]);
 
-    starpu_codelet_unpack_args(cl_arg, &uplo, &n, &iinfo, &sequence, &request);
-    TCORE_zpotrf(uplo, n, tileA, &info);
+    starpu_codelet_unpack_args( cl_arg, &clargs );
+    TCORE_zpotrf( clargs.uplo, clargs.n, tileA, &info );
 
-    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
-        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
+    if ( (clargs.sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
+        RUNTIME_sequence_flush( NULL, clargs.sequence, clargs.request, clargs.iinfo+info );
     }
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
@@ -52,46 +56,54 @@ static void cl_zpotrf_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-#if defined(CHAMELEON_SIMULATION) && defined(CHAMELEON_SIMULATION_EXTENDED)
-CODELETS( zpotrf, cl_zpotrf_cpu_func, cl_zpotrf_cuda_func, STARPU_CUDA_ASYNC )
-#else
 CODELETS_CPU( zpotrf, cl_zpotrf_cpu_func )
-#endif
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_zpotrf(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An,
-                       int iinfo)
+void INSERT_TASK_zpotrf( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An,
+                         int iinfo )
 {
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_zpotrf;
-    void (*callback)(void*) = options->profiling ? cl_zpotrf_callback : NULL;
-    starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
-    int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    struct cl_zpotrf_args_s clargs = {
+        .uplo     = uplo,
+        .n        = n,
+        .tileA    = A->get_blktile( A, Am, An ),
+        .iinfo    = iinfo,
+        .sequence = options->sequence,
+        .request  = options->request,
+    };
+    void (*callback)(void*);
+    RUNTIME_request_t       *request  = options->request;
+    starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
+    int                      workerid;
+    char                    *cl_name = "zpotrf";
 
+    /* Handle cache */
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_RW(A, Am, An);
     CHAMELEON_END_ACCESS_DECLARATION;
 
+    /* Callback fro profiling information */
+    callback = options->profiling ? cl_zpotrf_callback : NULL;
+
+    /* Fix the worker id */
+    workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+
+    /* Insert the task */
     rt_starpu_insert_task(
-        codelet,
-        STARPU_VALUE,    &uplo,                      sizeof(int),
-        STARPU_VALUE,    &n,                         sizeof(int),
-        STARPU_RW,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,    &iinfo,                     sizeof(int),
-        STARPU_VALUE,    &(options->sequence),       sizeof(RUNTIME_sequence_t*),
-        STARPU_VALUE,    &(options->request),        sizeof(RUNTIME_request_t*),
-        /* STARPU_SCRATCH,   options->ws_worker, */
-        STARPU_PRIORITY,  options->priority,
-        STARPU_CALLBACK,  callback,
+        &cl_zpotrf,
+        /* Task codelet arguments */
+        STARPU_VALUE, &clargs, sizeof(struct cl_zpotrf_args_s),
+        STARPU_RW,     RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+
+        /* Common task arguments */
+        STARPU_PRIORITY,          options->priority,
+        STARPU_CALLBACK,          callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zpotrf",
+        STARPU_NAME,              cl_name,
 #endif
-        0);
+
+        0 );
+
+    (void)nb;
 }
diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c
index 9552eee9ca40f3982c39b2140e9d5a9a2fe1b7df..0cba41cab3ab660e5dada013b4614cb179059b34 100644
--- a/runtime/starpu/codelets/codelet_zsyrk.c
+++ b/runtime/starpu/codelets/codelet_zsyrk.c
@@ -25,9 +25,7 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zsyrk_cpu_func(void *descr[], void *cl_arg)
-{
+struct cl_zsyrk_args_s {
     cham_uplo_t uplo;
     cham_trans_t trans;
     int n;
@@ -36,41 +34,46 @@ static void cl_zsyrk_cpu_func(void *descr[], void *cl_arg)
     CHAM_tile_t *tileA;
     CHAMELEON_Complex64_t beta;
     CHAM_tile_t *tileC;
+};
+
+#if !defined(CHAMELEON_SIMULATION)
+static void
+cl_zsyrk_cpu_func(void *descr[], void *cl_arg)
+{
+    struct cl_zsyrk_args_s clargs;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileC;
 
     tileA = cti_interface_get(descr[0]);
     tileC = cti_interface_get(descr[1]);
 
-    starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta);
-    TCORE_zsyrk(uplo, trans,
-        n, k,
-        alpha, tileA,
-        beta, tileC);
+    starpu_codelet_unpack_args( cl_arg, &clargs );
+    TCORE_zsyrk( clargs.uplo, clargs.trans, clargs.n, clargs.k,
+                 clargs.alpha, tileA, clargs.beta, tileC );
 }
 
-#ifdef CHAMELEON_USE_CUDA
-static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg)
+#if defined(CHAMELEON_USE_CUDA)
+static void
+cl_zsyrk_cuda_func(void *descr[], void *cl_arg)
 {
-    cham_uplo_t uplo;
-    cham_trans_t trans;
-    int n;
-    int k;
-    cuDoubleComplex alpha;
+    struct cl_zsyrk_args_s clargs;
     CHAM_tile_t *tileA;
-    cuDoubleComplex beta;
     CHAM_tile_t *tileC;
 
     tileA = cti_interface_get(descr[0]);
     tileC = cti_interface_get(descr[1]);
 
-    starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &n, &k, &alpha, &beta);
+    starpu_codelet_unpack_args( cl_arg, &clargs );
 
     RUNTIME_getStream(stream);
 
     CUDA_zsyrk(
-        uplo, trans, n, k,
-        &alpha, tileA->mat, tileA->ld,
-        &beta,  tileC->mat, tileC->ld,
-        stream);
+        clargs.uplo, clargs.trans, clargs.n, clargs.k,
+        (cuDoubleComplex*)&(clargs.alpha),
+        tileA->mat, tileA->ld,
+        (cuDoubleComplex*)&(clargs.beta),
+        tileC->mat, tileC->ld,
+        stream );
 
 #ifndef STARPU_CUDA_ASYNC
     cudaStreamSynchronize( stream );
@@ -78,57 +81,74 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg)
 
     return;
 }
-#endif /* CHAMELEON_USE_CUDA */
+#endif /* defined(CHAMELEON_USE_CUDA) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-CODELETS(zsyrk, cl_zsyrk_cpu_func, cl_zsyrk_cuda_func, STARPU_CUDA_ASYNC)
+CODELETS( zsyrk, cl_zsyrk_cpu_func, cl_zsyrk_cuda_func, STARPU_CUDA_ASYNC )
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_zsyrk(const RUNTIME_option_t *options,
-                      cham_uplo_t uplo, cham_trans_t trans,
-                      int n, int k, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
-                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
+void INSERT_TASK_zsyrk( const RUNTIME_option_t *options,
+                        cham_uplo_t uplo, cham_trans_t trans,
+                        int n, int k, int nb,
+                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                        CHAMELEON_Complex64_t beta,  const CHAM_desc_t *C, int Cm, int Cn )
 {
     if ( alpha == 0. ) {
         return INSERT_TASK_zlascal( options, uplo, n, n, nb,
                                     beta, C, Cm, Cn );
     }
 
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_zsyrk;
-    void (*callback)(void*) = options->profiling ? cl_zsyrk_callback : NULL;
-    starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
-    int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
-    int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
+    struct cl_zsyrk_args_s clargs = {
+        .uplo  = uplo,
+        .trans = trans,
+        .n     = n,
+        .k     = k,
+        .alpha = alpha,
+        .tileA = A->get_blktile( A, Am, An ),
+        .beta  = beta,
+        .tileC = C->get_blktile( C, Cm, Cn ),
+    };
+    void (*callback)(void*);
+    RUNTIME_request_t       *request  = options->request;
+    starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
+    int                      workerid, accessC;
+    char                    *cl_name = "zsyrk";
 
+    /* Handle cache */
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
     CHAMELEON_ACCESS_RW(C, Cm, Cn);
     CHAMELEON_END_ACCESS_DECLARATION;
 
+    /* Callback fro profiling information */
+    callback = options->profiling ? cl_zsyrk_callback : NULL;
+
+    /* Fix the worker id */
+    workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+
+    /* Reduce the C access if needed */
+    accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
+
+    /* Insert the task */
     rt_starpu_insert_task(
-        codelet,
-        STARPU_VALUE,      &uplo,                sizeof(int),
-        STARPU_VALUE,     &trans,                sizeof(int),
-        STARPU_VALUE,         &n,                        sizeof(int),
-        STARPU_VALUE,         &k,                        sizeof(int),
-        STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,      &beta,         sizeof(CHAMELEON_Complex64_t),
-        accessC,                  RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
-        STARPU_PRIORITY,    options->priority,
-        STARPU_CALLBACK,    callback,
+        &cl_zsyrk,
+
+        /* Task codelet arguments */
+        STARPU_VALUE, &clargs, sizeof(struct cl_zsyrk_args_s),
+        STARPU_R,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        accessC,       RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+
+        /* Common task arguments */
+        STARPU_PRIORITY,          options->priority,
+        STARPU_CALLBACK,          callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zsyrk",
+        STARPU_NAME,              cl_name,
 #endif
-        0);
+
+        0 );
+
+    (void)nb;
 }
diff --git a/runtime/starpu/codelets/codelet_ztradd.c b/runtime/starpu/codelets/codelet_ztradd.c
index 689cb6254ed2b8956505108665977176761c8278..2cbaa3aeff6fdc406006f8bfbaeccce107267dd8 100644
--- a/runtime/starpu/codelets/codelet_ztradd.c
+++ b/runtime/starpu/codelets/codelet_ztradd.c
@@ -21,30 +21,38 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_ztradd_cpu_func(void *descr[], void *cl_arg)
-{
+struct cl_ztradd_args_s {
     cham_uplo_t uplo;
     cham_trans_t trans;
-    int M;
-    int N;
+    int m;
+    int n;
     CHAMELEON_Complex64_t alpha;
     CHAM_tile_t *tileA;
     CHAMELEON_Complex64_t beta;
     CHAM_tile_t *tileB;
+};
+
+#if !defined(CHAMELEON_SIMULATION)
+static void
+cl_ztradd_cpu_func(void *descr[], void *cl_arg)
+{
+    struct cl_ztradd_args_s clargs;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
 
     tileA = cti_interface_get(descr[0]);
     tileB = cti_interface_get(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &alpha, &beta);
-    TCORE_ztradd(uplo, trans, M, N, alpha, tileA, beta, tileB);
-    return;
+
+    starpu_codelet_unpack_args( cl_arg, &clargs );
+    TCORE_ztradd( clargs.uplo, clargs.trans, clargs.m, clargs.n,
+                  clargs.alpha, tileA, clargs.beta, tileB );
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-CODELETS_CPU(ztradd, cl_ztradd_cpu_func)
+CODELETS_CPU( ztradd, cl_ztradd_cpu_func )
 
 void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
@@ -56,34 +64,54 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
                                     beta, B, Bm, Bn );
     }
 
-    struct starpu_codelet *codelet = &cl_ztradd;
-    void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL;
-    starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
-    int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
-    int accessB = ( beta == 0. ) ? STARPU_W : STARPU_RW;
+    struct cl_ztradd_args_s clargs = {
+        .uplo  = uplo,
+        .trans = trans,
+        .m     = m,
+        .n     = n,
+        .alpha = alpha,
+        .tileA = A->get_blktile( A, Am, An ),
+        .beta  = beta,
+        .tileB = B->get_blktile( B, Bm, Bn ),
+    };
+    void (*callback)(void*);
+    RUNTIME_request_t       *request  = options->request;
+    starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
+    int                      workerid, accessB;
+    char                    *cl_name = "ztradd";
 
+    /* Handle cache */
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
     CHAMELEON_ACCESS_RW(B, Bm, Bn);
     CHAMELEON_END_ACCESS_DECLARATION;
 
+    /* Callback fro profiling information */
+    callback = options->profiling ? cl_ztradd_callback : NULL;
+
+    /* Fix the worker id */
+    workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+
+    /* Reduce the B access if needed */
+    accessB = ( beta == 0. ) ? STARPU_W : STARPU_RW;
+
+    /* Insert the task */
     rt_starpu_insert_task(
-        codelet,
-        STARPU_VALUE,    &uplo,               sizeof(int),
-        STARPU_VALUE,    &trans,              sizeof(int),
-        STARPU_VALUE,    &m,                  sizeof(int),
-        STARPU_VALUE,    &n,                  sizeof(int),
-        STARPU_VALUE,    &alpha,              sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,    &beta,               sizeof(CHAMELEON_Complex64_t),
-        accessB,         RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        STARPU_PRIORITY,  options->priority,
-        STARPU_CALLBACK,  callback,
+        &cl_ztradd,
+        /* Task codelet arguments */
+        STARPU_VALUE, &clargs, sizeof(struct cl_ztradd_args_s),
+        STARPU_R,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        accessB,       RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+
+        /* Common task arguments */
+        STARPU_PRIORITY,          options->priority,
+        STARPU_CALLBACK,          callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "ztradd",
+        STARPU_NAME,              cl_name,
 #endif
-        0);
+
+        0 );
 
     (void)nb;
 }
diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c
index 354e5f454d84d9268ae4df9eed5791a231d0c942..099e5ca8a987135094c2c675d3756f7328c94df1 100644
--- a/runtime/starpu/codelets/codelet_ztrmm.c
+++ b/runtime/starpu/codelets/codelet_ztrmm.c
@@ -25,51 +25,53 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_ztrmm_cpu_func(void *descr[], void *cl_arg)
-{
+struct cl_ztrmm_args_s {
     cham_side_t side;
     cham_uplo_t uplo;
     cham_trans_t transA;
     cham_diag_t diag;
-    int M;
-    int N;
+    int m;
+    int n;
     CHAMELEON_Complex64_t alpha;
     CHAM_tile_t *tileA;
     CHAM_tile_t *tileB;
+};
+
+#if !defined(CHAMELEON_SIMULATION)
+static void
+cl_ztrmm_cpu_func(void *descr[], void *cl_arg)
+{
+    struct cl_ztrmm_args_s clargs;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
 
     tileA = cti_interface_get(descr[0]);
     tileB = cti_interface_get(descr[1]);
 
-    starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &M, &N, &alpha);
-    TCORE_ztrmm(side, uplo,
-        transA, diag,
-        M, N,
-        alpha, tileA,
-        tileB);
+    starpu_codelet_unpack_args( cl_arg, &clargs );
+    TCORE_ztrmm( clargs.side, clargs.uplo, clargs.transA, clargs.diag,
+                 clargs.m, clargs.n, clargs.alpha, tileA, tileB );
 }
 
 #ifdef CHAMELEON_USE_CUDA
-static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
+static void
+cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
 {
-    cham_side_t side;
-    cham_uplo_t uplo;
-    cham_trans_t transA;
-    cham_diag_t diag;
-    int M;
-    int N;
-    cuDoubleComplex alpha;
+    struct cl_ztrmm_args_s clargs;
     CHAM_tile_t *tileA;
     CHAM_tile_t *tileB;
 
     tileA = cti_interface_get(descr[0]);
     tileB = cti_interface_get(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &M, &N, &alpha);
+
+    starpu_codelet_unpack_args( cl_arg, &clargs );
 
     RUNTIME_getStream(stream);
 
     CUDA_ztrmm(
-        side, uplo, transA, diag, M, N, &alpha,
+        clargs.side, clargs.uplo, clargs.transA, clargs.diag,
+        clargs.m, clargs.n,
+        (cuDoubleComplex*)&(clargs.alpha),
         tileA->mat, tileA->ld,
         tileB->mat, tileB->ld,
         stream );
@@ -80,58 +82,66 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
 
     return;
 }
-#endif /* CHAMELEON_USE_CUDA */
+#endif /* defined(CHAMELEON_USE_CUDA) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
-
 /*
  * Codelet definition
  */
-CODELETS(ztrmm, cl_ztrmm_cpu_func, cl_ztrmm_cuda_func, STARPU_CUDA_ASYNC)
+CODELETS( ztrmm, cl_ztrmm_cpu_func, cl_ztrmm_cuda_func, STARPU_CUDA_ASYNC )
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_ztrmm(const RUNTIME_option_t *options,
-                      cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
-                      int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
-                      const CHAM_desc_t *B, int Bm, int Bn)
+void INSERT_TASK_ztrmm( const RUNTIME_option_t *options,
+                        cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
+                        int m, int n, int nb,
+                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                        const CHAM_desc_t *B, int Bm, int Bn )
 {
-    if ( alpha == 0. ) {
-        return INSERT_TASK_zlaset( options, ChamUpperLower, m, n,
-                                   alpha, alpha, B, Bm, Bn );
-    }
-
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_ztrmm;
-    void (*callback)(void*) = options->profiling ? cl_ztrmm_callback : NULL;
-    starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
-    int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    struct cl_ztrmm_args_s clargs = {
+        .side   = side,
+        .uplo   = uplo,
+        .transA = transA,
+        .diag   = diag,
+        .m      = m,
+        .n      = n,
+        .alpha  = alpha,
+        .tileA  = A->get_blktile( A, Am, An ),
+        .tileB  = B->get_blktile( B, Bm, Bn ),
+    };
+    void (*callback)(void*);
+    RUNTIME_request_t       *request  = options->request;
+    starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
+    int                      workerid;
+    char                    *cl_name = "ztrmm";
 
+    /* Handle cache */
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
     CHAMELEON_ACCESS_RW(B, Bm, Bn);
     CHAMELEON_END_ACCESS_DECLARATION;
 
+    /* Callback fro profiling information */
+    callback = options->profiling ? cl_ztrmm_callback : NULL;
+
+    /* Fix the worker id */
+    workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+
+    /* Insert the task */
     rt_starpu_insert_task(
-        codelet,
-        STARPU_VALUE,      &side,                sizeof(int),
-        STARPU_VALUE,      &uplo,                sizeof(int),
-        STARPU_VALUE,    &transA,                sizeof(int),
-        STARPU_VALUE,      &diag,                sizeof(int),
-        STARPU_VALUE,         &m,                        sizeof(int),
-        STARPU_VALUE,         &n,                        sizeof(int),
-        STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,                  RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_RW,                 RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        STARPU_PRIORITY,    options->priority,
-        STARPU_CALLBACK,    callback,
+        &cl_ztrmm,
+        /* Task codelet arguments */
+        STARPU_VALUE, &clargs, sizeof(struct cl_ztrmm_args_s),
+        STARPU_R,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_RW,     RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+
+        /* Common task arguments */
+        STARPU_PRIORITY,          options->priority,
+        STARPU_CALLBACK,          callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "ztrmm",
+        STARPU_NAME,              cl_name,
 #endif
-        0);
+
+        0 );
+
+    (void)nb;
 }
diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c
index 694f0a99bbd5b7638a9c81829e740d402d92375a..e7ad01a43e6745bc49ecaee676d74cb3f6180f49 100644
--- a/runtime/starpu/codelets/codelet_ztrsm.c
+++ b/runtime/starpu/codelets/codelet_ztrsm.c
@@ -12,8 +12,6 @@
  * @brief Chameleon ztrsm StarPU codelet
  *
  * @version 1.0.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
@@ -27,9 +25,7 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_ztrsm_cpu_func(void *descr[], void *cl_arg)
-{
+struct cl_ztrsm_args_s {
     cham_side_t side;
     cham_uplo_t uplo;
     cham_trans_t transA;
@@ -39,38 +35,43 @@ static void cl_ztrsm_cpu_func(void *descr[], void *cl_arg)
     CHAMELEON_Complex64_t alpha;
     CHAM_tile_t *tileA;
     CHAM_tile_t *tileB;
+};
+
+#if !defined(CHAMELEON_SIMULATION)
+static void
+cl_ztrsm_cpu_func(void *descr[], void *cl_arg)
+{
+    struct cl_ztrsm_args_s clargs;
+    CHAM_tile_t *tileA;
+    CHAM_tile_t *tileB;
 
     tileA = cti_interface_get(descr[0]);
     tileB = cti_interface_get(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha);
-    TCORE_ztrsm(side, uplo,
-        transA, diag,
-        m, n,
-        alpha, tileA,
-        tileB);
+
+    starpu_codelet_unpack_args( cl_arg, &clargs );
+    TCORE_ztrsm( clargs.side, clargs.uplo, clargs.transA, clargs.diag,
+                 clargs.m, clargs.n, clargs.alpha, tileA, tileB );
 }
 
 #ifdef CHAMELEON_USE_CUDA
-static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
+static void
+cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
 {
-    cham_side_t side;
-    cham_uplo_t uplo;
-    cham_trans_t transA;
-    cham_diag_t diag;
-    int m;
-    int n;
-    cuDoubleComplex alpha;
+    struct cl_ztrsm_args_s clargs;
     CHAM_tile_t *tileA;
     CHAM_tile_t *tileB;
 
     tileA = cti_interface_get(descr[0]);
     tileB = cti_interface_get(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &side, &uplo, &transA, &diag, &m, &n, &alpha);
+
+    starpu_codelet_unpack_args( cl_arg, &clargs );
 
     RUNTIME_getStream(stream);
 
     CUDA_ztrsm(
-        side, uplo, transA, diag, m, n, &alpha,
+        clargs.side, clargs.uplo, clargs.transA, clargs.diag,
+        clargs.m, clargs.n,
+        (cuDoubleComplex*)&(clargs.alpha),
         tileA->mat, tileA->ld,
         tileB->mat, tileB->ld,
         stream );
@@ -81,52 +82,66 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
 
     return;
 }
-#endif /* CHAMELEON_USE_CUDA */
+#endif /* defined(CHAMELEON_USE_CUDA) */
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-CODELETS(ztrsm, cl_ztrsm_cpu_func, cl_ztrsm_cuda_func, STARPU_CUDA_ASYNC)
+CODELETS( ztrsm, cl_ztrsm_cpu_func, cl_ztrsm_cuda_func, STARPU_CUDA_ASYNC )
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_ztrsm(const RUNTIME_option_t *options,
-                      cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
-                      int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
-                      const CHAM_desc_t *B, int Bm, int Bn)
+void INSERT_TASK_ztrsm( const RUNTIME_option_t *options,
+                        cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
+                        int m, int n, int nb,
+                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                        const CHAM_desc_t *B, int Bm, int Bn )
 {
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_ztrsm;
-    void (*callback)(void*) = options->profiling ? cl_ztrsm_callback : NULL;
-    starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
-    int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    struct cl_ztrsm_args_s clargs = {
+        .side   = side,
+        .uplo   = uplo,
+        .transA = transA,
+        .diag   = diag,
+        .m      = m,
+        .n      = n,
+        .alpha  = alpha,
+        .tileA  = A->get_blktile( A, Am, An ),
+        .tileB  = B->get_blktile( B, Bm, Bn ),
+    };
+    void (*callback)(void*);
+    RUNTIME_request_t       *request  = options->request;
+    starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
+    int                      workerid;
+    char                    *cl_name = "ztrsm";
 
+    /* Handle cache */
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
     CHAMELEON_ACCESS_RW(B, Bm, Bn);
     CHAMELEON_END_ACCESS_DECLARATION;
 
+    /* Callback fro profiling information */
+    callback = options->profiling ? cl_ztrsm_callback : NULL;
+
+    /* Fix the worker id */
+    workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+
+    /* Insert the task */
     rt_starpu_insert_task(
-        codelet,
-        STARPU_VALUE,    &side,               sizeof(int),
-        STARPU_VALUE,    &uplo,               sizeof(int),
-        STARPU_VALUE,    &transA,             sizeof(int),
-        STARPU_VALUE,    &diag,               sizeof(int),
-        STARPU_VALUE,    &m,                  sizeof(int),
-        STARPU_VALUE,    &n,                  sizeof(int),
-        STARPU_VALUE,    &alpha,              sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_RW,        RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        STARPU_PRIORITY,  options->priority,
-        STARPU_CALLBACK,  callback,
+        &cl_ztrsm,
+        /* Task codelet arguments */
+        STARPU_VALUE, &clargs, sizeof(struct cl_ztrsm_args_s),
+        STARPU_R,      RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_RW,     RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+
+        /* Common task arguments */
+        STARPU_PRIORITY,          options->priority,
+        STARPU_CALLBACK,          callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "ztrsm",
+        STARPU_NAME,              cl_name,
 #endif
-        0);
+
+        0 );
+
+    (void)nb;
 }
diff --git a/runtime/starpu/codelets/codelet_ztrtri.c b/runtime/starpu/codelets/codelet_ztrtri.c
index 01b14eef48612375cd0042ddda0f3da191163e91..9b2b0e5e3ded41d29fbfb272d95eada319dade35 100644
--- a/runtime/starpu/codelets/codelet_ztrtri.c
+++ b/runtime/starpu/codelets/codelet_ztrtri.c
@@ -12,8 +12,6 @@
  * @brief Chameleon ztrtri StarPU codelet
  *
  * @version 1.0.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Julien Langou
  * @author Henricus Bouwmeester
  * @author Mathieu Faverge
@@ -27,24 +25,31 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_ztrtri_cpu_func(void *descr[], void *cl_arg)
-{
+struct cl_ztrtri_args_s {
     cham_uplo_t uplo;
     cham_diag_t diag;
-    int N;
+    int n;
     CHAM_tile_t *tileA;
     int iinfo;
     RUNTIME_sequence_t *sequence;
     RUNTIME_request_t *request;
+};
+
+#if !defined(CHAMELEON_SIMULATION)
+static void
+cl_ztrtri_cpu_func(void *descr[], void *cl_arg)
+{
+    struct cl_ztrtri_args_s clargs;
+    CHAM_tile_t *tileA;
     int info = 0;
 
     tileA = cti_interface_get(descr[0]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &N, &iinfo, &sequence, &request);
-    TCORE_ztrtri(uplo, diag, N, tileA, &info);
 
-    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
-        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
+    starpu_codelet_unpack_args( cl_arg, &clargs );
+    TCORE_ztrtri( clargs.uplo, clargs.diag, clargs.n, tileA, &info );
+
+    if ( (clargs.sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
+        RUNTIME_sequence_flush( NULL, clargs.sequence, clargs.request, clargs.iinfo+info );
     }
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
@@ -52,43 +57,55 @@ static void cl_ztrtri_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(ztrtri, cl_ztrtri_cpu_func)
+CODELETS_CPU( ztrtri, cl_ztrtri_cpu_func )
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
 void INSERT_TASK_ztrtri( const RUNTIME_option_t *options,
-                         cham_uplo_t uplo, cham_diag_t diag,
-                         int n, int nb,
+                         cham_uplo_t uplo, cham_diag_t diag, int n, int nb,
                          const CHAM_desc_t *A, int Am, int An,
                          int iinfo )
 {
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_ztrtri;
-    void (*callback)(void*) = options->profiling ? cl_ztrtri_callback : NULL;
-    starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
-    int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    struct cl_ztrtri_args_s clargs = {
+        .uplo     = uplo,
+        .diag     = diag,
+        .n        = n,
+        .tileA    = A->get_blktile( A, Am, An ),
+        .iinfo    = iinfo,
+        .sequence = options->sequence,
+        .request  = options->request,
+    };
+    void (*callback)(void*);
+    RUNTIME_request_t       *request  = options->request;
+    starpu_option_request_t *schedopt = (starpu_option_request_t *)(request->schedopt);
+    int                      workerid;
+    char                    *cl_name = "ztrtri";
 
+    /* Handle cache */
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_RW(A, Am, An);
     CHAMELEON_END_ACCESS_DECLARATION;
 
+    /* Callback fro profiling information */
+    callback = options->profiling ? cl_ztrtri_callback : NULL;
+
+    /* Fix the worker id */
+    workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+
+    /* Insert the task */
     rt_starpu_insert_task(
-        codelet,
-        STARPU_VALUE,    &uplo,              sizeof(int),
-        STARPU_VALUE,    &diag,              sizeof(int),
-        STARPU_VALUE,    &n,                 sizeof(int),
-        STARPU_RW,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,    &iinfo,             sizeof(int),
-        STARPU_VALUE,    &(options->sequence),       sizeof(RUNTIME_sequence_t*),
-        STARPU_VALUE,    &(options->request),        sizeof(RUNTIME_request_t*),
-        STARPU_PRIORITY,  options->priority,
-        STARPU_CALLBACK,  callback,
+        &cl_ztrtri,
+        /* Task codelet arguments */
+        STARPU_VALUE, &clargs, sizeof(struct cl_ztrtri_args_s),
+        STARPU_RW,     RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+
+        /* Common task arguments */
+        STARPU_PRIORITY,          options->priority,
+        STARPU_CALLBACK,          callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "ztrtri",
+        STARPU_NAME,              cl_name,
 #endif
-        0);
+
+        0 );
+
+    (void)nb;
 }
diff --git a/tools/bench/plafrim/run.sh b/tools/bench/plafrim/run.sh
index b3ea6b313a3a9eb73eb5fbfdd670026672fade3b..3234c2bb8a8ecf0943f2e8c7d11fcb5bdb8cc2e3 100755
--- a/tools/bench/plafrim/run.sh
+++ b/tools/bench/plafrim/run.sh
@@ -4,6 +4,10 @@ echo "######################### Chameleon benchmarks #########################"
 
 set -x
 
+# Unset the binding environment of the CI for this specific case
+unset STARPU_MPI_NOBIND
+unset STARPU_WORKERS_NOBIND
+
 # to avoid a lock during fetching chameleon branch in parallel
 export XDG_CACHE_HOME=/tmp/guix-$$