diff --git a/cmake_modules/morse_cmake b/cmake_modules/morse_cmake
index 657741dbab25d4008c4dfc2ebdf34a3f43bf00e6..be970c0169f847f9a61993d2e6a7cd49a409399e 160000
--- a/cmake_modules/morse_cmake
+++ b/cmake_modules/morse_cmake
@@ -1 +1 @@
-Subproject commit 657741dbab25d4008c4dfc2ebdf34a3f43bf00e6
+Subproject commit be970c0169f847f9a61993d2e6a7cd49a409399e
diff --git a/runtime/starpu/codelets/codelet_dzasum.c b/runtime/starpu/codelets/codelet_dzasum.c
index b88605ba14695571cee582d43bf67688bf8bc281..7ef0d27e3e2331832d9a1d430add8249f1c76b5b 100644
--- a/runtime/starpu/codelets/codelet_dzasum.c
+++ b/runtime/starpu/codelets/codelet_dzasum.c
@@ -44,7 +44,7 @@ static void cl_dzasum_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(dzasum, 2, cl_dzasum_cpu_func)
+CODELETS_CPU(dzasum, cl_dzasum_cpu_func)
 
 void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
                          cham_store_t storev, cham_uplo_t uplo, int M, int N,
diff --git a/runtime/starpu/codelets/codelet_map.c b/runtime/starpu/codelets/codelet_map.c
index 65dd72e33fe2328e32e252d88d2d3a0468a6b735..d56451136a102f292863887dd601969f0344d143 100644
--- a/runtime/starpu/codelets/codelet_map.c
+++ b/runtime/starpu/codelets/codelet_map.c
@@ -39,7 +39,7 @@ static void cl_map_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(map, 1, cl_map_cpu_func)
+CODELETS_CPU(map, cl_map_cpu_func)
 
 void INSERT_TASK_map( const RUNTIME_option_t *options,
                       cham_uplo_t uplo, const CHAM_desc_t *A, int Am, int An,
diff --git a/runtime/starpu/codelets/codelet_zaxpy.c b/runtime/starpu/codelets/codelet_zaxpy.c
index 88cf08e022e8781a1dcaeb534d6347a55b973541..a4c1df2ebdfe0bc7c66ea6b16159b35d98753731 100644
--- a/runtime/starpu/codelets/codelet_zaxpy.c
+++ b/runtime/starpu/codelets/codelet_zaxpy.c
@@ -40,7 +40,7 @@ static void cl_zaxpy_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zaxpy, 2, cl_zaxpy_cpu_func)
+CODELETS_CPU(zaxpy, cl_zaxpy_cpu_func)
 
 void INSERT_TASK_zaxpy( const RUNTIME_option_t *options,
                         int M, CHAMELEON_Complex64_t alpha,
@@ -52,6 +52,10 @@ void INSERT_TASK_zaxpy( const RUNTIME_option_t *options,
     starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
     int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
 
+    if ( alpha == 0. ) {
+        return;
+    }
+
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
     CHAMELEON_ACCESS_RW(B, Bm, Bn);
diff --git a/runtime/starpu/codelets/codelet_zbuild.c b/runtime/starpu/codelets/codelet_zbuild.c
index ad52e6452e2064d80fc9858aa6c7db600af65b4d..b8b14ae4a3a2531149d5524a8964949dcd53b752 100644
--- a/runtime/starpu/codelets/codelet_zbuild.c
+++ b/runtime/starpu/codelets/codelet_zbuild.c
@@ -52,7 +52,7 @@ static void cl_zbuild_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zbuild, 1, cl_zbuild_cpu_func)
+CODELETS_CPU(zbuild, cl_zbuild_cpu_func)
 
     void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
                              const CHAM_desc_t *A, int Am, int An,
diff --git a/runtime/starpu/codelets/codelet_zgeadd.c b/runtime/starpu/codelets/codelet_zgeadd.c
index 27ce15a01640fd9c19121788b35781a45bf25125..bd027eff028f460333ab83651600783c3d085389 100644
--- a/runtime/starpu/codelets/codelet_zgeadd.c
+++ b/runtime/starpu/codelets/codelet_zgeadd.c
@@ -81,9 +81,9 @@ static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg)
  * Codelet definition
  */
 #if defined(CHAMELEON_USE_CUBLAS_V2)
-CODELETS(zgeadd, 2, cl_zgeadd_cpu_func, cl_zgeadd_cuda_func, STARPU_CUDA_ASYNC)
+CODELETS(zgeadd, cl_zgeadd_cpu_func, cl_zgeadd_cuda_func, STARPU_CUDA_ASYNC)
 #else
-CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func)
+CODELETS_CPU(zgeadd, cl_zgeadd_cpu_func)
 #endif
 
 /**
@@ -144,10 +144,16 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
                          CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
                          CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn )
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb,
+                                    beta, B, Bm, Bn );
+    }
+
     struct starpu_codelet *codelet = &cl_zgeadd;
     void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL;
     starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
     int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    int accessB = ( beta == 0. ) ? STARPU_W : STARPU_RW;
 
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
@@ -162,7 +168,7 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
         STARPU_VALUE,    &alpha,              sizeof(CHAMELEON_Complex64_t),
         STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_VALUE,    &beta,               sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,        RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        accessB,          RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
diff --git a/runtime/starpu/codelets/codelet_zgelqt.c b/runtime/starpu/codelets/codelet_zgelqt.c
index 63d7b9ca3cecc93332114b9fac3754fc1da599a3..9a26068cae6b8a15b87190f515328a3a47867891 100644
--- a/runtime/starpu/codelets/codelet_zgelqt.c
+++ b/runtime/starpu/codelets/codelet_zgelqt.c
@@ -56,7 +56,7 @@ static void cl_zgelqt_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func)
+CODELETS_CPU(zgelqt, cl_zgelqt_cpu_func)
 
 void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
                        int m, int n, int ib, int nb,
diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c
index 034136bed27c4c6ffb264463a44800d3641af842..54e6256b57d2b0447d579bfbaf59d075f4c33bc4 100644
--- a/runtime/starpu/codelets/codelet_zgemm.c
+++ b/runtime/starpu/codelets/codelet_zgemm.c
@@ -94,7 +94,7 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS(zgemm, 3, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC)
+CODELETS(zgemm, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC)
 
 /**
  *
@@ -108,11 +108,17 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
                                                    const CHAM_desc_t *B, int Bm, int Bn,
                       CHAMELEON_Complex64_t beta,  const CHAM_desc_t *C, int Cm, int Cn)
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb,
+                                    beta, C, Cm, Cn );
+    }
+
     (void)nb;
     struct starpu_codelet *codelet = &cl_zgemm;
     void (*callback)(void*) = options->profiling ? cl_zgemm_callback : NULL;
     starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
     int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
 
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
@@ -131,7 +137,7 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
         STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_R,         RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
         STARPU_VALUE,    &beta,              sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,        RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        accessC,          RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
diff --git a/runtime/starpu/codelets/codelet_zgeqrt.c b/runtime/starpu/codelets/codelet_zgeqrt.c
index 9f1f407d519485f26bb05fbd87b0a4c22f7fb6fb..ae8ad0d53459a717f5ac3b9153b4b1113bb8be93 100644
--- a/runtime/starpu/codelets/codelet_zgeqrt.c
+++ b/runtime/starpu/codelets/codelet_zgeqrt.c
@@ -57,7 +57,7 @@ static void cl_zgeqrt_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zgeqrt, 3, cl_zgeqrt_cpu_func)
+CODELETS_CPU(zgeqrt, cl_zgeqrt_cpu_func)
 
 void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
                        int m, int n, int ib, int nb,
diff --git a/runtime/starpu/codelets/codelet_zgessm.c b/runtime/starpu/codelets/codelet_zgessm.c
index 310e7e2b9f01c0c09147212457c338a0be0035fd..9e97aa44540d981c2ad8b16d5fb295d04ac8634e 100644
--- a/runtime/starpu/codelets/codelet_zgessm.c
+++ b/runtime/starpu/codelets/codelet_zgessm.c
@@ -50,7 +50,7 @@ static void cl_zgessm_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zgessm, 3, cl_zgessm_cpu_func)
+CODELETS_CPU(zgessm, cl_zgessm_cpu_func)
 
 void INSERT_TASK_zgessm( const RUNTIME_option_t *options,
                          int m, int n, int k, int ib, int nb,
diff --git a/runtime/starpu/codelets/codelet_zgessq.c b/runtime/starpu/codelets/codelet_zgessq.c
index f22e28a2416461272b6dc71ed88be065683efed1..7bcaabb15982ffabdd85a7377202927b1a4af4f7 100644
--- a/runtime/starpu/codelets/codelet_zgessq.c
+++ b/runtime/starpu/codelets/codelet_zgessq.c
@@ -43,7 +43,7 @@ static void cl_zgessq_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zgessq, 2, cl_zgessq_cpu_func)
+CODELETS_CPU(zgessq, cl_zgessq_cpu_func)
 
 void INSERT_TASK_zgessq( const RUNTIME_option_t *options,
                          cham_store_t storev, int m, int n,
diff --git a/runtime/starpu/codelets/codelet_zgetrf.c b/runtime/starpu/codelets/codelet_zgetrf.c
index 23e40e8738d6bdb02742e16fa37474948f1843b4..947fb8d2b0dd2a243eb42fe6c768d233f3ffafb6 100644
--- a/runtime/starpu/codelets/codelet_zgetrf.c
+++ b/runtime/starpu/codelets/codelet_zgetrf.c
@@ -52,7 +52,7 @@ static void cl_zgetrf_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zgetrf, 1, cl_zgetrf_cpu_func)
+CODELETS_CPU(zgetrf, cl_zgetrf_cpu_func)
 
 void INSERT_TASK_zgetrf( const RUNTIME_option_t *options,
                          int m, int n, int nb,
diff --git a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
index 4b2b788ba8ce1edb2aab076ea180bc38de3a1730..460a8e1a5b34b0eea3cccd55aae924ffcee3e033 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
@@ -56,7 +56,7 @@ static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zgetrf_incpiv, 3, cl_zgetrf_incpiv_cpu_func)
+CODELETS_CPU(zgetrf_incpiv, cl_zgetrf_incpiv_cpu_func)
 
 void INSERT_TASK_zgetrf_incpiv(const RUNTIME_option_t *options,
                               int m, int n, int ib, int nb,
diff --git a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
index 776415a755c28c0bf360439df68be75885edbea8..c84418518e9e4469360c9db00466f11450d2f337 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
@@ -53,7 +53,7 @@ static void cl_zgetrf_nopiv_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func)
+CODELETS_CPU(zgetrf_nopiv, cl_zgetrf_nopiv_cpu_func)
 
 void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options,
                               int m, int n, int ib, int nb,
diff --git a/runtime/starpu/codelets/codelet_zgram.c b/runtime/starpu/codelets/codelet_zgram.c
index 83643fc4f9839c8dc091e4ad6fd4c53426f3e1b0..1a5e8e4e9825272ec9d158c21b2cb44dd86bc247 100644
--- a/runtime/starpu/codelets/codelet_zgram.c
+++ b/runtime/starpu/codelets/codelet_zgram.c
@@ -43,7 +43,7 @@ static void cl_zgram_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zgram, 4, cl_zgram_cpu_func)
+CODELETS_CPU(zgram, cl_zgram_cpu_func)
 
 void INSERT_TASK_zgram( const RUNTIME_option_t *options,
                         cham_uplo_t uplo,
@@ -51,21 +51,21 @@ void INSERT_TASK_zgram( const RUNTIME_option_t *options,
                         const CHAM_desc_t *Di, int Dim, int Din,
                         const CHAM_desc_t *Dj, int Djm, int Djn,
                         const CHAM_desc_t *D, int Dm, int Dn,
-                        CHAM_desc_t *A, int Am, int An)
+                        CHAM_desc_t *A, int Am, int An )
 {
-  struct starpu_codelet *codelet = &cl_zgram;
-  void (*callback)(void*) = options->profiling ? cl_zgram_callback : NULL;
+    struct starpu_codelet *codelet = &cl_zgram;
+    void (*callback)(void*) = options->profiling ? cl_zgram_callback : NULL;
     starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
     int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
 
-  CHAMELEON_BEGIN_ACCESS_DECLARATION;
-  CHAMELEON_ACCESS_R(Di, Dim, Din);
-  CHAMELEON_ACCESS_R(Dj, Djm, Djn);
-  CHAMELEON_ACCESS_R(D, Dm, Dn);
-  CHAMELEON_ACCESS_RW(A, Am, An);
-  CHAMELEON_END_ACCESS_DECLARATION;
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(Di, Dim, Din);
+    CHAMELEON_ACCESS_R(Dj, Djm, Djn);
+    CHAMELEON_ACCESS_R(D, Dm, Dn);
+    CHAMELEON_ACCESS_RW(A, Am, An);
+    CHAMELEON_END_ACCESS_DECLARATION;
 
-  starpu_insert_task(
+    starpu_insert_task(
         starpu_mpi_codelet(codelet),
         STARPU_VALUE,    &uplo,                      sizeof(int),
         STARPU_VALUE,    &m,                         sizeof(int),
diff --git a/runtime/starpu/codelets/codelet_zhe2ge.c b/runtime/starpu/codelets/codelet_zhe2ge.c
index c7a24022b444d7f50fb8b91b74001ee6440899f0..fe1f9eb291209851ecfb84e1dd7039cfdd560d1a 100644
--- a/runtime/starpu/codelets/codelet_zhe2ge.c
+++ b/runtime/starpu/codelets/codelet_zhe2ge.c
@@ -42,18 +42,18 @@ static void cl_zhe2ge_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zhe2ge, 2, cl_zhe2ge_cpu_func)
+CODELETS_CPU(zhe2ge, cl_zhe2ge_cpu_func)
 
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
  */
-void INSERT_TASK_zhe2ge(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo,
-                       int m, int n, int mb,
-                       const CHAM_desc_t *A, int Am, int An,
-                       const CHAM_desc_t *B, int Bm, int Bn)
+void INSERT_TASK_zhe2ge( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo,
+                         int m, int n, int mb,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     (void)mb;
     struct starpu_codelet *codelet = &cl_zhe2ge;
diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c
index b6a827896636970bfe338cbeacaae776b3dbc401..5c90271ece923555a05f3c3d9d2374b9ba41b000 100644
--- a/runtime/starpu/codelets/codelet_zhemm.c
+++ b/runtime/starpu/codelets/codelet_zhemm.c
@@ -93,7 +93,7 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS(zhemm, 3, cl_zhemm_cpu_func, cl_zhemm_cuda_func, STARPU_CUDA_ASYNC)
+CODELETS(zhemm, cl_zhemm_cpu_func, cl_zhemm_cuda_func, STARPU_CUDA_ASYNC)
 
 /**
  *
@@ -107,11 +107,17 @@ void INSERT_TASK_zhemm(const RUNTIME_option_t *options,
                       const CHAM_desc_t *B, int Bm, int Bn,
                       CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb,
+                                    beta, C, Cm, Cn );
+    }
+
     (void)nb;
     struct starpu_codelet *codelet = &cl_zhemm;
     void (*callback)(void*) = options->profiling ? cl_zhemm_callback : NULL;
     starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
     int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
 
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
@@ -129,7 +135,7 @@ void INSERT_TASK_zhemm(const RUNTIME_option_t *options,
         STARPU_R,               RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_R,               RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
         STARPU_VALUE,    &beta,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,               RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        accessC,                RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
         STARPU_PRIORITY,    options->priority,
         STARPU_CALLBACK,    callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c
index 291fef2794b146994577fc2ec83ff2704397a826..0e93a35c99f0b9469839b5c5e8e17d8428598bcd 100644
--- a/runtime/starpu/codelets/codelet_zher2k.c
+++ b/runtime/starpu/codelets/codelet_zher2k.c
@@ -89,7 +89,7 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS(zher2k, 3, cl_zher2k_cpu_func, cl_zher2k_cuda_func, STARPU_CUDA_ASYNC)
+CODELETS(zher2k, cl_zher2k_cpu_func, cl_zher2k_cuda_func, STARPU_CUDA_ASYNC)
 
 /**
  *
@@ -104,11 +104,17 @@ INSERT_TASK_zher2k( const RUNTIME_option_t *options,
                                                  const CHAM_desc_t *B, int Bm, int Bn,
                     double beta,                 const CHAM_desc_t *C, int Cm, int Cn )
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, uplo, n, n, nb,
+                                    beta, C, Cm, Cn );
+    }
+
     (void)nb;
     struct starpu_codelet *codelet = &cl_zher2k;
     void (*callback)(void*) = options->profiling ? cl_zher2k_callback : NULL;
     starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
     int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
 
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
@@ -126,7 +132,7 @@ INSERT_TASK_zher2k( const RUNTIME_option_t *options,
         STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_R,                 RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
         STARPU_VALUE,      &beta,                     sizeof(double),
-        STARPU_RW,                 RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        accessC,                  RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
         STARPU_PRIORITY,    options->priority,
         STARPU_CALLBACK,    callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
diff --git a/runtime/starpu/codelets/codelet_zherfb.c b/runtime/starpu/codelets/codelet_zherfb.c
index e967fdfd1051a69c6796c03515c3c6f1a4102c39..d92f922eb9abf2b21435254bd5a076896f91d02a 100644
--- a/runtime/starpu/codelets/codelet_zherfb.c
+++ b/runtime/starpu/codelets/codelet_zherfb.c
@@ -78,7 +78,7 @@ static void cl_zherfb_cuda_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS(zherfb, 4, cl_zherfb_cpu_func, cl_zherfb_cuda_func, STARPU_CUDA_ASYNC)
+CODELETS(zherfb, cl_zherfb_cpu_func, cl_zherfb_cuda_func, STARPU_CUDA_ASYNC)
 
 /**
  *
diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c
index 6a8b17cd68d47fad48596896f05c00daf37bd049..915cc9b77d4a13cdc43b4ba14c67c4871a49dd37 100644
--- a/runtime/starpu/codelets/codelet_zherk.c
+++ b/runtime/starpu/codelets/codelet_zherk.c
@@ -86,7 +86,7 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS(zherk, 2, cl_zherk_cpu_func, cl_zherk_cuda_func, STARPU_CUDA_ASYNC)
+CODELETS(zherk, cl_zherk_cpu_func, cl_zherk_cuda_func, STARPU_CUDA_ASYNC)
 
 /**
  *
@@ -99,11 +99,17 @@ void INSERT_TASK_zherk(const RUNTIME_option_t *options,
                       double alpha, const CHAM_desc_t *A, int Am, int An,
                       double beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, uplo, n, n, nb,
+                                    beta, C, Cm, Cn );
+    }
+
     (void)nb;
     struct starpu_codelet *codelet = &cl_zherk;
     void (*callback)(void*) = options->profiling ? cl_zherk_callback : NULL;
     starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
     int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
 
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
@@ -119,7 +125,7 @@ void INSERT_TASK_zherk(const RUNTIME_option_t *options,
         STARPU_VALUE,    &alpha,             sizeof(double),
         STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_VALUE,    &beta,              sizeof(double),
-        STARPU_RW,        RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        accessC,          RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
diff --git a/runtime/starpu/codelets/codelet_zlacpy.c b/runtime/starpu/codelets/codelet_zlacpy.c
index 5703507ed9baba655125f1168df80485cbd7202d..312555129509bfdcaaa79337609b2d053471f310 100644
--- a/runtime/starpu/codelets/codelet_zlacpy.c
+++ b/runtime/starpu/codelets/codelet_zlacpy.c
@@ -57,7 +57,7 @@ static void cl_zlacpy_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zlacpy, 2, cl_zlacpy_cpu_func)
+CODELETS_CPU(zlacpy, cl_zlacpy_cpu_func)
 
 void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
                           cham_uplo_t uplo, int m, int n, int nb,
diff --git a/runtime/starpu/codelets/codelet_zlag2c.c b/runtime/starpu/codelets/codelet_zlag2c.c
index c3f44bc2a78d4b3916ece9f7f2e99a54f69ef4bb..b9eb9188913ca7e875116898fd8355fecfe4b9ea 100644
--- a/runtime/starpu/codelets/codelet_zlag2c.c
+++ b/runtime/starpu/codelets/codelet_zlag2c.c
@@ -45,7 +45,7 @@ static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zlag2c, 1, cl_zlag2c_cpu_func)
+CODELETS_CPU(zlag2c, cl_zlag2c_cpu_func)
 
 /**
  *
@@ -103,7 +103,7 @@ static void cl_clag2z_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(clag2z, 2, cl_clag2z_cpu_func)
+CODELETS_CPU(clag2z, cl_clag2z_cpu_func)
 
 void INSERT_TASK_clag2z(const RUNTIME_option_t *options,
                        int m, int n, int nb,
diff --git a/runtime/starpu/codelets/codelet_zlange.c b/runtime/starpu/codelets/codelet_zlange.c
index 23ca2f7cc02283e521edf3c6995b9e5ee00cab55..35a7251a8f05216cda7598bcb6af9d3810644592 100644
--- a/runtime/starpu/codelets/codelet_zlange.c
+++ b/runtime/starpu/codelets/codelet_zlange.c
@@ -47,7 +47,7 @@ static void cl_zlange_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zlange, 3, cl_zlange_cpu_func)
+CODELETS_CPU(zlange, cl_zlange_cpu_func)
 
 void INSERT_TASK_zlange( const RUNTIME_option_t *options,
                          cham_normtype_t norm, int M, int N, int NB,
@@ -105,7 +105,7 @@ static void cl_zlange_max_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zlange_max, 2, cl_zlange_max_cpu_func)
+CODELETS_CPU(zlange_max, cl_zlange_max_cpu_func)
 
 void INSERT_TASK_zlange_max(const RUNTIME_option_t *options,
                            const CHAM_desc_t *A, int Am, int An,
diff --git a/runtime/starpu/codelets/codelet_zlanhe.c b/runtime/starpu/codelets/codelet_zlanhe.c
index f31fea4c4c82b3dff4cfc5c47f8d6713f2bd2da4..45123eeb74fd3a94b27719a7783c40cf5d325474 100644
--- a/runtime/starpu/codelets/codelet_zlanhe.c
+++ b/runtime/starpu/codelets/codelet_zlanhe.c
@@ -47,7 +47,7 @@ static void cl_zlanhe_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zlanhe, 3, cl_zlanhe_cpu_func)
+CODELETS_CPU(zlanhe, cl_zlanhe_cpu_func)
 
 void INSERT_TASK_zlanhe(const RUNTIME_option_t *options,
                        cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
diff --git a/runtime/starpu/codelets/codelet_zlansy.c b/runtime/starpu/codelets/codelet_zlansy.c
index 0fb7e31ef66ea97877ad65d6d5ffa9fc93deb421..ef7eacbd92cdb511aecb209ed009a76371a507da 100644
--- a/runtime/starpu/codelets/codelet_zlansy.c
+++ b/runtime/starpu/codelets/codelet_zlansy.c
@@ -47,7 +47,7 @@ static void cl_zlansy_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zlansy, 3, cl_zlansy_cpu_func)
+CODELETS_CPU(zlansy, cl_zlansy_cpu_func)
 
 void INSERT_TASK_zlansy( const RUNTIME_option_t *options,
                          cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
diff --git a/runtime/starpu/codelets/codelet_zlantr.c b/runtime/starpu/codelets/codelet_zlantr.c
index f13dd13bbe951e4474e41287fadc642af4b889c3..b763625dd770bfc0e29986975e72f8a3179699e9 100644
--- a/runtime/starpu/codelets/codelet_zlantr.c
+++ b/runtime/starpu/codelets/codelet_zlantr.c
@@ -43,7 +43,7 @@ static void cl_zlantr_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zlantr, 3, cl_zlantr_cpu_func)
+CODELETS_CPU(zlantr, cl_zlantr_cpu_func)
 
 void INSERT_TASK_zlantr( const RUNTIME_option_t *options,
                          cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
diff --git a/runtime/starpu/codelets/codelet_zlascal.c b/runtime/starpu/codelets/codelet_zlascal.c
index 0916e8aa5e73887ad67cda9db946c1a81ecbbc24..d1bfc3fd35a70e56451b16e70e714433a49dbd1d 100644
--- a/runtime/starpu/codelets/codelet_zlascal.c
+++ b/runtime/starpu/codelets/codelet_zlascal.c
@@ -43,14 +43,22 @@ static void cl_zlascal_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zlascal, 1, cl_zlascal_cpu_func)
+CODELETS_CPU(zlascal, cl_zlascal_cpu_func)
 
-void INSERT_TASK_zlascal(const RUNTIME_option_t *options,
-                        cham_uplo_t uplo,
-                        int m, int n, int nb,
-                        CHAMELEON_Complex64_t alpha,
-                        const CHAM_desc_t *A, int Am, int An)
+void INSERT_TASK_zlascal( const RUNTIME_option_t *options,
+                          cham_uplo_t uplo,
+                          int m, int n, int nb,
+                          CHAMELEON_Complex64_t alpha,
+                          const CHAM_desc_t *A, int Am, int An)
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlaset( options, uplo, m, n,
+                                   alpha, alpha, A, Am, An );
+    }
+    else if ( alpha == 1. ) {
+        return;
+    }
+
     (void)nb;
     struct starpu_codelet *codelet = &cl_zlascal;
     void (*callback)(void*) = options->profiling ? cl_zlascal_callback : NULL;
@@ -67,7 +75,7 @@ void INSERT_TASK_zlascal(const RUNTIME_option_t *options,
         STARPU_VALUE,    &m,                  sizeof(int),
         STARPU_VALUE,    &n,                  sizeof(int),
         STARPU_VALUE,    &alpha,              sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_RW,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
diff --git a/runtime/starpu/codelets/codelet_zlaset.c b/runtime/starpu/codelets/codelet_zlaset.c
index df278acd98ab3af5b8d13c9bd16c8573c0cdad68..90d3ad925cafc15c1144eeb79106ec5a88783999 100644
--- a/runtime/starpu/codelets/codelet_zlaset.c
+++ b/runtime/starpu/codelets/codelet_zlaset.c
@@ -46,7 +46,7 @@ static void cl_zlaset_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zlaset, 1, cl_zlaset_cpu_func)
+CODELETS_CPU(zlaset, cl_zlaset_cpu_func)
 
 void INSERT_TASK_zlaset(const RUNTIME_option_t *options,
                        cham_uplo_t uplo, int M, int N,
diff --git a/runtime/starpu/codelets/codelet_zlaset2.c b/runtime/starpu/codelets/codelet_zlaset2.c
index 61256da734846416a495753fc65fbffc92fab8d3..0f19d0e946115d6f5903d1b59dbe5b535aca7f37 100644
--- a/runtime/starpu/codelets/codelet_zlaset2.c
+++ b/runtime/starpu/codelets/codelet_zlaset2.c
@@ -44,7 +44,7 @@ static void cl_zlaset2_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zlaset2, 1, cl_zlaset2_cpu_func)
+CODELETS_CPU(zlaset2, cl_zlaset2_cpu_func)
 
 void INSERT_TASK_zlaset2(const RUNTIME_option_t *options,
                        cham_uplo_t uplo, int M, int N,
diff --git a/runtime/starpu/codelets/codelet_zlatro.c b/runtime/starpu/codelets/codelet_zlatro.c
index 20ed9394cc2b0beca557a9535e359ff2836f6437..718fd3ce56bfa53c395962a009d9cf06a451ca0f 100644
--- a/runtime/starpu/codelets/codelet_zlatro.c
+++ b/runtime/starpu/codelets/codelet_zlatro.c
@@ -48,7 +48,7 @@ static void cl_zlatro_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zlatro, 2, cl_zlatro_cpu_func)
+CODELETS_CPU(zlatro, cl_zlatro_cpu_func)
 
 /**
  *
diff --git a/runtime/starpu/codelets/codelet_zlauum.c b/runtime/starpu/codelets/codelet_zlauum.c
index 59d67ebf5d984c0baf12f4a1b977441ff6858cd8..00dcda4754e77a37a3e3649dde9ca382dd733ba0 100644
--- a/runtime/starpu/codelets/codelet_zlauum.c
+++ b/runtime/starpu/codelets/codelet_zlauum.c
@@ -44,7 +44,7 @@ static void cl_zlauum_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zlauum, 1, cl_zlauum_cpu_func)
+CODELETS_CPU(zlauum, cl_zlauum_cpu_func)
 
 /**
  *
diff --git a/runtime/starpu/codelets/codelet_zplghe.c b/runtime/starpu/codelets/codelet_zplghe.c
index 361ebc17e91e164a7e6666522451883da86dafc2..23ccbc3dab28f52c8fe0b711f8f9c29892dc7533 100644
--- a/runtime/starpu/codelets/codelet_zplghe.c
+++ b/runtime/starpu/codelets/codelet_zplghe.c
@@ -51,7 +51,7 @@ static void cl_zplghe_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zplghe, 1, cl_zplghe_cpu_func)
+CODELETS_CPU(zplghe, cl_zplghe_cpu_func)
 
 void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
                          double bump, int m, int n, const CHAM_desc_t *A, int Am, int An,
diff --git a/runtime/starpu/codelets/codelet_zplgsy.c b/runtime/starpu/codelets/codelet_zplgsy.c
index efc64b037946ed5bd5848342c8e373cd4aa66b5c..665f31002735de92d2855a9352eec5015fb90467 100644
--- a/runtime/starpu/codelets/codelet_zplgsy.c
+++ b/runtime/starpu/codelets/codelet_zplgsy.c
@@ -51,7 +51,7 @@ static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zplgsy, 1, cl_zplgsy_cpu_func)
+CODELETS_CPU(zplgsy, cl_zplgsy_cpu_func)
 
 void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
                         CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An,
diff --git a/runtime/starpu/codelets/codelet_zplrnt.c b/runtime/starpu/codelets/codelet_zplrnt.c
index 0d86aeff00b89bbdda7a5d137eeff1b65b7b3926..4f2910f7d80c417b3c5310a53d641e626e9d0d8f 100644
--- a/runtime/starpu/codelets/codelet_zplrnt.c
+++ b/runtime/starpu/codelets/codelet_zplrnt.c
@@ -48,7 +48,7 @@ static void cl_zplrnt_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zplrnt, 1, cl_zplrnt_cpu_func)
+CODELETS_CPU(zplrnt, cl_zplrnt_cpu_func)
 
 void INSERT_TASK_zplrnt( const RUNTIME_option_t *options,
                          int m, int n, const CHAM_desc_t *A, int Am, int An,
diff --git a/runtime/starpu/codelets/codelet_zplssq.c b/runtime/starpu/codelets/codelet_zplssq.c
index b29f1d74e8230e45876d2b960508a955a1ff59c7..6201abff7748ecfd5dd404bb2851365cda88f8fa 100644
--- a/runtime/starpu/codelets/codelet_zplssq.c
+++ b/runtime/starpu/codelets/codelet_zplssq.c
@@ -48,7 +48,7 @@ static void cl_zplssq_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zplssq, 2, cl_zplssq_cpu_func)
+CODELETS_CPU(zplssq, cl_zplssq_cpu_func)
 
 void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
                          cham_store_t storev, int M, int N,
@@ -101,7 +101,7 @@ static void cl_zplssq2_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zplssq2, 1, cl_zplssq2_cpu_func)
+CODELETS_CPU(zplssq2, cl_zplssq2_cpu_func)
 
 void INSERT_TASK_zplssq2( const RUNTIME_option_t *options, int N,
                           const CHAM_desc_t *RESULT, int RESULTm, int RESULTn )
diff --git a/runtime/starpu/codelets/codelet_zpotrf.c b/runtime/starpu/codelets/codelet_zpotrf.c
index de6c1886b7e349411d89a34be3089b9e1264a902..35edf5234888074001c5ef18933b8259ae931219 100644
--- a/runtime/starpu/codelets/codelet_zpotrf.c
+++ b/runtime/starpu/codelets/codelet_zpotrf.c
@@ -52,7 +52,7 @@ static void cl_zpotrf_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zpotrf, 1, cl_zpotrf_cpu_func)
+CODELETS_CPU(zpotrf, cl_zpotrf_cpu_func)
 
 /**
  *
diff --git a/runtime/starpu/codelets/codelet_zssssm.c b/runtime/starpu/codelets/codelet_zssssm.c
index 29ef312bd6207d7d3c2476e5a764bcacf6a3f975..5c7125646a54e11ff6bde3f61f88ab940fbea7cf 100644
--- a/runtime/starpu/codelets/codelet_zssssm.c
+++ b/runtime/starpu/codelets/codelet_zssssm.c
@@ -54,7 +54,7 @@ static void cl_zssssm_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zssssm, 4, cl_zssssm_cpu_func)
+CODELETS_CPU(zssssm, cl_zssssm_cpu_func)
 
 void INSERT_TASK_zssssm( const RUNTIME_option_t *options,
                          int m1, int n1, int m2, int n2, int k, int ib, int nb,
diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c
index 689be9b62368127011176dca3a6507a429f37913..40ed44bcbb3e44904160bc28faa1d9d58dfccf32 100644
--- a/runtime/starpu/codelets/codelet_zsymm.c
+++ b/runtime/starpu/codelets/codelet_zsymm.c
@@ -93,7 +93,7 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS(zsymm, 3, cl_zsymm_cpu_func, cl_zsymm_cuda_func, STARPU_CUDA_ASYNC)
+CODELETS(zsymm, cl_zsymm_cpu_func, cl_zsymm_cuda_func, STARPU_CUDA_ASYNC)
 
 /**
  *
@@ -107,11 +107,17 @@ void INSERT_TASK_zsymm(const RUNTIME_option_t *options,
                       const CHAM_desc_t *B, int Bm, int Bn,
                       CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb,
+                                    beta, C, Cm, Cn );
+    }
+
     (void)nb;
     struct starpu_codelet *codelet = &cl_zsymm;
     void (*callback)(void*) = options->profiling ? cl_zsymm_callback : NULL;
     starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
     int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
 
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
@@ -129,7 +135,7 @@ void INSERT_TASK_zsymm(const RUNTIME_option_t *options,
         STARPU_R,               RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_R,               RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
         STARPU_VALUE,    &beta,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,               RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        accessC,                RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
         STARPU_PRIORITY,    options->priority,
         STARPU_CALLBACK,    callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c
index 86037a3778b6511a54d7b2e2f023856cd9ee5f7a..51f013036ddde9370e06e62de325c32c259cb01a 100644
--- a/runtime/starpu/codelets/codelet_zsyr2k.c
+++ b/runtime/starpu/codelets/codelet_zsyr2k.c
@@ -89,7 +89,7 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS(zsyr2k, 3, cl_zsyr2k_cpu_func, cl_zsyr2k_cuda_func, STARPU_CUDA_ASYNC)
+CODELETS(zsyr2k, cl_zsyr2k_cpu_func, cl_zsyr2k_cuda_func, STARPU_CUDA_ASYNC)
 
 /**
  *
@@ -103,11 +103,17 @@ void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options,
                        const CHAM_desc_t *B, int Bm, int Bn,
                        CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, uplo, n, n, nb,
+                                    beta, C, Cm, Cn );
+    }
+
     (void)nb;
     struct starpu_codelet *codelet = &cl_zsyr2k;
     void (*callback)(void*) = options->profiling ? cl_zsyr2k_callback : NULL;
     starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
     int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
 
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
@@ -125,7 +131,7 @@ void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options,
         STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_R,                 RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
         STARPU_VALUE,      &beta,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,                 RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        accessC,                  RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
         STARPU_PRIORITY,    options->priority,
         STARPU_CALLBACK,    callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c
index 66782975f6bdf5a409f431117e2096028a3cf870..83c51f5997d6195cc5542ab63386e235b7241f21 100644
--- a/runtime/starpu/codelets/codelet_zsyrk.c
+++ b/runtime/starpu/codelets/codelet_zsyrk.c
@@ -86,7 +86,7 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS(zsyrk, 2, cl_zsyrk_cpu_func, cl_zsyrk_cuda_func, STARPU_CUDA_ASYNC)
+CODELETS(zsyrk, cl_zsyrk_cpu_func, cl_zsyrk_cuda_func, STARPU_CUDA_ASYNC)
 
 /**
  *
@@ -99,11 +99,17 @@ void INSERT_TASK_zsyrk(const RUNTIME_option_t *options,
                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
                       CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, uplo, n, n, nb,
+                                    beta, C, Cm, Cn );
+    }
+
     (void)nb;
     struct starpu_codelet *codelet = &cl_zsyrk;
     void (*callback)(void*) = options->profiling ? cl_zsyrk_callback : NULL;
     starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
     int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
 
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
@@ -119,7 +125,7 @@ void INSERT_TASK_zsyrk(const RUNTIME_option_t *options,
         STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
         STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_VALUE,      &beta,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,                 RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        accessC,                  RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
         STARPU_PRIORITY,    options->priority,
         STARPU_CALLBACK,    callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
diff --git a/runtime/starpu/codelets/codelet_zsyssq.c b/runtime/starpu/codelets/codelet_zsyssq.c
index 951e60d1b6b10779ff3737f8915831232ee7f280..dd6300a18a18e3d89eb5b1540a9a1e4e8926c638 100644
--- a/runtime/starpu/codelets/codelet_zsyssq.c
+++ b/runtime/starpu/codelets/codelet_zsyssq.c
@@ -41,7 +41,7 @@ static void cl_zsyssq_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zsyssq, 2, cl_zsyssq_cpu_func)
+CODELETS_CPU(zsyssq, cl_zsyssq_cpu_func)
 
 void INSERT_TASK_zsyssq( const RUNTIME_option_t *options,
                          cham_store_t storev, cham_uplo_t uplo, int n,
diff --git a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c
index cbac3596ee2cab76abc4ed7c49b1bbff6f6f1527..2e92e4cfd195d95307cc753fc4c0e9f9bdc17e3a 100644
--- a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c
+++ b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c
@@ -44,7 +44,7 @@ static void cl_zsytrf_nopiv_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zsytrf_nopiv, 1, cl_zsytrf_nopiv_cpu_func)
+CODELETS_CPU(zsytrf_nopiv, cl_zsytrf_nopiv_cpu_func)
 
 void INSERT_TASK_zsytrf_nopiv( const RUNTIME_option_t *options,
                               cham_uplo_t uplo, int n, int nb,
diff --git a/runtime/starpu/codelets/codelet_ztplqt.c b/runtime/starpu/codelets/codelet_ztplqt.c
index 0754c207f01e2fa3463e309991f451f3b3be3510..708098635f6a8130390a2a902afe8a6a4a5df435 100644
--- a/runtime/starpu/codelets/codelet_ztplqt.c
+++ b/runtime/starpu/codelets/codelet_ztplqt.c
@@ -48,7 +48,7 @@ static void cl_ztplqt_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(ztplqt, 4, cl_ztplqt_cpu_func)
+CODELETS_CPU(ztplqt, cl_ztplqt_cpu_func)
 
 void INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
                          int M, int N, int L, int ib, int nb,
diff --git a/runtime/starpu/codelets/codelet_ztpmlqt.c b/runtime/starpu/codelets/codelet_ztpmlqt.c
index 92d9e3bac6644997c5fddf9f6e281cf8a5e721b1..1a250a195a1bcf1f1021012391cce8af499d679d 100644
--- a/runtime/starpu/codelets/codelet_ztpmlqt.c
+++ b/runtime/starpu/codelets/codelet_ztpmlqt.c
@@ -92,7 +92,7 @@ static void cl_ztpmlqt_cuda_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS(ztpmlqt, 5, cl_ztpmlqt_cpu_func, cl_ztpmlqt_cuda_func, STARPU_CUDA_ASYNC)
+CODELETS(ztpmlqt, cl_ztpmlqt_cpu_func, cl_ztpmlqt_cuda_func, STARPU_CUDA_ASYNC)
 
 void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
                           cham_side_t side, cham_trans_t trans,
diff --git a/runtime/starpu/codelets/codelet_ztpmqrt.c b/runtime/starpu/codelets/codelet_ztpmqrt.c
index c0da2c794afedca487488388d42bd47204b22f3a..e36abb2cd24b0de669e2c9a3f13d773baf7618c2 100644
--- a/runtime/starpu/codelets/codelet_ztpmqrt.c
+++ b/runtime/starpu/codelets/codelet_ztpmqrt.c
@@ -92,7 +92,7 @@ static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS(ztpmqrt, 5, cl_ztpmqrt_cpu_func, cl_ztpmqrt_cuda_func, STARPU_CUDA_ASYNC)
+CODELETS(ztpmqrt, cl_ztpmqrt_cpu_func, cl_ztpmqrt_cuda_func, STARPU_CUDA_ASYNC)
 
 void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options,
                           cham_side_t side, cham_trans_t trans,
diff --git a/runtime/starpu/codelets/codelet_ztpqrt.c b/runtime/starpu/codelets/codelet_ztpqrt.c
index 596bb44a28ee49045c69a512c0dff598cc0a7f30..9e1de5db0547c4db4026c7e4f5a985ac4e544e9b 100644
--- a/runtime/starpu/codelets/codelet_ztpqrt.c
+++ b/runtime/starpu/codelets/codelet_ztpqrt.c
@@ -47,7 +47,7 @@ static void cl_ztpqrt_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(ztpqrt, 4, cl_ztpqrt_cpu_func)
+CODELETS_CPU(ztpqrt, cl_ztpqrt_cpu_func)
 
 void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
                          int M, int N, int L, int ib, int nb,
diff --git a/runtime/starpu/codelets/codelet_ztradd.c b/runtime/starpu/codelets/codelet_ztradd.c
index e19dd2636fce5ea0d9ffb13345cc37953eadd9a5..ac3dc8bfaecc14657dc48399b2c10a010babe83f 100644
--- a/runtime/starpu/codelets/codelet_ztradd.c
+++ b/runtime/starpu/codelets/codelet_ztradd.c
@@ -46,7 +46,7 @@ static void cl_ztradd_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(ztradd, 2, cl_ztradd_cpu_func)
+CODELETS_CPU(ztradd, cl_ztradd_cpu_func)
 
 /**
  ******************************************************************************
@@ -112,10 +112,16 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
                          CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
                          CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn )
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, uplo, m, n, nb,
+                                    beta, B, Bm, Bn );
+    }
+
     struct starpu_codelet *codelet = &cl_ztradd;
     void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL;
     starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
     int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    int accessB = ( beta == 0. ) ? STARPU_W : STARPU_RW;
 
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
@@ -131,7 +137,7 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
         STARPU_VALUE,    &alpha,              sizeof(CHAMELEON_Complex64_t),
         STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_VALUE,    &beta,               sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,        RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        accessB,         RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
diff --git a/runtime/starpu/codelets/codelet_ztrasm.c b/runtime/starpu/codelets/codelet_ztrasm.c
index 527c7e44b2511285229ea05e9b90039d6bd23655..f870eb563ebd1e9ec2f970c79018007f2d2f7f40 100644
--- a/runtime/starpu/codelets/codelet_ztrasm.c
+++ b/runtime/starpu/codelets/codelet_ztrasm.c
@@ -44,7 +44,7 @@ static void cl_ztrasm_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(ztrasm, 2, cl_ztrasm_cpu_func)
+CODELETS_CPU(ztrasm, cl_ztrasm_cpu_func)
 
 void INSERT_TASK_ztrasm( const RUNTIME_option_t *options,
                          cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N,
diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c
index a1d24cf9542f5ac1e310511aa440f293fb35ab56..d1404ba960ac1991c06b84dca012c2e2b930e248 100644
--- a/runtime/starpu/codelets/codelet_ztrmm.c
+++ b/runtime/starpu/codelets/codelet_ztrmm.c
@@ -89,7 +89,7 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS(ztrmm, 2, cl_ztrmm_cpu_func, cl_ztrmm_cuda_func, STARPU_CUDA_ASYNC)
+CODELETS(ztrmm, cl_ztrmm_cpu_func, cl_ztrmm_cuda_func, STARPU_CUDA_ASYNC)
 
 /**
  *
@@ -102,6 +102,11 @@ void INSERT_TASK_ztrmm(const RUNTIME_option_t *options,
                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
                       const CHAM_desc_t *B, int Bm, int Bn)
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlaset( options, ChamUpperLower, m, n,
+                                   alpha, alpha, B, Bm, Bn );
+    }
+
     (void)nb;
     struct starpu_codelet *codelet = &cl_ztrmm;
     void (*callback)(void*) = options->profiling ? cl_ztrmm_callback : NULL;
@@ -122,7 +127,7 @@ void INSERT_TASK_ztrmm(const RUNTIME_option_t *options,
         STARPU_VALUE,         &m,                        sizeof(int),
         STARPU_VALUE,         &n,                        sizeof(int),
         STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_R,                  RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_RW,                 RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
         STARPU_PRIORITY,    options->priority,
         STARPU_CALLBACK,    callback,
diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c
index f4ab409209f9d82b333322ac88bf23ed2dfc060e..13fb16fad1e5aab314ab1b2906572702200b0d8e 100644
--- a/runtime/starpu/codelets/codelet_ztrsm.c
+++ b/runtime/starpu/codelets/codelet_ztrsm.c
@@ -87,7 +87,7 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS(ztrsm, 2, cl_ztrsm_cpu_func, cl_ztrsm_cuda_func, STARPU_CUDA_ASYNC)
+CODELETS(ztrsm, cl_ztrsm_cpu_func, cl_ztrsm_cuda_func, STARPU_CUDA_ASYNC)
 
 /**
  *
diff --git a/runtime/starpu/codelets/codelet_ztrssq.c b/runtime/starpu/codelets/codelet_ztrssq.c
index d7ed201afc60538aebadeaa78294c38fc7f1c5c4..a453d7acafb964c49f641554099b2d0ddb521665 100644
--- a/runtime/starpu/codelets/codelet_ztrssq.c
+++ b/runtime/starpu/codelets/codelet_ztrssq.c
@@ -42,7 +42,7 @@ static void cl_ztrssq_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(ztrssq, 2, cl_ztrssq_cpu_func)
+CODELETS_CPU(ztrssq, cl_ztrssq_cpu_func)
 
 void INSERT_TASK_ztrssq( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_diag_t diag,
diff --git a/runtime/starpu/codelets/codelet_ztrtri.c b/runtime/starpu/codelets/codelet_ztrtri.c
index 758542d8895ab175550bdf3bb266d6ac171b8661..97d8c56dddb7fd85eeaa4f9ac269c824e4804d03 100644
--- a/runtime/starpu/codelets/codelet_ztrtri.c
+++ b/runtime/starpu/codelets/codelet_ztrtri.c
@@ -52,7 +52,7 @@ static void cl_ztrtri_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(ztrtri, 1, cl_ztrtri_cpu_func)
+CODELETS_CPU(ztrtri, cl_ztrtri_cpu_func)
 
 /**
  *
diff --git a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
index 01704f33ac1fa6d97849d97aa73a6416e2dc208c..be215519fef4b2f0b1432af2f8fd2e46ede4b2e1 100644
--- a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
+++ b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
@@ -56,7 +56,7 @@ static void cl_ztsmlq_hetra1_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(ztsmlq_hetra1, 5, cl_ztsmlq_hetra1_cpu_func)
+CODELETS_CPU(ztsmlq_hetra1, cl_ztsmlq_hetra1_cpu_func)
 
 /**
  *
diff --git a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
index 4c6b5229e72eb9083db9ac2df481b4cb64e68c9a..7f8c0488de3ba7c8ba695c49da4a8976667f5668 100644
--- a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
+++ b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
@@ -56,7 +56,7 @@ static void cl_ztsmqr_hetra1_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(ztsmqr_hetra1, 5, cl_ztsmqr_hetra1_cpu_func)
+CODELETS_CPU(ztsmqr_hetra1, cl_ztsmqr_hetra1_cpu_func)
 
 /**
  *
diff --git a/runtime/starpu/codelets/codelet_ztstrf.c b/runtime/starpu/codelets/codelet_ztstrf.c
index 32b3fb5ad829a995f4db5a52c7c1fc7a8ceac4af..0e3f717f4a57c06e0b3e6345dfd87a2bb1b15744 100644
--- a/runtime/starpu/codelets/codelet_ztstrf.c
+++ b/runtime/starpu/codelets/codelet_ztstrf.c
@@ -67,7 +67,7 @@ static void cl_ztstrf_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(ztstrf, 4, cl_ztstrf_cpu_func)
+CODELETS_CPU(ztstrf, cl_ztstrf_cpu_func)
 
 void INSERT_TASK_ztstrf( const RUNTIME_option_t *options,
                          int m, int n, int ib, int nb,
diff --git a/runtime/starpu/codelets/codelet_zunmlq.c b/runtime/starpu/codelets/codelet_zunmlq.c
index 7f2b6b24f37bc06680bf8ae50fe66c40720a06ba..9f42aa67f7750644b864103fa31220b9e58e981b 100644
--- a/runtime/starpu/codelets/codelet_zunmlq.c
+++ b/runtime/starpu/codelets/codelet_zunmlq.c
@@ -95,7 +95,7 @@ static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC)
+CODELETS(zunmlq, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC)
 
 void INSERT_TASK_zunmlq( const RUNTIME_option_t *options,
                          cham_side_t side, cham_trans_t trans,
diff --git a/runtime/starpu/codelets/codelet_zunmqr.c b/runtime/starpu/codelets/codelet_zunmqr.c
index de61f52c9e5bdf04bba2c231a3cc407b0330b06a..e44279dd25e8df1e2fad2e8b58fcbd20d420b590 100644
--- a/runtime/starpu/codelets/codelet_zunmqr.c
+++ b/runtime/starpu/codelets/codelet_zunmqr.c
@@ -94,7 +94,7 @@ static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, STARPU_CUDA_ASYNC)
+CODELETS(zunmqr, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, STARPU_CUDA_ASYNC)
 
 void INSERT_TASK_zunmqr( const RUNTIME_option_t *options,
                          cham_side_t side, cham_trans_t trans,
diff --git a/runtime/starpu/control/runtime_descriptor.c b/runtime/starpu/control/runtime_descriptor.c
index 8402ec55b4d1fe7610092916f550e97aa99cc631..a6d9f84c7f9923bb379707516c32f5d3ccf0056b 100644
--- a/runtime/starpu/control/runtime_descriptor.c
+++ b/runtime/starpu/control/runtime_descriptor.c
@@ -31,7 +31,7 @@
  These values can be changed through the call CHAMELEON_user_tag_size(int tag_width, int tag_sep) */
 #define TAG_WIDTH_MIN 20
 static int tag_width = 64;
-static int tag_sep   = 50;
+static int tag_sep   = 40;
 static int _tag_mpi_initialized_ = 0;
 
 static inline int
diff --git a/runtime/starpu/include/runtime_codelets.h b/runtime/starpu/include/runtime_codelets.h
index 9b9fdc5b1569f5611c07727c421a5815c225443d..98b0c7760885aa15633f71e484de2889a04147c2 100644
--- a/runtime/starpu/include/runtime_codelets.h
+++ b/runtime/starpu/include/runtime_codelets.h
@@ -31,7 +31,7 @@
 #define CODELET_CUDA_FLAGS(flags)
 #endif
 
-#define CODELETS_ALL(cl_name, _nbuffers, cpu_func_name, cuda_func_name, _original_location_, cuda_flags) \
+#define CODELETS_ALL(cl_name, cpu_func_name, cuda_func_name, _original_location_, cuda_flags) \
     struct starpu_perfmodel cl_##cl_name##_fake = {                     \
         .type   = STARPU_HISTORY_BASED,                                 \
         .symbol = "fake_"#cl_name                                       \
@@ -47,7 +47,7 @@
         .cpu_func  = ((cpu_func_name)),                                 \
         CODELET_CUDA_FLAGS(cuda_flags)                                  \
         .cuda_func = ((cuda_func_name)),                                \
-        .nbuffers  = ((_nbuffers)),                                     \
+        .nbuffers  = STARPU_VARIABLE_NBUFFERS,                          \
         .model     = &cl_##cl_name##_model,                             \
         .name      = #cl_name                                           \
     };                                                                  \
@@ -69,15 +69,15 @@
     }
 
 #if defined(CHAMELEON_SIMULATION)
-#define CODELETS_CPU(name, _nbuffers, cpu_func_name)                    \
-    CODELETS_ALL( name, _nbuffers, (starpu_cpu_func_t) 1, NULL, STARPU_CPU, 0 )
+#define CODELETS_CPU(name, cpu_func_name)                    \
+    CODELETS_ALL( name, (starpu_cpu_func_t) 1, NULL, STARPU_CPU, 0 )
 #else
-#define CODELETS_CPU(name, _nbuffers, cpu_func_name)                    \
-    CODELETS_ALL( name, _nbuffers, cpu_func_name, NULL, STARPU_CPU, 0 )
+#define CODELETS_CPU(name, cpu_func_name)                    \
+    CODELETS_ALL( name, cpu_func_name, NULL, STARPU_CPU, 0 )
 #endif
 
-#define CODELETS_GPU(name, _nbuffers, cpu_func_name, cuda_func_name, cuda_flags) \
-    CODELETS_ALL( name, _nbuffers, cpu_func_name, cuda_func_name, STARPU_CPU  | STARPU_CUDA, cuda_flags )
+#define CODELETS_GPU(name, cpu_func_name, cuda_func_name, cuda_flags) \
+    CODELETS_ALL( name, cpu_func_name, cuda_func_name, STARPU_CPU  | STARPU_CUDA, cuda_flags )
 
 #define CODELETS_ALL_HEADER(name)                            \
      CHAMELEON_CL_CB_HEADER(name);                           \
@@ -89,24 +89,24 @@
 
 #if defined(CHAMELEON_SIMULATION)
 #if defined(CHAMELEON_USE_CUDA)
-#define CODELETS(name, _nbuffers, cpu_func_name, cuda_func_name, cuda_flags) \
-    CODELETS_GPU(name, _nbuffers, (starpu_cpu_func_t) 1, (starpu_cuda_func_t) 1, cuda_flags)
+#define CODELETS(name, cpu_func_name, cuda_func_name, cuda_flags) \
+    CODELETS_GPU(name, (starpu_cpu_func_t) 1, (starpu_cuda_func_t) 1, cuda_flags)
 
 #define CODELETS_HEADER(name)  CODELETS_ALL_HEADER(name)
 #else
-#define CODELETS(name, _nbuffers, cpu_func_name, cuda_func_name, cuda_flags) \
-    CODELETS_CPU(name, _nbuffers, (starpu_cpu_func_t) 1)
+#define CODELETS(name, cpu_func_name, cuda_func_name, cuda_flags) \
+    CODELETS_CPU(name, (starpu_cpu_func_t) 1)
 
 #define CODELETS_HEADER(name)  CODELETS_ALL_HEADER(name)
 #endif
 #elif defined(CHAMELEON_USE_CUDA)
-#define CODELETS(name, _nbuffers, cpu_func_name, cuda_func_name, cuda_flags) \
-    CODELETS_GPU(name, _nbuffers, cpu_func_name, cuda_func_name, cuda_flags)
+#define CODELETS(name, cpu_func_name, cuda_func_name, cuda_flags) \
+    CODELETS_GPU(name, cpu_func_name, cuda_func_name, cuda_flags)
 
 #define CODELETS_HEADER(name)  CODELETS_ALL_HEADER(name)
 #else
-#define CODELETS(name, _nbuffers, cpu_func_name, cuda_func_name, cuda_flags) \
-    CODELETS_CPU(name, _nbuffers, cpu_func_name)
+#define CODELETS(name, cpu_func_name, cuda_func_name, cuda_flags) \
+    CODELETS_CPU(name, cpu_func_name)
 
 #define CODELETS_HEADER(name)  CODELETS_ALL_HEADER(name)
 #endif
diff --git a/testing/chameleon_ztesting.c b/testing/chameleon_ztesting.c
index e41178a64e4afad629fa148408fba776a5470def..0d6d6e5d93d4ff1c1251f1994ad07ffcb04e28d3 100644
--- a/testing/chameleon_ztesting.c
+++ b/testing/chameleon_ztesting.c
@@ -98,10 +98,11 @@ static parameter_t parameters[] = {
     { "||A||",         "Norm of the matrix A",                  1005, PARAM_OUTPUT, 2, 13, TestValDouble, {0}, NULL, pread_double, sprint_double },
     { "||B||",         "Norm of the matrix B",                  1006, PARAM_OUTPUT, 2, 13, TestValDouble, {0}, NULL, pread_double, sprint_double },
     { "||C||",         "Norm of the matrix C",                  1007, PARAM_OUTPUT, 2, 13, TestValDouble, {0}, NULL, pread_double, sprint_double },
-    { "||b||",         "Norm of the vector b",                  1008, PARAM_OUTPUT, 2, 13, TestValDouble, {0}, NULL, pread_double, sprint_double },
-    { "||x||",         "Norm of the vector x",                  1009, PARAM_OUTPUT, 2, 13, TestValDouble, {0}, NULL, pread_double, sprint_double },
-    { "||Ax-b||/N/eps/(||A||||x||+||b||", "",                   1010, PARAM_OUTPUT, 2, 22, TestValDouble, {0}, NULL, pread_double, sprint_double },
-    { "||I-QQ'||",     "Orthonormality of Q",                   1011, PARAM_OUTPUT, 2, 13, TestValDouble, {0}, NULL, pread_double, sprint_double },
+    { "||R||",         "Residual norm",                         1008, PARAM_OUTPUT, 2, 13, TestValDouble, {0}, NULL, pread_double, sprint_double },
+    { "||b||",         "Norm of the vector b",                  1009, PARAM_OUTPUT, 2, 13, TestValDouble, {0}, NULL, pread_double, sprint_double },
+    { "||x||",         "Norm of the vector x",                  1010, PARAM_OUTPUT, 2, 13, TestValDouble, {0}, NULL, pread_double, sprint_double },
+    { "||Ax-b||/N/eps/(||A||||x||+||b||", "",                   1011, PARAM_OUTPUT, 2, 22, TestValDouble, {0}, NULL, pread_double, sprint_double },
+    { "||I-QQ'||",     "Orthonormality of Q",                   1012, PARAM_OUTPUT, 2, 13, TestValDouble, {0}, NULL, pread_double, sprint_double },
 };
 
 #define STR_MAX_LENGTH 256
diff --git a/testing/input/geadd.in b/testing/input/geadd.in
index 180fa9a87214d6b5bf1715c8ef341f39f76aa4d9..8e6fe98f11e7ec97f464df6a4cbb76f114f22daa 100644
--- a/testing/input/geadd.in
+++ b/testing/input/geadd.in
@@ -12,6 +12,8 @@
 # alpha: Scalar alpha
 # beta: Scalar beta
 
+alpha = 0., 3.45
+beta = 0., -4.86
 op = geadd
 nb = 16, 17
 ib = 8
diff --git a/testing/input/gemm.in b/testing/input/gemm.in
index f4df5ce2ce5384429096b78aa37d291962c09c0b..ae01aee4dc82bcba9d6ebc60584d296dadb4a9e2 100644
--- a/testing/input/gemm.in
+++ b/testing/input/gemm.in
@@ -15,6 +15,8 @@
 # alpha: Scalar alpha
 # beta: Scalar beta
 
+alpha = 0., 3.45
+beta = 0., -4.86
 op = gemm
 nb = 16, 17
 ib = 8
diff --git a/testing/input/hemm.in b/testing/input/hemm.in
index a571443045e1661b43a50adcd3bb9f96d5038362..047e5e9a6192508b071b714e77b6b6f80c79b508 100644
--- a/testing/input/hemm.in
+++ b/testing/input/hemm.in
@@ -15,6 +15,8 @@
 # beta: Scalar beta
 # bump: bump value for Hermitian matrices
 
+alpha = 0., 3.45
+beta = 0., -4.86
 op = hemm
 nb = 16, 17
 ib = 8
diff --git a/testing/input/her2k.in b/testing/input/her2k.in
index 8b420b62aa61c76e1937b7adbef00a6453d04c7c..261c63cee3a189eb96d5616ff494db43dc3c67ae 100644
--- a/testing/input/her2k.in
+++ b/testing/input/her2k.in
@@ -15,6 +15,8 @@
 # beta: Scalar beta
 # bump: Bump value for symmetric matrices
 
+alpha = 0., 3.45
+beta = 0., -4.86
 op = her2k
 nb = 16, 17
 ib = 8
diff --git a/testing/input/herk.in b/testing/input/herk.in
index c00df4c16f8fb4fa1f1c97e975a117d3de1b1974..eeec8f5201125c10128242fab32e7cced7250c83 100644
--- a/testing/input/herk.in
+++ b/testing/input/herk.in
@@ -14,6 +14,8 @@
 # beta: Scalar beta
 # bump: Bump value for symmetric matrices
 
+alpha = 0., 3.45
+beta = 0., -4.86
 op = herk
 nb = 16, 17
 ib = 8
diff --git a/testing/input/lascal.in b/testing/input/lascal.in
index ba7171b1b6abf98c90a8ab333a937456f566b4a6..14bccea0cb61f9ee3c6d09557e42c7b3f4340351 100644
--- a/testing/input/lascal.in
+++ b/testing/input/lascal.in
@@ -10,6 +10,7 @@
 # uplo: Part of the matrix to be copied (0 for Upper, 1 for Lower and 2 for UpperLower)
 # alpha: Scale to apply
 
+alpha = 0., 3.45
 op = lascal
 nb = 16, 17
 ib = 8
diff --git a/testing/input/symm.in b/testing/input/symm.in
index 17c7c3dacaa616116c1aa8fba89a6b5797034724..6981e403d4a56a29242983db24e61e02d3a7f799 100644
--- a/testing/input/symm.in
+++ b/testing/input/symm.in
@@ -15,6 +15,8 @@
 # beta: Scalar beta
 # bump: bump value for Hermitian matrices
 
+alpha = 0., 3.45
+beta = 0., -4.86
 op = symm
 nb = 16, 17
 ib = 8
diff --git a/testing/input/syr2k.in b/testing/input/syr2k.in
index fe434797ba148ca5847228e1c08ea845dfb42af8..24ced3073bec2c06dee53f4ea44803791a5c779d 100644
--- a/testing/input/syr2k.in
+++ b/testing/input/syr2k.in
@@ -15,6 +15,8 @@
 # beta: Scalar beta
 # bump: Bump value for symmetric matrices
 
+alpha = 0., 3.45
+beta = 0., -4.86
 op = syr2k
 nb = 16, 17
 ib = 8
diff --git a/testing/input/syrk.in b/testing/input/syrk.in
index 82daa929d76c69cfd164dceea7af1632d9c89fd4..b37b6a823d5db1bf788dd046190c39852219d6ae 100644
--- a/testing/input/syrk.in
+++ b/testing/input/syrk.in
@@ -14,6 +14,8 @@
 # beta: Scalar beta
 # bump: Bump value for symmetric matrices
 
+alpha = 0., 3.45
+beta = 0., -4.86
 op = syrk
 nb = 16, 17
 ib = 8
diff --git a/testing/input/tradd.in b/testing/input/tradd.in
index b930006627c68b1e14a3ebe245fb418e53150bdb..a574f03519847cf4183b360ba06e743d2c243492 100644
--- a/testing/input/tradd.in
+++ b/testing/input/tradd.in
@@ -13,6 +13,8 @@
 # alpha: Scalar alpha
 # beta: Scalar beta
 
+alpha = 0., 3.45
+beta = 0., -4.86
 op = tradd
 nb = 16, 17
 ib = 8
diff --git a/testing/input/trmm.in b/testing/input/trmm.in
index d570337401e27e1c6991f68b42cb09d5a363e0df..6c6228f1430fae996d28e3f4588d2cff4764f665 100644
--- a/testing/input/trmm.in
+++ b/testing/input/trmm.in
@@ -14,6 +14,7 @@
 # diag: Whether or not A is unit triangular
 # alpha: Scalar alpha
 
+alpha = 0., 3.45
 op = trmm
 nb = 16, 17
 ib = 8
diff --git a/testing/input/trsm.in b/testing/input/trsm.in
index 2882e91427de91845e5a63c5a5188d393e1e505e..93098cc254da1411d468f01b564e5bb50dcb3ca9 100644
--- a/testing/input/trsm.in
+++ b/testing/input/trsm.in
@@ -15,6 +15,7 @@
 # diag: Whether or not A is unit triangular
 # alpha: Scalar alpha
 
+alpha = 0., 3.45
 op = trsm
 nb = 16, 17
 ib = 8
diff --git a/testing/testing_zcheck.c b/testing/testing_zcheck.c
index 5a5f9f302e24a3c962aac79989f521f683e5feae..e0d4c1964c4ef857b78d3d79f1f9829a84ae039f 100644
--- a/testing/testing_zcheck.c
+++ b/testing/testing_zcheck.c
@@ -105,7 +105,12 @@ int check_zmatrices( run_arg_list_t *args, cham_uplo_t uplo, CHAM_desc_t *descA,
             Rnorm = LAPACKE_zlantr_work( LAPACK_COL_MAJOR, 'M', chameleon_lapack_const(uplo), 'N',
                                          M, N, B, LDA, work );
         }
-        result = Rnorm / (Anorm * eps);
+        if ( Anorm != 0. ) {
+            result = Rnorm / (Anorm * eps);
+        }
+        else {
+            result = Rnorm;
+        }
 
         /* Verifies if the result is inside a threshold */
         if (  isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) {
@@ -537,7 +542,16 @@ int check_zgemm( run_arg_list_t *args, cham_trans_t transA, cham_trans_t transB,
         /* Calculates the norm with the core function's result */
         Rnorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'M', M, N, Cref, LDC, NULL );
 
-        result = Rnorm / ((cabs(alpha) * max(Anorm, Bnorm) + cabs(beta) * Crefnorm) * K * eps);
+        if ( ( alpha != 0. ) || (beta != 0. ) ) {
+            result = Rnorm / ((cabs(alpha) * max(Anorm, Bnorm) + cabs(beta) * Crefnorm) * K * eps);
+        }
+        else {
+            result = Rnorm;
+        }
+        run_arg_add_double( args, "||A||", Anorm );
+        run_arg_add_double( args, "||B||", Bnorm );
+        run_arg_add_double( args, "||C||", Crefnorm );
+        run_arg_add_double( args, "||R||", Rnorm );
 
         /* Verifies if the result is inside a threshold */
         if (  isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) {
@@ -685,7 +699,12 @@ int check_zsymm( run_arg_list_t *args, cham_mtxtype_t matrix_type, cham_side_t s
         Clapacknorm = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'M', M, N, Cref, LDC, NULL );
         Rnorm       = LAPACKE_zlange_work( LAPACK_COL_MAJOR, 'M', M, N, Cref, LDC, NULL );
 
-        result = Rnorm / ((cabs(alpha) * max(Anorm, Bnorm) + cabs(beta) * Crefnorm) * An * eps);
+        if ( ( alpha != 0. ) || (beta != 0. ) ) {
+            result = Rnorm / ((cabs(alpha) * max(Anorm, Bnorm) + cabs(beta) * Crefnorm) * An * eps);
+        }
+        else {
+            result = Rnorm;
+        }
 
         /* Verifies if the result is inside a threshold */
         if (  isnan(Rnorm) || isinf(Rnorm) || isnan(result) || isinf(result) || (result > 10.0) ) {
diff --git a/testing/testing_zgemm.c b/testing/testing_zgemm.c
index 17d39874d0d3799dbdb482a789f58e25393fd041..b3f2bea7cc7190abfa2487585a1efe09f912bede 100644
--- a/testing/testing_zgemm.c
+++ b/testing/testing_zgemm.c
@@ -114,7 +114,7 @@ testing_t   test_zgemm;
 const char *zgemm_params[] = { "mtxfmt", "nb", "transA", "transB", "m",     "n",     "k",     "lda", "ldb",
                                "ldc", "alpha",  "beta",   "seedA", "seedB", "seedC", NULL };
 const char *zgemm_output[] = { NULL };
-const char *zgemm_outchk[] = { "RETURN", NULL };
+const char *zgemm_outchk[] = { "||A||", "||B||", "||C||", "||R||", "RETURN", NULL };
 
 /**
  * @brief Testing registration function