From 829a343c2cf9c1d6a98382ee76208b492c6a9063 Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Fri, 26 Jun 2020 18:03:53 +0200
Subject: [PATCH] Reduce the data access types whenever it's possible to get
 more flexibility with the allocation onf the fly

---
 runtime/starpu/codelets/codelet_zaxpy.c   |  4 ++++
 runtime/starpu/codelets/codelet_zgeadd.c  |  8 +++++++-
 runtime/starpu/codelets/codelet_zgemm.c   |  8 +++++++-
 runtime/starpu/codelets/codelet_zgram.c   | 20 ++++++++++----------
 runtime/starpu/codelets/codelet_zhe2ge.c  | 10 +++++-----
 runtime/starpu/codelets/codelet_zhemm.c   |  8 +++++++-
 runtime/starpu/codelets/codelet_zher2k.c  |  8 +++++++-
 runtime/starpu/codelets/codelet_zherk.c   |  8 +++++++-
 runtime/starpu/codelets/codelet_zlascal.c | 20 ++++++++++++++------
 runtime/starpu/codelets/codelet_zsymm.c   |  8 +++++++-
 runtime/starpu/codelets/codelet_zsyr2k.c  |  8 +++++++-
 runtime/starpu/codelets/codelet_zsyrk.c   |  8 +++++++-
 runtime/starpu/codelets/codelet_ztradd.c  |  8 +++++++-
 runtime/starpu/codelets/codelet_ztrmm.c   |  7 ++++++-
 runtime/starpu/include/runtime_codelets.h |  2 +-
 15 files changed, 103 insertions(+), 32 deletions(-)

diff --git a/runtime/starpu/codelets/codelet_zaxpy.c b/runtime/starpu/codelets/codelet_zaxpy.c
index 88cf08e02..7857b25e8 100644
--- a/runtime/starpu/codelets/codelet_zaxpy.c
+++ b/runtime/starpu/codelets/codelet_zaxpy.c
@@ -52,6 +52,10 @@ void INSERT_TASK_zaxpy( const RUNTIME_option_t *options,
     starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
     int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
 
+    if ( alpha == 0. ) {
+        return;
+    }
+
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
     CHAMELEON_ACCESS_RW(B, Bm, Bn);
diff --git a/runtime/starpu/codelets/codelet_zgeadd.c b/runtime/starpu/codelets/codelet_zgeadd.c
index 27ce15a01..20d1b23bd 100644
--- a/runtime/starpu/codelets/codelet_zgeadd.c
+++ b/runtime/starpu/codelets/codelet_zgeadd.c
@@ -144,10 +144,16 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
                          CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
                          CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn )
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb,
+                                    beta, B, Bm, Bn );
+    }
+
     struct starpu_codelet *codelet = &cl_zgeadd;
     void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL;
     starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
     int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    int accessB = ( beta == 0. ) ? STARPU_W : STARPU_RW;
 
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
@@ -162,7 +168,7 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
         STARPU_VALUE,    &alpha,              sizeof(CHAMELEON_Complex64_t),
         STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_VALUE,    &beta,               sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,        RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        accessB,          RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c
index 034136bed..1eed0875f 100644
--- a/runtime/starpu/codelets/codelet_zgemm.c
+++ b/runtime/starpu/codelets/codelet_zgemm.c
@@ -108,11 +108,17 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
                                                    const CHAM_desc_t *B, int Bm, int Bn,
                       CHAMELEON_Complex64_t beta,  const CHAM_desc_t *C, int Cm, int Cn)
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb,
+                                    beta, C, Cm, Cn );
+    }
+
     (void)nb;
     struct starpu_codelet *codelet = &cl_zgemm;
     void (*callback)(void*) = options->profiling ? cl_zgemm_callback : NULL;
     starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
     int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
 
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
@@ -131,7 +137,7 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
         STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_R,         RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
         STARPU_VALUE,    &beta,              sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,        RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        accessC,          RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
diff --git a/runtime/starpu/codelets/codelet_zgram.c b/runtime/starpu/codelets/codelet_zgram.c
index 83643fc4f..ff38f7308 100644
--- a/runtime/starpu/codelets/codelet_zgram.c
+++ b/runtime/starpu/codelets/codelet_zgram.c
@@ -51,21 +51,21 @@ void INSERT_TASK_zgram( const RUNTIME_option_t *options,
                         const CHAM_desc_t *Di, int Dim, int Din,
                         const CHAM_desc_t *Dj, int Djm, int Djn,
                         const CHAM_desc_t *D, int Dm, int Dn,
-                        CHAM_desc_t *A, int Am, int An)
+                        CHAM_desc_t *A, int Am, int An )
 {
-  struct starpu_codelet *codelet = &cl_zgram;
-  void (*callback)(void*) = options->profiling ? cl_zgram_callback : NULL;
+    struct starpu_codelet *codelet = &cl_zgram;
+    void (*callback)(void*) = options->profiling ? cl_zgram_callback : NULL;
     starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
     int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
 
-  CHAMELEON_BEGIN_ACCESS_DECLARATION;
-  CHAMELEON_ACCESS_R(Di, Dim, Din);
-  CHAMELEON_ACCESS_R(Dj, Djm, Djn);
-  CHAMELEON_ACCESS_R(D, Dm, Dn);
-  CHAMELEON_ACCESS_RW(A, Am, An);
-  CHAMELEON_END_ACCESS_DECLARATION;
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(Di, Dim, Din);
+    CHAMELEON_ACCESS_R(Dj, Djm, Djn);
+    CHAMELEON_ACCESS_R(D, Dm, Dn);
+    CHAMELEON_ACCESS_RW(A, Am, An);
+    CHAMELEON_END_ACCESS_DECLARATION;
 
-  starpu_insert_task(
+    starpu_insert_task(
         starpu_mpi_codelet(codelet),
         STARPU_VALUE,    &uplo,                      sizeof(int),
         STARPU_VALUE,    &m,                         sizeof(int),
diff --git a/runtime/starpu/codelets/codelet_zhe2ge.c b/runtime/starpu/codelets/codelet_zhe2ge.c
index c7a24022b..2f5fc6a58 100644
--- a/runtime/starpu/codelets/codelet_zhe2ge.c
+++ b/runtime/starpu/codelets/codelet_zhe2ge.c
@@ -49,11 +49,11 @@ CODELETS_CPU(zhe2ge, 2, cl_zhe2ge_cpu_func)
  * @ingroup INSERT_TASK_Complex64_t
  *
  */
-void INSERT_TASK_zhe2ge(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo,
-                       int m, int n, int mb,
-                       const CHAM_desc_t *A, int Am, int An,
-                       const CHAM_desc_t *B, int Bm, int Bn)
+void INSERT_TASK_zhe2ge( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo,
+                         int m, int n, int mb,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     (void)mb;
     struct starpu_codelet *codelet = &cl_zhe2ge;
diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c
index b6a827896..1d19bc6ae 100644
--- a/runtime/starpu/codelets/codelet_zhemm.c
+++ b/runtime/starpu/codelets/codelet_zhemm.c
@@ -107,11 +107,17 @@ void INSERT_TASK_zhemm(const RUNTIME_option_t *options,
                       const CHAM_desc_t *B, int Bm, int Bn,
                       CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb,
+                                    beta, C, Cm, Cn );
+    }
+
     (void)nb;
     struct starpu_codelet *codelet = &cl_zhemm;
     void (*callback)(void*) = options->profiling ? cl_zhemm_callback : NULL;
     starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
     int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
 
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
@@ -129,7 +135,7 @@ void INSERT_TASK_zhemm(const RUNTIME_option_t *options,
         STARPU_R,               RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_R,               RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
         STARPU_VALUE,    &beta,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,               RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        accessC,                RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
         STARPU_PRIORITY,    options->priority,
         STARPU_CALLBACK,    callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c
index 291fef279..d8b8b11ba 100644
--- a/runtime/starpu/codelets/codelet_zher2k.c
+++ b/runtime/starpu/codelets/codelet_zher2k.c
@@ -104,11 +104,17 @@ INSERT_TASK_zher2k( const RUNTIME_option_t *options,
                                                  const CHAM_desc_t *B, int Bm, int Bn,
                     double beta,                 const CHAM_desc_t *C, int Cm, int Cn )
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, uplo, n, n, nb,
+                                    beta, C, Cm, Cn );
+    }
+
     (void)nb;
     struct starpu_codelet *codelet = &cl_zher2k;
     void (*callback)(void*) = options->profiling ? cl_zher2k_callback : NULL;
     starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
     int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
 
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
@@ -126,7 +132,7 @@ INSERT_TASK_zher2k( const RUNTIME_option_t *options,
         STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_R,                 RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
         STARPU_VALUE,      &beta,                     sizeof(double),
-        STARPU_RW,                 RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        accessC,                  RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
         STARPU_PRIORITY,    options->priority,
         STARPU_CALLBACK,    callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c
index 6a8b17cd6..a1e2adf8e 100644
--- a/runtime/starpu/codelets/codelet_zherk.c
+++ b/runtime/starpu/codelets/codelet_zherk.c
@@ -99,11 +99,17 @@ void INSERT_TASK_zherk(const RUNTIME_option_t *options,
                       double alpha, const CHAM_desc_t *A, int Am, int An,
                       double beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, uplo, n, n, nb,
+                                    beta, C, Cm, Cn );
+    }
+
     (void)nb;
     struct starpu_codelet *codelet = &cl_zherk;
     void (*callback)(void*) = options->profiling ? cl_zherk_callback : NULL;
     starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
     int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
 
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
@@ -119,7 +125,7 @@ void INSERT_TASK_zherk(const RUNTIME_option_t *options,
         STARPU_VALUE,    &alpha,             sizeof(double),
         STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_VALUE,    &beta,              sizeof(double),
-        STARPU_RW,        RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        accessC,          RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
diff --git a/runtime/starpu/codelets/codelet_zlascal.c b/runtime/starpu/codelets/codelet_zlascal.c
index 0916e8aa5..129a8afec 100644
--- a/runtime/starpu/codelets/codelet_zlascal.c
+++ b/runtime/starpu/codelets/codelet_zlascal.c
@@ -45,12 +45,20 @@ static void cl_zlascal_cpu_func(void *descr[], void *cl_arg)
  */
 CODELETS_CPU(zlascal, 1, cl_zlascal_cpu_func)
 
-void INSERT_TASK_zlascal(const RUNTIME_option_t *options,
-                        cham_uplo_t uplo,
-                        int m, int n, int nb,
-                        CHAMELEON_Complex64_t alpha,
-                        const CHAM_desc_t *A, int Am, int An)
+void INSERT_TASK_zlascal( const RUNTIME_option_t *options,
+                          cham_uplo_t uplo,
+                          int m, int n, int nb,
+                          CHAMELEON_Complex64_t alpha,
+                          const CHAM_desc_t *A, int Am, int An)
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlaset( options, uplo, m, n,
+                                   alpha, alpha, A, Am, An );
+    }
+    else if ( alpha == 1. ) {
+        return;
+    }
+
     (void)nb;
     struct starpu_codelet *codelet = &cl_zlascal;
     void (*callback)(void*) = options->profiling ? cl_zlascal_callback : NULL;
@@ -67,7 +75,7 @@ void INSERT_TASK_zlascal(const RUNTIME_option_t *options,
         STARPU_VALUE,    &m,                  sizeof(int),
         STARPU_VALUE,    &n,                  sizeof(int),
         STARPU_VALUE,    &alpha,              sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_RW,        RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c
index 689be9b62..6bc2d770d 100644
--- a/runtime/starpu/codelets/codelet_zsymm.c
+++ b/runtime/starpu/codelets/codelet_zsymm.c
@@ -107,11 +107,17 @@ void INSERT_TASK_zsymm(const RUNTIME_option_t *options,
                       const CHAM_desc_t *B, int Bm, int Bn,
                       CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb,
+                                    beta, C, Cm, Cn );
+    }
+
     (void)nb;
     struct starpu_codelet *codelet = &cl_zsymm;
     void (*callback)(void*) = options->profiling ? cl_zsymm_callback : NULL;
     starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
     int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
 
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
@@ -129,7 +135,7 @@ void INSERT_TASK_zsymm(const RUNTIME_option_t *options,
         STARPU_R,               RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_R,               RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
         STARPU_VALUE,    &beta,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,               RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        accessC,                RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
         STARPU_PRIORITY,    options->priority,
         STARPU_CALLBACK,    callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c
index 86037a377..d14520cbe 100644
--- a/runtime/starpu/codelets/codelet_zsyr2k.c
+++ b/runtime/starpu/codelets/codelet_zsyr2k.c
@@ -103,11 +103,17 @@ void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options,
                        const CHAM_desc_t *B, int Bm, int Bn,
                        CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, uplo, n, n, nb,
+                                    beta, C, Cm, Cn );
+    }
+
     (void)nb;
     struct starpu_codelet *codelet = &cl_zsyr2k;
     void (*callback)(void*) = options->profiling ? cl_zsyr2k_callback : NULL;
     starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
     int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
 
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
@@ -125,7 +131,7 @@ void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options,
         STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_R,                 RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
         STARPU_VALUE,      &beta,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,                 RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        accessC,                  RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
         STARPU_PRIORITY,    options->priority,
         STARPU_CALLBACK,    callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c
index 66782975f..148c836e9 100644
--- a/runtime/starpu/codelets/codelet_zsyrk.c
+++ b/runtime/starpu/codelets/codelet_zsyrk.c
@@ -99,11 +99,17 @@ void INSERT_TASK_zsyrk(const RUNTIME_option_t *options,
                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
                       CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, uplo, n, n, nb,
+                                    beta, C, Cm, Cn );
+    }
+
     (void)nb;
     struct starpu_codelet *codelet = &cl_zsyrk;
     void (*callback)(void*) = options->profiling ? cl_zsyrk_callback : NULL;
     starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
     int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    int accessC = ( beta == 0. ) ? STARPU_W : STARPU_RW;
 
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
@@ -119,7 +125,7 @@ void INSERT_TASK_zsyrk(const RUNTIME_option_t *options,
         STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
         STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_VALUE,      &beta,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,                 RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        accessC,                  RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
         STARPU_PRIORITY,    options->priority,
         STARPU_CALLBACK,    callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
diff --git a/runtime/starpu/codelets/codelet_ztradd.c b/runtime/starpu/codelets/codelet_ztradd.c
index e19dd2636..d4469556c 100644
--- a/runtime/starpu/codelets/codelet_ztradd.c
+++ b/runtime/starpu/codelets/codelet_ztradd.c
@@ -112,10 +112,16 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
                          CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
                          CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn )
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, uplo, m, n, nb,
+                                    beta, B, Bm, Bn );
+    }
+
     struct starpu_codelet *codelet = &cl_ztradd;
     void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL;
     starpu_option_request_t* schedopt = (starpu_option_request_t *)(options->request->schedopt);
     int workerid = (schedopt == NULL) ? -1 : schedopt->workerid;
+    int accessB = ( beta == 0. ) ? STARPU_W : STARPU_RW;
 
     CHAMELEON_BEGIN_ACCESS_DECLARATION;
     CHAMELEON_ACCESS_R(A, Am, An);
@@ -131,7 +137,7 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
         STARPU_VALUE,    &alpha,              sizeof(CHAMELEON_Complex64_t),
         STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_VALUE,    &beta,               sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,        RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        accessB,         RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
         STARPU_PRIORITY,  options->priority,
         STARPU_CALLBACK,  callback,
         STARPU_EXECUTE_ON_WORKER, workerid,
diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c
index a1d24cf95..e8ed6d596 100644
--- a/runtime/starpu/codelets/codelet_ztrmm.c
+++ b/runtime/starpu/codelets/codelet_ztrmm.c
@@ -102,6 +102,11 @@ void INSERT_TASK_ztrmm(const RUNTIME_option_t *options,
                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
                       const CHAM_desc_t *B, int Bm, int Bn)
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlaset( options, ChamUpperLower, m, n,
+                                   alpha, alpha, B, Bm, Bn );
+    }
+
     (void)nb;
     struct starpu_codelet *codelet = &cl_ztrmm;
     void (*callback)(void*) = options->profiling ? cl_ztrmm_callback : NULL;
@@ -122,7 +127,7 @@ void INSERT_TASK_ztrmm(const RUNTIME_option_t *options,
         STARPU_VALUE,         &m,                        sizeof(int),
         STARPU_VALUE,         &n,                        sizeof(int),
         STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_R,                  RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
         STARPU_RW,                 RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
         STARPU_PRIORITY,    options->priority,
         STARPU_CALLBACK,    callback,
diff --git a/runtime/starpu/include/runtime_codelets.h b/runtime/starpu/include/runtime_codelets.h
index 9b9fdc5b1..8c66bf799 100644
--- a/runtime/starpu/include/runtime_codelets.h
+++ b/runtime/starpu/include/runtime_codelets.h
@@ -47,7 +47,7 @@
         .cpu_func  = ((cpu_func_name)),                                 \
         CODELET_CUDA_FLAGS(cuda_flags)                                  \
         .cuda_func = ((cuda_func_name)),                                \
-        .nbuffers  = ((_nbuffers)),                                     \
+        .nbuffers  = STARPU_VARIABLE_NBUFFERS,                          \
         .model     = &cl_##cl_name##_model,                             \
         .name      = #cl_name                                           \
     };                                                                  \
-- 
GitLab