diff --git a/runtime/quark/codelets/codelet_zaxpy.c b/runtime/quark/codelets/codelet_zaxpy.c
index 04cc7fad949fa01e50a62563081ce4b8cc1617f8..bb3357b8c18c70d3d55ba926782ad1458c33c5ff 100644
--- a/runtime/quark/codelets/codelet_zaxpy.c
+++ b/runtime/quark/codelets/codelet_zaxpy.c
@@ -39,6 +39,10 @@ void INSERT_TASK_zaxpy(const RUNTIME_option_t *options,
                       const CHAM_desc_t *A, int Am, int An, int incA,
                       const CHAM_desc_t *B, int Bm, int Bn, int incB)
 {
+    if ( alpha == 0. ) {
+        return;
+    }
+
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_AXPY;
     QUARK_Insert_Task(opt->quark, CORE_zaxpy_quark, (Quark_Task_Flags*)opt,
diff --git a/runtime/quark/codelets/codelet_zgeadd.c b/runtime/quark/codelets/codelet_zgeadd.c
index d95e443814b94c21a81990aceceae59751155f2b..6fa2cfb2b32c8063e66e584ca42e51ddbbe2e1b6 100644
--- a/runtime/quark/codelets/codelet_zgeadd.c
+++ b/runtime/quark/codelets/codelet_zgeadd.c
@@ -38,65 +38,19 @@ void CORE_zgeadd_quark(Quark *quark)
     return;
 }
 
-/**
- ******************************************************************************
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- * @brief Adds two general matrices together as in PBLAS pzgeadd.
- *
- *       B <- alpha * op(A)  + beta * B,
- *
- * where op(X) = X, X', or conj(X')
- *
- *******************************************************************************
- *
- * @param[in] trans
- *          Specifies whether the matrix A is non-transposed, transposed, or
- *          conjugate transposed
- *          = ChamNoTrans:   op(A) = A
- *          = ChamTrans:     op(A) = A'
- *          = ChamConjTrans: op(A) = conj(A')
- *
- * @param[in] M
- *          Number of rows of the matrices op(A) and B.
- *
- * @param[in] N
- *          Number of columns of the matrices op(A) and B.
- *
- * @param[in] alpha
- *          Scalar factor of A.
- *
- * @param[in] A
- *          Matrix of size LDA-by-N, if trans = ChamNoTrans, LDA-by-M
- *          otherwise.
- *
- * @param[in] LDA
- *          Leading dimension of the array A. LDA >= max(1,k), with k=M, if
- *          trans = ChamNoTrans, and k=N otherwise.
- *
- * @param[in] beta
- *          Scalar factor of B.
- *
- * @param[in,out] B
- *          Matrix of size LDB-by-N.
- *          On exit, B = alpha * op(A) + beta * B
- *
- * @param[in] LDB
- *          Leading dimension of the array B. LDB >= max(1,M)
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if -i, the i-th argument had an illegal value
- *
- */
 void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
                          cham_trans_t trans, int m, int n, int nb,
                          CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
                          CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn )
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb,
+                                    beta, B, Bm, Bn );
+    }
+
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
+    int accessB = ( beta == 0. ) ? OUTPUT : INOUT;
+
     DAG_CORE_GEADD;
     QUARK_Insert_Task(opt->quark, CORE_zgeadd_quark, (Quark_Task_Flags*)opt,
         sizeof(int),                 &trans, VALUE,
@@ -105,7 +59,7 @@ void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
         sizeof(CHAMELEON_Complex64_t),         &alpha, VALUE,
         sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),             INPUT,
         sizeof(CHAMELEON_Complex64_t),         &beta,   VALUE,
-        sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),             INOUT,
+        sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),             accessB,
         0);
 
     (void)nb;
diff --git a/runtime/quark/codelets/codelet_zgemm.c b/runtime/quark/codelets/codelet_zgemm.c
index 6def09b52ccce39402397f42c9faaa069329e5e7..9b513766324a29f3c01ccc6e71d80e5875070b5d 100644
--- a/runtime/quark/codelets/codelet_zgemm.c
+++ b/runtime/quark/codelets/codelet_zgemm.c
@@ -41,8 +41,7 @@ void CORE_zgemm_quark(Quark *quark)
     quark_unpack_args_10(quark, transA, transB, m, n, k, alpha, tileA, tileB, beta, tileC);
     TCORE_zgemm( transA, transB,
                  m, n, k,
-                 alpha, tileA,
-                        tileB,
+                 alpha, tileA, tileB,
                  beta,  tileC );
 }
 
@@ -50,10 +49,17 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
                       cham_trans_t transA, cham_trans_t transB,
                       int m, int n, int k, int nb,
                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
-                      const CHAM_desc_t *B, int Bm, int Bn,
-                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
+                                                   const CHAM_desc_t *B, int Bm, int Bn,
+                      CHAMELEON_Complex64_t beta,  const CHAM_desc_t *C, int Cm, int Cn)
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb,
+                                    beta, C, Cm, Cn );
+    }
+
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
+    int accessC = ( beta == 0. ) ? OUTPUT : INOUT;
+
     DAG_CORE_GEMM;
     QUARK_Insert_Task(opt->quark, CORE_zgemm_quark, (Quark_Task_Flags*)opt,
                       sizeof(int),                &transA,    VALUE,
@@ -65,6 +71,6 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
                       sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
                       sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),                 INPUT,
                       sizeof(CHAMELEON_Complex64_t),         &beta,      VALUE,
-                      sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),                 INOUT,
+                      sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),                 accessC,
                       0);
 }
diff --git a/runtime/quark/codelets/codelet_zhe2ge.c b/runtime/quark/codelets/codelet_zhe2ge.c
index 7b4a425665848e1c19035b7a46a5c1d1991ff9e9..e8aefce457b3d255e15fa87e1877b1cc1ab56b9f 100644
--- a/runtime/quark/codelets/codelet_zhe2ge.c
+++ b/runtime/quark/codelets/codelet_zhe2ge.c
@@ -21,11 +21,6 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_ztile.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
 static inline void CORE_zhe2ge_quark(Quark *quark)
 {
     cham_uplo_t uplo;
@@ -38,12 +33,11 @@ static inline void CORE_zhe2ge_quark(Quark *quark)
     TCORE_zhe2ge(uplo, M, N, tileA, tileB);
 }
 
-
-void INSERT_TASK_zhe2ge(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo,
-                       int m, int n, int mb,
-                       const CHAM_desc_t *A, int Am, int An,
-                       const CHAM_desc_t *B, int Bm, int Bn)
+void INSERT_TASK_zhe2ge( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo,
+                         int m, int n, int mb,
+                         const CHAM_desc_t *A, int Am, int An,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_LACPY;
diff --git a/runtime/quark/codelets/codelet_zhemm.c b/runtime/quark/codelets/codelet_zhemm.c
index 5ab6412228c9aa9ab0e8e891b436368684356a73..c55fa6901d5f0361faa0e4455733e541c80f1b4c 100644
--- a/runtime/quark/codelets/codelet_zhemm.c
+++ b/runtime/quark/codelets/codelet_zhemm.c
@@ -52,7 +52,14 @@ void INSERT_TASK_zhemm(const RUNTIME_option_t *options,
                       const CHAM_desc_t *B, int Bm, int Bn,
                       CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb,
+                                    beta, C, Cm, Cn );
+    }
+
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
+    int accessC = ( beta == 0. ) ? OUTPUT : INOUT;
+
     DAG_CORE_HEMM;
     QUARK_Insert_Task(opt->quark, CORE_zhemm_quark, (Quark_Task_Flags*)opt,
         sizeof(int),                &side,    VALUE,
@@ -63,7 +70,6 @@ void INSERT_TASK_zhemm(const RUNTIME_option_t *options,
         sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),               INPUT,
         sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),               INPUT,
         sizeof(CHAMELEON_Complex64_t),         &beta,    VALUE,
-        sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),               INOUT,
+        sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),               accessC,
         0);
 }
-
diff --git a/runtime/quark/codelets/codelet_zher2k.c b/runtime/quark/codelets/codelet_zher2k.c
index bd6437c53ec7861534693eb58655fe487f3b65b9..05b46cd1fbb1e0073e13cec5d5a0d57c6a399cc9 100644
--- a/runtime/quark/codelets/codelet_zher2k.c
+++ b/runtime/quark/codelets/codelet_zher2k.c
@@ -42,14 +42,22 @@ void CORE_zher2k_quark(Quark *quark)
                 n, k, alpha, tileA, tileB, beta, tileC);
 }
 
-void INSERT_TASK_zher2k(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, cham_trans_t trans,
-                       int n, int k, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
-                       const CHAM_desc_t *B, int Bm, int Bn,
-                       double beta, const CHAM_desc_t *C, int Cm, int Cn)
+void
+INSERT_TASK_zher2k( const RUNTIME_option_t *options,
+                    cham_uplo_t uplo, cham_trans_t trans,
+                    int n, int k, int nb,
+                    CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                                                 const CHAM_desc_t *B, int Bm, int Bn,
+                    double beta,                 const CHAM_desc_t *C, int Cm, int Cn )
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, uplo, n, n, nb,
+                                    beta, C, Cm, Cn );
+    }
+
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
+    int accessC = ( beta == 0. ) ? OUTPUT : INOUT;
+
     DAG_CORE_HER2K;
     QUARK_Insert_Task(opt->quark, CORE_zher2k_quark, (Quark_Task_Flags*)opt,
         sizeof(int),                &uplo,      VALUE,
@@ -60,6 +68,6 @@ void INSERT_TASK_zher2k(const RUNTIME_option_t *options,
         sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
         sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),                 INPUT,
         sizeof(double),                     &beta,      VALUE,
-        sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),                 INOUT,
+        sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),                 accessC,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zherk.c b/runtime/quark/codelets/codelet_zherk.c
index 3d47a8e59f9f12df5fd2282c473e46e04e34f45a..7d11dfb52c1f181befa3f7c917e3699a7cd010f7 100644
--- a/runtime/quark/codelets/codelet_zherk.c
+++ b/runtime/quark/codelets/codelet_zherk.c
@@ -49,7 +49,14 @@ void INSERT_TASK_zherk(const RUNTIME_option_t *options,
                       double alpha, const CHAM_desc_t *A, int Am, int An,
                       double beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, uplo, n, n, nb,
+                                    beta, C, Cm, Cn );
+    }
+
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
+    int accessC = ( beta == 0. ) ? OUTPUT : INOUT;
+
     DAG_CORE_HERK;
     QUARK_Insert_Task(opt->quark, CORE_zherk_quark, (Quark_Task_Flags*)opt,
         sizeof(int),                &uplo,      VALUE,
@@ -59,6 +66,6 @@ void INSERT_TASK_zherk(const RUNTIME_option_t *options,
         sizeof(double),                     &alpha,     VALUE,
         sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
         sizeof(double),                     &beta,      VALUE,
-        sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),                 INOUT,
+        sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),                 accessC,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zlascal.c b/runtime/quark/codelets/codelet_zlascal.c
index 716c85c6bf2560bd5c70e6027e509fe68a4a023d..67cdcb149bb78d466dc8ec5c411c701c176ed1a7 100644
--- a/runtime/quark/codelets/codelet_zlascal.c
+++ b/runtime/quark/codelets/codelet_zlascal.c
@@ -43,6 +43,14 @@ void INSERT_TASK_zlascal(const RUNTIME_option_t *options,
                         CHAMELEON_Complex64_t alpha,
                         const CHAM_desc_t *A, int Am, int An)
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlaset( options, uplo, m, n,
+                                   alpha, alpha, A, Am, An );
+    }
+    else if ( alpha == 1. ) {
+        return;
+    }
+
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_LASCAL;
     QUARK_Insert_Task(opt->quark, CORE_zlascal_quark, (Quark_Task_Flags*)opt,
@@ -53,5 +61,3 @@ void INSERT_TASK_zlascal(const RUNTIME_option_t *options,
         sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An), INOUT,
         0);
 }
-
-
diff --git a/runtime/quark/codelets/codelet_zsymm.c b/runtime/quark/codelets/codelet_zsymm.c
index 6bccc1deebdca6e376843a60c66c28c5027aef02..71658b68c44370afe45810220bfee3794c67b0b6 100644
--- a/runtime/quark/codelets/codelet_zsymm.c
+++ b/runtime/quark/codelets/codelet_zsymm.c
@@ -52,7 +52,14 @@ void INSERT_TASK_zsymm(const RUNTIME_option_t *options,
                       const CHAM_desc_t *B, int Bm, int Bn,
                       CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, ChamUpperLower, m, n, nb,
+                                    beta, C, Cm, Cn );
+    }
+
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
+    int accessC = ( beta == 0. ) ? OUTPUT : INOUT;
+
     DAG_CORE_SYMM;
     QUARK_Insert_Task(opt->quark, CORE_zsymm_quark, (Quark_Task_Flags*)opt,
         sizeof(int),                &side,    VALUE,
@@ -63,6 +70,6 @@ void INSERT_TASK_zsymm(const RUNTIME_option_t *options,
         sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),               INPUT,
         sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),               INPUT,
         sizeof(CHAMELEON_Complex64_t),         &beta,    VALUE,
-        sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),               INOUT,
+        sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),               accessC,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zsyr2k.c b/runtime/quark/codelets/codelet_zsyr2k.c
index 0e41e44fa6c9f1e2241d2fb1490fb606b8056a25..d172bc35971ffa261fb37914fd7c19eda2980623 100644
--- a/runtime/quark/codelets/codelet_zsyr2k.c
+++ b/runtime/quark/codelets/codelet_zsyr2k.c
@@ -39,7 +39,7 @@ void CORE_zsyr2k_quark(Quark *quark)
 
     quark_unpack_args_9(quark, uplo, trans, n, k, alpha, tileA, tileB, beta, tileC);
     TCORE_zsyr2k(uplo, trans,
-                n, k, alpha, tileA, tileB, beta, tileC);
+                 n, k, alpha, tileA, tileB, beta, tileC);
 }
 
 void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options,
@@ -49,7 +49,14 @@ void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options,
                        const CHAM_desc_t *B, int Bm, int Bn,
                        CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, uplo, n, n, nb,
+                                    beta, C, Cm, Cn );
+    }
+
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
+    int accessC = ( beta == 0. ) ? OUTPUT : INOUT;
+
     DAG_CORE_SYR2K;
     QUARK_Insert_Task(opt->quark, CORE_zsyr2k_quark, (Quark_Task_Flags*)opt,
         sizeof(int),                &uplo,      VALUE,
@@ -60,6 +67,6 @@ void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options,
         sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
         sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),                 INPUT,
         sizeof(CHAMELEON_Complex64_t),         &beta,      VALUE,
-        sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),                 INOUT,
+        sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),                 accessC,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_zsyrk.c b/runtime/quark/codelets/codelet_zsyrk.c
index d8c272f5013aba1eff3d67fcbbedc69add9c83a0..b58c022d35993f0874ffe64da6cbfc9d814774ac 100644
--- a/runtime/quark/codelets/codelet_zsyrk.c
+++ b/runtime/quark/codelets/codelet_zsyrk.c
@@ -49,7 +49,14 @@ void INSERT_TASK_zsyrk(const RUNTIME_option_t *options,
                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
                       CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn)
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, uplo, n, n, nb,
+                                    beta, C, Cm, Cn );
+    }
+
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
+    int accessC = ( beta == 0. ) ? OUTPUT : INOUT;
+
     DAG_CORE_SYRK;
     QUARK_Insert_Task(opt->quark, CORE_zsyrk_quark, (Quark_Task_Flags*)opt,
         sizeof(int),                &uplo,      VALUE,
@@ -59,6 +66,6 @@ void INSERT_TASK_zsyrk(const RUNTIME_option_t *options,
         sizeof(CHAMELEON_Complex64_t),         &alpha,     VALUE,
         sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),                 INPUT,
         sizeof(CHAMELEON_Complex64_t),         &beta,      VALUE,
-        sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),                 INOUT,
+        sizeof(void*), RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),                 accessC,
         0);
 }
diff --git a/runtime/quark/codelets/codelet_ztradd.c b/runtime/quark/codelets/codelet_ztradd.c
index f3a9e0d24dd2d72dfb866295dfaabee528f12086..6532b16008bfc22e1b5288d8cfe5289eaa7406d9 100644
--- a/runtime/quark/codelets/codelet_ztradd.c
+++ b/runtime/quark/codelets/codelet_ztradd.c
@@ -37,71 +37,19 @@ void CORE_ztradd_quark(Quark *quark)
     return;
 }
 
-/**
- ******************************************************************************
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- * @brief Adds two trapezoidal matrices together as in PBLAS pzgeadd.
- *
- *       B <- alpha * op(A)  + beta * B,
- *
- * where op(X) = X, X', or conj(X')
- *
- *******************************************************************************
- *
- * @param[in] uplo
- *          Specifies the shape of A and B matrices:
- *          = ChamUpperLower: A and B are general matrices.
- *          = ChamUpper: op(A) and B are upper trapezoidal matrices.
- *          = ChamLower: op(A) and B are lower trapezoidal matrices.
- *
- * @param[in] trans
- *          Specifies whether the matrix A is non-transposed, transposed, or
- *          conjugate transposed
- *          = ChamNoTrans:   op(A) = A
- *          = ChamTrans:     op(A) = A'
- *          = ChamConjTrans: op(A) = conj(A')
- *
- * @param[in] M
- *          Number of rows of the matrices op(A) and B.
- *
- * @param[in] N
- *          Number of columns of the matrices op(A) and B.
- *
- * @param[in] alpha
- *          Scalar factor of A.
- *
- * @param[in] A
- *          Matrix of size LDA-by-N, if trans = ChamNoTrans, LDA-by-M
- *          otherwise.
- *
- * @param[in] LDA
- *          Leading dimension of the array A. LDA >= max(1,k), with k=M, if
- *          trans = ChamNoTrans, and k=N otherwise.
- *
- * @param[in] beta
- *          Scalar factor of B.
- *
- * @param[in,out] B
- *          Matrix of size LDB-by-N.
- *          On exit, B = alpha * op(A) + beta * B
- *
- * @param[in] LDB
- *          Leading dimension of the array B. LDB >= max(1,M)
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if -i, the i-th argument had an illegal value
- *
- */
 void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
                          CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
                          CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn )
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlascal( options, uplo, m, n, nb,
+                                    beta, B, Bm, Bn );
+    }
+
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
+    int accessB = ( beta == 0. ) ? OUTPUT : INOUT;
+
     DAG_CORE_GEADD;
     QUARK_Insert_Task(opt->quark, CORE_ztradd_quark, (Quark_Task_Flags*)opt,
         sizeof(int),                 &uplo,  VALUE,
@@ -111,7 +59,7 @@ void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
         sizeof(CHAMELEON_Complex64_t),         &alpha, VALUE,
         sizeof(void*), RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),             INPUT,
         sizeof(CHAMELEON_Complex64_t),         &beta,   VALUE,
-        sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),             INOUT,
+        sizeof(void*), RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),             accessB,
         0);
 
     (void)nb;
diff --git a/runtime/quark/codelets/codelet_ztrmm.c b/runtime/quark/codelets/codelet_ztrmm.c
index 56d6afada16d189dcf06463cc2c93ef73cb958b3..df18b77bebac6a95f34d3aff32353d16fbf5d241 100644
--- a/runtime/quark/codelets/codelet_ztrmm.c
+++ b/runtime/quark/codelets/codelet_ztrmm.c
@@ -45,12 +45,17 @@ void CORE_ztrmm_quark(Quark *quark)
         tileB);
 }
 
-void INSERT_TASK_ztrmm(const RUNTIME_option_t *options,
-                      cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
-                      int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
-                      const CHAM_desc_t *B, int Bm, int Bn)
+void INSERT_TASK_ztrmm( const RUNTIME_option_t *options,
+                        cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
+                        int m, int n, int nb,
+                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
+                        const CHAM_desc_t *B, int Bm, int Bn )
 {
+    if ( alpha == 0. ) {
+        return INSERT_TASK_zlaset( options, ChamUpperLower, m, n,
+                                   alpha, alpha, B, Bm, Bn );
+    }
+
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_TRMM;
     QUARK_Insert_Task(opt->quark, CORE_ztrmm_quark, (Quark_Task_Flags*)opt,
diff --git a/runtime/starpu/codelets/codelet_zgeadd.c b/runtime/starpu/codelets/codelet_zgeadd.c
index bd027eff028f460333ab83651600783c3d085389..65c82231e8205f3e666fb579135ae92f1ad9c660 100644
--- a/runtime/starpu/codelets/codelet_zgeadd.c
+++ b/runtime/starpu/codelets/codelet_zgeadd.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zgeadd StarPU codelet
  *
  * @version 1.0.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @author Emmanuel Agullo
  * @author Cedric Castagnede
@@ -86,59 +84,6 @@ CODELETS(zgeadd, cl_zgeadd_cpu_func, cl_zgeadd_cuda_func, STARPU_CUDA_ASYNC)
 CODELETS_CPU(zgeadd, cl_zgeadd_cpu_func)
 #endif
 
-/**
- ******************************************************************************
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- * @brief Adds two general matrices together as in PBLAS pzgeadd.
- *
- *       B <- alpha * op(A)  + beta * B,
- *
- * where op(X) = X, X', or conj(X')
- *
- *******************************************************************************
- *
- * @param[in] trans
- *          Specifies whether the matrix A is non-transposed, transposed, or
- *          conjugate transposed
- *          = ChamNoTrans:   op(A) = A
- *          = ChamTrans:     op(A) = A'
- *          = ChamConjTrans: op(A) = conj(A')
- *
- * @param[in] M
- *          Number of rows of the matrices op(A) and B.
- *
- * @param[in] N
- *          Number of columns of the matrices op(A) and B.
- *
- * @param[in] alpha
- *          Scalar factor of A.
- *
- * @param[in] A
- *          Matrix of size ldA-by-N, if trans = ChamNoTrans, ldA-by-M
- *          otherwise.
- *
- * @param[in] ldA
- *          Leading dimension of the array A. ldA >= max(1,k), with k=M, if
- *          trans = ChamNoTrans, and k=N otherwise.
- *
- * @param[in] beta
- *          Scalar factor of B.
- *
- * @param[in,out] B
- *          Matrix of size ldB-by-N.
- *          On exit, B = alpha * op(A) + beta * B
- *
- * @param[in] ldB
- *          Leading dimension of the array B. ldB >= max(1,M)
- *
- *******************************************************************************
- *
- * @retval CHAMELEON_SUCCESS successful exit
- * @retval <0 if -i, the i-th argument had an illegal value
- *
- */
 void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
                          cham_trans_t trans, int m, int n, int nb,
                          CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c
index 54e6256b57d2b0447d579bfbaf59d075f4c33bc4..42bd6609ab848313fce0892dd463f4c94f502a08 100644
--- a/runtime/starpu/codelets/codelet_zgemm.c
+++ b/runtime/starpu/codelets/codelet_zgemm.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zgemm StarPU codelet
  *
  * @version 1.0.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
diff --git a/runtime/starpu/codelets/codelet_zhe2ge.c b/runtime/starpu/codelets/codelet_zhe2ge.c
index fe1f9eb291209851ecfb84e1dd7039cfdd560d1a..203544170939a742ccaef1aa05b9066e4185d0a9 100644
--- a/runtime/starpu/codelets/codelet_zhe2ge.c
+++ b/runtime/starpu/codelets/codelet_zhe2ge.c
@@ -44,11 +44,6 @@ static void cl_zhe2ge_cpu_func(void *descr[], void *cl_arg)
  */
 CODELETS_CPU(zhe2ge, cl_zhe2ge_cpu_func)
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
 void INSERT_TASK_zhe2ge( const RUNTIME_option_t *options,
                          cham_uplo_t uplo,
                          int m, int n, int mb,
diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c
index 5c90271ece923555a05f3c3d9d2374b9ba41b000..f4963cacfd70f21410f743fc182eff072886976f 100644
--- a/runtime/starpu/codelets/codelet_zhemm.c
+++ b/runtime/starpu/codelets/codelet_zhemm.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zhemm StarPU codelet
  *
  * @version 1.0.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c
index 0e93a35c99f0b9469839b5c5e8e17d8428598bcd..e652db505a1da7519059c5a52ad6ffd154f900fe 100644
--- a/runtime/starpu/codelets/codelet_zher2k.c
+++ b/runtime/starpu/codelets/codelet_zher2k.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zher2k StarPU codelet
  *
  * @version 1.0.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c
index 915cc9b77d4a13cdc43b4ba14c67c4871a49dd37..ec0f985b533c5e39a340095ab5301afd660e8cab 100644
--- a/runtime/starpu/codelets/codelet_zherk.c
+++ b/runtime/starpu/codelets/codelet_zherk.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zherk StarPU codelet
  *
  * @version 1.0.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
@@ -88,11 +86,6 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
  */
 CODELETS(zherk, cl_zherk_cpu_func, cl_zherk_cuda_func, STARPU_CUDA_ASYNC)
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
 void INSERT_TASK_zherk(const RUNTIME_option_t *options,
                       cham_uplo_t uplo, cham_trans_t trans,
                       int n, int k, int nb,
diff --git a/runtime/starpu/codelets/codelet_zlascal.c b/runtime/starpu/codelets/codelet_zlascal.c
index d1bfc3fd35a70e56451b16e70e714433a49dbd1d..0142c39ec967856284333e28c97c5b7b8df3b6e9 100644
--- a/runtime/starpu/codelets/codelet_zlascal.c
+++ b/runtime/starpu/codelets/codelet_zlascal.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zlascal StarPU codelet
  *
  * @version 1.0.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Dalal Sukkari
  * @author Lucas Barros de Assis
  * @date 2020-03-03
diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c
index 40ed44bcbb3e44904160bc28faa1d9d58dfccf32..b87b3bef619ec969a7d11e88318bd45522e5679b 100644
--- a/runtime/starpu/codelets/codelet_zsymm.c
+++ b/runtime/starpu/codelets/codelet_zsymm.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zsymm StarPU codelet
  *
  * @version 1.0.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c
index 51f013036ddde9370e06e62de325c32c259cb01a..82209455847a0c76a6d6297b2537ddab0187c717 100644
--- a/runtime/starpu/codelets/codelet_zsyr2k.c
+++ b/runtime/starpu/codelets/codelet_zsyr2k.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zsyr2k StarPU codelet
  *
  * @version 1.0.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c
index 83c51f5997d6195cc5542ab63386e235b7241f21..9795d0a982665b29d1c1d5a3e20a44898fff1275 100644
--- a/runtime/starpu/codelets/codelet_zsyrk.c
+++ b/runtime/starpu/codelets/codelet_zsyrk.c
@@ -12,8 +12,6 @@
  * @brief Chameleon zsyrk StarPU codelet
  *
  * @version 1.0.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Hatem Ltaief
  * @author Jakub Kurzak
  * @author Mathieu Faverge
diff --git a/runtime/starpu/codelets/codelet_ztradd.c b/runtime/starpu/codelets/codelet_ztradd.c
index ac3dc8bfaecc14657dc48399b2c10a010babe83f..fbd6a0f8e8f89e58d1f8795c82698325d88f8440 100644
--- a/runtime/starpu/codelets/codelet_ztradd.c
+++ b/runtime/starpu/codelets/codelet_ztradd.c
@@ -12,8 +12,6 @@
  * @brief Chameleon ztradd StarPU codelet
  *
  * @version 1.0.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Mathieu Faverge
  * @author Lucas Barros de Assis
  * @date 2020-03-03
@@ -48,65 +46,6 @@ static void cl_ztradd_cpu_func(void *descr[], void *cl_arg)
  */
 CODELETS_CPU(ztradd, cl_ztradd_cpu_func)
 
-/**
- ******************************************************************************
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- * @brief Adds two trapezoidal matrices together as in PBLAS pzgeadd.
- *
- *       B <- alpha * op(A)  + beta * B,
- *
- * where op(X) = X, X', or conj(X')
- *
- *******************************************************************************
- *
- * @param[in] uplo
- *          Specifies the shape of A and B matrices:
- *          = ChamUpperLower: A and B are general matrices.
- *          = ChamUpper: op(A) and B are upper trapezoidal matrices.
- *          = ChamLower: op(A) and B are lower trapezoidal matrices.
- *
- * @param[in] trans
- *          Specifies whether the matrix A is non-transposed, transposed, or
- *          conjugate transposed
- *          = ChamNoTrans:   op(A) = A
- *          = ChamTrans:     op(A) = A'
- *          = ChamConjTrans: op(A) = conj(A')
- *
- * @param[in] M
- *          Number of rows of the matrices op(A) and B.
- *
- * @param[in] N
- *          Number of columns of the matrices op(A) and B.
- *
- * @param[in] alpha
- *          Scalar factor of A.
- *
- * @param[in] A
- *          Matrix of size ldA-by-N, if trans = ChamNoTrans, ldA-by-M
- *          otherwise.
- *
- * @param[in] ldA
- *          Leading dimension of the array A. ldA >= max(1,k), with k=M, if
- *          trans = ChamNoTrans, and k=N otherwise.
- *
- * @param[in] beta
- *          Scalar factor of B.
- *
- * @param[in,out] B
- *          Matrix of size ldB-by-N.
- *          On exit, B = alpha * op(A) + beta * B
- *
- * @param[in] ldB
- *          Leading dimension of the array B. ldB >= max(1,M)
- *
- *******************************************************************************
- *
- *          @retval CHAMELEON_SUCCESS successful exit
- *          @retval <0 if -i, the i-th argument had an illegal value
- *
- */
 void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
                          CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An,
diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c
index d1404ba960ac1991c06b84dca012c2e2b930e248..e820a6d6b6b2bf65be16e5f6e06ea0a7a643ee57 100644
--- a/runtime/starpu/codelets/codelet_ztrmm.c
+++ b/runtime/starpu/codelets/codelet_ztrmm.c
@@ -12,8 +12,6 @@
  * @brief Chameleon ztrmm StarPU codelet
  *
  * @version 1.0.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 0.9.2
  * @author Julien Langou
  * @author Henricus Bouwmeester
  * @author Mathieu Faverge