From 978a5d43489c004514a76e0f56b8fb0e446a3d4e Mon Sep 17 00:00:00 2001
From: Mathieu Faverge <mathieu.faverge@inria.fr>
Date: Fri, 8 Feb 2019 13:41:44 +0100
Subject: [PATCH] Cleanup OpenMP codelet and doc

---
 compute/zbuild.c                              |   8 +-
 compute/zgeadd.c                              |   6 +-
 compute/zgelqf.c                              |   8 +-
 compute/zgelqf_param.c                        |   8 +-
 compute/zgelqs.c                              |   8 +-
 compute/zgelqs_param.c                        |   8 +-
 compute/zgels.c                               |   8 +-
 compute/zgels_param.c                         |   8 +-
 compute/zgemm.c                               |   6 +-
 compute/zgeqrf.c                              |   8 +-
 compute/zgeqrf_param.c                        |   8 +-
 compute/zgeqrs.c                              |   8 +-
 compute/zgeqrs_param.c                        |   8 +-
 compute/zgesv_incpiv.c                        |  12 +-
 compute/zgesv_nopiv.c                         |  12 +-
 compute/zgesvd.c                              |   8 +-
 compute/zgetrf_incpiv.c                       |  12 +-
 compute/zgetrf_nopiv.c                        |  12 +-
 compute/zgetrs_incpiv.c                       |   8 +-
 compute/zgetrs_nopiv.c                        |   8 +-
 compute/zheevd.c                              |  14 +-
 compute/zhemm.c                               |   6 +-
 compute/zher2k.c                              |   6 +-
 compute/zherk.c                               |   6 +-
 compute/zhetrd.c                              |  14 +-
 compute/zlacpy.c                              |   3 +-
 compute/zlange.c                              |   6 +-
 compute/zlanhe.c                              |   6 +-
 compute/zlansy.c                              |   6 +-
 compute/zlantr.c                              |   6 +-
 compute/zlascal.c                             |   6 +-
 compute/zlaset.c                              |   3 +-
 compute/zlauum.c                              |   8 +-
 compute/zplghe.c                              |   8 +-
 compute/zplgsy.c                              |   8 +-
 compute/zplrnt.c                              |   8 +-
 compute/zposv.c                               |  12 +-
 compute/zpotrf.c                              |  12 +-
 compute/zpotri.c                              |  12 +-
 compute/zpotrimm.c                            |  12 +-
 compute/zpotrs.c                              |   8 +-
 compute/zsymm.c                               |   6 +-
 compute/zsyr2k.c                              |   6 +-
 compute/zsyrk.c                               |   6 +-
 compute/zsysv.c                               |   8 +-
 compute/zsytrf.c                              |  10 +-
 compute/zsytrs.c                              |   8 +-
 compute/ztile.c                               |   6 +-
 compute/ztpgqrt.c                             |   8 +-
 compute/ztpqrt.c                              |   8 +-
 compute/ztradd.c                              |   6 +-
 compute/ztrmm.c                               |   8 +-
 compute/ztrsm.c                               |   8 +-
 compute/ztrsmpl.c                             |   8 +-
 compute/ztrtri.c                              |  12 +-
 compute/zunglq.c                              |   8 +-
 compute/zunglq_param.c                        |   8 +-
 compute/zungqr.c                              |   8 +-
 compute/zungqr_param.c                        |   8 +-
 compute/zunmlq.c                              |   8 +-
 compute/zunmlq_param.c                        |   8 +-
 compute/zunmqr.c                              |   8 +-
 compute/zunmqr_param.c                        |   8 +-
 control/async.c                               |  12 +-
 control/auxiliary.c                           |   9 +-
 control/context.c                             |  12 +-
 control/control.c                             |  15 +-
 control/descriptor.c                          |  12 +-
 control/tile.c                                |   6 +-
 control/workspace.c                           |   3 +-
 control/workspace_z.c                         |  57 ++---
 coreblas/compute/core_zaxpy.c                 |   5 +-
 coreblas/compute/core_zgeadd.c                |   5 +-
 coreblas/compute/core_zgelqt.c                |   5 +-
 coreblas/compute/core_zgeqrt.c                |   5 +-
 coreblas/compute/core_zgesplit.c              |   5 +-
 coreblas/compute/core_zgessm.c                |   5 +-
 coreblas/compute/core_zgessq.c                |   5 +-
 coreblas/compute/core_zgetf2_nopiv.c          |   7 +-
 coreblas/compute/core_zgetrf_incpiv.c         |   7 +-
 coreblas/compute/core_zgetrf_nopiv.c          |   7 +-
 coreblas/compute/core_zherfb.c                |   5 +-
 coreblas/compute/core_zhessq.c                |   5 +-
 coreblas/compute/core_zlascal.c               |   5 +-
 coreblas/compute/core_zlatro.c                |   5 +-
 coreblas/compute/core_zpamm.c                 |   5 +-
 coreblas/compute/core_zparfb.c                |   5 +-
 coreblas/compute/core_zpemv.c                 |   5 +-
 coreblas/compute/core_zssssm.c                |   5 +-
 coreblas/compute/core_zsyssq.c                |   5 +-
 coreblas/compute/core_ztplqt.c                |   5 +-
 coreblas/compute/core_ztpmqrt.c               |   5 +-
 coreblas/compute/core_ztpqrt.c                |   5 +-
 coreblas/compute/core_ztradd.c                |   5 +-
 coreblas/compute/core_ztrssq.c                |   5 +-
 coreblas/compute/core_ztslqt.c                |   5 +-
 coreblas/compute/core_ztsmlq.c                |   5 +-
 coreblas/compute/core_ztsmlq_hetra1.c         |   5 +-
 coreblas/compute/core_ztsmqr.c                |   5 +-
 coreblas/compute/core_ztsmqr_hetra1.c         |   5 +-
 coreblas/compute/core_ztsqrt.c                |   5 +-
 coreblas/compute/core_ztstrf.c                |   7 +-
 coreblas/compute/core_zttlqt.c                |   5 +-
 coreblas/compute/core_zttmlq.c                |   5 +-
 coreblas/compute/core_zttmqr.c                |   5 +-
 coreblas/compute/core_zttqrt.c                |   5 +-
 coreblas/compute/core_zunmlq.c                |   5 +-
 coreblas/compute/core_zunmqr.c                |   5 +-
 .../eztrace_module/coreblas_eztrace_module    |   6 -
 cudablas/compute/cuda_zgeadd.c                |   5 +-
 include/chameleon/tasks_z.h                   | 181 +++-----------
 runtime/CMakeLists.txt                        |   1 -
 runtime/openmp/codelets/codelet_zgeadd.c      |  15 +-
 runtime/openmp/codelets/codelet_zgelqt.c      |   5 +-
 runtime/openmp/codelets/codelet_zgemm.c       |   2 +-
 runtime/openmp/codelets/codelet_zgeqrt.c      |   5 +-
 runtime/openmp/codelets/codelet_zgessm.c      |   7 +-
 runtime/openmp/codelets/codelet_zgetrf.c      |   2 +-
 .../openmp/codelets/codelet_zgetrf_incpiv.c   |   9 +-
 .../openmp/codelets/codelet_zgetrf_nopiv.c    |   9 +-
 runtime/openmp/codelets/codelet_zhemm.c       |   2 +-
 runtime/openmp/codelets/codelet_zhessq.c      |   2 +-
 runtime/openmp/codelets/codelet_zlacpy.c      |  20 +-
 runtime/openmp/codelets/codelet_zlag2c.c      |  23 +-
 runtime/openmp/codelets/codelet_zlascal.c     |   7 +-
 runtime/openmp/codelets/codelet_zlatro.c      |  12 +-
 runtime/openmp/codelets/codelet_zlauum.c      |   2 +-
 runtime/openmp/codelets/codelet_zplghe.c      |   8 +-
 runtime/openmp/codelets/codelet_zplgsy.c      |   6 +-
 runtime/openmp/codelets/codelet_zplrnt.c      |   6 +-
 runtime/openmp/codelets/codelet_zplssq.c      |   4 +-
 runtime/openmp/codelets/codelet_zssssm.c      |  13 +-
 runtime/openmp/codelets/codelet_zsymm.c       |   2 +-
 runtime/openmp/codelets/codelet_zsyssq.c      |   2 +-
 .../openmp/codelets/codelet_zsytrf_nopiv.c    |   2 +-
 runtime/openmp/codelets/codelet_ztile_zero.c  |  38 ---
 runtime/openmp/codelets/codelet_ztplqt.c      |  15 +-
 runtime/openmp/codelets/codelet_ztpmlqt.c     |  23 +-
 runtime/openmp/codelets/codelet_ztpmqrt.c     |  17 +-
 runtime/openmp/codelets/codelet_ztpqrt.c      |  17 +-
 runtime/openmp/codelets/codelet_ztradd.c      |  17 +-
 runtime/openmp/codelets/codelet_ztstrf.c      |   7 +-
 runtime/openmp/codelets/codelet_zunmlq.c      |   5 +-
 runtime/openmp/codelets/codelet_zunmqr.c      |   5 +-
 runtime/parsec/codelets/codelet_zgeadd.c      |  15 +-
 runtime/parsec/codelets/codelet_zgelqt.c      |   5 +-
 runtime/parsec/codelets/codelet_zgeqrt.c      |   5 +-
 runtime/parsec/codelets/codelet_zgessm.c      |   5 +-
 .../parsec/codelets/codelet_zgetrf_incpiv.c   |   7 +-
 .../parsec/codelets/codelet_zgetrf_nopiv.c    |   7 +-
 runtime/parsec/codelets/codelet_zlacpy.c      |  19 +-
 runtime/parsec/codelets/codelet_zpamm.c       | 224 ------------------
 runtime/parsec/codelets/codelet_zplssq.c      |  58 ++---
 runtime/parsec/codelets/codelet_ztile_zero.c  |  61 -----
 runtime/parsec/codelets/codelet_ztradd.c      |  15 +-
 runtime/quark/codelets/codelet_zgeadd.c       |  15 +-
 runtime/quark/codelets/codelet_zgelqt.c       |   5 +-
 runtime/quark/codelets/codelet_zgeqrt.c       |   5 +-
 runtime/quark/codelets/codelet_zgessm.c       |   5 +-
 .../quark/codelets/codelet_zgetrf_incpiv.c    |   7 +-
 runtime/quark/codelets/codelet_zgetrf_nopiv.c |   7 +-
 runtime/quark/codelets/codelet_zlacpy.c       |  20 +-
 runtime/quark/codelets/codelet_zpamm.c        | 220 -----------------
 runtime/quark/codelets/codelet_zplssq.c       |   8 +-
 runtime/quark/codelets/codelet_zssssm.c       |   5 +-
 runtime/quark/codelets/codelet_ztile_zero.c   |  58 -----
 runtime/quark/codelets/codelet_ztradd.c       |  15 +-
 runtime/quark/codelets/codelet_ztstrf.c       |   7 +-
 runtime/quark/codelets/codelet_zunmlq.c       |   5 +-
 runtime/quark/codelets/codelet_zunmqr.c       |   5 +-
 runtime/starpu/codelets/codelet_zasum.c       |  55 +++--
 runtime/starpu/codelets/codelet_zaxpy.c       |  53 ++---
 runtime/starpu/codelets/codelet_zbuild.c      |  57 +++--
 runtime/starpu/codelets/codelet_zgeadd.c      | 144 ++++++-----
 runtime/starpu/codelets/codelet_zgelqt.c      |   5 +-
 runtime/starpu/codelets/codelet_zgemm.c       |  90 +++----
 runtime/starpu/codelets/codelet_zgeqrt.c      |   5 +-
 runtime/starpu/codelets/codelet_zgessm.c      |  70 +++---
 runtime/starpu/codelets/codelet_zgessq.c      |  49 ++--
 runtime/starpu/codelets/codelet_zgetrf.c      |  61 +++--
 .../starpu/codelets/codelet_zgetrf_incpiv.c   |  72 +++---
 .../starpu/codelets/codelet_zgetrf_nopiv.c    |  71 +++---
 runtime/starpu/codelets/codelet_zhe2ge.c      |  46 ++--
 runtime/starpu/codelets/codelet_zhemm.c       |  89 ++++---
 runtime/starpu/codelets/codelet_zher2k.c      |  89 ++++---
 runtime/starpu/codelets/codelet_zherfb.c      |  89 ++++---
 runtime/starpu/codelets/codelet_zherk.c       |  80 +++----
 runtime/starpu/codelets/codelet_zhessq.c      |  43 ++--
 runtime/starpu/codelets/codelet_zlacpy.c      |  66 +++---
 runtime/starpu/codelets/codelet_zlag2c.c      |  66 +++---
 runtime/starpu/codelets/codelet_zlange.c      |  70 +++---
 runtime/starpu/codelets/codelet_zlanhe.c      |  48 ++--
 runtime/starpu/codelets/codelet_zlansy.c      |  56 ++---
 runtime/starpu/codelets/codelet_zlantr.c      |  56 ++---
 runtime/starpu/codelets/codelet_zlascal.c     |  51 ++--
 runtime/starpu/codelets/codelet_zlaset.c      |  44 ++--
 runtime/starpu/codelets/codelet_zlaset2.c     |  42 ++--
 runtime/starpu/codelets/codelet_zlatro.c      |  58 ++---
 runtime/starpu/codelets/codelet_zlauum.c      |  45 ++--
 runtime/starpu/codelets/codelet_zplghe.c      |  57 +++--
 runtime/starpu/codelets/codelet_zplgsy.c      |  52 ++--
 runtime/starpu/codelets/codelet_zplrnt.c      |  52 ++--
 runtime/starpu/codelets/codelet_zplssq.c      |  72 +++---
 runtime/starpu/codelets/codelet_zpotrf.c      |  58 +++--
 runtime/starpu/codelets/codelet_zssssm.c      |  88 ++++---
 runtime/starpu/codelets/codelet_zsymm.c       |  89 ++++---
 runtime/starpu/codelets/codelet_zsyr2k.c      |  89 ++++---
 runtime/starpu/codelets/codelet_zsyrk.c       |  81 ++++---
 runtime/starpu/codelets/codelet_zsyssq.c      |  49 ++--
 .../starpu/codelets/codelet_zsytrf_nopiv.c    |  51 ++--
 runtime/starpu/codelets/codelet_ztile_zero.c  |  84 -------
 runtime/starpu/codelets/codelet_ztplqt.c      |  11 +-
 runtime/starpu/codelets/codelet_ztpmlqt.c     |  15 +-
 runtime/starpu/codelets/codelet_ztpmqrt.c     |  15 +-
 runtime/starpu/codelets/codelet_ztpqrt.c      |  11 +-
 runtime/starpu/codelets/codelet_ztradd.c      |  70 +++---
 runtime/starpu/codelets/codelet_ztrasm.c      |  59 +++--
 runtime/starpu/codelets/codelet_ztrmm.c       |  83 ++++---
 runtime/starpu/codelets/codelet_ztrsm.c       |  83 ++++---
 runtime/starpu/codelets/codelet_ztrssq.c      |  55 +++--
 runtime/starpu/codelets/codelet_ztrtri.c      |  69 +++---
 .../starpu/codelets/codelet_ztsmlq_hetra1.c   |  98 ++++----
 .../starpu/codelets/codelet_ztsmqr_hetra1.c   |  98 ++++----
 runtime/starpu/codelets/codelet_ztstrf.c      | 114 +++++----
 runtime/starpu/codelets/codelet_zunmlq.c      | 157 ++++++------
 runtime/starpu/codelets/codelet_zunmqr.c      | 157 ++++++------
 runtime/starpu/include/runtime_codelet_z.h    |   5 -
 227 files changed, 2359 insertions(+), 3476 deletions(-)
 delete mode 100644 runtime/openmp/codelets/codelet_ztile_zero.c
 delete mode 100644 runtime/parsec/codelets/codelet_zpamm.c
 delete mode 100644 runtime/parsec/codelets/codelet_ztile_zero.c
 delete mode 100644 runtime/quark/codelets/codelet_zpamm.c
 delete mode 100644 runtime/quark/codelets/codelet_ztile_zero.c
 delete mode 100644 runtime/starpu/codelets/codelet_ztile_zero.c

diff --git a/compute/zbuild.c b/compute/zbuild.c
index 22f2676df..6ec2419ca 100644
--- a/compute/zbuild.c
+++ b/compute/zbuild.c
@@ -66,9 +66,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -175,8 +174,7 @@ int CHAMELEON_zbuild( cham_uplo_t uplo, int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgeadd.c b/compute/zgeadd.c
index cc14238e9..e2674316a 100644
--- a/compute/zgeadd.c
+++ b/compute/zgeadd.c
@@ -75,8 +75,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
@@ -211,8 +210,7 @@ int CHAMELEON_zgeadd( cham_trans_t trans, int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgelqf.c b/compute/zgelqf.c
index 8c0cc4bec..975250334 100644
--- a/compute/zgelqf.c
+++ b/compute/zgelqf.c
@@ -56,9 +56,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -165,8 +164,7 @@ int CHAMELEON_zgelqf( int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgelqf_param.c b/compute/zgelqf_param.c
index fecf0f20f..c507463ef 100644
--- a/compute/zgelqf_param.c
+++ b/compute/zgelqf_param.c
@@ -54,9 +54,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -164,8 +163,7 @@ int CHAMELEON_zgelqf_param( const libhqr_tree_t *qrtree, int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgelqs.c b/compute/zgelqs.c
index 08ec0dd3d..cc9b89fef 100644
--- a/compute/zgelqs.c
+++ b/compute/zgelqs.c
@@ -62,9 +62,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -186,8 +185,7 @@ int CHAMELEON_zgelqs( int M, int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgelqs_param.c b/compute/zgelqs_param.c
index b594dd602..720ab5d4d 100644
--- a/compute/zgelqs_param.c
+++ b/compute/zgelqs_param.c
@@ -64,9 +64,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -192,8 +191,7 @@ int CHAMELEON_zgelqs_param( const libhqr_tree_t *qrtree, int M, int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgels.c b/compute/zgels.c
index 99d7914da..276c4e3a6 100644
--- a/compute/zgels.c
+++ b/compute/zgels.c
@@ -89,9 +89,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -244,8 +243,7 @@ int CHAMELEON_zgels( cham_trans_t trans, int M, int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \return CHAMELEON_SUCCESS successful exit
+ * @return CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgels_param.c b/compute/zgels_param.c
index ea23c9a49..34ab5c609 100644
--- a/compute/zgels_param.c
+++ b/compute/zgels_param.c
@@ -92,9 +92,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -250,8 +249,7 @@ int CHAMELEON_zgels_param( const libhqr_tree_t *qrtree, cham_trans_t trans, int
  *
  *******************************************************************************
  *
- * @return
- *          \return CHAMELEON_SUCCESS successful exit
+ * @return CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgemm.c b/compute/zgemm.c
index 8d7cfba19..e266039ad 100644
--- a/compute/zgemm.c
+++ b/compute/zgemm.c
@@ -114,8 +114,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
@@ -283,8 +282,7 @@ int CHAMELEON_zgemm( cham_trans_t transA, cham_trans_t transB, int M, int N, int
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgeqrf.c b/compute/zgeqrf.c
index f365fc510..9ccd619c9 100644
--- a/compute/zgeqrf.c
+++ b/compute/zgeqrf.c
@@ -55,9 +55,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -164,8 +163,7 @@ int CHAMELEON_zgeqrf( int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgeqrf_param.c b/compute/zgeqrf_param.c
index 36cb65d2a..d83e3f447 100644
--- a/compute/zgeqrf_param.c
+++ b/compute/zgeqrf_param.c
@@ -59,9 +59,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -173,8 +172,7 @@ int CHAMELEON_zgeqrf_param( const libhqr_tree_t *qrtree, int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgeqrs.c b/compute/zgeqrs.c
index ee2d2bc6d..7af82e43f 100644
--- a/compute/zgeqrs.c
+++ b/compute/zgeqrs.c
@@ -62,9 +62,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -185,8 +184,7 @@ int CHAMELEON_zgeqrs( int M, int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgeqrs_param.c b/compute/zgeqrs_param.c
index 15a5ff0d6..7fe000a50 100644
--- a/compute/zgeqrs_param.c
+++ b/compute/zgeqrs_param.c
@@ -58,9 +58,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -182,8 +181,7 @@ int CHAMELEON_zgeqrs_param( const libhqr_tree_t *qrtree, int M, int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgesv_incpiv.c b/compute/zgesv_incpiv.c
index 6dd3073f6..275b6dc6a 100644
--- a/compute/zgesv_incpiv.c
+++ b/compute/zgesv_incpiv.c
@@ -67,10 +67,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
  *               but the factor U is exactly singular, so the solution could not be computed.
  *
  *******************************************************************************
@@ -189,9 +188,8 @@ int CHAMELEON_zgesv_incpiv( int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
  *               but the factor U is exactly singular, so the solution could not be computed.
  *
  *******************************************************************************
diff --git a/compute/zgesv_nopiv.c b/compute/zgesv_nopiv.c
index 7dbf73caa..f7dfbb880 100644
--- a/compute/zgesv_nopiv.c
+++ b/compute/zgesv_nopiv.c
@@ -66,10 +66,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
  *               but the factor U is exactly singular, so the solution could not be computed.
  *
  *******************************************************************************
@@ -179,9 +178,8 @@ int CHAMELEON_zgesv_nopiv( int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
  *               but the factor U is exactly singular, so the solution could not be computed.
  *
  *******************************************************************************
diff --git a/compute/zgesvd.c b/compute/zgesvd.c
index a9ba03d41..5b3cf9bd6 100644
--- a/compute/zgesvd.c
+++ b/compute/zgesvd.c
@@ -129,9 +129,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -318,8 +317,7 @@ int CHAMELEON_zgesvd( cham_job_t jobu, cham_job_t jobvt,
  *
  *******************************************************************************
  *
- * @return
- *          \return CHAMELEON_SUCCESS successful exit
+ * @return CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgetrf_incpiv.c b/compute/zgetrf_incpiv.c
index f990126eb..273b247f3 100644
--- a/compute/zgetrf_incpiv.c
+++ b/compute/zgetrf_incpiv.c
@@ -56,10 +56,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
  *               but the factor U is exactly singular, and division by zero will occur
  *               if it is used to solve a system of equations.
  *
@@ -166,9 +165,8 @@ int CHAMELEON_zgetrf_incpiv( int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
  *               but the factor U is exactly singular, and division by zero will occur
  *               if it is used to solve a system of equations.
  *
diff --git a/compute/zgetrf_nopiv.c b/compute/zgetrf_nopiv.c
index f99d3d9e1..0e1004c43 100644
--- a/compute/zgetrf_nopiv.c
+++ b/compute/zgetrf_nopiv.c
@@ -50,10 +50,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if i, U(i,i) is exactly zero. The factorization has been
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if i, U(i,i) is exactly zero. The factorization has been
  *               completed, but the factor U is exactly singular, and division
  *               by zero will occur if it is used to solve a system of
  *               equations.
@@ -151,9 +150,8 @@ int CHAMELEON_zgetrf_nopiv( int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval >0 if i, U(i,i) is exactly zero. The factorization has been completed,
  *               but the factor U is exactly singular, and division by zero will occur
  *               if it is used to solve a system of equations.
  *
diff --git a/compute/zgetrs_incpiv.c b/compute/zgetrs_incpiv.c
index f0cf32cb5..8d9aa3630 100644
--- a/compute/zgetrs_incpiv.c
+++ b/compute/zgetrs_incpiv.c
@@ -69,9 +69,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \return <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @return <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -194,8 +193,7 @@ int CHAMELEON_zgetrs_incpiv( cham_trans_t trans, int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zgetrs_nopiv.c b/compute/zgetrs_nopiv.c
index fb8ac0722..33b3cf70b 100644
--- a/compute/zgetrs_nopiv.c
+++ b/compute/zgetrs_nopiv.c
@@ -64,9 +64,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \return <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @return <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -181,8 +180,7 @@ int CHAMELEON_zgetrs_nopiv( cham_trans_t trans, int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zheevd.c b/compute/zheevd.c
index 7f1a8b497..1291e43a8 100644
--- a/compute/zheevd.c
+++ b/compute/zheevd.c
@@ -79,10 +79,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if INFO = i, the algorithm failed to converge; i
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if INFO = i, the algorithm failed to converge; i
  *               off-diagonal elements of an intermediate tridiagonal
  *               form did not converge to zero.
  *
@@ -219,10 +218,9 @@ int CHAMELEON_zheevd( cham_job_t jobz, cham_uplo_t uplo, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if INFO = i, the algorithm failed to converge; i
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if INFO = i, the algorithm failed to converge; i
  *               off-diagonal elements of an intermediate tridiagonal
  *               form did not converge to zero.
  *
diff --git a/compute/zhemm.c b/compute/zhemm.c
index fd968b42d..43f123975 100644
--- a/compute/zhemm.c
+++ b/compute/zhemm.c
@@ -90,8 +90,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
@@ -248,8 +247,7 @@ int CHAMELEON_zhemm( cham_side_t side, cham_uplo_t uplo, int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zher2k.c b/compute/zher2k.c
index 216de17a5..fc8a746a3 100644
--- a/compute/zher2k.c
+++ b/compute/zher2k.c
@@ -92,8 +92,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
@@ -249,8 +248,7 @@ int CHAMELEON_zher2k( cham_uplo_t uplo, cham_trans_t trans, int N, int K,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zherk.c b/compute/zherk.c
index 13e59ce4c..ff3b21ddc 100644
--- a/compute/zherk.c
+++ b/compute/zherk.c
@@ -82,8 +82,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
@@ -225,8 +224,7 @@ int CHAMELEON_zherk( cham_uplo_t uplo, cham_trans_t trans, int N, int K,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zhetrd.c b/compute/zhetrd.c
index 0815e1dd6..f0686a16c 100644
--- a/compute/zhetrd.c
+++ b/compute/zhetrd.c
@@ -92,10 +92,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if INFO = i, the algorithm failed to converge; i
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if INFO = i, the algorithm failed to converge; i
  *               off-diagonal elements of an intermediate tridiagonal
  *               form did not converge to zero.
  *
@@ -245,10 +244,9 @@ int CHAMELEON_zhetrd( cham_job_t jobz, cham_uplo_t uplo, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if INFO = i, the algorithm failed to converge; i
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if INFO = i, the algorithm failed to converge; i
  *               off-diagonal elements of an intermediate tridiagonal
  *               form did not converge to zero.
  *
diff --git a/compute/zlacpy.c b/compute/zlacpy.c
index 7bd169637..73ad779e6 100644
--- a/compute/zlacpy.c
+++ b/compute/zlacpy.c
@@ -180,8 +180,7 @@ int CHAMELEON_zlacpy( cham_uplo_t uplo, int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zlange.c b/compute/zlange.c
index 522c8c204..b1e9269d2 100644
--- a/compute/zlange.c
+++ b/compute/zlange.c
@@ -66,8 +66,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval the norm described above.
+ * @retval the norm described above.
  *
  *******************************************************************************
  *
@@ -176,8 +175,7 @@ double CHAMELEON_zlange(cham_normtype_t norm, int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zlanhe.c b/compute/zlanhe.c
index e2dad154c..50f3d1f92 100644
--- a/compute/zlanhe.c
+++ b/compute/zlanhe.c
@@ -66,8 +66,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval the norm described above.
+ * @retval the norm described above.
  *
  *******************************************************************************
  *
@@ -180,8 +179,7 @@ double CHAMELEON_zlanhe(cham_normtype_t norm, cham_uplo_t uplo, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zlansy.c b/compute/zlansy.c
index dc9b1236e..c7e39a453 100644
--- a/compute/zlansy.c
+++ b/compute/zlansy.c
@@ -66,8 +66,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval the norm described above.
+ * @retval the norm described above.
  *
  *******************************************************************************
  *
@@ -180,8 +179,7 @@ double CHAMELEON_zlansy(cham_normtype_t norm, cham_uplo_t uplo, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zlantr.c b/compute/zlantr.c
index 6721a9b9f..005fea133 100644
--- a/compute/zlantr.c
+++ b/compute/zlantr.c
@@ -78,8 +78,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval the norm described above.
+ * @retval the norm described above.
  *
  *******************************************************************************
  *
@@ -202,8 +201,7 @@ double CHAMELEON_zlantr(cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zlascal.c b/compute/zlascal.c
index bae3815fb..0d0ff18b6 100644
--- a/compute/zlascal.c
+++ b/compute/zlascal.c
@@ -57,8 +57,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
@@ -168,8 +167,7 @@ int CHAMELEON_zlascal( cham_uplo_t uplo, int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zlaset.c b/compute/zlaset.c
index 7001e66a2..0ab77a34e 100644
--- a/compute/zlaset.c
+++ b/compute/zlaset.c
@@ -167,8 +167,7 @@ int CHAMELEON_zlaset( cham_uplo_t uplo, int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zlauum.c b/compute/zlauum.c
index 9907d0b08..254eb2b18 100644
--- a/compute/zlauum.c
+++ b/compute/zlauum.c
@@ -58,9 +58,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -165,8 +164,7 @@ int CHAMELEON_zlauum( cham_uplo_t uplo, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zplghe.c b/compute/zplghe.c
index 3fd07d511..ceb0a138b 100644
--- a/compute/zplghe.c
+++ b/compute/zplghe.c
@@ -56,9 +56,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -160,8 +159,7 @@ int CHAMELEON_zplghe( double bump, cham_uplo_t uplo, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zplgsy.c b/compute/zplgsy.c
index 809e2a224..ff033d819 100644
--- a/compute/zplgsy.c
+++ b/compute/zplgsy.c
@@ -56,9 +56,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -160,8 +159,7 @@ int CHAMELEON_zplgsy( CHAMELEON_Complex64_t bump, cham_uplo_t uplo, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zplrnt.c b/compute/zplrnt.c
index 3e15ea36a..56a3cedaf 100644
--- a/compute/zplrnt.c
+++ b/compute/zplrnt.c
@@ -49,9 +49,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -150,8 +149,7 @@ int CHAMELEON_zplrnt( int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zposv.c b/compute/zposv.c
index 055f17e88..668fec3c5 100644
--- a/compute/zposv.c
+++ b/compute/zposv.c
@@ -75,10 +75,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if i, the leading minor of order i of A is not positive definite, so the
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if i, the leading minor of order i of A is not positive definite, so the
  *               factorization could not be completed, and the solution has not been computed.
  *
  *******************************************************************************
@@ -206,9 +205,8 @@ int CHAMELEON_zposv( cham_uplo_t uplo, int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval >0 if i, the leading minor of order i of A is not positive definite, so the
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval >0 if i, the leading minor of order i of A is not positive definite, so the
  *               factorization could not be completed, and the solution has not been computed.
  *
  *******************************************************************************
diff --git a/compute/zpotrf.c b/compute/zpotrf.c
index bb8485337..d7054e42d 100644
--- a/compute/zpotrf.c
+++ b/compute/zpotrf.c
@@ -62,10 +62,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if i, the leading minor of order i of A is not positive definite, so the
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if i, the leading minor of order i of A is not positive definite, so the
  *               factorization could not be completed, and the solution has not been computed.
  *
  *******************************************************************************
@@ -175,9 +174,8 @@ int CHAMELEON_zpotrf( cham_uplo_t uplo, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval >0 if i, the leading minor of order i of A is not positive definite, so the
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval >0 if i, the leading minor of order i of A is not positive definite, so the
  *               factorization could not be completed, and the solution has not been computed.
  *
  *******************************************************************************
diff --git a/compute/zpotri.c b/compute/zpotri.c
index d903bda64..2de905c8d 100644
--- a/compute/zpotri.c
+++ b/compute/zpotri.c
@@ -53,10 +53,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if i, the (i,i) element of the factor U or L is
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if i, the (i,i) element of the factor U or L is
  *                zero, and the inverse could not be computed.
  *
  *******************************************************************************
@@ -162,9 +161,8 @@ int CHAMELEON_zpotri( cham_uplo_t uplo, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval >0 if i, the leading minor of order i of A is not
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval >0 if i, the leading minor of order i of A is not
  *               positive definite, so the factorization could not be
  *               completed, and the solution has not been computed.
  *
diff --git a/compute/zpotrimm.c b/compute/zpotrimm.c
index 0f3d8146e..ca57f4962 100644
--- a/compute/zpotrimm.c
+++ b/compute/zpotrimm.c
@@ -53,10 +53,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if i, the (i,i) element of the factor U or L is
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if i, the (i,i) element of the factor U or L is
  *                zero, and the inverse could not be computed.
  *
  *******************************************************************************
@@ -184,9 +183,8 @@ int CHAMELEON_zpotrimm( cham_uplo_t uplo, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval >0 if i, the leading minor of order i of A is not
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval >0 if i, the leading minor of order i of A is not
  *               positive definite, so the factorization could not be
  *               completed, and the solution has not been computed.
  *
diff --git a/compute/zpotrs.c b/compute/zpotrs.c
index 3e242d114..7cce83910 100644
--- a/compute/zpotrs.c
+++ b/compute/zpotrs.c
@@ -61,9 +61,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -183,8 +182,7 @@ int CHAMELEON_zpotrs( cham_uplo_t uplo, int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zsymm.c b/compute/zsymm.c
index 4a64f907e..13221e335 100644
--- a/compute/zsymm.c
+++ b/compute/zsymm.c
@@ -90,8 +90,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
@@ -248,8 +247,7 @@ int CHAMELEON_zsymm( cham_side_t side, cham_uplo_t uplo, int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zsyr2k.c b/compute/zsyr2k.c
index 9ef352142..0fe3e6f9e 100644
--- a/compute/zsyr2k.c
+++ b/compute/zsyr2k.c
@@ -92,8 +92,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
@@ -249,8 +248,7 @@ int CHAMELEON_zsyr2k( cham_uplo_t uplo, cham_trans_t trans, int N, int K,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zsyrk.c b/compute/zsyrk.c
index e1c6db986..91f4627b8 100644
--- a/compute/zsyrk.c
+++ b/compute/zsyrk.c
@@ -82,8 +82,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
@@ -225,8 +224,7 @@ int CHAMELEON_zsyrk( cham_uplo_t uplo, cham_trans_t trans, int N, int K,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zsysv.c b/compute/zsysv.c
index 5b40d66e5..baf78e90e 100644
--- a/compute/zsysv.c
+++ b/compute/zsysv.c
@@ -76,9 +76,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -203,8 +202,7 @@ int CHAMELEON_zsysv( cham_uplo_t uplo, int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zsytrf.c b/compute/zsytrf.c
index 508f93a9a..44ea078c6 100644
--- a/compute/zsytrf.c
+++ b/compute/zsytrf.c
@@ -57,10 +57,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if i, the leading minor of order i of A is not positive definite, so the
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if i, the leading minor of order i of A is not positive definite, so the
  *               factorization could not be completed, and the solution has not been computed.
  *
  *******************************************************************************
@@ -169,8 +168,7 @@ int CHAMELEON_zsytrf( cham_uplo_t uplo, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zsytrs.c b/compute/zsytrs.c
index 90256661b..4eeb3d7d3 100644
--- a/compute/zsytrs.c
+++ b/compute/zsytrs.c
@@ -63,9 +63,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -182,8 +181,7 @@ int CHAMELEON_zsytrs( cham_uplo_t uplo, int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/ztile.c b/compute/ztile.c
index d95a729b7..3cafc9b7b 100644
--- a/compute/ztile.c
+++ b/compute/ztile.c
@@ -45,8 +45,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
@@ -122,8 +121,7 @@ int CHAMELEON_zLapack_to_Tile( CHAMELEON_Complex64_t *Af77, int LDA, CHAM_desc_t
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/ztpgqrt.c b/compute/ztpgqrt.c
index 4be3c8293..b9d07b870 100644
--- a/compute/ztpgqrt.c
+++ b/compute/ztpgqrt.c
@@ -115,9 +115,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -258,8 +257,7 @@ int CHAMELEON_ztpgqrt( int M, int N, int K, int L,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/ztpqrt.c b/compute/ztpqrt.c
index 8847d9235..04e7ddfa6 100644
--- a/compute/ztpqrt.c
+++ b/compute/ztpqrt.c
@@ -110,9 +110,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -235,8 +234,7 @@ int CHAMELEON_ztpqrt( int M, int N, int L,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/ztradd.c b/compute/ztradd.c
index f5f2d8217..b5e85ec81 100644
--- a/compute/ztradd.c
+++ b/compute/ztradd.c
@@ -81,8 +81,7 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
@@ -227,8 +226,7 @@ int CHAMELEON_ztradd( cham_uplo_t uplo, cham_trans_t trans, int M, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/ztrmm.c b/compute/ztrmm.c
index 96ef0f7e3..3380900f6 100644
--- a/compute/ztrmm.c
+++ b/compute/ztrmm.c
@@ -82,9 +82,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -249,8 +248,7 @@ int CHAMELEON_ztrmm( cham_side_t side, cham_uplo_t uplo,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/ztrsm.c b/compute/ztrsm.c
index abcdf8e9e..cc76ab7bd 100644
--- a/compute/ztrsm.c
+++ b/compute/ztrsm.c
@@ -83,9 +83,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -247,8 +246,7 @@ int CHAMELEON_ztrsm( cham_side_t side, cham_uplo_t uplo,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/ztrsmpl.c b/compute/ztrsmpl.c
index dd7859cf8..2cac2da62 100644
--- a/compute/ztrsmpl.c
+++ b/compute/ztrsmpl.c
@@ -61,9 +61,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -179,8 +178,7 @@ int CHAMELEON_ztrsmpl( int N, int NRHS,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/ztrtri.c b/compute/ztrtri.c
index cb19dffa9..6a2f8f321 100644
--- a/compute/ztrtri.c
+++ b/compute/ztrtri.c
@@ -61,10 +61,9 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *          \retval >0 if i, A(i,i) is exactly zero.  The triangular
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
+ * @retval >0 if i, A(i,i) is exactly zero.  The triangular
  *               matrix is singular and its inverse can not be computed.
  *
  *******************************************************************************
@@ -182,9 +181,8 @@ int CHAMELEON_ztrtri( cham_uplo_t uplo, cham_diag_t diag, int N,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval >0 if i, A(i,i) is exactly zero.  The triangular
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval >0 if i, A(i,i) is exactly zero.  The triangular
  *               matrix is singular and its inverse can not be computed.
  *
  *******************************************************************************
diff --git a/compute/zunglq.c b/compute/zunglq.c
index 410154646..ef284b861 100644
--- a/compute/zunglq.c
+++ b/compute/zunglq.c
@@ -62,9 +62,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval CHAMELEON_SUCCESS <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -181,8 +180,7 @@ int CHAMELEON_zunglq( int M, int N, int K,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zunglq_param.c b/compute/zunglq_param.c
index e6d369542..09f40a29f 100644
--- a/compute/zunglq_param.c
+++ b/compute/zunglq_param.c
@@ -60,9 +60,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval CHAMELEON_SUCCESS <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -180,8 +179,7 @@ int CHAMELEON_zunglq_param( const libhqr_tree_t *qrtree, int M, int N, int K,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zungqr.c b/compute/zungqr.c
index c51539616..6ae056b2d 100644
--- a/compute/zungqr.c
+++ b/compute/zungqr.c
@@ -62,9 +62,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -180,8 +179,7 @@ int CHAMELEON_zungqr( int M, int N, int K,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zungqr_param.c b/compute/zungqr_param.c
index 5b46d66ff..9ed032da7 100644
--- a/compute/zungqr_param.c
+++ b/compute/zungqr_param.c
@@ -60,9 +60,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -181,8 +180,7 @@ int CHAMELEON_zungqr_param( const libhqr_tree_t *qrtree,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zunmlq.c b/compute/zunmlq.c
index f460e12e4..f3948bf39 100644
--- a/compute/zunmlq.c
+++ b/compute/zunmlq.c
@@ -86,9 +86,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -234,8 +233,7 @@ int CHAMELEON_zunmlq( cham_side_t side, cham_trans_t trans, int M, int N, int K,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zunmlq_param.c b/compute/zunmlq_param.c
index 4c0a72358..46372cef9 100644
--- a/compute/zunmlq_param.c
+++ b/compute/zunmlq_param.c
@@ -86,9 +86,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -233,8 +232,7 @@ int CHAMELEON_zunmlq_param( const libhqr_tree_t *qrtree, cham_side_t side, cham_
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zunmqr.c b/compute/zunmqr.c
index 6271ed98e..78be51f52 100644
--- a/compute/zunmqr.c
+++ b/compute/zunmqr.c
@@ -88,9 +88,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -236,8 +235,7 @@ int CHAMELEON_zunmqr( cham_side_t side, cham_trans_t trans, int M, int N, int K,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/compute/zunmqr_param.c b/compute/zunmqr_param.c
index 5674ba090..434c16a04 100644
--- a/compute/zunmqr_param.c
+++ b/compute/zunmqr_param.c
@@ -89,9 +89,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  *******************************************************************************
  *
@@ -239,8 +238,7 @@ int CHAMELEON_zunmqr_param( const libhqr_tree_t *qrtree,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  *******************************************************************************
  *
diff --git a/control/async.c b/control/async.c
index 5e65c2b09..55351f6b6 100644
--- a/control/async.c
+++ b/control/async.c
@@ -86,8 +86,7 @@ int chameleon_sequence_wait(CHAM_context_t *chamctxt, RUNTIME_sequence_t *sequen
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Sequence_Create(RUNTIME_sequence_t **sequence)
@@ -117,8 +116,7 @@ int CHAMELEON_Sequence_Create(RUNTIME_sequence_t **sequence)
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Sequence_Destroy(RUNTIME_sequence_t *sequence)
@@ -152,8 +150,7 @@ int CHAMELEON_Sequence_Destroy(RUNTIME_sequence_t *sequence)
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Sequence_Wait(RUNTIME_sequence_t *sequence)
@@ -190,8 +187,7 @@ int CHAMELEON_Sequence_Wait(RUNTIME_sequence_t *sequence)
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Sequence_Flush(RUNTIME_sequence_t *sequence, RUNTIME_request_t *request)
diff --git a/control/auxiliary.c b/control/auxiliary.c
index 032dc0684..5f90b85af 100644
--- a/control/auxiliary.c
+++ b/control/auxiliary.c
@@ -139,8 +139,7 @@ int chameleon_tune(cham_tasktype_t func, int M, int N, int NRHS)
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Version(int *ver_major, int *ver_minor, int *ver_micro)
@@ -180,8 +179,7 @@ int CHAMELEON_Version(int *ver_major, int *ver_minor, int *ver_micro)
  *
  ******************************************************************************
  *
- * @return
- *          \retval Element size in bytes
+ * @retval Element size in bytes
  *
  */
 int CHAMELEON_Element_Size(int type)
@@ -209,8 +207,7 @@ int CHAMELEON_Element_Size(int type)
  *
  ******************************************************************************
  *
- * @return
- *          \retval MPI rank
+ * @retval MPI rank
  *
  */
 int CHAMELEON_My_Mpi_Rank(void)
diff --git a/control/context.c b/control/context.c
index 881abe974..fa0dcd250 100644
--- a/control/context.c
+++ b/control/context.c
@@ -123,8 +123,7 @@ int chameleon_context_destroy(){
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Enable(int option)
@@ -192,8 +191,7 @@ int CHAMELEON_Enable(int option)
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Disable(int option)
@@ -256,8 +254,7 @@ int CHAMELEON_Disable(int option)
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Set( int param, int value )
@@ -350,8 +347,7 @@ int CHAMELEON_Set( int param, int value )
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Get(int param, int *value)
diff --git a/control/control.c b/control/control.c
index 8a8de8717..08765ca2d 100644
--- a/control/control.c
+++ b/control/control.c
@@ -154,8 +154,7 @@ int __chameleon_finalize(void)
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Pause(void)
@@ -178,8 +177,7 @@ int CHAMELEON_Pause(void)
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Resume(void)
@@ -201,8 +199,7 @@ int CHAMELEON_Resume(void)
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Distributed_start(void)
@@ -224,8 +221,7 @@ int CHAMELEON_Distributed_start(void)
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Distributed_stop(void)
@@ -294,8 +290,7 @@ int CHAMELEON_Comm_rank()
  *
  ******************************************************************************
  *
- * @return
- *          \retval The number of CPU workers started
+ * @retval The number of CPU workers started
  *
  */
 int CHAMELEON_GetThreadNbr( )
diff --git a/control/descriptor.c b/control/descriptor.c
index 06e52cdec..f32800a28 100644
--- a/control/descriptor.c
+++ b/control/descriptor.c
@@ -413,8 +413,7 @@ int chameleon_desc_check(const CHAM_desc_t *desc)
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Desc_Create( CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp, int mb, int nb, int bsiz,
@@ -476,8 +475,7 @@ int CHAMELEON_Desc_Create( CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Desc_Create_User( CHAM_desc_t **descptr, void *mat, cham_flttype_t dtyp, int mb, int nb, int bsiz,
@@ -560,8 +558,7 @@ int CHAMELEON_Desc_Create_User( CHAM_desc_t **descptr, void *mat, cham_flttype_t
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Desc_Create_OOC_User(CHAM_desc_t **descptr, cham_flttype_t dtyp, int mb, int nb, int bsiz,
@@ -621,8 +618,7 @@ int CHAMELEON_Desc_Create_OOC_User(CHAM_desc_t **descptr, cham_flttype_t dtyp, i
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Desc_Create_OOC(CHAM_desc_t **descptr, cham_flttype_t dtyp, int mb, int nb, int bsiz,
diff --git a/control/tile.c b/control/tile.c
index 960c88f5d..560f5dd7d 100644
--- a/control/tile.c
+++ b/control/tile.c
@@ -44,8 +44,7 @@
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Lapack_to_Tile(void *Af77, int LDA, CHAM_desc_t *A)
@@ -86,8 +85,7 @@ int CHAMELEON_Lapack_to_Tile(void *Af77, int LDA, CHAM_desc_t *A)
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Tile_to_Lapack(CHAM_desc_t *A, void *Af77, int LDA)
diff --git a/control/workspace.c b/control/workspace.c
index 8039447fb..4a8b078e3 100644
--- a/control/workspace.c
+++ b/control/workspace.c
@@ -138,8 +138,7 @@ int chameleon_alloc_ipiv(int M, int N, cham_tasktype_t func, int type, CHAM_desc
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Dealloc_Workspace(CHAM_desc_t **desc)
diff --git a/control/workspace_z.c b/control/workspace_z.c
index 6009bac50..732d86fe0 100644
--- a/control/workspace_z.c
+++ b/control/workspace_z.c
@@ -45,8 +45,7 @@
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgeev(int N, CHAM_desc_t **descT, int p, int q) {
@@ -70,8 +69,7 @@ int CHAMELEON_Alloc_Workspace_zgeev(int N, CHAM_desc_t **descT, int p, int q) {
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgehrd(int N, CHAM_desc_t **descT, int p, int q) {
@@ -97,8 +95,7 @@ int CHAMELEON_Alloc_Workspace_zgehrd(int N, CHAM_desc_t **descT, int p, int q) {
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgebrd(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -125,8 +122,7 @@ int CHAMELEON_Alloc_Workspace_zgebrd(int M, int N, CHAM_desc_t **descT, int p, i
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgels(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -159,8 +155,7 @@ int CHAMELEON_Alloc_Workspace_zgels(int M, int N, CHAM_desc_t **descT, int p, in
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgels_Tile(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -193,8 +188,7 @@ int CHAMELEON_Alloc_Workspace_zgels_Tile(int M, int N, CHAM_desc_t **descT, int
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgeqrf(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -227,8 +221,7 @@ int CHAMELEON_Alloc_Workspace_zgeqrf(int M, int N, CHAM_desc_t **descT, int p, i
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgeqrf_Tile(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -261,8 +254,7 @@ int CHAMELEON_Alloc_Workspace_zgeqrf_Tile(int M, int N, CHAM_desc_t **descT, int
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgelqf(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -294,8 +286,7 @@ int CHAMELEON_Alloc_Workspace_zgelqf(int M, int N, CHAM_desc_t **descT, int p, i
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgelqf_Tile(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -324,8 +315,7 @@ int CHAMELEON_Alloc_Workspace_zgelqf_Tile(int M, int N, CHAM_desc_t **descT, int
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgesv_incpiv(int N, CHAM_desc_t **descL, int **IPIV, int p, int q) {
@@ -354,8 +344,7 @@ int CHAMELEON_Alloc_Workspace_zgesv_incpiv(int N, CHAM_desc_t **descL, int **IPI
  *
  ******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgesv_incpiv_Tile(int N, CHAM_desc_t **descL, int **IPIV, int p, int q)
@@ -388,8 +377,7 @@ int CHAMELEON_Alloc_Workspace_zgesv_incpiv_Tile(int N, CHAM_desc_t **descL, int
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgesvd(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -421,8 +409,7 @@ int CHAMELEON_Alloc_Workspace_zgesvd(int M, int N, CHAM_desc_t **descT, int p, i
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  ******************************************************************************
  *
@@ -456,8 +443,7 @@ int CHAMELEON_Alloc_Workspace_zgetrf_incpiv(int M, int N, CHAM_desc_t **descL, i
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zgetrf_incpiv_Tile(int N, CHAM_desc_t **descL, int **IPIV, int p, int q) {
@@ -488,8 +474,7 @@ int CHAMELEON_Alloc_Workspace_zgetrf_incpiv_Tile(int N, CHAM_desc_t **descL, int
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zheev(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -520,8 +505,7 @@ int CHAMELEON_Alloc_Workspace_zheev(int M, int N, CHAM_desc_t **descT, int p, in
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zheevd(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -552,8 +536,7 @@ int CHAMELEON_Alloc_Workspace_zheevd(int M, int N, CHAM_desc_t **descT, int p, i
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zhegv(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -584,8 +567,7 @@ int CHAMELEON_Alloc_Workspace_zhegv(int M, int N, CHAM_desc_t **descT, int p, in
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zhegvd(int M, int N, CHAM_desc_t **descT, int p, int q) {
@@ -616,8 +598,7 @@ int CHAMELEON_Alloc_Workspace_zhegvd(int M, int N, CHAM_desc_t **descT, int p, i
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
+ * @retval CHAMELEON_SUCCESS successful exit
  *
  */
 int CHAMELEON_Alloc_Workspace_zhetrd(int M, int N, CHAM_desc_t **descT, int p, int q) {
diff --git a/coreblas/compute/core_zaxpy.c b/coreblas/compute/core_zaxpy.c
index d3477032a..a982aaafe 100644
--- a/coreblas/compute/core_zaxpy.c
+++ b/coreblas/compute/core_zaxpy.c
@@ -47,9 +47,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zgeadd.c b/coreblas/compute/core_zgeadd.c
index a85bec68a..5afb5a770 100644
--- a/coreblas/compute/core_zgeadd.c
+++ b/coreblas/compute/core_zgeadd.c
@@ -71,9 +71,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 #if defined(CHAMELEON_HAVE_WEAK)
diff --git a/coreblas/compute/core_zgelqt.c b/coreblas/compute/core_zgelqt.c
index 7a2a74ca0..7793a76df 100644
--- a/coreblas/compute/core_zgelqt.c
+++ b/coreblas/compute/core_zgelqt.c
@@ -82,9 +82,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zgeqrt.c b/coreblas/compute/core_zgeqrt.c
index 76fcfdfc2..ab5681866 100644
--- a/coreblas/compute/core_zgeqrt.c
+++ b/coreblas/compute/core_zgeqrt.c
@@ -83,9 +83,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zgesplit.c b/coreblas/compute/core_zgesplit.c
index 5255442c5..0f30ae816 100644
--- a/coreblas/compute/core_zgesplit.c
+++ b/coreblas/compute/core_zgesplit.c
@@ -56,9 +56,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zgessm.c b/coreblas/compute/core_zgessm.c
index 9757800cd..c395a30ff 100644
--- a/coreblas/compute/core_zgessm.c
+++ b/coreblas/compute/core_zgessm.c
@@ -68,9 +68,8 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zgessq.c b/coreblas/compute/core_zgessq.c
index 55bbf091f..e6462f979 100644
--- a/coreblas/compute/core_zgessq.c
+++ b/coreblas/compute/core_zgessq.c
@@ -82,9 +82,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval -k, the k-th argument had an illegal value
  *
  */
 int CORE_zgessq(int M, int N,
diff --git a/coreblas/compute/core_zgetf2_nopiv.c b/coreblas/compute/core_zgetf2_nopiv.c
index 91c313430..18836b6e1 100644
--- a/coreblas/compute/core_zgetf2_nopiv.c
+++ b/coreblas/compute/core_zgetf2_nopiv.c
@@ -58,10 +58,9 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
diff --git a/coreblas/compute/core_zgetrf_incpiv.c b/coreblas/compute/core_zgetrf_incpiv.c
index b47084b3c..b1355e645 100644
--- a/coreblas/compute/core_zgetrf_incpiv.c
+++ b/coreblas/compute/core_zgetrf_incpiv.c
@@ -71,10 +71,9 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
diff --git a/coreblas/compute/core_zgetrf_nopiv.c b/coreblas/compute/core_zgetrf_nopiv.c
index b7661ba5c..fbd34a128 100644
--- a/coreblas/compute/core_zgetrf_nopiv.c
+++ b/coreblas/compute/core_zgetrf_nopiv.c
@@ -60,10 +60,9 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
diff --git a/coreblas/compute/core_zherfb.c b/coreblas/compute/core_zherfb.c
index d3653d55b..d1f952bf7 100644
--- a/coreblas/compute/core_zherfb.c
+++ b/coreblas/compute/core_zherfb.c
@@ -85,9 +85,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 #if defined(CHAMELEON_HAVE_WEAK)
diff --git a/coreblas/compute/core_zhessq.c b/coreblas/compute/core_zhessq.c
index 250962aba..d5b968515 100644
--- a/coreblas/compute/core_zhessq.c
+++ b/coreblas/compute/core_zhessq.c
@@ -88,9 +88,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval -k, the k-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zlascal.c b/coreblas/compute/core_zlascal.c
index 645bc6714..50654a63b 100644
--- a/coreblas/compute/core_zlascal.c
+++ b/coreblas/compute/core_zlascal.c
@@ -52,9 +52,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 int
diff --git a/coreblas/compute/core_zlatro.c b/coreblas/compute/core_zlatro.c
index 2bdcbfc31..c22ac72ab 100644
--- a/coreblas/compute/core_zlatro.c
+++ b/coreblas/compute/core_zlatro.c
@@ -72,9 +72,8 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
  *
  */
 #if defined(CHAMELEON_HAVE_WEAK)
diff --git a/coreblas/compute/core_zpamm.c b/coreblas/compute/core_zpamm.c
index 35c8e0490..2dd190e9c 100644
--- a/coreblas/compute/core_zpamm.c
+++ b/coreblas/compute/core_zpamm.c
@@ -174,9 +174,8 @@ static inline int CORE_zpamm_w(cham_side_t side, cham_trans_t trans, cham_uplo_t
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 int
diff --git a/coreblas/compute/core_zparfb.c b/coreblas/compute/core_zparfb.c
index a359402d6..05d07f72e 100644
--- a/coreblas/compute/core_zparfb.c
+++ b/coreblas/compute/core_zparfb.c
@@ -132,9 +132,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 /* This kernel is never traced so return type on previous line for convert2eztrace.pl script */
diff --git a/coreblas/compute/core_zpemv.c b/coreblas/compute/core_zpemv.c
index 62213c723..6b8fc9ed6 100644
--- a/coreblas/compute/core_zpemv.c
+++ b/coreblas/compute/core_zpemv.c
@@ -113,9 +113,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zssssm.c b/coreblas/compute/core_zssssm.c
index 87d18d295..ef5bd6a17 100644
--- a/coreblas/compute/core_zssssm.c
+++ b/coreblas/compute/core_zssssm.c
@@ -91,9 +91,8 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zsyssq.c b/coreblas/compute/core_zsyssq.c
index a2c19544b..8bce64cec 100644
--- a/coreblas/compute/core_zsyssq.c
+++ b/coreblas/compute/core_zsyssq.c
@@ -88,9 +88,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval -k, the k-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_ztplqt.c b/coreblas/compute/core_ztplqt.c
index b2794dc6d..e80f80a96 100644
--- a/coreblas/compute/core_ztplqt.c
+++ b/coreblas/compute/core_ztplqt.c
@@ -77,9 +77,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 int CORE_ztplqt( int M, int N, int L, int IB,
diff --git a/coreblas/compute/core_ztpmqrt.c b/coreblas/compute/core_ztpmqrt.c
index 5909f19ee..6584e2ba5 100644
--- a/coreblas/compute/core_ztpmqrt.c
+++ b/coreblas/compute/core_ztpmqrt.c
@@ -129,9 +129,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_ztpqrt.c b/coreblas/compute/core_ztpqrt.c
index ddfbb49ab..a251bed84 100644
--- a/coreblas/compute/core_ztpqrt.c
+++ b/coreblas/compute/core_ztpqrt.c
@@ -82,9 +82,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 int CORE_ztpqrt( int M, int N, int L, int IB,
diff --git a/coreblas/compute/core_ztradd.c b/coreblas/compute/core_ztradd.c
index 831ad069e..3242ae53a 100644
--- a/coreblas/compute/core_ztradd.c
+++ b/coreblas/compute/core_ztradd.c
@@ -74,9 +74,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 #if defined(CHAMELEON_HAVE_WEAK)
diff --git a/coreblas/compute/core_ztrssq.c b/coreblas/compute/core_ztrssq.c
index f01e63663..61cc2994d 100644
--- a/coreblas/compute/core_ztrssq.c
+++ b/coreblas/compute/core_ztrssq.c
@@ -82,9 +82,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval -k, the k-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_ztslqt.c b/coreblas/compute/core_ztslqt.c
index da5b27078..156429d2b 100644
--- a/coreblas/compute/core_ztslqt.c
+++ b/coreblas/compute/core_ztslqt.c
@@ -94,9 +94,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_ztsmlq.c b/coreblas/compute/core_ztsmlq.c
index a9324fa04..c2238aed6 100644
--- a/coreblas/compute/core_ztsmlq.c
+++ b/coreblas/compute/core_ztsmlq.c
@@ -121,9 +121,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_ztsmlq_hetra1.c b/coreblas/compute/core_ztsmlq_hetra1.c
index ff7123317..fc0a5abda 100644
--- a/coreblas/compute/core_ztsmlq_hetra1.c
+++ b/coreblas/compute/core_ztsmlq_hetra1.c
@@ -108,9 +108,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 #if defined(CHAMELEON_HAVE_WEAK)
diff --git a/coreblas/compute/core_ztsmqr.c b/coreblas/compute/core_ztsmqr.c
index e4f681581..aeb35c924 100644
--- a/coreblas/compute/core_ztsmqr.c
+++ b/coreblas/compute/core_ztsmqr.c
@@ -121,9 +121,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_ztsmqr_hetra1.c b/coreblas/compute/core_ztsmqr_hetra1.c
index cfba422e7..40dcf9270 100644
--- a/coreblas/compute/core_ztsmqr_hetra1.c
+++ b/coreblas/compute/core_ztsmqr_hetra1.c
@@ -110,9 +110,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 #if defined(CHAMELEON_HAVE_WEAK)
diff --git a/coreblas/compute/core_ztsqrt.c b/coreblas/compute/core_ztsqrt.c
index 7564c4edf..3bbbd8f1b 100644
--- a/coreblas/compute/core_ztsqrt.c
+++ b/coreblas/compute/core_ztsqrt.c
@@ -83,9 +83,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_ztstrf.c b/coreblas/compute/core_ztstrf.c
index c0f5c9eca..6f03a2664 100644
--- a/coreblas/compute/core_ztstrf.c
+++ b/coreblas/compute/core_ztstrf.c
@@ -84,10 +84,9 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
diff --git a/coreblas/compute/core_zttlqt.c b/coreblas/compute/core_zttlqt.c
index db12242e3..b331b2871 100644
--- a/coreblas/compute/core_zttlqt.c
+++ b/coreblas/compute/core_zttlqt.c
@@ -95,9 +95,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zttmlq.c b/coreblas/compute/core_zttmlq.c
index 5b6ee0261..b2fd88691 100644
--- a/coreblas/compute/core_zttmlq.c
+++ b/coreblas/compute/core_zttmlq.c
@@ -113,9 +113,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zttmqr.c b/coreblas/compute/core_zttmqr.c
index 9342ecbe3..850f27599 100644
--- a/coreblas/compute/core_zttmqr.c
+++ b/coreblas/compute/core_zttmqr.c
@@ -112,9 +112,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zttqrt.c b/coreblas/compute/core_zttqrt.c
index c024dc959..4f127334a 100644
--- a/coreblas/compute/core_zttqrt.c
+++ b/coreblas/compute/core_zttqrt.c
@@ -95,9 +95,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zunmlq.c b/coreblas/compute/core_zunmlq.c
index c7ac26f55..3f1593883 100644
--- a/coreblas/compute/core_zunmlq.c
+++ b/coreblas/compute/core_zunmlq.c
@@ -105,9 +105,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/compute/core_zunmqr.c b/coreblas/compute/core_zunmqr.c
index 59fb4c525..347512a01 100644
--- a/coreblas/compute/core_zunmqr.c
+++ b/coreblas/compute/core_zunmqr.c
@@ -106,9 +106,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/coreblas/eztrace_module/coreblas_eztrace_module b/coreblas/eztrace_module/coreblas_eztrace_module
index dca1e36d9..4a8688192 100644
--- a/coreblas/eztrace_module/coreblas_eztrace_module
+++ b/coreblas/eztrace_module/coreblas_eztrace_module
@@ -1172,12 +1172,6 @@ int  CORE_zlatro(int uplo, int trans,
                  void *A, int LDA,
                        void *B, int LDB);
 void CORE_zlauum(int uplo, int N, void *A, int LDA);
-int CORE_zpamm(int op, int side, int storev,
-               int M, int N, int K, int L,
-               void *A1, int LDA1,
-                     void *A2, int LDA2,
-               void *V, int LDV,
-                     void *W, int LDW);
 int  CORE_zparfb(int side, int trans, int direct, int storev,
                  int M1, int N1, int M2, int N2, int K, int L,
                        void *A1, int LDA1,
diff --git a/cudablas/compute/cuda_zgeadd.c b/cudablas/compute/cuda_zgeadd.c
index d7f86784c..e520dfb0a 100644
--- a/cudablas/compute/cuda_zgeadd.c
+++ b/cudablas/compute/cuda_zgeadd.c
@@ -72,9 +72,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 int CUDA_zgeadd(cham_trans_t trans,
diff --git a/include/chameleon/tasks_z.h b/include/chameleon/tasks_z.h
index 4fa07c2b4..954f67570 100644
--- a/include/chameleon/tasks_z.h
+++ b/include/chameleon/tasks_z.h
@@ -38,23 +38,13 @@ void INSERT_TASK_zaxpy( const RUNTIME_option_t *options,
                         int M, CHAMELEON_Complex64_t alpha,
                         const CHAM_desc_t *A, int Am, int An, int incA,
                         const CHAM_desc_t *B, int Bm, int Bn, int incB );
+void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         void *user_data, void* user_build_callback );
 void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
                          cham_trans_t trans, int m, int n, int nb,
                          CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
                          CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb );
-void INSERT_TASK_zlascal( const RUNTIME_option_t *options,
-                          cham_uplo_t uplo,
-                          int m, int n, int nb,
-                          CHAMELEON_Complex64_t alpha,
-                          const CHAM_desc_t *A, int Am, int An, int lda );
-void INSERT_TASK_zbrdalg( const RUNTIME_option_t *options,
-                          cham_uplo_t uplo,
-                          int N, int NB,
-                          const CHAM_desc_t *A,
-                          const CHAM_desc_t *C, int Cm, int Cn,
-                          const CHAM_desc_t *S, int Sm, int Sn,
-                          int i, int j, int m, int grsiz, int BAND,
-                          int *PCOL, int *ACOL, int *MCOL );
 void INSERT_TASK_zgelqt( const RUNTIME_option_t *options,
                          int m, int n, int ib, int nb,
                          const CHAM_desc_t *A, int Am, int An, int lda,
@@ -65,39 +55,6 @@ void INSERT_TASK_zgemm( const RUNTIME_option_t *options,
                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
                         CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
-void INSERT_TASK_zgemm2( const RUNTIME_option_t *options,
-                         cham_trans_t transA, cham_trans_t transB,
-                         int m, int n, int k, int nb,
-                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                         CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
-void INSERT_TASK_zgemm_f2( const RUNTIME_option_t *options,
-                           cham_trans_t transA, cham_trans_t transB,
-                           int m, int n, int k, int nb,
-                           CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                           const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                           CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc,
-                           const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1,
-                           const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 );
-void INSERT_TASK_zgemm_p2( const RUNTIME_option_t *options,
-                           cham_trans_t transA, cham_trans_t transB,
-                           int m, int n, int k, int nb,
-                           CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                           const CHAMELEON_Complex64_t **B, int ldb,
-                           CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
-void INSERT_TASK_zgemm_p2f1( const RUNTIME_option_t *options,
-                             cham_trans_t transA, cham_trans_t transB,
-                             int m, int n, int k, int nb,
-                             CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                             const CHAMELEON_Complex64_t **B, int ldb,
-                             CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc,
-                             const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1 );
-void INSERT_TASK_zgemm_p3( const RUNTIME_option_t *options,
-                           cham_trans_t transA, cham_trans_t transB,
-                           int m, int n, int k, int nb,
-                           CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                           const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                           CHAMELEON_Complex64_t beta, CHAMELEON_Complex64_t **C, int ldc );
 void INSERT_TASK_zgeqrt( const RUNTIME_option_t *options,
                          int m, int n, int ib, int nb,
                          const CHAM_desc_t *A, int Am, int An, int lda,
@@ -126,28 +83,6 @@ void INSERT_TASK_zgetrf_incpiv( const RUNTIME_option_t *options,
 void INSERT_TASK_zgetrf_nopiv( const RUNTIME_option_t *options,
                                int m, int n, int ib, int nb,
                                const CHAM_desc_t *A, int Am, int An, int lda, int iinfo );
-void INSERT_TASK_zgetrf_reclap( const RUNTIME_option_t *options,
-                                int m, int n, int nb,
-                                const CHAM_desc_t *A, int Am, int An, int lda,
-                                int *IPIV,
-
-                                cham_bool_t check_info, int iinfo,
-                                int nbthread );
-void INSERT_TASK_zgetrf_rectil( const RUNTIME_option_t *options,
-                                const CHAM_desc_t A, const CHAM_desc_t *Amn, int Amnm, int Amnn, int size,
-                                int *IPIV,
-
-                                cham_bool_t check_info, int iinfo,
-                                int nbthread );
-void INSERT_TASK_zgetrip( const RUNTIME_option_t *options,
-                          int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA );
-void INSERT_TASK_zgetrip_f1( const RUNTIME_option_t *options,
-                             int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA,
-                             const CHAM_desc_t *fake, int fakem, int faken, int szeF, int paramF );
-void INSERT_TASK_zgetrip_f2( const RUNTIME_option_t *options,
-                             int m, int n, const CHAM_desc_t *A, int Am, int An, int szeA,
-                             const CHAM_desc_t *fake1, int fake1m, int fake1n, int szeF1, int paramF1,
-                             const CHAM_desc_t *fake2, int fake2m, int fake2n, int szeF2, int paramF2 );
 void INSERT_TASK_zhe2ge( const RUNTIME_option_t *options,
                          cham_uplo_t uplo,
                          int m, int n, int mb,
@@ -159,16 +94,6 @@ void INSERT_TASK_zhemm( const RUNTIME_option_t *options,
                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
                         CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
-void INSERT_TASK_zhegst( const RUNTIME_option_t *options,
-                         int itype, cham_uplo_t uplo, int N,
-                         const CHAM_desc_t *A, int Am, int An, int LDA,
-                         const CHAM_desc_t *B, int Bm, int Bn, int LDB,
-                         int iinfo );
-void INSERT_TASK_zherk( const RUNTIME_option_t *options,
-                        cham_uplo_t uplo, cham_trans_t trans,
-                        int n, int k, int nb,
-                        double alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                        double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
 void INSERT_TASK_zher2k( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_trans_t trans,
                          int n, int k, int nb,
@@ -181,6 +106,15 @@ void INSERT_TASK_zherfb( const RUNTIME_option_t *options,
                          const CHAM_desc_t *A, int Am, int An, int lda,
                          const CHAM_desc_t *T, int Tm, int Tn, int ldt,
                          const CHAM_desc_t *C, int Cm, int Cn, int ldc );
+void INSERT_TASK_zherk( const RUNTIME_option_t *options,
+                        cham_uplo_t uplo, cham_trans_t trans,
+                        int n, int k, int nb,
+                        double alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                        double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
+void INSERT_TASK_zhessq( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, int n,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn );
 void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, int m, int n, int mb,
                          const CHAM_desc_t *A, int Am, int An, int lda,
@@ -196,10 +130,6 @@ void INSERT_TASK_zlange( const RUNTIME_option_t *options,
 void INSERT_TASK_zlange_max( const RUNTIME_option_t *options,
                              const CHAM_desc_t *A, int Am, int An,
                              const CHAM_desc_t *B, int Bm, int Bn );
-void INSERT_TASK_zhessq( const RUNTIME_option_t *options,
-                         cham_uplo_t uplo, int n,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn );
 void INSERT_TASK_zlanhe( const RUNTIME_option_t *options,
                          cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
                          const CHAM_desc_t *A, int Am, int An, int LDA,
@@ -213,31 +143,18 @@ void INSERT_TASK_zlantr( const RUNTIME_option_t *options,
                          int M, int N, int NB,
                          const CHAM_desc_t *A, int Am, int An, int LDA,
                          const CHAM_desc_t *B, int Bm, int Bn );
+void INSERT_TASK_zlascal( const RUNTIME_option_t *options,
+                          cham_uplo_t uplo,
+                          int m, int n, int nb,
+                          CHAMELEON_Complex64_t alpha,
+                          const CHAM_desc_t *A, int Am, int An, int lda );
 void INSERT_TASK_zlaset( const RUNTIME_option_t *options,
-                         cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha,
-                         CHAMELEON_Complex64_t beta, const CHAM_desc_t *tileA, int tileAm, int tileAn, int ldtilea );
+                         cham_uplo_t uplo, int n1, int n2,
+                         CHAMELEON_Complex64_t alpha, CHAMELEON_Complex64_t beta,
+                         const CHAM_desc_t *tileA, int tileAm, int tileAn, int ldtilea );
 void INSERT_TASK_zlaset2( const RUNTIME_option_t *options,
                           cham_uplo_t uplo, int n1, int n2, CHAMELEON_Complex64_t alpha,
                           const CHAM_desc_t *tileA, int tileAm, int tileAn, int ldtilea );
-void INSERT_TASK_zlaswp( const RUNTIME_option_t *options,
-                         int n, const CHAM_desc_t *A, int Am, int An, int lda,
-                         int i1,  int i2, int *ipiv, int inc );
-void INSERT_TASK_zlaswp_f2( const RUNTIME_option_t *options,
-                            int n, const CHAM_desc_t *A, int Am, int An, int lda,
-                            int i1,  int i2, int *ipiv, int inc,
-                            const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1,
-                            const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 );
-void INSERT_TASK_zlaswp_ontile( const RUNTIME_option_t *options,
-                                const CHAM_desc_t descA, const CHAM_desc_t *A, int Am, int An,
-                                int i1,  int i2, int *ipiv, int inc, CHAMELEON_Complex64_t *fakepanel );
-void INSERT_TASK_zlaswp_ontile_f2( const RUNTIME_option_t *options,
-                                   const CHAM_desc_t descA, const CHAM_desc_t *A, int Am, int An,
-                                   int i1,  int i2, int *ipiv, int inc,
-                                   const CHAM_desc_t *fake1, int fake1m, int fake1n, int szefake1, int flag1,
-                                   const CHAM_desc_t *fake2, int fake2m, int fake2n, int szefake2, int flag2 );
-void INSERT_TASK_zlaswpc_ontile( const RUNTIME_option_t *options,
-                                 const CHAM_desc_t descA, const CHAM_desc_t *A, int Am, int An,
-                                 int i1,  int i2, int *ipiv, int inc, CHAMELEON_Complex64_t *fakepanel );
 void INSERT_TASK_zlatro( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_trans_t trans, int m, int n, int mb,
                          const CHAM_desc_t *A, int Am, int An, int lda,
@@ -254,17 +171,15 @@ void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
 void INSERT_TASK_zplrnt( const RUNTIME_option_t *options,
                          int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
                          int bigM, int m0, int n0, unsigned long long int seed );
+void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
+                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn,
+                         const CHAM_desc_t *SCLSSQ,     int SCLSSQm,     int SCLSSQn );
+void INSERT_TASK_zplssq2( const RUNTIME_option_t *options,
+                          const CHAM_desc_t *RESULT, int RESULTm, int RESULTn );
 void INSERT_TASK_zpotrf( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, int n, int nb,
                          const CHAM_desc_t *A, int Am, int An, int lda,
-
                          int iinfo );
-void INSERT_TASK_zshift( const RUNTIME_option_t *options,
-                         int s, int m, int n, int L,
-                         CHAMELEON_Complex64_t *A );
-void INSERT_TASK_zshiftw( const RUNTIME_option_t *options,
-                          int s, int cl, int m, int n, int L,
-                          const CHAM_desc_t *A, int Am, int An, CHAMELEON_Complex64_t *W );
 void INSERT_TASK_zssssm( const RUNTIME_option_t *options,
                          int m1, int n1, int m2, int n2, int k, int ib, int nb,
                          const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
@@ -278,17 +193,17 @@ void INSERT_TASK_zsymm( const RUNTIME_option_t *options,
                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
                         CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
-void INSERT_TASK_zsyrk( const RUNTIME_option_t *options,
-                        cham_uplo_t uplo, cham_trans_t trans,
-                        int n, int k, int nb,
-                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                        CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
 void INSERT_TASK_zsyr2k( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_trans_t trans,
                          int n, int k, int nb,
                          CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
                          const CHAM_desc_t *B, int Bm, int Bn, int LDB,
                          CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
+void INSERT_TASK_zsyrk( const RUNTIME_option_t *options,
+                        cham_uplo_t uplo, cham_trans_t trans,
+                        int n, int k, int nb,
+                        CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                        CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc );
 void INSERT_TASK_zsyssq( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, int n,
                          const CHAM_desc_t *A, int Am, int An, int lda,
@@ -297,13 +212,6 @@ void INSERT_TASK_zsytrf_nopiv( const RUNTIME_option_t *options,
                                cham_uplo_t uplo, int n, int nb,
                                const CHAM_desc_t *A, int Am, int An, int lda,
                                int iinfo );
-void INSERT_TASK_zswpab( const RUNTIME_option_t *options,
-                         int i, int n1, int n2,
-                         const CHAM_desc_t *A, int Am, int An, int szeA );
-void INSERT_TASK_zswptr_ontile( const RUNTIME_option_t *options,
-                                const CHAM_desc_t descA, const CHAM_desc_t *Aij, int Aijm, int Aijn,
-                                int i1,  int i2, int *ipiv, int inc,
-                                const CHAM_desc_t *Akk, int Akkm, int Akkn, int ldak );
 void INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
                          int m, int n, int l, int ib, int nb,
                          const CHAM_desc_t *A, int Am, int An, int lda,
@@ -328,14 +236,6 @@ void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
                          const CHAM_desc_t *A, int Am, int An, int lda,
                          const CHAM_desc_t *B, int Bm, int Bn, int ldb,
                          const CHAM_desc_t *T, int Tm, int Tn, int ldt );
-void INSERT_TASK_ztrdalg( const RUNTIME_option_t *options,
-                          cham_uplo_t uplo,
-                          int N, int NB,
-                          const CHAM_desc_t *A,
-                          const CHAM_desc_t *C, int Cm, int Cn,
-                          const CHAM_desc_t *S, int Sm, int Sn,
-                          int i, int j, int m, int grsiz, int BAND,
-                          int *PCOL, int *ACOL, int *MCOL );
 void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
                          cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
                          CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
@@ -349,11 +249,6 @@ void INSERT_TASK_ztrmm( const RUNTIME_option_t *options,
                         int m, int n, int nb,
                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
                         const CHAM_desc_t *B, int Bm, int Bn, int ldb );
-void INSERT_TASK_ztrmm_p2( const RUNTIME_option_t *options,
-                           cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
-                           int m, int n, int nb,
-                           CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                           CHAMELEON_Complex64_t **B, int ldb );
 void INSERT_TASK_ztrsm( const RUNTIME_option_t *options,
                         cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
                         int m, int n, int nb,
@@ -390,18 +285,6 @@ void INSERT_TASK_ztstrf( const RUNTIME_option_t *options,
                          const CHAM_desc_t *L, int Lm, int Ln, int ldl,
                          int *IPIV,
                          cham_bool_t check_info, int iinfo );
-void INSERT_TASK_zpamm( const RUNTIME_option_t *options,
-                        int op, cham_side_t side, cham_store_t storev,
-                        int m, int n, int k, int l,
-                        const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                        const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                        const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                        const CHAM_desc_t *W, int Wm, int Wn, int ldw );
-void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
-                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn,
-                         const CHAM_desc_t *SCLSSQ,     int SCLSSQm,     int SCLSSQn );
-void INSERT_TASK_zplssq2( const RUNTIME_option_t *options,
-                          const CHAM_desc_t *RESULT, int RESULTm, int RESULTn );
 void INSERT_TASK_zunmlq( const RUNTIME_option_t *options,
                          cham_side_t side, cham_trans_t trans,
                          int m, int n, int ib,  int nb, int k,
@@ -414,10 +297,6 @@ void INSERT_TASK_zunmqr( const RUNTIME_option_t *options,
                          const CHAM_desc_t *A, int Am, int An, int lda,
                          const CHAM_desc_t *T, int Tm, int Tn, int ldt,
                          const CHAM_desc_t *C, int Cm, int Cn, int ldc );
-void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
-                         const CHAM_desc_t *A, int Am, int An, int lda,
-                         void *user_data, void* user_build_callback );
-
 
 /**
  * Keep these insert_task for retro-compatibility
diff --git a/runtime/CMakeLists.txt b/runtime/CMakeLists.txt
index cabe559c2..309dea896 100644
--- a/runtime/CMakeLists.txt
+++ b/runtime/CMakeLists.txt
@@ -28,7 +28,6 @@
 # List of codelets required by all runtimes
 # -----------------------------------------
 set(CODELETS_ZSRC
-    codelets/codelet_ztile_zero.c
     codelets/codelet_zasum.c
     ##################
     # BLAS 1
diff --git a/runtime/openmp/codelets/codelet_zgeadd.c b/runtime/openmp/codelets/codelet_zgeadd.c
index 1d18ff18f..2ceeb8159 100644
--- a/runtime/openmp/codelets/codelet_zgeadd.c
+++ b/runtime/openmp/codelets/codelet_zgeadd.c
@@ -31,7 +31,7 @@
  *
  * @ingroup CORE_CHAMELEON_Complex64_t
  *
- *  INSERT_TASK_zgeadd adds two general matrices together as in PBLAS pzgeadd.
+ * @brief Adds two general matrices together as in PBLAS pzgeadd.
  *
  *       B <- alpha * op(A)  + beta * B,
  *
@@ -75,15 +75,14 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
-void INSERT_TASK_zgeadd(const RUNTIME_option_t *options,
-                       cham_trans_t trans, int m, int n, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
+                         cham_trans_t trans, int m, int n, int nb,
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
diff --git a/runtime/openmp/codelets/codelet_zgelqt.c b/runtime/openmp/codelets/codelet_zgelqt.c
index 3341a8f01..8dd282d62 100644
--- a/runtime/openmp/codelets/codelet_zgelqt.c
+++ b/runtime/openmp/codelets/codelet_zgelqt.c
@@ -84,9 +84,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/runtime/openmp/codelets/codelet_zgemm.c b/runtime/openmp/codelets/codelet_zgemm.c
index 68aec8de4..b2737c388 100644
--- a/runtime/openmp/codelets/codelet_zgemm.c
+++ b/runtime/openmp/codelets/codelet_zgemm.c
@@ -43,7 +43,7 @@ void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
     CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn);
-#pragma omp task firstprivate(transA, transB, m, n, k, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0:Am*An], ptrB[0:Bm*Bn]) depend(inout:ptrC[0:Cm*Cn])
+#pragma omp task firstprivate(transA, transB, m, n, k, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0], ptrB[0]) depend(inout:ptrC[0])
     CORE_zgemm(transA, transB,
         m, n, k,
         alpha, ptrA, lda,
diff --git a/runtime/openmp/codelets/codelet_zgeqrt.c b/runtime/openmp/codelets/codelet_zgeqrt.c
index 6428375b2..f8bf811af 100644
--- a/runtime/openmp/codelets/codelet_zgeqrt.c
+++ b/runtime/openmp/codelets/codelet_zgeqrt.c
@@ -85,9 +85,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/runtime/openmp/codelets/codelet_zgessm.c b/runtime/openmp/codelets/codelet_zgessm.c
index cd24a4ac0..2ed15696a 100644
--- a/runtime/openmp/codelets/codelet_zgessm.c
+++ b/runtime/openmp/codelets/codelet_zgessm.c
@@ -68,9 +68,8 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
  *
  */
 
@@ -83,6 +82,6 @@ void INSERT_TASK_zgessm(const RUNTIME_option_t *options,
 {
     CHAMELEON_Complex64_t *ptrD = RTBLKADDR(D, CHAMELEON_Complex64_t, Dm, Dn);
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-#pragma omp task firstprivate(m, n, k, ib, IPIV, ptrD, ldd, ptrA, lda) depend(in:ptrD[0:Dm*Dn]) depend(inout:ptrA[0:Am*An])
+#pragma omp task firstprivate(m, n, k, ib, IPIV, ptrD, ldd, ptrA, lda) depend(in:ptrD[0]) depend(inout:ptrA[0])
     CORE_zgessm(m, n, k, ib, IPIV, ptrD, ldd, ptrA, lda);
 }
diff --git a/runtime/openmp/codelets/codelet_zgetrf.c b/runtime/openmp/codelets/codelet_zgetrf.c
index d7cc9fe75..ab9869f7e 100644
--- a/runtime/openmp/codelets/codelet_zgetrf.c
+++ b/runtime/openmp/codelets/codelet_zgetrf.c
@@ -34,6 +34,6 @@ void INSERT_TASK_zgetrf(const RUNTIME_option_t *options,
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     int info = 0;
-#pragma omp task firstprivate(m, n, ptrA, lda, IPIV, info) depend(inout:ptrA[0])
+#pragma omp task firstprivate(m, n, ptrA, lda, IPIV, info) depend(out:IPIV[0]) depend(inout:ptrA[0])
     CORE_zgetrf( m, n, ptrA, lda, IPIV, &info );
 }
diff --git a/runtime/openmp/codelets/codelet_zgetrf_incpiv.c b/runtime/openmp/codelets/codelet_zgetrf_incpiv.c
index 20b5e92d3..9f26a7064 100644
--- a/runtime/openmp/codelets/codelet_zgetrf_incpiv.c
+++ b/runtime/openmp/codelets/codelet_zgetrf_incpiv.c
@@ -73,10 +73,9 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
@@ -92,6 +91,6 @@ void INSERT_TASK_zgetrf_incpiv(const RUNTIME_option_t *options,
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     int info = 0;
-#pragma omp task firstprivate(m, n, ib, ptrA, lda, IPIV, info) depend(inout:ptrA[0:Am*An])
+#pragma omp task firstprivate(m, n, ib, ptrA, lda, IPIV, info) depend(out:IPIV[0]) depend(inout:ptrA[0])
     CORE_zgetrf_incpiv(m, n, ib, ptrA, lda, IPIV, &info);
 }
diff --git a/runtime/openmp/codelets/codelet_zgetrf_nopiv.c b/runtime/openmp/codelets/codelet_zgetrf_nopiv.c
index 5f26b76e9..829b5473e 100644
--- a/runtime/openmp/codelets/codelet_zgetrf_nopiv.c
+++ b/runtime/openmp/codelets/codelet_zgetrf_nopiv.c
@@ -63,10 +63,9 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
@@ -80,6 +79,6 @@ void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options,
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     int info = 0;
-#pragma omp task firstprivate(m, n, ib, ptrA, lda, info) depend(inout:ptrA[0:Am*An])
+#pragma omp task firstprivate(m, n, ib, ptrA, lda, info) depend(inout:ptrA[0])
     CORE_zgetrf_nopiv(m, n, ib, ptrA, lda, &info);
 }
diff --git a/runtime/openmp/codelets/codelet_zhemm.c b/runtime/openmp/codelets/codelet_zhemm.c
index 331459e47..4d632655a 100644
--- a/runtime/openmp/codelets/codelet_zhemm.c
+++ b/runtime/openmp/codelets/codelet_zhemm.c
@@ -43,7 +43,7 @@ void INSERT_TASK_zhemm(const RUNTIME_option_t *options,
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
     CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn);
-#pragma omp task firstprivate(side, uplo, m, n, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0:Am*An], ptrB[0:Bm*Bn]) depend(inout:ptrC[0:Cm*Cn])
+#pragma omp task firstprivate(side, uplo, m, n, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0], ptrB[0]) depend(inout:ptrC[0])
     CORE_zhemm(side, uplo,
         m, n,
         alpha, ptrA, lda,
diff --git a/runtime/openmp/codelets/codelet_zhessq.c b/runtime/openmp/codelets/codelet_zhessq.c
index 46cd0f5c7..409d413dd 100644
--- a/runtime/openmp/codelets/codelet_zhessq.c
+++ b/runtime/openmp/codelets/codelet_zhessq.c
@@ -31,6 +31,6 @@ void INSERT_TASK_zhessq( const RUNTIME_option_t *options,
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     double *ptrScaleSum = RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn);
-#pragma omp task firstprivate(uplo, n, ptrA, lda, ptrScaleSum) depend(in:ptrScaleSum[0:SCALESUMSQm*SCALESUMSQn]) depend(inout:ptrA[0:Am*An])
+#pragma omp task firstprivate(uplo, n, ptrA, lda, ptrScaleSum) depend(in:ptrScaleSum[0]) depend(inout:ptrA[0])
     CORE_zhessq( uplo, n, ptrA, lda, &ptrScaleSum[0], &ptrScaleSum[1] );
 }
diff --git a/runtime/openmp/codelets/codelet_zlacpy.c b/runtime/openmp/codelets/codelet_zlacpy.c
index 74e420c31..44ea300eb 100644
--- a/runtime/openmp/codelets/codelet_zlacpy.c
+++ b/runtime/openmp/codelets/codelet_zlacpy.c
@@ -33,10 +33,10 @@
  * @ingroup CORE_CHAMELEON_Complex64_t
  *
  */
-void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options,
-                        cham_uplo_t uplo, int m, int n, int nb,
-                        int displA, const CHAM_desc_t *A, int Am, int An, int lda,
-                        int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
+                          cham_uplo_t uplo, int m, int n, int nb,
+                          int displA, const CHAM_desc_t *A, int Am, int An, int lda,
+                          int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb)
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A + displA, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B + displB, CHAMELEON_Complex64_t, Bm, Bn);
@@ -44,12 +44,12 @@ void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options,
     CORE_zlacpy(uplo, m, n, ptrA, lda, ptrB, ldb);
 }
 
-void INSERT_TASK_zlacpy(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, int m, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, int m, int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     INSERT_TASK_zlacpyx( options, uplo, m, n, nb,
-                        0, A, Am, An, lda,
-                        0, B, Bm, Bn, ldb );
+                         0, A, Am, An, lda,
+                         0, B, Bm, Bn, ldb );
 }
diff --git a/runtime/openmp/codelets/codelet_zlag2c.c b/runtime/openmp/codelets/codelet_zlag2c.c
index b65a938fe..26a024cd0 100644
--- a/runtime/openmp/codelets/codelet_zlag2c.c
+++ b/runtime/openmp/codelets/codelet_zlag2c.c
@@ -31,13 +31,24 @@
  * @ingroup CORE_CHAMELEON_Complex64_t
  *
  */
-void INSERT_TASK_zlag2c(const RUNTIME_option_t *options,
-                       int m, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zlag2c( const RUNTIME_option_t *options,
+                         int m, int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
-#pragma omp task firstprivate(m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0:Am*An]) depend(inout:ptrB[0:Bm*Bn])
+    CHAMELEON_Complex32_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex32_t, Bm, Bn);
+#pragma omp task firstprivate(m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0])
     CORE_zlag2c( m, n, ptrA, lda, ptrB, ldb);
 }
+
+void INSERT_TASK_clag2z( const RUNTIME_option_t *options,
+                         int m, int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+{
+    CHAMELEON_Complex32_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex32_t, Am, An);
+    CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
+#pragma omp task firstprivate(m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0])
+    CORE_clag2z( m, n, ptrA, lda, ptrB, ldb);
+}
diff --git a/runtime/openmp/codelets/codelet_zlascal.c b/runtime/openmp/codelets/codelet_zlascal.c
index d579bb39a..2aa990418 100644
--- a/runtime/openmp/codelets/codelet_zlascal.c
+++ b/runtime/openmp/codelets/codelet_zlascal.c
@@ -51,9 +51,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
@@ -64,6 +63,6 @@ void INSERT_TASK_zlascal(const RUNTIME_option_t *options,
                         const CHAM_desc_t *A, int Am, int An, int lda)
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-#pragma omp task firstprivate(uplo, m, n, alpha, ptrA, lda) depend(inout:ptrA[0:Am*An])
+#pragma omp task firstprivate(uplo, m, n, alpha, ptrA, lda) depend(inout:ptrA[0])
     CORE_zlascal(uplo, m, n, alpha, ptrA, lda);
 }
diff --git a/runtime/openmp/codelets/codelet_zlatro.c b/runtime/openmp/codelets/codelet_zlatro.c
index 6f7ba5fa5..ec50bb9cf 100644
--- a/runtime/openmp/codelets/codelet_zlatro.c
+++ b/runtime/openmp/codelets/codelet_zlatro.c
@@ -33,14 +33,14 @@
  * @ingroup CORE_CHAMELEON_Complex64_t
  *
  */
-void INSERT_TASK_zlatro(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, cham_trans_t trans,
-                       int m, int n, int mb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zlatro( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, cham_trans_t trans,
+                         int m, int n, int mb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
-#pragma omp task firstprivate(uplo, trans, m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0:Am*An]) depend(inout:ptrB[0:Bm*Bn])
+#pragma omp task firstprivate(uplo, trans, m, n, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0])
     CORE_zlatro(uplo, trans, m, n, ptrA, lda, ptrB, ldb);
 }
diff --git a/runtime/openmp/codelets/codelet_zlauum.c b/runtime/openmp/codelets/codelet_zlauum.c
index 7ab7c8b99..70030fc19 100644
--- a/runtime/openmp/codelets/codelet_zlauum.c
+++ b/runtime/openmp/codelets/codelet_zlauum.c
@@ -38,6 +38,6 @@ void INSERT_TASK_zlauum(const RUNTIME_option_t *options,
                        const CHAM_desc_t *A, int Am, int An, int lda)
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-#pragma omp task firstprivate(uplo, n, ptrA, lda) depend(inout:ptrA[0:Am*An])
+#pragma omp task firstprivate(uplo, n, ptrA, lda) depend(inout:ptrA[0])
     CORE_zlauum(uplo, n, ptrA, lda);
 }
diff --git a/runtime/openmp/codelets/codelet_zplghe.c b/runtime/openmp/codelets/codelet_zplghe.c
index 06e890a45..a785b19c2 100644
--- a/runtime/openmp/codelets/codelet_zplghe.c
+++ b/runtime/openmp/codelets/codelet_zplghe.c
@@ -28,13 +28,11 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
-/*   INSERT_TASK_zplghe - Generate a tile for random hermitian (positive definite if bump is large enough) matrix. */
-
 void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
-                        double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
-                        int bigM, int m0, int n0, unsigned long long int seed )
+                         double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
+                         int bigM, int m0, int n0, unsigned long long int seed )
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-#pragma omp task firstprivate(bump, m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0:Am*An])
+#pragma omp task firstprivate(bump, m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0])
     CORE_zplghe( bump, m, n, ptrA, lda, bigM, m0, n0, seed );
 }
diff --git a/runtime/openmp/codelets/codelet_zplgsy.c b/runtime/openmp/codelets/codelet_zplgsy.c
index 5269d5276..4a3cea2f6 100644
--- a/runtime/openmp/codelets/codelet_zplgsy.c
+++ b/runtime/openmp/codelets/codelet_zplgsy.c
@@ -28,11 +28,9 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
-/*   INSERT_TASK_zplgsy - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. */
-
 void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
-                        CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
-                        int bigM, int m0, int n0, unsigned long long int seed )
+                         CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
+                         int bigM, int m0, int n0, unsigned long long int seed )
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
 #pragma omp task firstprivate(bump, m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0])
diff --git a/runtime/openmp/codelets/codelet_zplrnt.c b/runtime/openmp/codelets/codelet_zplrnt.c
index 35cb6300c..139f44c8b 100644
--- a/runtime/openmp/codelets/codelet_zplrnt.c
+++ b/runtime/openmp/codelets/codelet_zplrnt.c
@@ -28,11 +28,9 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
-/*   INSERT_TASK_zplrnt - Generate a tile for random matrix. */
-
 void INSERT_TASK_zplrnt( const RUNTIME_option_t *options,
-                        int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
-                        int bigM, int m0, int n0, unsigned long long int seed )
+                         int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
+                         int bigM, int m0, int n0, unsigned long long int seed )
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
 #pragma omp task firstprivate(m, n, ptrA, lda, bigM, m0, n0, seed) depend(inout:ptrA[0])
diff --git a/runtime/openmp/codelets/codelet_zplssq.c b/runtime/openmp/codelets/codelet_zplssq.c
index 7ee45f66d..cec083dca 100644
--- a/runtime/openmp/codelets/codelet_zplssq.c
+++ b/runtime/openmp/codelets/codelet_zplssq.c
@@ -29,7 +29,7 @@
  *
  * @ingroup CORE_CHAMELEON_Complex64_t
  *
- *  INSERT_TASK_zplssq returns: scl * sqrt(ssq)
+ * @brief Compute sum( a_ij ^ 2 ) = scl * sqrt(ssq)
  *
  * with scl and ssq such that
  *
@@ -74,7 +74,7 @@ void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
 }
 
 void INSERT_TASK_zplssq2( const RUNTIME_option_t *options,
-                         const CHAM_desc_t *RESULT, int RESULTm, int RESULTn )
+                          const CHAM_desc_t *RESULT, int RESULTm, int RESULTn )
 {
     CHAMELEON_Complex64_t *res = RTBLKADDR(RESULT, CHAMELEON_Complex64_t, RESULTm, RESULTn);
 
diff --git a/runtime/openmp/codelets/codelet_zssssm.c b/runtime/openmp/codelets/codelet_zssssm.c
index 38d9ad5e3..db82b480e 100644
--- a/runtime/openmp/codelets/codelet_zssssm.c
+++ b/runtime/openmp/codelets/codelet_zssssm.c
@@ -91,9 +91,8 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
  *
  */
 
@@ -109,10 +108,8 @@ void INSERT_TASK_zssssm(const RUNTIME_option_t *options,
     CHAMELEON_Complex64_t *ptrA2 = RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n);
     CHAMELEON_Complex64_t *ptrL1 = RTBLKADDR(L1, CHAMELEON_Complex64_t, L1m, L1n);
     CHAMELEON_Complex64_t *ptrL2 = RTBLKADDR(L2, CHAMELEON_Complex64_t, L2m, L2n);
-#pragma omp task firstprivate(m1, n1, m2, n2, k, ib, ptrA1, ptrA2, ptrL1, ptrL2, lda1, lda2, ldl1, ldl2, IPIV)\
-    depend(inout:ptrA1[0])\
-    depend(inout:ptrA2[0])\
-    depend(in:ptrL1[0])\
-    depend(in:ptrL2[0])
+
+#pragma omp task firstprivate(m1, n1, m2, n2, k, ib, ptrA1, ptrA2, ptrL1, ptrL2, lda1, lda2, ldl1, ldl2, IPIV) \
+    depend(inout:ptrA1[0], ptrA2[0]) depend(in:ptrL1[0], ptrL2[0])
     CORE_zssssm(m1, n1, m2, n2, k, ib, ptrA1, lda1, ptrA2, lda2, ptrL1, ldl1, ptrL2, ldl2, IPIV);
 }
diff --git a/runtime/openmp/codelets/codelet_zsymm.c b/runtime/openmp/codelets/codelet_zsymm.c
index efe71b425..76d6ec7b7 100644
--- a/runtime/openmp/codelets/codelet_zsymm.c
+++ b/runtime/openmp/codelets/codelet_zsymm.c
@@ -41,7 +41,7 @@ void INSERT_TASK_zsymm(const RUNTIME_option_t *options,
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
     CHAMELEON_Complex64_t *ptrC = RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn);
-#pragma omp task firstprivate(side, uplo, m, n, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0:Am*An], ptrB[0:Bm*Bn]) depend(inout:ptrC[0:Cm*Cn])
+#pragma omp task firstprivate(side, uplo, m, n, alpha, ptrA, lda, ptrB, ldb, beta, ptrC, ldc) depend(in:ptrA[0], ptrB[0]) depend(inout:ptrC[0])
     CORE_zsymm(side, uplo,
         m, n,
         alpha, ptrA, lda,
diff --git a/runtime/openmp/codelets/codelet_zsyssq.c b/runtime/openmp/codelets/codelet_zsyssq.c
index c2d69dc57..86b58eb00 100644
--- a/runtime/openmp/codelets/codelet_zsyssq.c
+++ b/runtime/openmp/codelets/codelet_zsyssq.c
@@ -29,6 +29,6 @@ void INSERT_TASK_zsyssq( const RUNTIME_option_t *options,
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     double *ptrSCALESUMSQ = RTBLKADDR(SCALESUMSQ, double, SCALESUMSQm, SCALESUMSQn);
-#pragma omp task firstprivate(uplo, n, ptrA, lda, ptrSCALESUMSQ) depend(in:ptrA[0:Am*An]) depend(inout:ptrSCALESUMSQ[0])
+#pragma omp task firstprivate(uplo, n, ptrA, lda, ptrSCALESUMSQ) depend(in:ptrA[0]) depend(inout:ptrSCALESUMSQ[0])
     CORE_zsyssq( uplo, n, ptrA, lda, &ptrSCALESUMSQ[0], &ptrSCALESUMSQ[1] );
 }
diff --git a/runtime/openmp/codelets/codelet_zsytrf_nopiv.c b/runtime/openmp/codelets/codelet_zsytrf_nopiv.c
index 1ebd1aa08..73032cf98 100644
--- a/runtime/openmp/codelets/codelet_zsytrf_nopiv.c
+++ b/runtime/openmp/codelets/codelet_zsytrf_nopiv.c
@@ -32,6 +32,6 @@ void INSERT_TASK_zsytrf_nopiv(const RUNTIME_option_t *options,
                              int iinfo)
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-#pragma omp task firstprivate(uplo, n, ptrA, lda) depend(inout:ptrA[0:Am*An])
+#pragma omp task firstprivate(uplo, n, ptrA, lda) depend(inout:ptrA[0])
     CORE_zsytf2_nopiv(uplo, n, ptrA, lda);
 }
diff --git a/runtime/openmp/codelets/codelet_ztile_zero.c b/runtime/openmp/codelets/codelet_ztile_zero.c
deleted file mode 100644
index 96ef911bf..000000000
--- a/runtime/openmp/codelets/codelet_ztile_zero.c
+++ /dev/null
@@ -1,38 +0,0 @@
-/**
- *
- * @file openmp/codelet_ztile_zero.c
- *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
- * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
- *                      Univ. Bordeaux. All rights reserved.
- *
- ***
- *
- * @brief Chameleon ztile_zero StarPU codelet
- *
- * @version 1.0.0
- * @author Hatem Ltaief
- * @author Mathieu Faverge
- * @author Jakub Kurzak
- * @date 2010-11-15
- * @precisions normal z -> c d s
- *
- */
-
-#include "chameleon_openmp.h"
-#include "chameleon/tasks_z.h"
-#include "coreblas.h"
-/**
- *
- */
-void INSERT_TASK_ztile_zero( const RUNTIME_option_t *options,
-                            int X1, int X2, int Y1, int Y2,
-                            const CHAM_desc_t *A, int Am, int An, int lda )
-{
-    CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
-    int x, y;
-    for (x = X1; x < X2; x++)
-        for (y = Y1; y < Y2; y++)
-            ptrA[lda*x+y] = 0.0;
-}
diff --git a/runtime/openmp/codelets/codelet_ztplqt.c b/runtime/openmp/codelets/codelet_ztplqt.c
index 4bb4f16f0..367e437a7 100644
--- a/runtime/openmp/codelets/codelet_ztplqt.c
+++ b/runtime/openmp/codelets/codelet_ztplqt.c
@@ -20,12 +20,12 @@
 
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-void
-INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
-                   int M, int N, int L, int ib, int nb,
-                   const CHAM_desc_t *A, int Am, int An, int lda,
-                   const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                   const CHAM_desc_t *T, int Tm, int Tn, int ldt )
+
+void INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
+                         int M, int N, int L, int ib, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
+                         const CHAM_desc_t *T, int Tm, int Tn, int ldt )
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
@@ -36,8 +36,7 @@ INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
     {
       CHAMELEON_Complex64_t work[ws_size];
 
-      CORE_zlaset( ChamUpperLower, ib, M, 0., 0., ptrT, ldt);
-
+      CORE_zlaset( ChamUpperLower, ib, M, 0., 0., ptrT, ldt );
       CORE_ztplqt( M, N, L, ib,
                    ptrA, lda, ptrB, ldb, ptrT, ldt, work );
     }
diff --git a/runtime/openmp/codelets/codelet_ztpmlqt.c b/runtime/openmp/codelets/codelet_ztpmlqt.c
index 543704822..a5da0f533 100644
--- a/runtime/openmp/codelets/codelet_ztpmlqt.c
+++ b/runtime/openmp/codelets/codelet_ztpmlqt.c
@@ -17,24 +17,25 @@
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-void
-INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
-                    cham_side_t side, cham_trans_t trans,
-                    int M, int N, int K, int L, int ib, int nb,
-                    const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                    const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                    const CHAM_desc_t *A, int Am, int An, int lda,
-                    const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+
+void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
+                          cham_side_t side, cham_trans_t trans,
+                          int M, int N, int K, int L, int ib, int nb,
+                          const CHAM_desc_t *V, int Vm, int Vn, int ldv,
+                          const CHAM_desc_t *T, int Tm, int Tn, int ldt,
+                          const CHAM_desc_t *A, int Am, int An, int lda,
+                          const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
     CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
     CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn);
     int ws_size = options->ws_wsize;
+
 #pragma omp task firstprivate(ws_size, side, trans, M, N, K, L, ib, ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(in:ptrV[0], ptrT[0]) depend(inout:ptrA[0], ptrB[0])
     {
-      CHAMELEON_Complex64_t work[ws_size];
-      CORE_ztpmlqt( side, trans, M, N, K, L, ib,
-                    ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work );
+        CHAMELEON_Complex64_t work[ws_size];
+        CORE_ztpmlqt( side, trans, M, N, K, L, ib,
+                      ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb, work );
     }
 }
diff --git a/runtime/openmp/codelets/codelet_ztpmqrt.c b/runtime/openmp/codelets/codelet_ztpmqrt.c
index 4f3262221..5378a2a5b 100644
--- a/runtime/openmp/codelets/codelet_ztpmqrt.c
+++ b/runtime/openmp/codelets/codelet_ztpmqrt.c
@@ -17,20 +17,21 @@
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-void
-INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options,
-                    cham_side_t side, cham_trans_t trans,
-                    int M, int N, int K, int L, int ib, int nb,
-                    const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                    const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                    const CHAM_desc_t *A, int Am, int An, int lda,
-                    const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+
+void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options,
+                          cham_side_t side, cham_trans_t trans,
+                          int M, int N, int K, int L, int ib, int nb,
+                          const CHAM_desc_t *V, int Vm, int Vn, int ldv,
+                          const CHAM_desc_t *T, int Tm, int Tn, int ldt,
+                          const CHAM_desc_t *A, int Am, int An, int lda,
+                          const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
     CHAMELEON_Complex64_t *ptrT = RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn);
     CHAMELEON_Complex64_t *ptrV = RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn);
     int ws_size = options->ws_wsize;
+
 #pragma omp task firstprivate(ws_size, side, trans, M, N, K, L, ib, nb, ptrV, ldv, ptrT, ldt, ptrA, lda, ptrB, ldb) depend(in:ptrV[0], ptrT[0]) depend(inout:ptrA[0], ptrB[0])
     {
         CHAMELEON_Complex64_t tmp[ws_size];
diff --git a/runtime/openmp/codelets/codelet_ztpqrt.c b/runtime/openmp/codelets/codelet_ztpqrt.c
index 7381f6ebd..755de21bd 100644
--- a/runtime/openmp/codelets/codelet_ztpqrt.c
+++ b/runtime/openmp/codelets/codelet_ztpqrt.c
@@ -19,12 +19,12 @@
  */
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
-void
-INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
-                   int M, int N, int L, int ib, int nb,
-                   const CHAM_desc_t *A, int Am, int An, int lda,
-                   const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                   const CHAM_desc_t *T, int Tm, int Tn, int ldt )
+
+void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
+                         int M, int N, int L, int ib, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
+                         const CHAM_desc_t *T, int Tm, int Tn, int ldt )
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
@@ -35,9 +35,8 @@ INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
     {
       CHAMELEON_Complex64_t tmp[ws_size];
 
-      CORE_zlaset( ChamUpperLower, ib, N, 0., 0., ptrT, ldt);
-
+      CORE_zlaset( ChamUpperLower, ib, N, 0., 0., ptrT, ldt );
       CORE_ztpqrt( M, N, L, ib,
-          ptrA, lda, ptrB, ldb, ptrT, ldt, tmp );
+                   ptrA, lda, ptrB, ldb, ptrT, ldt, tmp );
     }
 }
diff --git a/runtime/openmp/codelets/codelet_ztradd.c b/runtime/openmp/codelets/codelet_ztradd.c
index 9a39aaf56..384ba192d 100644
--- a/runtime/openmp/codelets/codelet_ztradd.c
+++ b/runtime/openmp/codelets/codelet_ztradd.c
@@ -22,12 +22,13 @@
 
 #include "chameleon_openmp.h"
 #include "chameleon/tasks_z.h"
+
 /**
  ******************************************************************************
  *
  * @ingroup CORE_CHAMELEON_Complex64_t
  *
- *  INSERT_TASK_ztradd adds two trapezoidal matrices together as in PBLAS pzgeadd.
+ * @brief Adds two trapezoidal matrices together as in PBLAS pzgeadd.
  *
  *       B <- alpha * op(A)  + beta * B,
  *
@@ -77,18 +78,18 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
-void INSERT_TASK_ztradd(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb)
 {
     CHAMELEON_Complex64_t *ptrA = RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An);
     CHAMELEON_Complex64_t *ptrB = RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn);
+
 #pragma omp task firstprivate(uplo, trans, m, n, alpha, ptrA, lda, ptrB, ldb) depend(in:ptrA[0]) depend(inout:ptrB[0])
     CORE_ztradd(uplo, trans, m, n, alpha, ptrA, lda, beta, ptrB, ldb);
 }
diff --git a/runtime/openmp/codelets/codelet_ztstrf.c b/runtime/openmp/codelets/codelet_ztstrf.c
index cb612cb6f..4072b5d68 100644
--- a/runtime/openmp/codelets/codelet_ztstrf.c
+++ b/runtime/openmp/codelets/codelet_ztstrf.c
@@ -83,10 +83,9 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
diff --git a/runtime/openmp/codelets/codelet_zunmlq.c b/runtime/openmp/codelets/codelet_zunmlq.c
index 348b290a1..0dd8f263d 100644
--- a/runtime/openmp/codelets/codelet_zunmlq.c
+++ b/runtime/openmp/codelets/codelet_zunmlq.c
@@ -105,9 +105,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/runtime/openmp/codelets/codelet_zunmqr.c b/runtime/openmp/codelets/codelet_zunmqr.c
index 427654703..ed40c4211 100644
--- a/runtime/openmp/codelets/codelet_zunmqr.c
+++ b/runtime/openmp/codelets/codelet_zunmqr.c
@@ -105,9 +105,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 
diff --git a/runtime/parsec/codelets/codelet_zgeadd.c b/runtime/parsec/codelets/codelet_zgeadd.c
index 7ad41db2f..7d937857f 100644
--- a/runtime/parsec/codelets/codelet_zgeadd.c
+++ b/runtime/parsec/codelets/codelet_zgeadd.c
@@ -52,7 +52,7 @@ CORE_zgeadd_parsec( parsec_execution_stream_t *context,
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
- *  INSERT_TASK_zgeadd adds two general matrices together as in PBLAS pzgeadd.
+ * @brief Adds two general matrices together as in PBLAS pzgeadd.
  *
  *       B <- alpha * op(A)  + beta * B,
  *
@@ -96,15 +96,14 @@ CORE_zgeadd_parsec( parsec_execution_stream_t *context,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
-void INSERT_TASK_zgeadd(const RUNTIME_option_t *options,
-                       cham_trans_t trans, int m, int n, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
+                         cham_trans_t trans, int m, int n, int nb,
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
 
diff --git a/runtime/parsec/codelets/codelet_zgelqt.c b/runtime/parsec/codelets/codelet_zgelqt.c
index 4ef5b5b7a..32a7dfaa9 100644
--- a/runtime/parsec/codelets/codelet_zgelqt.c
+++ b/runtime/parsec/codelets/codelet_zgelqt.c
@@ -76,9 +76,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 static inline int
diff --git a/runtime/parsec/codelets/codelet_zgeqrt.c b/runtime/parsec/codelets/codelet_zgeqrt.c
index 53ac8ac04..3aaaf84cf 100644
--- a/runtime/parsec/codelets/codelet_zgeqrt.c
+++ b/runtime/parsec/codelets/codelet_zgeqrt.c
@@ -77,9 +77,8 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 static inline int
diff --git a/runtime/parsec/codelets/codelet_zgessm.c b/runtime/parsec/codelets/codelet_zgessm.c
index a7f62dc4b..a4762cfa2 100644
--- a/runtime/parsec/codelets/codelet_zgessm.c
+++ b/runtime/parsec/codelets/codelet_zgessm.c
@@ -60,9 +60,8 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
  *
  */
 static inline int
diff --git a/runtime/parsec/codelets/codelet_zgetrf_incpiv.c b/runtime/parsec/codelets/codelet_zgetrf_incpiv.c
index 09ef6c401..55a1fe635 100644
--- a/runtime/parsec/codelets/codelet_zgetrf_incpiv.c
+++ b/runtime/parsec/codelets/codelet_zgetrf_incpiv.c
@@ -65,10 +65,9 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
diff --git a/runtime/parsec/codelets/codelet_zgetrf_nopiv.c b/runtime/parsec/codelets/codelet_zgetrf_nopiv.c
index ab7f49bb1..0aadb3c90 100644
--- a/runtime/parsec/codelets/codelet_zgetrf_nopiv.c
+++ b/runtime/parsec/codelets/codelet_zgetrf_nopiv.c
@@ -58,10 +58,9 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
diff --git a/runtime/parsec/codelets/codelet_zlacpy.c b/runtime/parsec/codelets/codelet_zlacpy.c
index 64c777e38..d79617ccb 100644
--- a/runtime/parsec/codelets/codelet_zlacpy.c
+++ b/runtime/parsec/codelets/codelet_zlacpy.c
@@ -49,11 +49,10 @@ CORE_zlacpyx_parsec( parsec_execution_stream_t *context,
 }
 
 void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
-                         cham_uplo_t uplo, int m, int n, int nb,
-                         int displA, const CHAM_desc_t *A, int Am, int An, int lda,
-                         int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+                          cham_uplo_t uplo, int m, int n, int nb,
+                          int displA, const CHAM_desc_t *A, int Am, int An, int lda,
+                          int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
-
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
 
     parsec_dtd_taskpool_insert_task(
@@ -71,12 +70,12 @@ void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
     (void)nb;
 }
 
-void INSERT_TASK_zlacpy(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, int m, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, int m, int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     INSERT_TASK_zlacpyx( options, uplo, m, n, nb,
-                        0, A, Am, An, lda,
-                        0, B, Bm, Bn, ldb );
+                         0, A, Am, An, lda,
+                         0, B, Bm, Bn, ldb );
 }
diff --git a/runtime/parsec/codelets/codelet_zpamm.c b/runtime/parsec/codelets/codelet_zpamm.c
deleted file mode 100644
index 3d075b014..000000000
--- a/runtime/parsec/codelets/codelet_zpamm.c
+++ /dev/null
@@ -1,224 +0,0 @@
-/**
- *
- * @file parsec/codelet_zpamm.c
- *
- * @copyright 2009-2015 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
- * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
- *                      Univ. Bordeaux. All rights reserved.
- *
- ***
- *
- * @brief Chameleon zpamm PaRSEC codelet
- *
- * @version 1.0.0
- * @author Reazul Hoque
- * @precisions normal z -> c d s
- *
- */
-#include "chameleon_parsec.h"
-#include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
-
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  ZPAMM  performs one of the matrix-matrix operations
- *
- *                    LEFT                      RIGHT
- *     OP ChameleonW  :  W  = A1 + op(V) * A2  or  W  = A1 + A2 * op(V)
- *     OP ChameleonA2 :  A2 = A2 - op(V) * W   or  A2 = A2 - W * op(V)
- *
- *  where  op( V ) is one of
- *
- *     op( V ) = V   or   op( V ) = V**T   or   op( V ) = V**H,
- *
- *  A1, A2 and W are general matrices, and V is:
- *
- *        l = k: rectangle + triangle
- *        l < k: rectangle + trapezoid
- *        l = 0: rectangle
- *
- *  Size of V, both rowwise and columnwise, is:
- *
- *         ----------------------
- *          side   trans    size
- *         ----------------------
- *          left     N     M x K
- *                   T     K x M
- *          right    N     K x N
- *                   T     N x K
- *         ----------------------
- *
- *  LEFT (columnwise and rowwise):
- *
- *              |    K    |                 |         M         |
- *           _  __________   _              _______________        _
- *              |    |    |                 |             | \
- *     V:       |    |    |            V':  |_____________|___\    K
- *              |    |    | M-L             |                  |
- *           M  |    |    |                 |__________________|   _
- *              |____|    |  _
- *              \    |    |                 |    M - L    | L  |
- *                \  |    |  L
- *           _      \|____|  _
- *
- *  RIGHT (columnwise and rowwise):
- *
- *          |         K         |                   |    N    |
- *          _______________        _             _  __________   _
- *          |             | \                       |    |    |
- *     V':  |_____________|___\    N        V:      |    |    |
- *          |                  |                    |    |    | K-L
- *          |__________________|   _             K  |    |    |
- *                                                  |____|    |  _
- *          |    K - L    | L  |                    \    |    |
- *                                                    \  |    |  L
- *                                               _      \|____|  _
- *
- *  Arguments
- *  ==========
- *
- * @param[in] op
- *
- *         OP specifies which operation to perform:
- *
- *         @arg ChameleonW  : W  = A1 + op(V) * A2  or  W  = A1 + A2 * op(V)
- *         @arg ChameleonA2 : A2 = A2 - op(V) * W   or  A2 = A2 - W * op(V)
- *
- * @param[in] side
- *
- *         SIDE specifies whether  op( V ) multiplies A2
- *         or W from the left or right as follows:
- *
- *         @arg ChamLeft  : multiply op( V ) from the left
- *                            OP ChameleonW  :  W  = A1 + op(V) * A2
- *                            OP ChameleonA2 :  A2 = A2 - op(V) * W
- *
- *         @arg ChamRight : multiply op( V ) from the right
- *                            OP ChameleonW  :  W  = A1 + A2 * op(V)
- *                            OP ChameleonA2 :  A2 = A2 - W * op(V)
- *
- * @param[in] storev
- *
- *         Indicates how the vectors which define the elementary
- *         reflectors are stored in V:
- *
- *         @arg ChamColumnwise
- *         @arg ChamRowwise
- *
- * @param[in] M
- *         The number of rows of the A1, A2 and W
- *         If SIDE is ChamLeft, the number of rows of op( V )
- *
- * @param[in] N
- *         The number of columns of the A1, A2 and W
- *         If SIDE is ChamRight, the number of columns of op( V )
- *
- * @param[in] K
- *         If SIDE is ChamLeft, the number of columns of op( V )
- *         If SIDE is ChamRight, the number of rows of op( V )
- *
- * @param[in] L
- *         The size of the triangular part of V
- *
- * @param[in] A1
- *         On entry, the M-by-N tile A1.
- *
- * @param[in] LDA1
- *         The leading dimension of the array A1. LDA1 >= max(1,M).
- *
- * @param[in,out] A2
- *         On entry, the M-by-N tile A2.
- *         On exit, if OP is ChameleonA2 A2 is overwritten
- *
- * @param[in] LDA2
- *         The leading dimension of the tile A2. LDA2 >= max(1,M).
- *
- * @param[in] V
- *         The matrix V as described above.
- *         If SIDE is ChamLeft : op( V ) is M-by-K
- *         If SIDE is ChamRight: op( V ) is K-by-N
- *
- * @param[in] LDV
- *         The leading dimension of the array V.
- *
- * @param[in,out] W
- *         On entry, the M-by-N matrix W.
- *         On exit, W is overwritten either if OP is ChameleonA2 or ChameleonW.
- *         If OP is ChameleonA2, W is an input and is used as a workspace.
- *
- * @param[in] LDW
- *         The leading dimension of array WORK.
- *
- *******************************************************************************
- *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *
- */
-
-
-/**/
-
-static inline int
-CORE_zpamm_parsec( parsec_execution_stream_t *context,
-                    parsec_task_t             *this_task )
-{
-    int op;
-    cham_side_t side;
-    cham_store_t storev;
-    int M;
-    int N;
-    int K;
-    int L;
-    CHAMELEON_Complex64_t *A1;
-    int LDA1;
-    CHAMELEON_Complex64_t *A2;
-    int LDA2;
-    CHAMELEON_Complex64_t *V;
-    int LDV;
-    CHAMELEON_Complex64_t *W;
-    int LDW;
-
-    parsec_dtd_unpack_args(
-        this_task, &op, &side, &storev, &M, &N, &K, &L, &A1, &LDA1, &A2, &LDA2, &V, &LDV, &W, &LDW );
-
-    CORE_zpamm( op, side, storev, M, N, K, L, A1, LDA1, A2, LDA2, V, LDV, W, LDW );
-
-    (void)context;
-    return PARSEC_HOOK_RETURN_DONE;
-}
-
-void
-INSERT_TASK_zpamm(const RUNTIME_option_t *options,
-                 int op, cham_side_t side, cham_store_t storev,
-                 int m, int n, int k, int l,
-                 const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                       const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                 const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                       const CHAM_desc_t *W, int Wm, int Wn, int ldw)
-{
-    parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
-
-    parsec_dtd_taskpool_insert_task(
-        PARSEC_dtd_taskpool, CORE_zpamm_parsec, options->priority, "pamm",
-        sizeof(int),                        &op,                VALUE,
-        sizeof(int),                 &side,              VALUE,
-        sizeof(int),                 &storev,            VALUE,
-        sizeof(int),                        &m,                 VALUE,
-        sizeof(int),                        &n,                 VALUE,
-        sizeof(int),                        &k,                 VALUE,
-        sizeof(int),                        &l,                 VALUE,
-        PASSED_BY_REF,         RTBLKADDR( A1, CHAMELEON_Complex64_t, A1m, A1n ), chameleon_parsec_get_arena_index( A1 ) | INPUT,
-        sizeof(int),                        &lda1,              VALUE,
-        PASSED_BY_REF,         RTBLKADDR( A2, CHAMELEON_Complex64_t, A2m, A2n ), chameleon_parsec_get_arena_index( A2 ) | INOUT | AFFINITY,
-        sizeof(int),                        &lda2,              VALUE,
-        PASSED_BY_REF,         RTBLKADDR( V, CHAMELEON_Complex64_t, Vm, Vn ), chameleon_parsec_get_arena_index( V ) | INPUT,
-        sizeof(int),                        &ldv,               VALUE,
-        PASSED_BY_REF,         RTBLKADDR( W, CHAMELEON_Complex64_t, Wm, Wn ), chameleon_parsec_get_arena_index( W ) | INOUT,
-        sizeof(int),                        &ldw,               VALUE,
-        PARSEC_DTD_ARG_END );
-}
diff --git a/runtime/parsec/codelets/codelet_zplssq.c b/runtime/parsec/codelets/codelet_zplssq.c
index ed1fde76f..99006c131 100644
--- a/runtime/parsec/codelets/codelet_zplssq.c
+++ b/runtime/parsec/codelets/codelet_zplssq.c
@@ -21,11 +21,35 @@
 #include "chameleon/tasks_z.h"
 #include "coreblas/coreblas_z.h"
 
+static inline int
+CORE_zplssq_parsec( parsec_execution_stream_t *context,
+                    parsec_task_t             *this_task )
+{
+    double *SCLSSQ_IN;
+    double *SCLSSQ_OUT;
+
+    parsec_dtd_unpack_args(
+        this_task, &SCLSSQ_IN, &SCLSSQ_OUT );
+
+    assert( SCLSSQ_OUT[0] >= 0. );
+    if( SCLSSQ_OUT[0] < SCLSSQ_IN[0] ) {
+        SCLSSQ_OUT[1] = SCLSSQ_IN[1]  + (SCLSSQ_OUT[1] * (( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ) * ( SCLSSQ_OUT[0] / SCLSSQ_IN[0] )));
+        SCLSSQ_OUT[0] = SCLSSQ_IN[0];
+    } else {
+        if ( SCLSSQ_OUT[0] > 0 ) {
+            SCLSSQ_OUT[1] = SCLSSQ_OUT[1] + (SCLSSQ_IN[1]  * (( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ) * ( SCLSSQ_IN[0] / SCLSSQ_OUT[0] )));
+        }
+    }
+
+    (void)context;
+    return PARSEC_HOOK_RETURN_DONE;
+}
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
- *  INSERT_TASK_zplssq returns: scl * sqrt(ssq)
+ * @brief Compute sum( a_ij ^ 2 ) = scl * sqrt(ssq)
  *
  * with scl and ssq such that
  *
@@ -52,33 +76,9 @@
  *          On exit, result contains scl * sqrt( ssq )
  *
  */
-static inline int
-CORE_zplssq_parsec( parsec_execution_stream_t *context,
-                    parsec_task_t             *this_task )
-{
-    double *SCLSSQ_IN;
-    double *SCLSSQ_OUT;
-
-    parsec_dtd_unpack_args(
-        this_task, &SCLSSQ_IN, &SCLSSQ_OUT );
-
-    assert( SCLSSQ_OUT[0] >= 0. );
-    if( SCLSSQ_OUT[0] < SCLSSQ_IN[0] ) {
-        SCLSSQ_OUT[1] = SCLSSQ_IN[1]  + (SCLSSQ_OUT[1] * (( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ) * ( SCLSSQ_OUT[0] / SCLSSQ_IN[0] )));
-        SCLSSQ_OUT[0] = SCLSSQ_IN[0];
-    } else {
-        if ( SCLSSQ_OUT[0] > 0 ) {
-            SCLSSQ_OUT[1] = SCLSSQ_OUT[1] + (SCLSSQ_IN[1]  * (( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ) * ( SCLSSQ_IN[0] / SCLSSQ_OUT[0] )));
-        }
-    }
-
-    (void)context;
-    return PARSEC_HOOK_RETURN_DONE;
-}
-
 void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
-                        const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn,
-                        const CHAM_desc_t *SCLSSQ,     int SCLSSQm,     int SCLSSQn )
+                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn,
+                         const CHAM_desc_t *SCLSSQ,     int SCLSSQm,     int SCLSSQn )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
 
@@ -91,7 +91,7 @@ void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
 
 static inline int
 CORE_zplssq2_parsec( parsec_execution_stream_t *context,
-                    parsec_task_t             *this_task )
+                     parsec_task_t             *this_task )
 {
     double *RESULT;
 
@@ -105,7 +105,7 @@ CORE_zplssq2_parsec( parsec_execution_stream_t *context,
 }
 
 void INSERT_TASK_zplssq2( const RUNTIME_option_t *options,
-                         const CHAM_desc_t *RESULT, int RESULTm, int RESULTn )
+                          const CHAM_desc_t *RESULT, int RESULTm, int RESULTn )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
 
diff --git a/runtime/parsec/codelets/codelet_ztile_zero.c b/runtime/parsec/codelets/codelet_ztile_zero.c
deleted file mode 100644
index e07175cb1..000000000
--- a/runtime/parsec/codelets/codelet_ztile_zero.c
+++ /dev/null
@@ -1,61 +0,0 @@
-/**
- *
- * @file parsec/codelet_ztile_zero.c
- *
- * @copyright 2009-2015 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
- * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
- *                      Univ. Bordeaux. All rights reserved.
- *
- ***
- *
- * @brief Chameleon ztile_zero PaRSEC codelet
- *
- * @version 1.0.0
- * @author Reazul Hoque
- * @precisions normal z -> c d s
- *
- */
-#include "chameleon_parsec.h"
-#include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
-
-static inline int
-CORE_ztile_zero_parsec( parsec_execution_stream_t *context,
-                        parsec_task_t             *this_task )
-{
-    int X1;
-    int X2;
-    int Y1;
-    int Y2;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    int x, y;
-
-    parsec_dtd_unpack_args(
-        this_task, &X1, &X2, &Y1, &Y2, &A, &lda );
-
-    for (x = X1; x < X2; x++)
-        for (y = Y1; y < Y2; y++)
-            A[lda * x + y] = 0.0;
-
-    (void)context;
-    return PARSEC_HOOK_RETURN_DONE;
-}
-
-void INSERT_TASK_ztile_zero( const RUNTIME_option_t *options,
-                            int X1, int X2, int Y1, int Y2,
-                            const CHAM_desc_t *A, int Am, int An, int lda )
-{
-    parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
-
-    parsec_dtd_taskpool_insert_task(
-        PARSEC_dtd_taskpool, CORE_ztile_zero_parsec, options->priority, "tile zero",
-        sizeof(int),       &X1,                       VALUE,
-        sizeof(int),       &X2,                       VALUE,
-        sizeof(int),       &Y1,                       VALUE,
-        sizeof(int),       &Y2,                       VALUE,
-        PASSED_BY_REF,     RTBLKADDR( A, CHAMELEON_Complex64_t, Am, An ), chameleon_parsec_get_arena_index( A ) | OUTPUT | AFFINITY,
-        sizeof(int),       &lda,                      VALUE,
-        PARSEC_DTD_ARG_END );
-}
diff --git a/runtime/parsec/codelets/codelet_ztradd.c b/runtime/parsec/codelets/codelet_ztradd.c
index b431f983a..267ac386a 100644
--- a/runtime/parsec/codelets/codelet_ztradd.c
+++ b/runtime/parsec/codelets/codelet_ztradd.c
@@ -52,7 +52,7 @@ CORE_ztradd_parsec( parsec_execution_stream_t *context,
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
- *  INSERT_TASK_ztradd adds two trapezoidal matrices together as in PBLAS pztradd.
+ * @brief Adds two trapezoidal matrices together as in PBLAS pztradd.
  *
  *       B <- alpha * op(A)  + beta * B,
  *
@@ -102,15 +102,14 @@ CORE_ztradd_parsec( parsec_execution_stream_t *context,
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
-void INSERT_TASK_ztradd(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     parsec_taskpool_t* PARSEC_dtd_taskpool = (parsec_taskpool_t *)(options->sequence->schedopt);
 
diff --git a/runtime/quark/codelets/codelet_zgeadd.c b/runtime/quark/codelets/codelet_zgeadd.c
index 16287953c..935bc0ba1 100644
--- a/runtime/quark/codelets/codelet_zgeadd.c
+++ b/runtime/quark/codelets/codelet_zgeadd.c
@@ -47,7 +47,7 @@ void CORE_zgeadd_quark(Quark *quark)
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
- *  INSERT_TASK_zgeadd adds two general matrices together as in PBLAS pzgeadd.
+ * @brief Adds two general matrices together as in PBLAS pzgeadd.
  *
  *       B <- alpha * op(A)  + beta * B,
  *
@@ -91,15 +91,14 @@ void CORE_zgeadd_quark(Quark *quark)
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
-void INSERT_TASK_zgeadd(const RUNTIME_option_t *options,
-                       cham_trans_t trans, int m, int n, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
+                         cham_trans_t trans, int m, int n, int nb,
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_GEADD;
diff --git a/runtime/quark/codelets/codelet_zgelqt.c b/runtime/quark/codelets/codelet_zgelqt.c
index 240773c98..45db83e55 100644
--- a/runtime/quark/codelets/codelet_zgelqt.c
+++ b/runtime/quark/codelets/codelet_zgelqt.c
@@ -100,9 +100,8 @@ void CORE_zgelqt_quark(Quark *quark)
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
diff --git a/runtime/quark/codelets/codelet_zgeqrt.c b/runtime/quark/codelets/codelet_zgeqrt.c
index 09ed24eef..33ad21a52 100644
--- a/runtime/quark/codelets/codelet_zgeqrt.c
+++ b/runtime/quark/codelets/codelet_zgeqrt.c
@@ -101,9 +101,8 @@ void CORE_zgeqrt_quark(Quark *quark)
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
diff --git a/runtime/quark/codelets/codelet_zgessm.c b/runtime/quark/codelets/codelet_zgessm.c
index d31d3dc9b..63b59a664 100644
--- a/runtime/quark/codelets/codelet_zgessm.c
+++ b/runtime/quark/codelets/codelet_zgessm.c
@@ -86,9 +86,8 @@ void CORE_zgessm_quark(Quark *quark)
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
  *
  */
 void INSERT_TASK_zgessm(const RUNTIME_option_t *options,
diff --git a/runtime/quark/codelets/codelet_zgetrf_incpiv.c b/runtime/quark/codelets/codelet_zgetrf_incpiv.c
index 7ba0886d1..9b9d29a7f 100644
--- a/runtime/quark/codelets/codelet_zgetrf_incpiv.c
+++ b/runtime/quark/codelets/codelet_zgetrf_incpiv.c
@@ -94,10 +94,9 @@ void CORE_zgetrf_incpiv_quark(Quark *quark)
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
diff --git a/runtime/quark/codelets/codelet_zgetrf_nopiv.c b/runtime/quark/codelets/codelet_zgetrf_nopiv.c
index dfee169e6..c7115e7d5 100644
--- a/runtime/quark/codelets/codelet_zgetrf_nopiv.c
+++ b/runtime/quark/codelets/codelet_zgetrf_nopiv.c
@@ -81,10 +81,9 @@ void CORE_zgetrf_nopiv_quark(Quark *quark)
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
diff --git a/runtime/quark/codelets/codelet_zlacpy.c b/runtime/quark/codelets/codelet_zlacpy.c
index 99a0dc89a..8aa18403b 100644
--- a/runtime/quark/codelets/codelet_zlacpy.c
+++ b/runtime/quark/codelets/codelet_zlacpy.c
@@ -43,10 +43,10 @@ static inline void CORE_zlacpy_quark(Quark *quark)
     CORE_zlacpy(uplo, M, N, A + displA, LDA, B + displB, LDB);
 }
 
-void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options,
-                        cham_uplo_t uplo, int m, int n, int nb,
-                        int displA, const CHAM_desc_t *A, int Am, int An, int lda,
-                        int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
+                          cham_uplo_t uplo, int m, int n, int nb,
+                          int displA, const CHAM_desc_t *A, int Am, int An, int lda,
+                          int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_LACPY;
@@ -63,12 +63,12 @@ void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options,
         0);
 }
 
-void INSERT_TASK_zlacpy(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, int m, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, int m, int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     INSERT_TASK_zlacpyx( options, uplo, m, n, nb,
-                        0, A, Am, An, lda,
-                        0, B, Bm, Bn, ldb );
+                         0, A, Am, An, lda,
+                         0, B, Bm, Bn, ldb );
 }
diff --git a/runtime/quark/codelets/codelet_zpamm.c b/runtime/quark/codelets/codelet_zpamm.c
deleted file mode 100644
index ba9de2a29..000000000
--- a/runtime/quark/codelets/codelet_zpamm.c
+++ /dev/null
@@ -1,220 +0,0 @@
-/**
- *
- * @file quark/codelet_zpamm.c
- *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
- * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
- *                      Univ. Bordeaux. All rights reserved.
- *
- ***
- *
- * @brief Chameleon zpamm Quark codelet
- *
- * @version 1.0.0
- * @comment This file has been automatically generated
- *          from Plasma 2.5.0 for CHAMELEON 1.0.0
- * @author Dulceneia Becker
- * @author Mathieu Faverge
- * @author Emmanuel Agullo
- * @author Cedric Castagnede
- * @date 2011-06-14
- * @precisions normal z -> c d s
- *
- */
-#include "coreblas/cblas.h"
-#include "chameleon_quark.h"
-#include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
-
-void
-CORE_zpamm_quark(Quark *quark)
-{
-    int op;
-    cham_side_t side;
-    cham_store_t storev;
-    int M;
-    int N;
-    int K;
-    int L;
-    CHAMELEON_Complex64_t *A1;
-    int LDA1;
-    CHAMELEON_Complex64_t *A2;
-    int LDA2;
-    CHAMELEON_Complex64_t *V;
-    int LDV;
-    CHAMELEON_Complex64_t *W;
-    int LDW;
-
-    quark_unpack_args_15(quark, op, side, storev, M, N, K, L,
-            A1, LDA1, A2, LDA2, V, LDV, W, LDW);
-
-    CORE_zpamm( op, side, storev, M, N, K, L, A1, LDA1, A2, LDA2, V, LDV, W, LDW);
-}
-
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- *  ZPAMM  performs one of the matrix-matrix operations
- *
- *                    LEFT                      RIGHT
- *     OP ChameleonW  :  W  = A1 + op(V) * A2  or  W  = A1 + A2 * op(V)
- *     OP ChameleonA2 :  A2 = A2 - op(V) * W   or  A2 = A2 - W * op(V)
- *
- *  where  op( V ) is one of
- *
- *     op( V ) = V   or   op( V ) = V**T   or   op( V ) = V**H,
- *
- *  A1, A2 and W are general matrices, and V is:
- *
- *        l = k: rectangle + triangle
- *        l < k: rectangle + trapezoid
- *        l = 0: rectangle
- *
- *  Size of V, both rowwise and columnwise, is:
- *
- *         ----------------------
- *          side   trans    size
- *         ----------------------
- *          left     N     M x K
- *                   T     K x M
- *          right    N     K x N
- *                   T     N x K
- *         ----------------------
- *
- *  LEFT (columnwise and rowwise):
- *
- *              |    K    |                 |         M         |
- *           _  __________   _              _______________        _
- *              |    |    |                 |             | \
- *     V:       |    |    |            V':  |_____________|___\    K
- *              |    |    | M-L             |                  |
- *           M  |    |    |                 |__________________|   _
- *              |____|    |  _
- *              \    |    |                 |    M - L    | L  |
- *                \  |    |  L
- *           _      \|____|  _
- *
- *  RIGHT (columnwise and rowwise):
- *
- *          |         K         |                   |    N    |
- *          _______________        _             _  __________   _
- *          |             | \                       |    |    |
- *     V':  |_____________|___\    N        V:      |    |    |
- *          |                  |                    |    |    | K-L
- *          |__________________|   _             K  |    |    |
- *                                                  |____|    |  _
- *          |    K - L    | L  |                    \    |    |
- *                                                    \  |    |  L
- *                                               _      \|____|  _
- *
- *  Arguments
- *  ==========
- *
- * @param[in] op
- *
- *         OP specifies which operation to perform:
- *
- *         @arg ChameleonW  : W  = A1 + op(V) * A2  or  W  = A1 + A2 * op(V)
- *         @arg ChameleonA2 : A2 = A2 - op(V) * W   or  A2 = A2 - W * op(V)
- *
- * @param[in] side
- *
- *         SIDE specifies whether  op( V ) multiplies A2
- *         or W from the left or right as follows:
- *
- *         @arg ChamLeft  : multiply op( V ) from the left
- *                            OP ChameleonW  :  W  = A1 + op(V) * A2
- *                            OP ChameleonA2 :  A2 = A2 - op(V) * W
- *
- *         @arg ChamRight : multiply op( V ) from the right
- *                            OP ChameleonW  :  W  = A1 + A2 * op(V)
- *                            OP ChameleonA2 :  A2 = A2 - W * op(V)
- *
- * @param[in] storev
- *
- *         Indicates how the vectors which define the elementary
- *         reflectors are stored in V:
- *
- *         @arg ChamColumnwise
- *         @arg ChamRowwise
- *
- * @param[in] M
- *         The number of rows of the A1, A2 and W
- *         If SIDE is ChamLeft, the number of rows of op( V )
- *
- * @param[in] N
- *         The number of columns of the A1, A2 and W
- *         If SIDE is ChamRight, the number of columns of op( V )
- *
- * @param[in] K
- *         If SIDE is ChamLeft, the number of columns of op( V )
- *         If SIDE is ChamRight, the number of rows of op( V )
- *
- * @param[in] L
- *         The size of the triangular part of V
- *
- * @param[in] A1
- *         On entry, the M-by-N tile A1.
- *
- * @param[in] LDA1
- *         The leading dimension of the array A1. LDA1 >= max(1,M).
- *
- * @param[in,out] A2
- *         On entry, the M-by-N tile A2.
- *         On exit, if OP is ChameleonA2 A2 is overwritten
- *
- * @param[in] LDA2
- *         The leading dimension of the tile A2. LDA2 >= max(1,M).
- *
- * @param[in] V
- *         The matrix V as described above.
- *         If SIDE is ChamLeft : op( V ) is M-by-K
- *         If SIDE is ChamRight: op( V ) is K-by-N
- *
- * @param[in] LDV
- *         The leading dimension of the array V.
- *
- * @param[in,out] W
- *         On entry, the M-by-N matrix W.
- *         On exit, W is overwritten either if OP is ChameleonA2 or ChameleonW.
- *         If OP is ChameleonA2, W is an input and is used as a workspace.
- *
- * @param[in] LDW
- *         The leading dimension of array WORK.
- *
- *******************************************************************************
- *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
- *
- */
-void
-INSERT_TASK_zpamm(const RUNTIME_option_t *options,
-                 int op, cham_side_t side, cham_store_t storev,
-                 int m, int n, int k, int l,
-                 const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                       const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                 const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                       const CHAM_desc_t *W, int Wm, int Wn, int ldw)
-{
-    QUARK_Insert_Task(opt->quark, CORE_zpamm_quark, (Quark_Task_Flags*)opt,
-        sizeof(int),                        &op,      VALUE,
-        sizeof(int),                &side,    VALUE,
-        sizeof(int),                &storev,  VALUE,
-        sizeof(int),                        &m,       VALUE,
-        sizeof(int),                        &n,       VALUE,
-        sizeof(int),                        &k,       VALUE,
-        sizeof(int),                        &l,       VALUE,
-        sizeof(CHAMELEON_Complex64_t)*m*k,     RTBLKADDR(A1, CHAMELEON_Complex64_t, A1m, A1n),           INPUT,
-        sizeof(int),                        &lda1,    VALUE,
-        sizeof(CHAMELEON_Complex64_t)*k*n,     RTBLKADDR(A2, CHAMELEON_Complex64_t, A2m, A2n),           INOUT,
-        sizeof(int),                        &lda2,    VALUE,
-        sizeof(CHAMELEON_Complex64_t)*m*n,     RTBLKADDR(V, CHAMELEON_Complex64_t, Vm, Vn),            INPUT,
-        sizeof(int),                        &ldv,     VALUE,
-        sizeof(CHAMELEON_Complex64_t)*m*n,     RTBLKADDR(W, CHAMELEON_Complex64_t, Wm, Wn),            INOUT,
-        sizeof(int),                        &ldw,     VALUE,
-        0);
-}
diff --git a/runtime/quark/codelets/codelet_zplssq.c b/runtime/quark/codelets/codelet_zplssq.c
index 79067050a..14418c58f 100644
--- a/runtime/quark/codelets/codelet_zplssq.c
+++ b/runtime/quark/codelets/codelet_zplssq.c
@@ -46,7 +46,7 @@ void CORE_zplssq_quark(Quark *quark)
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
- *  INSERT_TASK_zplssq returns: scl * sqrt(ssq)
+ * @brief Compute sum( a_ij ^ 2 ) = scl * sqrt(ssq)
  *
  * with scl and ssq such that
  *
@@ -74,8 +74,8 @@ void CORE_zplssq_quark(Quark *quark)
  *
  */
 void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
-                        const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn,
-                        const CHAM_desc_t *SCLSSQ,     int SCLSSQm,     int SCLSSQn )
+                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn,
+                         const CHAM_desc_t *SCLSSQ,     int SCLSSQm,     int SCLSSQn )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     QUARK_Insert_Task(opt->quark, CORE_zplssq_quark, (Quark_Task_Flags*)opt,
@@ -94,7 +94,7 @@ void CORE_zplssq2_quark(Quark *quark)
 }
 
 void INSERT_TASK_zplssq2( const RUNTIME_option_t *options,
-                         const CHAM_desc_t *RESULT, int RESULTm, int RESULTn )
+                          const CHAM_desc_t *RESULT, int RESULTm, int RESULTn )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     QUARK_Insert_Task(opt->quark, CORE_zplssq2_quark, (Quark_Task_Flags*)opt,
diff --git a/runtime/quark/codelets/codelet_zssssm.c b/runtime/quark/codelets/codelet_zssssm.c
index ea1830964..878b17a56 100644
--- a/runtime/quark/codelets/codelet_zssssm.c
+++ b/runtime/quark/codelets/codelet_zssssm.c
@@ -115,9 +115,8 @@ void CORE_zssssm_quark(Quark *quark)
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
  *
  */
 void INSERT_TASK_zssssm(const RUNTIME_option_t *options,
diff --git a/runtime/quark/codelets/codelet_ztile_zero.c b/runtime/quark/codelets/codelet_ztile_zero.c
deleted file mode 100644
index 68f52d47c..000000000
--- a/runtime/quark/codelets/codelet_ztile_zero.c
+++ /dev/null
@@ -1,58 +0,0 @@
-/**
- *
- * @file quark/codelet_ztile_zero.c
- *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
- * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
- *                      Univ. Bordeaux. All rights reserved.
- *
- ***
- *
- * @brief Chameleon ztile_zero Quark codelet
- *
- * @version 1.0.0
- * @author Hatem Ltaief
- * @author Mathieu Faverge
- * @author Jakub Kurzak
- * @date 2010-11-15
- * @precisions normal z -> c d s
- *
- */
-#include "chameleon_quark.h"
-#include "chameleon/tasks_z.h"
-#include "coreblas/coreblas_z.h"
-
-void CORE_ztile_zero_quark(Quark *quark)
-{
-    int X1;
-    int X2;
-    int Y1;
-    int Y2;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-
-    int x, y;
-
-    quark_unpack_args_6(quark, X1, X2, Y1, Y2, A, lda);
-
-    for (x = X1; x < X2; x++)
-        for (y = Y1; y < Y2; y++)
-            A[lda*x+y] = 0.0;
-
-}
-
-void INSERT_TASK_ztile_zero( const RUNTIME_option_t *options,
-                            int X1, int X2, int Y1, int Y2,
-                            const CHAM_desc_t *A, int Am, int An, int lda )
-{
-    quark_option_t *opt = (quark_option_t*)(options->schedopt);
-    QUARK_Insert_Task(opt->quark, CORE_ztile_zero_quark, (Quark_Task_Flags*)opt,
-        sizeof(int),                       &X1,                                       VALUE,
-        sizeof(int),                       &X2,                                       VALUE,
-        sizeof(int),                       &Y1,                                       VALUE,
-        sizeof(int),                       &Y2,                                       VALUE,
-        sizeof(CHAMELEON_Complex64_t)*A->bsiz,  RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),  OUTPUT | LOCALITY,
-        sizeof(int),                       &lda,                                      VALUE,
-        0);
-}
diff --git a/runtime/quark/codelets/codelet_ztradd.c b/runtime/quark/codelets/codelet_ztradd.c
index d18aa1db8..8c42160bc 100644
--- a/runtime/quark/codelets/codelet_ztradd.c
+++ b/runtime/quark/codelets/codelet_ztradd.c
@@ -46,7 +46,7 @@ void CORE_ztradd_quark(Quark *quark)
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
- *  INSERT_TASK_ztradd adds two trapezoidal matrices together as in PBLAS pzgeadd.
+ * @brief Adds two trapezoidal matrices together as in PBLAS pzgeadd.
  *
  *       B <- alpha * op(A)  + beta * B,
  *
@@ -96,15 +96,14 @@ void CORE_ztradd_quark(Quark *quark)
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
-void INSERT_TASK_ztradd(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     quark_option_t *opt = (quark_option_t*)(options->schedopt);
     DAG_CORE_GEADD;
diff --git a/runtime/quark/codelets/codelet_ztstrf.c b/runtime/quark/codelets/codelet_ztstrf.c
index d44bb81a1..1c31704e2 100644
--- a/runtime/quark/codelets/codelet_ztstrf.c
+++ b/runtime/quark/codelets/codelet_ztstrf.c
@@ -115,10 +115,9 @@ void CORE_ztstrf_quark(Quark *quark)
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
diff --git a/runtime/quark/codelets/codelet_zunmlq.c b/runtime/quark/codelets/codelet_zunmlq.c
index f87e193ad..af020eaec 100644
--- a/runtime/quark/codelets/codelet_zunmlq.c
+++ b/runtime/quark/codelets/codelet_zunmlq.c
@@ -129,9 +129,8 @@ void CORE_zunmlq_quark(Quark *quark)
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 void INSERT_TASK_zunmlq(const RUNTIME_option_t *options,
diff --git a/runtime/quark/codelets/codelet_zunmqr.c b/runtime/quark/codelets/codelet_zunmqr.c
index d78a12faa..8407d8967 100644
--- a/runtime/quark/codelets/codelet_zunmqr.c
+++ b/runtime/quark/codelets/codelet_zunmqr.c
@@ -129,9 +129,8 @@ void CORE_zunmqr_quark(Quark *quark)
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 void INSERT_TASK_zunmqr(const RUNTIME_option_t *options,
diff --git a/runtime/starpu/codelets/codelet_zasum.c b/runtime/starpu/codelets/codelet_zasum.c
index e8aa0aeca..6bfaf2c13 100644
--- a/runtime/starpu/codelets/codelet_zasum.c
+++ b/runtime/starpu/codelets/codelet_zasum.c
@@ -22,10 +22,33 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-void INSERT_TASK_dzasum(const RUNTIME_option_t *options,
-                       cham_store_t storev, cham_uplo_t uplo, int M, int N,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn)
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_dzasum_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_store_t storev;
+    cham_uplo_t uplo;
+    int M;
+    int N;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    double *work;
+
+    A    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    work = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &M, &N, &lda);
+    CORE_dzasum(storev, uplo, M, N, A, lda, work);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zasum, 2, cl_dzasum_cpu_func)
+
+void INSERT_TASK_dzasum( const RUNTIME_option_t *options,
+                         cham_store_t storev, cham_uplo_t uplo, int M, int N,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     struct starpu_codelet *codelet = &cl_zasum;
     void (*callback)(void*) = options->profiling ? cl_zasum_callback : NULL;
@@ -51,27 +74,3 @@ void INSERT_TASK_dzasum(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_dzasum_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_store_t storev;
-    cham_uplo_t uplo;
-    int M;
-    int N;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    double *work;
-
-    A     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    work  = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &M, &N, &lda);
-    CORE_dzasum(storev, uplo, M, N, A, lda, work);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zasum, 2, cl_dzasum_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zaxpy.c b/runtime/starpu/codelets/codelet_zaxpy.c
index 0b70bb6f9..5280eae12 100644
--- a/runtime/starpu/codelets/codelet_zaxpy.c
+++ b/runtime/starpu/codelets/codelet_zaxpy.c
@@ -20,10 +20,32 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-void INSERT_TASK_zaxpy(const RUNTIME_option_t *options,
-                      int M, CHAMELEON_Complex64_t alpha,
-                      const CHAM_desc_t *A, int Am, int An, int incA,
-                      const CHAM_desc_t *B, int Bm, int Bn, int incB)
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zaxpy_cpu_func(void *descr[], void *cl_arg)
+{
+    int M;
+    CHAMELEON_Complex64_t alpha;
+    CHAMELEON_Complex64_t *A;
+    int incA;
+    CHAMELEON_Complex64_t *B;
+    int incB;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &M, &alpha, &incA, &incB);
+    CORE_zaxpy(M, alpha, A, incA, B, incB);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zaxpy, 2, cl_zaxpy_cpu_func)
+
+void INSERT_TASK_zaxpy( const RUNTIME_option_t *options,
+                        int M, CHAMELEON_Complex64_t alpha,
+                        const CHAM_desc_t *A, int Am, int An, int incA,
+                        const CHAM_desc_t *B, int Bm, int Bn, int incB )
 {
     struct starpu_codelet *codelet = &cl_zaxpy;
     void (*callback)(void*) = options->profiling ? cl_zaxpy_callback : NULL;
@@ -48,26 +70,3 @@ void INSERT_TASK_zaxpy(const RUNTIME_option_t *options,
 #endif
             0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zaxpy_cpu_func(void *descr[], void *cl_arg)
-{
-    int M;
-    CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int incA;
-    CHAMELEON_Complex64_t *B;
-    int incB;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &M, &alpha, &incA, &incB);
-    CORE_zaxpy(M, alpha, A, incA, B, incB);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zaxpy, 2, cl_zaxpy_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zbuild.c b/runtime/starpu/codelets/codelet_zbuild.c
index 4aa45bf5b..a2610a9bc 100644
--- a/runtime/starpu/codelets/codelet_zbuild.c
+++ b/runtime/starpu/codelets/codelet_zbuild.c
@@ -27,9 +27,35 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zbuild_cpu_func(void *descr[], void *cl_arg)
+{
+  CHAMELEON_Complex64_t *A;
+  int ld;
+  void *user_data;
+  void (*user_build_callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) ;
+  int row_min, row_max, col_min, col_max;
+
+  A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+  starpu_codelet_unpack_args(cl_arg, &row_min, &row_max, &col_min, &col_max, &ld, &user_data, &user_build_callback );
+
+  /* The callback 'user_build_callback' is expected to build the block of matrix [row_min, row_max] x [col_min, col_max]
+   * (with both min and max values included in the intervals, index start at 0 like in C, NOT 1 like in Fortran)
+   * and store it at the address 'buffer' with leading dimension 'ld'
+   */
+  user_build_callback(row_min, row_max, col_min, col_max, A, ld, user_data);
+
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zbuild, 1, cl_zbuild_cpu_func)
+
 void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
-                        const CHAM_desc_t *A, int Am, int An, int lda,
-                        void *user_data, void* user_build_callback )
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         void *user_data, void* user_build_callback )
 {
 
   struct starpu_codelet *codelet = &cl_zbuild;
@@ -61,30 +87,3 @@ void INSERT_TASK_zbuild( const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zbuild_cpu_func(void *descr[], void *cl_arg)
-{
-  CHAMELEON_Complex64_t *A;
-  int ld;
-  void *user_data;
-  void (*user_build_callback)(int row_min, int row_max, int col_min, int col_max, void *buffer, int ld, void *user_data) ;
-  int row_min, row_max, col_min, col_max;
-
-  A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-  starpu_codelet_unpack_args(cl_arg, &row_min, &row_max, &col_min, &col_max, &ld, &user_data, &user_build_callback );
-
-  /* The callback 'user_build_callback' is expected to build the block of matrix [row_min, row_max] x [col_min, col_max]
-   * (with both min and max values included in the intervals, index start at 0 like in C, NOT 1 like in Fortran)
-   * and store it at the address 'buffer' with leading dimension 'ld'
-   */
-  user_build_callback(row_min, row_max, col_min, col_max, A, ld, user_data);
-
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zbuild, 1, cl_zbuild_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zgeadd.c b/runtime/starpu/codelets/codelet_zgeadd.c
index 7c296d750..eb0adebe5 100644
--- a/runtime/starpu/codelets/codelet_zgeadd.c
+++ b/runtime/starpu/codelets/codelet_zgeadd.c
@@ -24,12 +24,76 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_trans_t trans;
+    int M;
+    int N;
+    CHAMELEON_Complex64_t alpha;
+    const CHAMELEON_Complex64_t *A;
+    int LDA;
+    CHAMELEON_Complex64_t beta;
+    CHAMELEON_Complex64_t *B;
+    int LDB;
+
+    A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &LDA, &beta, &LDB);
+    CORE_zgeadd(trans, M, N, alpha, A, LDA, beta, B, LDB);
+    return;
+}
+
+#ifdef CHAMELEON_USE_CUBLAS_V2
+static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg)
+{
+    cham_trans_t trans;
+    int M;
+    int N;
+    cuDoubleComplex alpha;
+    const cuDoubleComplex *A;
+    int lda;
+    cuDoubleComplex beta;
+    cuDoubleComplex *B;
+    int ldb;
+
+    A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
+    B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &lda, &beta, &ldb);
+
+    RUNTIME_getStream( stream );
+
+    CUDA_zgeadd(
+        trans,
+        M, N,
+        &alpha, A, lda,
+        &beta,  B, ldb,
+        stream);
+
+#ifndef STARPU_CUDA_ASYNC
+    cudaStreamSynchronize( stream );
+#endif
+
+    return;
+}
+#endif /* defined(CHAMELEON_USE_CUBLAS_V2) */
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+#if defined(CHAMELEON_USE_CUBLAS_V2)
+CODELETS(zgeadd, 2, cl_zgeadd_cpu_func, cl_zgeadd_cuda_func, STARPU_CUDA_ASYNC)
+#else
+CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func)
+#endif
+
 /**
  ******************************************************************************
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
- *  INSERT_TASK_zgeadd adds two general matrices together as in PBLAS pzgeadd.
+ * @brief Adds two general matrices together as in PBLAS pzgeadd.
  *
  *       B <- alpha * op(A)  + beta * B,
  *
@@ -73,15 +137,14 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if -i, the i-th argument had an illegal value
  *
  */
-void INSERT_TASK_zgeadd(const RUNTIME_option_t *options,
-                       cham_trans_t trans, int m, int n, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zgeadd( const RUNTIME_option_t *options,
+                         cham_trans_t trans, int m, int n, int nb,
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     struct starpu_codelet *codelet = &cl_zgeadd;
     void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL;
@@ -111,68 +174,3 @@ void INSERT_TASK_zgeadd(const RUNTIME_option_t *options,
 
     (void)nb;
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zgeadd_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_trans_t trans;
-    int M;
-    int N;
-    CHAMELEON_Complex64_t alpha;
-    const CHAMELEON_Complex64_t *A;
-    int LDA;
-    CHAMELEON_Complex64_t beta;
-    CHAMELEON_Complex64_t *B;
-    int LDB;
-
-    A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &LDA, &beta, &LDB);
-    CORE_zgeadd(trans, M, N, alpha, A, LDA, beta, B, LDB);
-    return;
-}
-
-#ifdef CHAMELEON_USE_CUBLAS_V2
-static void cl_zgeadd_cuda_func(void *descr[], void *cl_arg)
-{
-    cham_trans_t trans;
-    int M;
-    int N;
-    cuDoubleComplex alpha;
-    const cuDoubleComplex *A;
-    int lda;
-    cuDoubleComplex beta;
-    cuDoubleComplex *B;
-    int ldb;
-
-    A = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &trans, &M, &N, &alpha, &lda, &beta, &ldb);
-
-    RUNTIME_getStream( stream );
-
-    CUDA_zgeadd(
-        trans,
-        M, N,
-        &alpha, A, lda,
-        &beta,  B, ldb,
-        stream);
-
-#ifndef STARPU_CUDA_ASYNC
-    cudaStreamSynchronize( stream );
-#endif
-
-    return;
-}
-#endif /* defined(CHAMELEON_USE_CUBLAS_V2) */
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-#if defined(CHAMELEON_USE_CUBLAS_V2)
-CODELETS(zgeadd, 2, cl_zgeadd_cpu_func, cl_zgeadd_cuda_func, STARPU_CUDA_ASYNC)
-#else
-CODELETS_CPU(zgeadd, 2, cl_zgeadd_cpu_func)
-#endif
diff --git a/runtime/starpu/codelets/codelet_zgelqt.c b/runtime/starpu/codelets/codelet_zgelqt.c
index 8ffad6e1a..abdf1954c 100644
--- a/runtime/starpu/codelets/codelet_zgelqt.c
+++ b/runtime/starpu/codelets/codelet_zgelqt.c
@@ -112,9 +112,8 @@ CODELETS_CPU(zgelqt, 3, cl_zgelqt_cpu_func)
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ *          @retval CHAMELEON_SUCCESS successful exit
+ *          @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 void INSERT_TASK_zgelqt(const RUNTIME_option_t *options,
diff --git a/runtime/starpu/codelets/codelet_zgemm.c b/runtime/starpu/codelets/codelet_zgemm.c
index 205da5e35..fed1350a7 100644
--- a/runtime/starpu/codelets/codelet_zgemm.c
+++ b/runtime/starpu/codelets/codelet_zgemm.c
@@ -26,51 +26,6 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
-                      cham_trans_t transA, cham_trans_t transB,
-                      int m, int n, int k, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                                                   const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                      CHAMELEON_Complex64_t beta,  const CHAM_desc_t *C, int Cm, int Cn, int ldc)
-{
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_zgemm;
-    void (*callback)(void*) = options->profiling ? cl_zgemm_callback : NULL;
-
-    CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_R(A, Am, An);
-    CHAMELEON_ACCESS_R(B, Bm, Bn);
-    CHAMELEON_ACCESS_RW(C, Cm, Cn);
-    CHAMELEON_END_ACCESS_DECLARATION;
-
-    starpu_insert_task(
-        starpu_mpi_codelet(codelet),
-        STARPU_VALUE,    &transA,            sizeof(int),
-        STARPU_VALUE,    &transB,            sizeof(int),
-        STARPU_VALUE,    &m,                 sizeof(int),
-        STARPU_VALUE,    &n,                 sizeof(int),
-        STARPU_VALUE,    &k,                 sizeof(int),
-        STARPU_VALUE,    &alpha,             sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,    &lda,               sizeof(int),
-        STARPU_R,         RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        STARPU_VALUE,    &ldb,               sizeof(int),
-        STARPU_VALUE,    &beta,              sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,        RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
-        STARPU_VALUE,    &ldc,               sizeof(int),
-        STARPU_PRIORITY,  options->priority,
-        STARPU_CALLBACK,  callback,
-#if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zgemm",
-#endif
-        0);
-}
-
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zgemm_cpu_func(void *descr[], void *cl_arg)
 {
@@ -144,3 +99,48 @@ static void cl_zgemm_cuda_func(void *descr[], void *cl_arg)
  * Codelet definition
  */
 CODELETS(zgemm, 3, cl_zgemm_cpu_func, cl_zgemm_cuda_func, STARPU_CUDA_ASYNC)
+
+/**
+ *
+ * @ingroup INSERT_TASK_Complex64_t
+ *
+ */
+void INSERT_TASK_zgemm(const RUNTIME_option_t *options,
+                      cham_trans_t transA, cham_trans_t transB,
+                      int m, int n, int k, int nb,
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                                                   const CHAM_desc_t *B, int Bm, int Bn, int ldb,
+                      CHAMELEON_Complex64_t beta,  const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+{
+    (void)nb;
+    struct starpu_codelet *codelet = &cl_zgemm;
+    void (*callback)(void*) = options->profiling ? cl_zgemm_callback : NULL;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_R(B, Bm, Bn);
+    CHAMELEON_ACCESS_RW(C, Cm, Cn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    starpu_insert_task(
+        starpu_mpi_codelet(codelet),
+        STARPU_VALUE,    &transA,            sizeof(int),
+        STARPU_VALUE,    &transB,            sizeof(int),
+        STARPU_VALUE,    &m,                 sizeof(int),
+        STARPU_VALUE,    &n,                 sizeof(int),
+        STARPU_VALUE,    &k,                 sizeof(int),
+        STARPU_VALUE,    &alpha,             sizeof(CHAMELEON_Complex64_t),
+        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_VALUE,    &lda,               sizeof(int),
+        STARPU_R,         RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_VALUE,    &ldb,               sizeof(int),
+        STARPU_VALUE,    &beta,              sizeof(CHAMELEON_Complex64_t),
+        STARPU_RW,        RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_VALUE,    &ldc,               sizeof(int),
+        STARPU_PRIORITY,  options->priority,
+        STARPU_CALLBACK,  callback,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "zgemm",
+#endif
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_zgeqrt.c b/runtime/starpu/codelets/codelet_zgeqrt.c
index bee5168f9..1ff57d185 100644
--- a/runtime/starpu/codelets/codelet_zgeqrt.c
+++ b/runtime/starpu/codelets/codelet_zgeqrt.c
@@ -114,9 +114,8 @@ CODELETS_CPU(zgeqrt, 3, cl_zgeqrt_cpu_func)
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ *          @retval CHAMELEON_SUCCESS successful exit
+ *          @retval <0 if -i, the i-th argument had an illegal value
  *
  */
 void INSERT_TASK_zgeqrt(const RUNTIME_option_t *options,
diff --git a/runtime/starpu/codelets/codelet_zgessm.c b/runtime/starpu/codelets/codelet_zgessm.c
index 2dac8366a..72736b064 100644
--- a/runtime/starpu/codelets/codelet_zgessm.c
+++ b/runtime/starpu/codelets/codelet_zgessm.c
@@ -26,6 +26,32 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zgessm_cpu_func(void *descr[], void *cl_arg)
+{
+    int m;
+    int n;
+    int k;
+    int ib;
+    int *IPIV;
+    int ldl;
+    CHAMELEON_Complex64_t *D;
+    int ldd;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+
+    D = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
+    starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV, &ldl, &ldd, &lda);
+    CORE_zgessm(m, n, k, ib, IPIV, D, ldd, A, lda);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zgessm, 3, cl_zgessm_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -66,18 +92,17 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
  *
  */
 
-void INSERT_TASK_zgessm(const RUNTIME_option_t *options,
-                       int m, int n, int k, int ib, int nb,
-                       int *IPIV,
-                       const CHAM_desc_t *L, int Lm, int Ln, int ldl,
-                       const CHAM_desc_t *D, int Dm, int Dn, int ldd,
-                       const CHAM_desc_t *A, int Am, int An, int lda)
+void INSERT_TASK_zgessm( const RUNTIME_option_t *options,
+                         int m, int n, int k, int ib, int nb,
+                         int *IPIV,
+                         const CHAM_desc_t *L, int Lm, int Ln, int ldl,
+                         const CHAM_desc_t *D, int Dm, int Dn, int ldd,
+                         const CHAM_desc_t *A, int Am, int An, int lda )
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zgessm;
@@ -109,30 +134,3 @@ void INSERT_TASK_zgessm(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zgessm_cpu_func(void *descr[], void *cl_arg)
-{
-    int m;
-    int n;
-    int k;
-    int ib;
-    int *IPIV;
-    int ldl;
-    CHAMELEON_Complex64_t *D;
-    int ldd;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-
-    D = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &k, &ib, &IPIV, &ldl, &ldd, &lda);
-    CORE_zgessm(m, n, k, ib, IPIV, D, ldd, A, lda);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zgessm, 3, cl_zgessm_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zgessq.c b/runtime/starpu/codelets/codelet_zgessq.c
index a9cdc2ff8..e22f803bc 100644
--- a/runtime/starpu/codelets/codelet_zgessq.c
+++ b/runtime/starpu/codelets/codelet_zgessq.c
@@ -22,10 +22,31 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zgessq_cpu_func(void *descr[], void *cl_arg)
+{
+    int m;
+    int n;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    double *SCALESUMSQ;
+
+    A          = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &m, &n, &lda);
+    CORE_zgessq( m, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] );
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zgessq, 2, cl_zgessq_cpu_func)
+
 void INSERT_TASK_zgessq( const RUNTIME_option_t *options,
-                        int m, int n,
-                        const CHAM_desc_t *A, int Am, int An, int lda,
-                        const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
+                         int m, int n,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
 {
     struct starpu_codelet *codelet = &cl_zgessq;
     void (*callback)(void*) = options->profiling ? cl_zgessq_callback : NULL;
@@ -49,25 +70,3 @@ void INSERT_TASK_zgessq( const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zgessq_cpu_func(void *descr[], void *cl_arg)
-{
-    int m;
-    int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    double *SCALESUMSQ;
-
-    A          = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &lda);
-    CORE_zgessq( m, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] );
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zgessq, 2, cl_zgessq_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zgetrf.c b/runtime/starpu/codelets/codelet_zgetrf.c
index b6bf892c4..977e8c2c5 100644
--- a/runtime/starpu/codelets/codelet_zgetrf.c
+++ b/runtime/starpu/codelets/codelet_zgetrf.c
@@ -24,6 +24,36 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zgetrf_cpu_func(void *descr[], void *cl_arg)
+{
+    int m;
+    int n;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    int *IPIV;
+    cham_bool_t check_info;
+    int iinfo;
+    RUNTIME_sequence_t *sequence;
+    RUNTIME_request_t *request;
+    int info = 0;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+
+    starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &IPIV, &check_info, &iinfo, &sequence, &request);
+    CORE_zgetrf( m, n, A, lda, IPIV, &info );
+
+    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
+        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
+    }
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zgetrf, 1, cl_zgetrf_cpu_func)
+
 void INSERT_TASK_zgetrf( const RUNTIME_option_t *options,
                          int m, int n, int nb,
                          const CHAM_desc_t *A, int Am, int An, int lda,
@@ -56,34 +86,3 @@ void INSERT_TASK_zgetrf( const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zgetrf_cpu_func(void *descr[], void *cl_arg)
-{
-    int m;
-    int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    int *IPIV;
-    cham_bool_t check_info;
-    int iinfo;
-    RUNTIME_sequence_t *sequence;
-    RUNTIME_request_t *request;
-    int info = 0;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &IPIV, &check_info, &iinfo, &sequence, &request);
-    CORE_zgetrf( m, n, A, lda, IPIV, &info );
-
-    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
-        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
-    }
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zgetrf, 1, cl_zgetrf_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
index 94113ee7d..66a5201ca 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_incpiv.c
@@ -26,6 +26,38 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg)
+{
+    CHAMELEON_starpu_ws_t *h_work;
+    int m;
+    int n;
+    int ib;
+    CHAMELEON_Complex64_t *A;
+    int lda, ldl;
+    int *IPIV;
+    cham_bool_t check_info;
+    int iinfo;
+    RUNTIME_sequence_t *sequence;
+    RUNTIME_request_t *request;
+    int info = 0;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+
+    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldl, &IPIV, &check_info, &iinfo, &h_work, &sequence, &request);
+    CORE_zgetrf_incpiv(m, n, ib, A, lda, IPIV, &info);
+
+    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
+        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
+    }
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zgetrf_incpiv, 3, cl_zgetrf_incpiv_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -71,10 +103,9 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
@@ -122,36 +153,3 @@ void INSERT_TASK_zgetrf_incpiv(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zgetrf_incpiv_cpu_func(void *descr[], void *cl_arg)
-{
-    CHAMELEON_starpu_ws_t *h_work;
-    int m;
-    int n;
-    int ib;
-    CHAMELEON_Complex64_t *A;
-    int lda, ldl;
-    int *IPIV;
-    cham_bool_t check_info;
-    int iinfo;
-    RUNTIME_sequence_t *sequence;
-    RUNTIME_request_t *request;
-    int info = 0;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &ldl, &IPIV, &check_info, &iinfo, &h_work, &sequence, &request);
-    CORE_zgetrf_incpiv(m, n, ib, A, lda, IPIV, &info);
-
-    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
-        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
-    }
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zgetrf_incpiv, 3, cl_zgetrf_incpiv_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
index 9f3a0a8d2..3efbe362e 100644
--- a/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
+++ b/runtime/starpu/codelets/codelet_zgetrf_nopiv.c
@@ -23,6 +23,38 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+/*
+ * Codelet CPU
+ */
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zgetrf_nopiv_cpu_func(void *descr[], void *cl_arg)
+{
+    int m;
+    int n;
+    int ib;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    int iinfo;
+    RUNTIME_sequence_t *sequence;
+    RUNTIME_request_t *request;
+    int info = 0;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+
+    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &iinfo, &sequence, &request);
+    CORE_zgetrf_nopiv(m, n, ib, A, lda, &info);
+
+    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
+        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
+    }
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -61,10 +93,9 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
@@ -101,35 +132,3 @@ void INSERT_TASK_zgetrf_nopiv(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-/*
- * Codelet CPU
- */
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zgetrf_nopiv_cpu_func(void *descr[], void *cl_arg)
-{
-    int m;
-    int n;
-    int ib;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    int iinfo;
-    RUNTIME_sequence_t *sequence;
-    RUNTIME_request_t *request;
-    int info = 0;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &lda, &iinfo, &sequence, &request);
-    CORE_zgetrf_nopiv(m, n, ib, A, lda, &info);
-
-    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
-        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
-    }
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zgetrf_nopiv, 1, cl_zgetrf_nopiv_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zhe2ge.c b/runtime/starpu/codelets/codelet_zhe2ge.c
index 4c562fbeb..11ce21834 100644
--- a/runtime/starpu/codelets/codelet_zhe2ge.c
+++ b/runtime/starpu/codelets/codelet_zhe2ge.c
@@ -18,6 +18,29 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zhe2ge_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    int M;
+    int N;
+    const CHAMELEON_Complex64_t *A;
+    int LDA;
+    CHAMELEON_Complex64_t *B;
+    int LDB;
+
+    A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &LDA, &LDB);
+    CORE_zhe2ge(uplo, M, N, A, LDA, B, LDB);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zhe2ge, 2, cl_zhe2ge_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -54,26 +77,3 @@ void INSERT_TASK_zhe2ge(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zhe2ge_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    int M;
-    int N;
-    const CHAMELEON_Complex64_t *A;
-    int LDA;
-    CHAMELEON_Complex64_t *B;
-    int LDB;
-
-    A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &LDA, &LDB);
-    CORE_zhe2ge(uplo, M, N, A, LDA, B, LDB);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zhe2ge, 2, cl_zhe2ge_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zhemm.c b/runtime/starpu/codelets/codelet_zhemm.c
index 9396feebc..4f7698352 100644
--- a/runtime/starpu/codelets/codelet_zhemm.c
+++ b/runtime/starpu/codelets/codelet_zhemm.c
@@ -26,51 +26,6 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_zhemm(const RUNTIME_option_t *options,
-                      cham_side_t side, cham_uplo_t uplo,
-                      int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
-{
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_zhemm;
-    void (*callback)(void*) = options->profiling ? cl_zhemm_callback : NULL;
-
-    CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_R(A, Am, An);
-    CHAMELEON_ACCESS_R(B, Bm, Bn);
-    CHAMELEON_ACCESS_RW(C, Cm, Cn);
-    CHAMELEON_END_ACCESS_DECLARATION;
-
-    starpu_insert_task(
-        starpu_mpi_codelet(codelet),
-        STARPU_VALUE,    &side,                sizeof(int),
-        STARPU_VALUE,    &uplo,                sizeof(int),
-        STARPU_VALUE,       &m,                        sizeof(int),
-        STARPU_VALUE,       &n,                        sizeof(int),
-        STARPU_VALUE,   &alpha,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,               RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,     &lda,                        sizeof(int),
-        STARPU_R,               RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        STARPU_VALUE,     &ldb,                        sizeof(int),
-        STARPU_VALUE,    &beta,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,               RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
-        STARPU_VALUE,     &ldc,                        sizeof(int),
-        STARPU_PRIORITY,    options->priority,
-        STARPU_CALLBACK,    callback,
-#if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zhemm",
-#endif
-        0);
-}
-
-
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zhemm_cpu_func(void *descr[], void *cl_arg)
 {
@@ -142,3 +97,47 @@ static void cl_zhemm_cuda_func(void *descr[], void *cl_arg)
  * Codelet definition
  */
 CODELETS(zhemm, 3, cl_zhemm_cpu_func, cl_zhemm_cuda_func, STARPU_CUDA_ASYNC)
+
+/**
+ *
+ * @ingroup INSERT_TASK_Complex64_t
+ *
+ */
+void INSERT_TASK_zhemm(const RUNTIME_option_t *options,
+                      cham_side_t side, cham_uplo_t uplo,
+                      int m, int n, int nb,
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                      const CHAM_desc_t *B, int Bm, int Bn, int ldb,
+                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+{
+    (void)nb;
+    struct starpu_codelet *codelet = &cl_zhemm;
+    void (*callback)(void*) = options->profiling ? cl_zhemm_callback : NULL;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_R(B, Bm, Bn);
+    CHAMELEON_ACCESS_RW(C, Cm, Cn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    starpu_insert_task(
+        starpu_mpi_codelet(codelet),
+        STARPU_VALUE,    &side,                sizeof(int),
+        STARPU_VALUE,    &uplo,                sizeof(int),
+        STARPU_VALUE,       &m,                        sizeof(int),
+        STARPU_VALUE,       &n,                        sizeof(int),
+        STARPU_VALUE,   &alpha,         sizeof(CHAMELEON_Complex64_t),
+        STARPU_R,               RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_VALUE,     &lda,                        sizeof(int),
+        STARPU_R,               RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_VALUE,     &ldb,                        sizeof(int),
+        STARPU_VALUE,    &beta,         sizeof(CHAMELEON_Complex64_t),
+        STARPU_RW,               RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_VALUE,     &ldc,                        sizeof(int),
+        STARPU_PRIORITY,    options->priority,
+        STARPU_CALLBACK,    callback,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "zhemm",
+#endif
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_zher2k.c b/runtime/starpu/codelets/codelet_zher2k.c
index 668ee4246..24553aba4 100644
--- a/runtime/starpu/codelets/codelet_zher2k.c
+++ b/runtime/starpu/codelets/codelet_zher2k.c
@@ -26,51 +26,6 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_zher2k(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, cham_trans_t trans,
-                       int n, int k, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                       double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
-{
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_zher2k;
-    void (*callback)(void*) = options->profiling ? cl_zher2k_callback : NULL;
-
-    CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_R(A, Am, An);
-    CHAMELEON_ACCESS_R(B, Bm, Bn);
-    CHAMELEON_ACCESS_RW(C, Cm, Cn);
-    CHAMELEON_END_ACCESS_DECLARATION;
-
-    starpu_insert_task(
-        starpu_mpi_codelet(codelet),
-        STARPU_VALUE,      &uplo,                sizeof(int),
-        STARPU_VALUE,     &trans,                sizeof(int),
-        STARPU_VALUE,         &n,                        sizeof(int),
-        STARPU_VALUE,         &k,                        sizeof(int),
-        STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,       &lda,                        sizeof(int),
-        STARPU_R,                 RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        STARPU_VALUE,       &ldb,                        sizeof(int),
-        STARPU_VALUE,      &beta,                     sizeof(double),
-        STARPU_RW,                 RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
-        STARPU_VALUE,       &ldc,                        sizeof(int),
-        STARPU_PRIORITY,    options->priority,
-        STARPU_CALLBACK,    callback,
-#if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zher2k",
-#endif
-        0);
-}
-
-
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zher2k_cpu_func(void *descr[], void *cl_arg)
 {
@@ -135,3 +90,47 @@ static void cl_zher2k_cuda_func(void *descr[], void *cl_arg)
  * Codelet definition
  */
 CODELETS(zher2k, 3, cl_zher2k_cpu_func, cl_zher2k_cuda_func, STARPU_CUDA_ASYNC)
+
+/**
+ *
+ * @ingroup INSERT_TASK_Complex64_t
+ *
+ */
+void INSERT_TASK_zher2k(const RUNTIME_option_t *options,
+                       cham_uplo_t uplo, cham_trans_t trans,
+                       int n, int k, int nb,
+                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                       const CHAM_desc_t *B, int Bm, int Bn, int ldb,
+                       double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+{
+    (void)nb;
+    struct starpu_codelet *codelet = &cl_zher2k;
+    void (*callback)(void*) = options->profiling ? cl_zher2k_callback : NULL;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_R(B, Bm, Bn);
+    CHAMELEON_ACCESS_RW(C, Cm, Cn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    starpu_insert_task(
+        starpu_mpi_codelet(codelet),
+        STARPU_VALUE,      &uplo,                sizeof(int),
+        STARPU_VALUE,     &trans,                sizeof(int),
+        STARPU_VALUE,         &n,                        sizeof(int),
+        STARPU_VALUE,         &k,                        sizeof(int),
+        STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
+        STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_VALUE,       &lda,                        sizeof(int),
+        STARPU_R,                 RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_VALUE,       &ldb,                        sizeof(int),
+        STARPU_VALUE,      &beta,                     sizeof(double),
+        STARPU_RW,                 RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_VALUE,       &ldc,                        sizeof(int),
+        STARPU_PRIORITY,    options->priority,
+        STARPU_CALLBACK,    callback,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "zher2k",
+#endif
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_zherfb.c b/runtime/starpu/codelets/codelet_zherfb.c
index 21c97dcf8..d83314923 100644
--- a/runtime/starpu/codelets/codelet_zherfb.c
+++ b/runtime/starpu/codelets/codelet_zherfb.c
@@ -20,51 +20,6 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_zherfb(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo,
-                       int n, int k, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                       const CHAM_desc_t *C, int Cm, int Cn, int ldc)
-{
-    struct starpu_codelet *codelet = &cl_zherfb;
-    void (*callback)(void*) = options->profiling ? cl_zherfb_callback : NULL;
-
-    CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_R(A, Am, An);
-    CHAMELEON_ACCESS_R(T, Tm, Tn);
-    CHAMELEON_ACCESS_RW(C, Cm, Cn);
-    CHAMELEON_END_ACCESS_DECLARATION;
-
-    starpu_insert_task(
-        starpu_mpi_codelet(codelet),
-        STARPU_VALUE,    &uplo,              sizeof(int),
-        STARPU_VALUE,    &n,                 sizeof(int),
-        STARPU_VALUE,    &k,                 sizeof(int),
-        STARPU_VALUE,    &ib,                sizeof(int),
-        STARPU_VALUE,    &nb,                sizeof(int),
-        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,    &lda,               sizeof(int),
-        STARPU_R,         RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
-        STARPU_VALUE,    &ldt,               sizeof(int),
-        STARPU_RW,        RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
-        STARPU_VALUE,    &ldc,               sizeof(int),
-        STARPU_SCRATCH,   options->ws_worker,
-        STARPU_VALUE,    &nb,                sizeof(int),
-        STARPU_PRIORITY,  options->priority,
-        STARPU_CALLBACK,  callback,
-#if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zherfb",
-#endif
-        0);
-}
-
-
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zherfb_cpu_func(void *descr[], void *cl_arg)
 {
@@ -131,3 +86,47 @@ static void cl_zherfb_cuda_func(void *descr[], void *cl_arg)
  * Codelet definition
  */
 CODELETS(zherfb, 4, cl_zherfb_cpu_func, cl_zherfb_cuda_func, STARPU_CUDA_ASYNC)
+
+/**
+ *
+ * @ingroup INSERT_TASK_Complex64_t
+ *
+ */
+void INSERT_TASK_zherfb(const RUNTIME_option_t *options,
+                       cham_uplo_t uplo,
+                       int n, int k, int ib, int nb,
+                       const CHAM_desc_t *A, int Am, int An, int lda,
+                       const CHAM_desc_t *T, int Tm, int Tn, int ldt,
+                       const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+{
+    struct starpu_codelet *codelet = &cl_zherfb;
+    void (*callback)(void*) = options->profiling ? cl_zherfb_callback : NULL;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_R(T, Tm, Tn);
+    CHAMELEON_ACCESS_RW(C, Cm, Cn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    starpu_insert_task(
+        starpu_mpi_codelet(codelet),
+        STARPU_VALUE,    &uplo,              sizeof(int),
+        STARPU_VALUE,    &n,                 sizeof(int),
+        STARPU_VALUE,    &k,                 sizeof(int),
+        STARPU_VALUE,    &ib,                sizeof(int),
+        STARPU_VALUE,    &nb,                sizeof(int),
+        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_VALUE,    &lda,               sizeof(int),
+        STARPU_R,         RTBLKADDR(T, CHAMELEON_Complex64_t, Tm, Tn),
+        STARPU_VALUE,    &ldt,               sizeof(int),
+        STARPU_RW,        RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_VALUE,    &ldc,               sizeof(int),
+        STARPU_SCRATCH,   options->ws_worker,
+        STARPU_VALUE,    &nb,                sizeof(int),
+        STARPU_PRIORITY,  options->priority,
+        STARPU_CALLBACK,  callback,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "zherfb",
+#endif
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_zherk.c b/runtime/starpu/codelets/codelet_zherk.c
index 101eef19c..d8709bac1 100644
--- a/runtime/starpu/codelets/codelet_zherk.c
+++ b/runtime/starpu/codelets/codelet_zherk.c
@@ -26,46 +26,6 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_zherk(const RUNTIME_option_t *options,
-                      cham_uplo_t uplo, cham_trans_t trans,
-                      int n, int k, int nb,
-                      double alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
-{
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_zherk;
-    void (*callback)(void*) = options->profiling ? cl_zherk_callback : NULL;
-
-    CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_R(A, Am, An);
-    CHAMELEON_ACCESS_RW(C, Cm, Cn);
-    CHAMELEON_END_ACCESS_DECLARATION;
-
-    starpu_insert_task(
-        starpu_mpi_codelet(codelet),
-        STARPU_VALUE,    &uplo,              sizeof(int),
-        STARPU_VALUE,    &trans,             sizeof(int),
-        STARPU_VALUE,    &n,                 sizeof(int),
-        STARPU_VALUE,    &k,                 sizeof(int),
-        STARPU_VALUE,    &alpha,             sizeof(double),
-        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,    &lda,               sizeof(int),
-        STARPU_VALUE,    &beta,              sizeof(double),
-        STARPU_RW,        RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
-        STARPU_VALUE,    &ldc,               sizeof(int),
-        STARPU_PRIORITY,  options->priority,
-        STARPU_CALLBACK,  callback,
-#if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zherk",
-#endif
-        0);
-}
-
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zherk_cpu_func(void *descr[], void *cl_arg)
 {
@@ -129,3 +89,43 @@ static void cl_zherk_cuda_func(void *descr[], void *cl_arg)
  * Codelet definition
  */
 CODELETS(zherk, 2, cl_zherk_cpu_func, cl_zherk_cuda_func, STARPU_CUDA_ASYNC)
+
+/**
+ *
+ * @ingroup INSERT_TASK_Complex64_t
+ *
+ */
+void INSERT_TASK_zherk(const RUNTIME_option_t *options,
+                      cham_uplo_t uplo, cham_trans_t trans,
+                      int n, int k, int nb,
+                      double alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                      double beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+{
+    (void)nb;
+    struct starpu_codelet *codelet = &cl_zherk;
+    void (*callback)(void*) = options->profiling ? cl_zherk_callback : NULL;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_RW(C, Cm, Cn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    starpu_insert_task(
+        starpu_mpi_codelet(codelet),
+        STARPU_VALUE,    &uplo,              sizeof(int),
+        STARPU_VALUE,    &trans,             sizeof(int),
+        STARPU_VALUE,    &n,                 sizeof(int),
+        STARPU_VALUE,    &k,                 sizeof(int),
+        STARPU_VALUE,    &alpha,             sizeof(double),
+        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_VALUE,    &lda,               sizeof(int),
+        STARPU_VALUE,    &beta,              sizeof(double),
+        STARPU_RW,        RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_VALUE,    &ldc,               sizeof(int),
+        STARPU_PRIORITY,  options->priority,
+        STARPU_CALLBACK,  callback,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "zherk",
+#endif
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_zhessq.c b/runtime/starpu/codelets/codelet_zhessq.c
index c47e0871d..c2ebde6af 100644
--- a/runtime/starpu/codelets/codelet_zhessq.c
+++ b/runtime/starpu/codelets/codelet_zhessq.c
@@ -22,6 +22,27 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zhessq_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    int n;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    double *SCALESUMSQ;
+
+    A          = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda);
+    CORE_zhessq( uplo, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] );
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zhessq, 2, cl_zhessq_cpu_func)
+
 void INSERT_TASK_zhessq( const RUNTIME_option_t *options,
                         cham_uplo_t uplo, int n,
                         const CHAM_desc_t *A, int Am, int An, int lda,
@@ -49,25 +70,3 @@ void INSERT_TASK_zhessq( const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zhessq_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    double *SCALESUMSQ;
-
-    A          = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda);
-    CORE_zhessq( uplo, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] );
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zhessq, 2, cl_zhessq_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zlacpy.c b/runtime/starpu/codelets/codelet_zlacpy.c
index 0d2426400..9c53e6e4e 100644
--- a/runtime/starpu/codelets/codelet_zlacpy.c
+++ b/runtime/starpu/codelets/codelet_zlacpy.c
@@ -26,15 +26,40 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zlacpy_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    int M;
+    int N;
+    int displA;
+    int displB;
+    const CHAMELEON_Complex64_t *A;
+    int LDA;
+    CHAMELEON_Complex64_t *B;
+    int LDB;
+
+    A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &displA, &LDA, &displB, &LDB);
+    CORE_zlacpy(uplo, M, N, A + displA, LDA, B + displB, LDB);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zlacpy, 2, cl_zlacpy_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
  */
-void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options,
-                         cham_uplo_t uplo, int m, int n, int nb,
-                         int displA, const CHAM_desc_t *A, int Am, int An, int lda,
-                         int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zlacpyx( const RUNTIME_option_t *options,
+                          cham_uplo_t uplo, int m, int n, int nb,
+                          int displA, const CHAM_desc_t *A, int Am, int An, int lda,
+                          int displB, const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zlacpy;
@@ -64,37 +89,12 @@ void INSERT_TASK_zlacpyx(const RUNTIME_option_t *options,
         0);
 }
 
-void INSERT_TASK_zlacpy(const RUNTIME_option_t *options,
-                        cham_uplo_t uplo, int m, int n, int nb,
-                        const CHAM_desc_t *A, int Am, int An, int lda,
-                        const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zlacpy( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, int m, int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     INSERT_TASK_zlacpyx( options, uplo, m, n, nb,
                          0, A, Am, An, lda,
                          0, B, Bm, Bn, ldb );
 }
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zlacpy_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    int M;
-    int N;
-    int displA;
-    int displB;
-    const CHAMELEON_Complex64_t *A;
-    int LDA;
-    CHAMELEON_Complex64_t *B;
-    int LDB;
-
-    A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &displA, &LDA, &displB, &LDB);
-    CORE_zlacpy(uplo, M, N, A + displA, LDA, B + displB, LDB);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zlacpy, 2, cl_zlacpy_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zlag2c.c b/runtime/starpu/codelets/codelet_zlag2c.c
index 43b4314c6..46eea2740 100644
--- a/runtime/starpu/codelets/codelet_zlag2c.c
+++ b/runtime/starpu/codelets/codelet_zlag2c.c
@@ -24,6 +24,28 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg)
+{
+    int m;
+    int n;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    CHAMELEON_Complex32_t *B;
+    int ldb;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    B = (CHAMELEON_Complex32_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &ldb);
+    CORE_zlag2c( m, n, A, lda, B, ldb);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zlag2c, 1, cl_zlag2c_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -60,22 +82,27 @@ void INSERT_TASK_zlag2c(const RUNTIME_option_t *options,
 }
 
 #if !defined(CHAMELEON_SIMULATION)
-static void cl_zlag2c_cpu_func(void *descr[], void *cl_arg)
+static void cl_clag2z_cpu_func(void *descr[], void *cl_arg)
 {
     int m;
     int n;
-    CHAMELEON_Complex64_t *A;
+    CHAMELEON_Complex32_t *A;
     int lda;
-    CHAMELEON_Complex32_t *B;
+    CHAMELEON_Complex64_t *B;
     int ldb;
 
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex32_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    A = (CHAMELEON_Complex32_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
     starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &ldb);
-    CORE_zlag2c( m, n, A, lda, B, ldb);
+    CORE_clag2z( m, n, A, lda, B, ldb);
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(clag2z, 2, cl_clag2z_cpu_func)
+
 void INSERT_TASK_clag2z(const RUNTIME_option_t *options,
                        int m, int n, int nb,
                        const CHAM_desc_t *A, int Am, int An, int lda,
@@ -105,30 +132,3 @@ void INSERT_TASK_clag2z(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_clag2z_cpu_func(void *descr[], void *cl_arg)
-{
-    int m;
-    int n;
-    CHAMELEON_Complex32_t *A;
-    int lda;
-    CHAMELEON_Complex64_t *B;
-    int ldb;
-
-    A = (CHAMELEON_Complex32_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &ldb);
-    CORE_clag2z( m, n, A, lda, B, ldb);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zlag2c, 1, cl_zlag2c_cpu_func)
-/*
- * Codelet definition
- */
-CODELETS_CPU(clag2z, 2, cl_clag2z_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zlange.c b/runtime/starpu/codelets/codelet_zlange.c
index 9ab611908..4b389bbb2 100644
--- a/runtime/starpu/codelets/codelet_zlange.c
+++ b/runtime/starpu/codelets/codelet_zlange.c
@@ -24,6 +24,30 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zlange_cpu_func(void *descr[], void *cl_arg)
+{
+    double *normA;
+    cham_normtype_t norm;
+    int M;
+    int N;
+    CHAMELEON_Complex64_t *A;
+    int LDA;
+    double *work;
+
+    A     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    work  = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
+    starpu_codelet_unpack_args(cl_arg, &norm, &M, &N, &LDA);
+    CORE_zlange( norm, M, N, A, LDA, work, normA );
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zlange, 3, cl_zlange_cpu_func)
+
 void INSERT_TASK_zlange( const RUNTIME_option_t *options,
                          cham_normtype_t norm, int M, int N, int NB,
                          const CHAM_desc_t *A, int Am, int An, int LDA,
@@ -56,28 +80,25 @@ void INSERT_TASK_zlange( const RUNTIME_option_t *options,
 }
 
 #if !defined(CHAMELEON_SIMULATION)
-static void cl_zlange_cpu_func(void *descr[], void *cl_arg)
+static void cl_zlange_max_cpu_func(void *descr[], void *cl_arg)
 {
-    double *normA;
-    cham_normtype_t norm;
-    int M;
-    int N;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-    double *work;
+    double *A;
+    double *B;
 
-    A     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    work  = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
-    starpu_codelet_unpack_args(cl_arg, &norm, &M, &N, &LDA);
-    CORE_zlange( norm, M, N, A, LDA, work, normA );
+    A = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
+    B = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+
+    if ( *A > *B ) {
+        *B = *A;
+    }
+    (void)cl_arg;
 }
 #endif /* !defined(CHAMELEON_SIMULATION) */
 
 /*
  * Codelet definition
  */
-CODELETS_CPU(zlange, 3, cl_zlange_cpu_func)
+CODELETS_CPU(zlange_max, 2, cl_zlange_max_cpu_func)
 
 void INSERT_TASK_zlange_max(const RUNTIME_option_t *options,
                            const CHAM_desc_t *A, int Am, int An,
@@ -102,24 +123,3 @@ void INSERT_TASK_zlange_max(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zlange_max_cpu_func(void *descr[], void *cl_arg)
-{
-    double *A;
-    double *B;
-
-    A = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-
-    if ( *A > *B ) {
-        *B = *A;
-    }
-    (void)cl_arg;
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zlange_max, 2, cl_zlange_max_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zlanhe.c b/runtime/starpu/codelets/codelet_zlanhe.c
index d29b5b3d5..4fc51d3d0 100644
--- a/runtime/starpu/codelets/codelet_zlanhe.c
+++ b/runtime/starpu/codelets/codelet_zlanhe.c
@@ -24,6 +24,30 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zlanhe_cpu_func(void *descr[], void *cl_arg)
+{
+    double *normA;
+    cham_normtype_t norm;
+    cham_uplo_t uplo;
+    int N;
+    CHAMELEON_Complex64_t *A;
+    int LDA;
+    double *work;
+
+    A     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    work  = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
+    starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &N, &LDA);
+    CORE_zlanhe( norm, uplo, N, A, LDA, work, normA);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zlanhe, 3, cl_zlanhe_cpu_func)
+
 void INSERT_TASK_zlanhe(const RUNTIME_option_t *options,
                        cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
                        const CHAM_desc_t *A, int Am, int An, int LDA,
@@ -55,27 +79,3 @@ void INSERT_TASK_zlanhe(const RUNTIME_option_t *options,
 
     (void)NB;
 }
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zlanhe_cpu_func(void *descr[], void *cl_arg)
-{
-    double *normA;
-    cham_normtype_t norm;
-    cham_uplo_t uplo;
-    int N;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-    double *work;
-
-    A     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    work  = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
-    starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &N, &LDA);
-    CORE_zlanhe( norm, uplo, N, A, LDA, work, normA);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zlanhe, 3, cl_zlanhe_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zlansy.c b/runtime/starpu/codelets/codelet_zlansy.c
index 6fd7cae04..fdea83309 100644
--- a/runtime/starpu/codelets/codelet_zlansy.c
+++ b/runtime/starpu/codelets/codelet_zlansy.c
@@ -24,10 +24,34 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-void INSERT_TASK_zlansy(const RUNTIME_option_t *options,
-                       cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
-                       const CHAM_desc_t *A, int Am, int An, int LDA,
-                       const CHAM_desc_t *B, int Bm, int Bn)
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zlansy_cpu_func(void *descr[], void *cl_arg)
+{
+    double *normA;
+    cham_normtype_t norm;
+    cham_uplo_t uplo;
+    int N;
+    CHAMELEON_Complex64_t *A;
+    int LDA;
+    double *work;
+
+    A     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    work  = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
+    starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &N, &LDA);
+    CORE_zlansy( norm, uplo, N, A, LDA, work, normA);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zlansy, 3, cl_zlansy_cpu_func)
+
+void INSERT_TASK_zlansy( const RUNTIME_option_t *options,
+                         cham_normtype_t norm, cham_uplo_t uplo, int N, int NB,
+                         const CHAM_desc_t *A, int Am, int An, int LDA,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     (void)NB;
     struct starpu_codelet *codelet = &cl_zlansy;
@@ -54,27 +78,3 @@ void INSERT_TASK_zlansy(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zlansy_cpu_func(void *descr[], void *cl_arg)
-{
-    double *normA;
-    cham_normtype_t norm;
-    cham_uplo_t uplo;
-    int N;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-    double *work;
-
-    A     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    work  = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
-    starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &N, &LDA);
-    CORE_zlansy( norm, uplo, N, A, LDA, work, normA);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zlansy, 3, cl_zlansy_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zlantr.c b/runtime/starpu/codelets/codelet_zlantr.c
index b4a5da805..078b81d6b 100644
--- a/runtime/starpu/codelets/codelet_zlantr.c
+++ b/runtime/starpu/codelets/codelet_zlantr.c
@@ -22,11 +22,34 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-void INSERT_TASK_zlantr(const RUNTIME_option_t *options,
-                       cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
-                       int M, int N, int NB,
-                       const CHAM_desc_t *A, int Am, int An, int LDA,
-                       const CHAM_desc_t *B, int Bm, int Bn)
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zlantr_cpu_func(void *descr[], void *cl_arg)
+{
+    double *normA;
+    cham_normtype_t norm, uplo, diag;
+    int M, N;
+    CHAMELEON_Complex64_t *A;
+    int LDA;
+    double *work;
+
+    A     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    work  = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
+    starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &diag, &M, &N, &LDA);
+    CORE_zlantr( norm, uplo, diag, M, N, A, LDA, work, normA);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zlantr, 3, cl_zlantr_cpu_func)
+
+void INSERT_TASK_zlantr( const RUNTIME_option_t *options,
+                         cham_normtype_t norm, cham_uplo_t uplo, cham_diag_t diag,
+                         int M, int N, int NB,
+                         const CHAM_desc_t *A, int Am, int An, int LDA,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     struct starpu_codelet *codelet = &cl_zlantr;
     void (*callback)(void*) = options->profiling ? cl_zlange_callback : NULL;
@@ -56,26 +79,3 @@ void INSERT_TASK_zlantr(const RUNTIME_option_t *options,
 
     (void)NB;
 }
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zlantr_cpu_func(void *descr[], void *cl_arg)
-{
-    double *normA;
-    cham_normtype_t norm, uplo, diag;
-    int M, N;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-    double *work;
-
-    A     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    work  = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    normA = (double *)STARPU_MATRIX_GET_PTR(descr[2]);
-    starpu_codelet_unpack_args(cl_arg, &norm, &uplo, &diag, &M, &N, &LDA);
-    CORE_zlantr( norm, uplo, diag, M, N, A, LDA, work, normA);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zlantr, 3, cl_zlantr_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zlascal.c b/runtime/starpu/codelets/codelet_zlascal.c
index 61d63bcfc..f625d8830 100644
--- a/runtime/starpu/codelets/codelet_zlascal.c
+++ b/runtime/starpu/codelets/codelet_zlascal.c
@@ -22,6 +22,28 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zlascal_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    int M;
+    int N;
+    CHAMELEON_Complex64_t alpha;
+    CHAMELEON_Complex64_t *A;
+    int LDA;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &LDA);
+    CORE_zlascal(uplo, M, N, alpha, A, LDA);
+    return;
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zlascal, 1, cl_zlascal_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -49,12 +71,10 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ *          @retval CHAMELEON_SUCCESS successful exit
+ *          @retval <0 if -i, the i-th argument had an illegal value
  *
  */
-
 void INSERT_TASK_zlascal(const RUNTIME_option_t *options,
                         cham_uplo_t uplo,
                         int m, int n, int nb,
@@ -84,26 +104,3 @@ void INSERT_TASK_zlascal(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zlascal_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    int M;
-    int N;
-    CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &LDA);
-    CORE_zlascal(uplo, M, N, alpha, A, LDA);
-    return;
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zlascal, 1, cl_zlascal_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zlaset.c b/runtime/starpu/codelets/codelet_zlaset.c
index 80ab8c2c1..9108167de 100644
--- a/runtime/starpu/codelets/codelet_zlaset.c
+++ b/runtime/starpu/codelets/codelet_zlaset.c
@@ -25,6 +25,27 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zlaset_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    int M;
+    int N;
+    CHAMELEON_Complex64_t alpha;
+    CHAMELEON_Complex64_t beta;
+    CHAMELEON_Complex64_t *A;
+    int LDA;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &beta, &LDA);
+    CORE_zlaset(uplo, M, N, alpha, beta, A, LDA);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zlaset, 1, cl_zlaset_cpu_func)
 
 /**
  *
@@ -90,26 +111,3 @@ void INSERT_TASK_zlaset(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zlaset_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    int M;
-    int N;
-    CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t beta;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &beta, &LDA);
-    CORE_zlaset(uplo, M, N, alpha, beta, A, LDA);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zlaset, 1, cl_zlaset_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zlaset2.c b/runtime/starpu/codelets/codelet_zlaset2.c
index c3514735d..0e67ee39d 100644
--- a/runtime/starpu/codelets/codelet_zlaset2.c
+++ b/runtime/starpu/codelets/codelet_zlaset2.c
@@ -25,6 +25,26 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zlaset2_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    int M;
+    int N;
+    CHAMELEON_Complex64_t alpha;
+    CHAMELEON_Complex64_t *A;
+    int LDA;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &LDA);
+    CORE_zlaset2(uplo, M, N, alpha, A, LDA);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zlaset2, 1, cl_zlaset2_cpu_func)
 
 /**
  *
@@ -86,25 +106,3 @@ void INSERT_TASK_zlaset2(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zlaset2_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    int M;
-    int N;
-    CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &M, &N, &alpha, &LDA);
-    CORE_zlaset2(uplo, M, N, alpha, A, LDA);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zlaset2, 1, cl_zlaset2_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zlatro.c b/runtime/starpu/codelets/codelet_zlatro.c
index d92ddaf47..063b7aea8 100644
--- a/runtime/starpu/codelets/codelet_zlatro.c
+++ b/runtime/starpu/codelets/codelet_zlatro.c
@@ -26,16 +26,40 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zlatro_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    cham_trans_t trans;
+    int M;
+    int N;
+    const CHAMELEON_Complex64_t *A;
+    int LDA;
+    CHAMELEON_Complex64_t *B;
+    int LDB;
+
+    A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &LDA, &LDB);
+    CORE_zlatro(uplo, trans, M, N, A, LDA, B, LDB);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zlatro, 2, cl_zlatro_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
  */
-void INSERT_TASK_zlatro(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, cham_trans_t trans,
-                       int m, int n, int mb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_zlatro( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, cham_trans_t trans,
+                         int m, int n, int mb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     struct starpu_codelet *codelet = &cl_zlatro;
     void (*callback)(void*) = NULL;
@@ -63,27 +87,3 @@ void INSERT_TASK_zlatro(const RUNTIME_option_t *options,
         0);
     (void)mb;
 }
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zlatro_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    cham_trans_t trans;
-    int M;
-    int N;
-    const CHAMELEON_Complex64_t *A;
-    int LDA;
-    CHAMELEON_Complex64_t *B;
-    int LDB;
-
-    A = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &LDA, &LDB);
-    CORE_zlatro(uplo, trans, M, N, A, LDA, B, LDB);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zlatro, 2, cl_zlatro_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zlauum.c b/runtime/starpu/codelets/codelet_zlauum.c
index 166b13881..2344c5171 100644
--- a/runtime/starpu/codelets/codelet_zlauum.c
+++ b/runtime/starpu/codelets/codelet_zlauum.c
@@ -26,14 +26,33 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zlauum_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    int N;
+    CHAMELEON_Complex64_t *A;
+    int LDA;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &N, &LDA);
+    CORE_zlauum(uplo, N, A, LDA);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zlauum, 1, cl_zlauum_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
  */
-void INSERT_TASK_zlauum(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda)
+void INSERT_TASK_zlauum( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda )
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zlauum;
@@ -56,23 +75,3 @@ void INSERT_TASK_zlauum(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zlauum_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    int N;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &N, &LDA);
-    CORE_zlauum(uplo, N, A, LDA);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zlauum, 1, cl_zlauum_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zplghe.c b/runtime/starpu/codelets/codelet_zplghe.c
index ae014c297..345d18a24 100644
--- a/runtime/starpu/codelets/codelet_zplghe.c
+++ b/runtime/starpu/codelets/codelet_zplghe.c
@@ -26,13 +26,36 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/*   INSERT_TASK_zplghe - Generate a tile for random hermitian (positive definite if bump is large enough) matrix. */
+/*   cl_zplghe_cpu_func - Generate a tile for random hermitian (positive definite if bump is large enough) matrix. */
 
-void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
-                        double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
-                        int bigM, int m0, int n0, unsigned long long int seed )
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zplghe_cpu_func(void *descr[], void *cl_arg)
 {
+    double bump;
+    int m;
+    int n;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    int bigM;
+    int m0;
+    int n0;
+    unsigned long long int seed;
 
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &lda, &bigM, &m0, &n0, &seed );
+    CORE_zplghe( bump, m, n, A, lda, bigM, m0, n0, seed );
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zplghe, 1, cl_zplghe_cpu_func)
+
+void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
+                         double bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
+                         int bigM, int m0, int n0, unsigned long long int seed )
+{
     struct starpu_codelet *codelet = &cl_zplghe;
     void (*callback)(void*) = options->profiling ? cl_zplghe_callback : NULL;
 
@@ -58,29 +81,3 @@ void INSERT_TASK_zplghe( const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-/*   cl_zplghe_cpu_func - Generate a tile for random hermitian (positive definite if bump is large enough) matrix. */
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zplghe_cpu_func(void *descr[], void *cl_arg)
-{
-    double bump;
-    int m;
-    int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    int bigM;
-    int m0;
-    int n0;
-    unsigned long long int seed;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &lda, &bigM, &m0, &n0, &seed );
-    CORE_zplghe( bump, m, n, A, lda, bigM, m0, n0, seed );
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zplghe, 1, cl_zplghe_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zplgsy.c b/runtime/starpu/codelets/codelet_zplgsy.c
index de00e6033..9141ecd0f 100644
--- a/runtime/starpu/codelets/codelet_zplgsy.c
+++ b/runtime/starpu/codelets/codelet_zplgsy.c
@@ -26,7 +26,31 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/*   INSERT_TASK_zplgsy - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. */
+/*   cl_zplgsy_cpu_func - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. */
+
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg)
+{
+    CHAMELEON_Complex64_t bump;
+    int m;
+    int n;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    int bigM;
+    int m0;
+    int n0;
+    unsigned long long int seed;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &lda, &bigM, &m0, &n0, &seed );
+    CORE_zplgsy( bump, m, n, A, lda, bigM, m0, n0, seed );
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zplgsy, 1, cl_zplgsy_cpu_func)
 
 void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
                         CHAMELEON_Complex64_t bump, int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
@@ -58,29 +82,3 @@ void INSERT_TASK_zplgsy( const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-/*   cl_zplgsy_cpu_func - Generate a tile for random symmetric (positive definite if 'bump' is large enough) matrix. */
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zplgsy_cpu_func(void *descr[], void *cl_arg)
-{
-    CHAMELEON_Complex64_t bump;
-    int m;
-    int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    int bigM;
-    int m0;
-    int n0;
-    unsigned long long int seed;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    starpu_codelet_unpack_args(cl_arg, &bump, &m, &n, &lda, &bigM, &m0, &n0, &seed );
-    CORE_zplgsy( bump, m, n, A, lda, bigM, m0, n0, seed );
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zplgsy, 1, cl_zplgsy_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zplrnt.c b/runtime/starpu/codelets/codelet_zplrnt.c
index 24f7c9159..d824485da 100644
--- a/runtime/starpu/codelets/codelet_zplrnt.c
+++ b/runtime/starpu/codelets/codelet_zplrnt.c
@@ -26,11 +26,32 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/*   INSERT_TASK_zplrnt - Generate a tile for random matrix. */
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zplrnt_cpu_func(void *descr[], void *cl_arg)
+{
+    int m;
+    int n;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    int bigM;
+    int m0;
+    int n0;
+    unsigned long long int seed;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &bigM, &m0, &n0, &seed );
+    CORE_zplrnt( m, n, A, lda, bigM, m0, n0, seed );
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zplrnt, 1, cl_zplrnt_cpu_func)
 
 void INSERT_TASK_zplrnt( const RUNTIME_option_t *options,
-                        int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
-                        int bigM, int m0, int n0, unsigned long long int seed )
+                         int m, int n, const CHAM_desc_t *A, int Am, int An, int lda,
+                         int bigM, int m0, int n0, unsigned long long int seed )
 {
 
     struct starpu_codelet *codelet = &cl_zplrnt;
@@ -57,28 +78,3 @@ void INSERT_TASK_zplrnt( const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-/*   cl_zplrnt_cpu_func - Generate a tile for random matrix. */
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zplrnt_cpu_func(void *descr[], void *cl_arg)
-{
-    int m;
-    int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    int bigM;
-    int m0;
-    int n0;
-    unsigned long long int seed;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &lda, &bigM, &m0, &n0, &seed );
-    CORE_zplrnt( m, n, A, lda, bigM, m0, n0, seed );
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zplrnt, 1, cl_zplrnt_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zplssq.c b/runtime/starpu/codelets/codelet_zplssq.c
index 4fdbaf6c3..2fe5d2a7f 100644
--- a/runtime/starpu/codelets/codelet_zplssq.c
+++ b/runtime/starpu/codelets/codelet_zplssq.c
@@ -23,11 +23,39 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zplssq_cpu_func(void *descr[], void *cl_arg)
+{
+    double *SCLSSQ_IN;
+    double *SCLSSQ_OUT;
+
+    SCLSSQ_IN  = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
+    SCLSSQ_OUT = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+
+    assert( SCLSSQ_OUT[0] >= 0. );
+    if( SCLSSQ_OUT[0] < SCLSSQ_IN[0] ) {
+        SCLSSQ_OUT[1] = SCLSSQ_IN[1]  + (SCLSSQ_OUT[1] * (( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ) * ( SCLSSQ_OUT[0] / SCLSSQ_IN[0] )));
+        SCLSSQ_OUT[0] = SCLSSQ_IN[0];
+    } else {
+        if ( SCLSSQ_OUT[0] > 0 ) {
+            SCLSSQ_OUT[1] = SCLSSQ_OUT[1] + (SCLSSQ_IN[1]  * (( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ) * ( SCLSSQ_IN[0] / SCLSSQ_OUT[0] )));
+        }
+    }
+
+    (void)cl_arg;
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zplssq, 2, cl_zplssq_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
- *  INSERT_TASK_zplssq returns: scl * sqrt(ssq)
+ * @brief Compute sum( a_ij ^ 2 ) = scl * sqrt(ssq)
  *
  * with scl and ssq such that
  *
@@ -78,25 +106,14 @@ void INSERT_TASK_zplssq( const RUNTIME_option_t *options,
         0);
 }
 
-
 #if !defined(CHAMELEON_SIMULATION)
-static void cl_zplssq_cpu_func(void *descr[], void *cl_arg)
+static void cl_zplssq2_cpu_func(void *descr[], void *cl_arg)
 {
-    double *SCLSSQ_IN;
-    double *SCLSSQ_OUT;
+    double *RESULT;
 
-    SCLSSQ_IN  = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
-    SCLSSQ_OUT = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    RESULT = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
 
-    assert( SCLSSQ_OUT[0] >= 0. );
-    if( SCLSSQ_OUT[0] < SCLSSQ_IN[0] ) {
-        SCLSSQ_OUT[1] = SCLSSQ_IN[1]  + (SCLSSQ_OUT[1] * (( SCLSSQ_OUT[0] / SCLSSQ_IN[0] ) * ( SCLSSQ_OUT[0] / SCLSSQ_IN[0] )));
-        SCLSSQ_OUT[0] = SCLSSQ_IN[0];
-    } else {
-        if ( SCLSSQ_OUT[0] > 0 ) {
-            SCLSSQ_OUT[1] = SCLSSQ_OUT[1] + (SCLSSQ_IN[1]  * (( SCLSSQ_IN[0] / SCLSSQ_OUT[0] ) * ( SCLSSQ_IN[0] / SCLSSQ_OUT[0] )));
-        }
-    }
+    RESULT[0] = RESULT[0] * sqrt( RESULT[1] );
 
     (void)cl_arg;
 }
@@ -105,10 +122,10 @@ static void cl_zplssq_cpu_func(void *descr[], void *cl_arg)
 /*
  * Codelet definition
  */
-CODELETS_CPU(zplssq, 2, cl_zplssq_cpu_func)
+CODELETS_CPU(zplssq2, 1, cl_zplssq2_cpu_func)
 
 void INSERT_TASK_zplssq2( const RUNTIME_option_t *options,
-                         const CHAM_desc_t *RESULT, int RESULTm, int RESULTn )
+                          const CHAM_desc_t *RESULT, int RESULTm, int RESULTn )
 {
     struct starpu_codelet *codelet = &cl_zplssq2;
     void (*callback)(void*) = options->profiling ? cl_zplssq2_callback : NULL;
@@ -127,22 +144,3 @@ void INSERT_TASK_zplssq2( const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zplssq2_cpu_func(void *descr[], void *cl_arg)
-{
-    double *RESULT;
-
-    RESULT = (double *)STARPU_MATRIX_GET_PTR(descr[0]);
-
-    RESULT[0] = RESULT[0] * sqrt( RESULT[1] );
-
-    (void)cl_arg;
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zplssq2, 1, cl_zplssq2_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zpotrf.c b/runtime/starpu/codelets/codelet_zpotrf.c
index a43f31723..bbfe81774 100644
--- a/runtime/starpu/codelets/codelet_zpotrf.c
+++ b/runtime/starpu/codelets/codelet_zpotrf.c
@@ -26,6 +26,34 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zpotrf_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    int n;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    int iinfo;
+    RUNTIME_sequence_t *sequence;
+    RUNTIME_request_t *request;
+    int info = 0;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+
+    starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo, &sequence, &request);
+    CORE_zpotrf(uplo, n, A, lda, &info);
+
+    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
+        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
+    }
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zpotrf, 1, cl_zpotrf_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -61,33 +89,3 @@ void INSERT_TASK_zpotrf(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zpotrf_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    int iinfo;
-    RUNTIME_sequence_t *sequence;
-    RUNTIME_request_t *request;
-    int info = 0;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-
-    starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo, &sequence, &request);
-    CORE_zpotrf(uplo, n, A, lda, &info);
-
-    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
-        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
-    }
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zpotrf, 1, cl_zpotrf_cpu_func)
-
diff --git a/runtime/starpu/codelets/codelet_zssssm.c b/runtime/starpu/codelets/codelet_zssssm.c
index 9efbd985f..ecae613ee 100644
--- a/runtime/starpu/codelets/codelet_zssssm.c
+++ b/runtime/starpu/codelets/codelet_zssssm.c
@@ -26,6 +26,39 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zssssm_cpu_func(void *descr[], void *cl_arg)
+{
+    int m1;
+    int n1;
+    int m2;
+    int n2;
+    int k;
+    int ib;
+    CHAMELEON_Complex64_t *A1;
+    int lda1;
+    CHAMELEON_Complex64_t *A2;
+    int lda2;
+    CHAMELEON_Complex64_t *L1;
+    int ldl1;
+    CHAMELEON_Complex64_t *L2;
+    int ldl2;
+    int *IPIV;
+
+    A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    L1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
+    L2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
+    starpu_codelet_unpack_args(cl_arg, &m1, &n1, &m2, &n2, &k, &ib, &lda1, &lda2, &ldl1, &ldl2, &IPIV);
+    CORE_zssssm(m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, L1, ldl1, L2, ldl2, IPIV);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zssssm, 4, cl_zssssm_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -91,19 +124,17 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
  *
  */
-
-void INSERT_TASK_zssssm(const RUNTIME_option_t *options,
-                       int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                       const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                       const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                       const CHAM_desc_t *L1, int L1m, int L1n, int ldl1,
-                       const CHAM_desc_t *L2, int L2m, int L2n, int ldl2,
-                       const int *IPIV)
+void INSERT_TASK_zssssm( const RUNTIME_option_t *options,
+                         int m1, int n1, int m2, int n2, int k, int ib, int nb,
+                         const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
+                         const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
+                         const CHAM_desc_t *L1, int L1m, int L1n, int ldl1,
+                         const CHAM_desc_t *L2, int L2m, int L2n, int ldl2,
+                         const int *IPIV )
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zssssm;
@@ -140,38 +171,3 @@ void INSERT_TASK_zssssm(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zssssm_cpu_func(void *descr[], void *cl_arg)
-{
-    int m1;
-    int n1;
-    int m2;
-    int n2;
-    int k;
-    int ib;
-    CHAMELEON_Complex64_t *A1;
-    int lda1;
-    CHAMELEON_Complex64_t *A2;
-    int lda2;
-    CHAMELEON_Complex64_t *L1;
-    int ldl1;
-    CHAMELEON_Complex64_t *L2;
-    int ldl2;
-    int *IPIV;
-
-    A1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    A2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    L1 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    L2 = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
-    starpu_codelet_unpack_args(cl_arg, &m1, &n1, &m2, &n2, &k, &ib, &lda1, &lda2, &ldl1, &ldl2, &IPIV);
-    CORE_zssssm(m1, n1, m2, n2, k, ib, A1, lda1, A2, lda2, L1, ldl1, L2, ldl2, IPIV);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zssssm, 4, cl_zssssm_cpu_func)
-
diff --git a/runtime/starpu/codelets/codelet_zsymm.c b/runtime/starpu/codelets/codelet_zsymm.c
index 455d118f1..49d3af5d9 100644
--- a/runtime/starpu/codelets/codelet_zsymm.c
+++ b/runtime/starpu/codelets/codelet_zsymm.c
@@ -26,51 +26,6 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_zsymm(const RUNTIME_option_t *options,
-                      cham_side_t side, cham_uplo_t uplo,
-                      int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
-{
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_zsymm;
-    void (*callback)(void*) = options->profiling ? cl_zsymm_callback : NULL;
-
-    CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_R(A, Am, An);
-    CHAMELEON_ACCESS_R(B, Bm, Bn);
-    CHAMELEON_ACCESS_RW(C, Cm, Cn);
-    CHAMELEON_END_ACCESS_DECLARATION;
-
-    starpu_insert_task(
-        starpu_mpi_codelet(codelet),
-        STARPU_VALUE,    &side,                sizeof(int),
-        STARPU_VALUE,    &uplo,                sizeof(int),
-        STARPU_VALUE,       &m,                        sizeof(int),
-        STARPU_VALUE,       &n,                        sizeof(int),
-        STARPU_VALUE,   &alpha,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,               RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,     &lda,                        sizeof(int),
-        STARPU_R,               RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        STARPU_VALUE,     &ldb,                        sizeof(int),
-        STARPU_VALUE,    &beta,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,               RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
-        STARPU_VALUE,     &ldc,                        sizeof(int),
-        STARPU_PRIORITY,    options->priority,
-        STARPU_CALLBACK,    callback,
-#if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zsymm",
-#endif
-        0);
-}
-
-
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zsymm_cpu_func(void *descr[], void *cl_arg)
 {
@@ -142,3 +97,47 @@ static void cl_zsymm_cuda_func(void *descr[], void *cl_arg)
  * Codelet definition
  */
 CODELETS(zsymm, 3, cl_zsymm_cpu_func, cl_zsymm_cuda_func, STARPU_CUDA_ASYNC)
+
+/**
+ *
+ * @ingroup INSERT_TASK_Complex64_t
+ *
+ */
+void INSERT_TASK_zsymm(const RUNTIME_option_t *options,
+                      cham_side_t side, cham_uplo_t uplo,
+                      int m, int n, int nb,
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                      const CHAM_desc_t *B, int Bm, int Bn, int ldb,
+                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+{
+    (void)nb;
+    struct starpu_codelet *codelet = &cl_zsymm;
+    void (*callback)(void*) = options->profiling ? cl_zsymm_callback : NULL;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_R(B, Bm, Bn);
+    CHAMELEON_ACCESS_RW(C, Cm, Cn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    starpu_insert_task(
+        starpu_mpi_codelet(codelet),
+        STARPU_VALUE,    &side,                sizeof(int),
+        STARPU_VALUE,    &uplo,                sizeof(int),
+        STARPU_VALUE,       &m,                        sizeof(int),
+        STARPU_VALUE,       &n,                        sizeof(int),
+        STARPU_VALUE,   &alpha,         sizeof(CHAMELEON_Complex64_t),
+        STARPU_R,               RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_VALUE,     &lda,                        sizeof(int),
+        STARPU_R,               RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_VALUE,     &ldb,                        sizeof(int),
+        STARPU_VALUE,    &beta,         sizeof(CHAMELEON_Complex64_t),
+        STARPU_RW,               RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_VALUE,     &ldc,                        sizeof(int),
+        STARPU_PRIORITY,    options->priority,
+        STARPU_CALLBACK,    callback,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "zsymm",
+#endif
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_zsyr2k.c b/runtime/starpu/codelets/codelet_zsyr2k.c
index bafefa0f7..27b63010a 100644
--- a/runtime/starpu/codelets/codelet_zsyr2k.c
+++ b/runtime/starpu/codelets/codelet_zsyr2k.c
@@ -26,51 +26,6 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, cham_trans_t trans,
-                       int n, int k, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                       CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
-{
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_zsyr2k;
-    void (*callback)(void*) = options->profiling ? cl_zsyr2k_callback : NULL;
-
-    CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_R(A, Am, An);
-    CHAMELEON_ACCESS_R(B, Bm, Bn);
-    CHAMELEON_ACCESS_RW(C, Cm, Cn);
-    CHAMELEON_END_ACCESS_DECLARATION;
-
-    starpu_insert_task(
-        starpu_mpi_codelet(codelet),
-        STARPU_VALUE,      &uplo,                sizeof(int),
-        STARPU_VALUE,     &trans,                sizeof(int),
-        STARPU_VALUE,         &n,                        sizeof(int),
-        STARPU_VALUE,         &k,                        sizeof(int),
-        STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,       &lda,                        sizeof(int),
-        STARPU_R,                 RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        STARPU_VALUE,       &ldb,                        sizeof(int),
-        STARPU_VALUE,      &beta,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,                 RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
-        STARPU_VALUE,       &ldc,                        sizeof(int),
-        STARPU_PRIORITY,    options->priority,
-        STARPU_CALLBACK,    callback,
-#if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zsyr2k",
-#endif
-        0);
-}
-
-
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zsyr2k_cpu_func(void *descr[], void *cl_arg)
 {
@@ -135,3 +90,47 @@ static void cl_zsyr2k_cuda_func(void *descr[], void *cl_arg)
  * Codelet definition
  */
 CODELETS(zsyr2k, 3, cl_zsyr2k_cpu_func, cl_zsyr2k_cuda_func, STARPU_CUDA_ASYNC)
+
+/**
+ *
+ * @ingroup INSERT_TASK_Complex64_t
+ *
+ */
+void INSERT_TASK_zsyr2k(const RUNTIME_option_t *options,
+                       cham_uplo_t uplo, cham_trans_t trans,
+                       int n, int k, int nb,
+                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                       const CHAM_desc_t *B, int Bm, int Bn, int ldb,
+                       CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+{
+    (void)nb;
+    struct starpu_codelet *codelet = &cl_zsyr2k;
+    void (*callback)(void*) = options->profiling ? cl_zsyr2k_callback : NULL;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_R(B, Bm, Bn);
+    CHAMELEON_ACCESS_RW(C, Cm, Cn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    starpu_insert_task(
+        starpu_mpi_codelet(codelet),
+        STARPU_VALUE,      &uplo,                sizeof(int),
+        STARPU_VALUE,     &trans,                sizeof(int),
+        STARPU_VALUE,         &n,                        sizeof(int),
+        STARPU_VALUE,         &k,                        sizeof(int),
+        STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
+        STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_VALUE,       &lda,                        sizeof(int),
+        STARPU_R,                 RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_VALUE,       &ldb,                        sizeof(int),
+        STARPU_VALUE,      &beta,         sizeof(CHAMELEON_Complex64_t),
+        STARPU_RW,                 RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_VALUE,       &ldc,                        sizeof(int),
+        STARPU_PRIORITY,    options->priority,
+        STARPU_CALLBACK,    callback,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "zsyr2k",
+#endif
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_zsyrk.c b/runtime/starpu/codelets/codelet_zsyrk.c
index 6f72802e5..e08990453 100644
--- a/runtime/starpu/codelets/codelet_zsyrk.c
+++ b/runtime/starpu/codelets/codelet_zsyrk.c
@@ -26,47 +26,6 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_zsyrk(const RUNTIME_option_t *options,
-                      cham_uplo_t uplo, cham_trans_t trans,
-                      int n, int k, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
-{
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_zsyrk;
-    void (*callback)(void*) = options->profiling ? cl_zsyrk_callback : NULL;
-
-    CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_R(A, Am, An);
-    CHAMELEON_ACCESS_RW(C, Cm, Cn);
-    CHAMELEON_END_ACCESS_DECLARATION;
-
-    starpu_insert_task(
-        starpu_mpi_codelet(codelet),
-        STARPU_VALUE,      &uplo,                sizeof(int),
-        STARPU_VALUE,     &trans,                sizeof(int),
-        STARPU_VALUE,         &n,                        sizeof(int),
-        STARPU_VALUE,         &k,                        sizeof(int),
-        STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,       &lda,                        sizeof(int),
-        STARPU_VALUE,      &beta,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_RW,                 RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
-        STARPU_VALUE,       &ldc,                        sizeof(int),
-        STARPU_PRIORITY,    options->priority,
-        STARPU_CALLBACK,    callback,
-#if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "zsyrk",
-#endif
-        0);
-}
-
-
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_zsyrk_cpu_func(void *descr[], void *cl_arg)
 {
@@ -130,3 +89,43 @@ static void cl_zsyrk_cuda_func(void *descr[], void *cl_arg)
  * Codelet definition
  */
 CODELETS(zsyrk, 2, cl_zsyrk_cpu_func, cl_zsyrk_cuda_func, STARPU_CUDA_ASYNC)
+
+/**
+ *
+ * @ingroup INSERT_TASK_Complex64_t
+ *
+ */
+void INSERT_TASK_zsyrk(const RUNTIME_option_t *options,
+                      cham_uplo_t uplo, cham_trans_t trans,
+                      int n, int k, int nb,
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                      CHAMELEON_Complex64_t beta, const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+{
+    (void)nb;
+    struct starpu_codelet *codelet = &cl_zsyrk;
+    void (*callback)(void*) = options->profiling ? cl_zsyrk_callback : NULL;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_RW(C, Cm, Cn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    starpu_insert_task(
+        starpu_mpi_codelet(codelet),
+        STARPU_VALUE,      &uplo,                sizeof(int),
+        STARPU_VALUE,     &trans,                sizeof(int),
+        STARPU_VALUE,         &n,                        sizeof(int),
+        STARPU_VALUE,         &k,                        sizeof(int),
+        STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
+        STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_VALUE,       &lda,                        sizeof(int),
+        STARPU_VALUE,      &beta,         sizeof(CHAMELEON_Complex64_t),
+        STARPU_RW,                 RTBLKADDR(C, CHAMELEON_Complex64_t, Cm, Cn),
+        STARPU_VALUE,       &ldc,                        sizeof(int),
+        STARPU_PRIORITY,    options->priority,
+        STARPU_CALLBACK,    callback,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "zsyrk",
+#endif
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_zsyssq.c b/runtime/starpu/codelets/codelet_zsyssq.c
index 2e2ae7676..3f07e618d 100644
--- a/runtime/starpu/codelets/codelet_zsyssq.c
+++ b/runtime/starpu/codelets/codelet_zsyssq.c
@@ -22,10 +22,31 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zsyssq_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    int n;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    double *SCALESUMSQ;
+
+    A          = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda);
+    CORE_zsyssq( uplo, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] );
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zsyssq, 2, cl_zsyssq_cpu_func)
+
 void INSERT_TASK_zsyssq( const RUNTIME_option_t *options,
-                        cham_uplo_t uplo, int n,
-                        const CHAM_desc_t *A, int Am, int An, int lda,
-                        const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
+                         cham_uplo_t uplo, int n,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
 {
     struct starpu_codelet *codelet = &cl_zsyssq;
     void (*callback)(void*) = options->profiling ? cl_zgessq_callback : NULL;
@@ -49,25 +70,3 @@ void INSERT_TASK_zsyssq( const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zsyssq_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    double *SCALESUMSQ;
-
-    A          = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda);
-    CORE_zsyssq( uplo, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1] );
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zsyssq, 2, cl_zsyssq_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c
index b1f741aff..06c4775e7 100644
--- a/runtime/starpu/codelets/codelet_zsytrf_nopiv.c
+++ b/runtime/starpu/codelets/codelet_zsytrf_nopiv.c
@@ -26,10 +26,31 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-void INSERT_TASK_zsytrf_nopiv(const RUNTIME_option_t *options,
-                             cham_uplo_t uplo, int n, int nb,
-                             const CHAM_desc_t *A, int Am, int An, int lda,
-                             int iinfo)
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zsytrf_nopiv_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    int n;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    int iinfo;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+
+    starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo);
+    CORE_zsytf2_nopiv(uplo, n, A, lda);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(zsytrf_nopiv, 1, cl_zsytrf_nopiv_cpu_func)
+
+void INSERT_TASK_zsytrf_nopiv( const RUNTIME_option_t *options,
+                              cham_uplo_t uplo, int n, int nb,
+                               const CHAM_desc_t *A, int Am, int An, int lda,
+                               int iinfo )
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_zsytrf_nopiv;
@@ -54,25 +75,3 @@ void INSERT_TASK_zsytrf_nopiv(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zsytrf_nopiv_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    int iinfo;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-
-    starpu_codelet_unpack_args(cl_arg, &uplo, &n, &lda, &iinfo);
-    CORE_zsytf2_nopiv(uplo, n, A, lda);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(zsytrf_nopiv, 1, cl_zsytrf_nopiv_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_ztile_zero.c b/runtime/starpu/codelets/codelet_ztile_zero.c
deleted file mode 100644
index c59115b41..000000000
--- a/runtime/starpu/codelets/codelet_ztile_zero.c
+++ /dev/null
@@ -1,84 +0,0 @@
-/**
- *
- * @file starpu/codelet_ztile_zero.c
- *
- * @copyright 2009-2014 The University of Tennessee and The University of
- *                      Tennessee Research Foundation. All rights reserved.
- * @copyright 2012-2018 Bordeaux INP, CNRS (LaBRI UMR 5800), Inria,
- *                      Univ. Bordeaux. All rights reserved.
- *
- ***
- *
- * @brief Chameleon ztile_zero StarPU codelet
- *
- * @version 1.0.0
- * @author Hatem Ltaief
- * @author Mathieu Faverge
- * @author Jakub Kurzak
- * @date 2010-11-15
- * @precisions normal z -> c d s
- *
- */
-#include "chameleon_starpu.h"
-#include "runtime_codelet_z.h"
-
-/**
- *
- */
-void INSERT_TASK_ztile_zero( const RUNTIME_option_t *options,
-                            int X1, int X2, int Y1, int Y2,
-                            const CHAM_desc_t *A, int Am, int An, int lda )
-{
-    struct starpu_codelet *codelet;
-    codelet = &cl_ztile_zero;
-    void (*callback)(void*) = options->profiling ? cl_zlacpy_callback : NULL;
-
-    CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_W(A, Am, An);
-    CHAMELEON_END_ACCESS_DECLARATION;
-
-    starpu_insert_task(
-        starpu_mpi_codelet(codelet),
-        STARPU_VALUE, &X1,  sizeof(int),
-        STARPU_VALUE, &X2,  sizeof(int),
-        STARPU_VALUE, &Y1,  sizeof(int),
-        STARPU_VALUE, &Y2,  sizeof(int),
-        STARPU_W,     RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE, &lda, sizeof(int),
-        STARPU_PRIORITY,    options->priority,
-        STARPU_CALLBACK,    callback, NULL,
-#if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "ztile_zero",
-#endif
-        0);
-}
-
-/**
- *
- */
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_ztile_zero_cpu_func(void *descr[], void *cl_arg)
-{
-    int X1;
-    int X2;
-    int Y1;
-    int Y2;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-
-    int x, y;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    starpu_codelet_unpack_args(cl_arg, &X1, &X2, &Y1, &Y2, &lda);
-
-    for (x = X1; x < X2; x++)
-        for (y = Y1; y < Y2; y++)
-            A[lda*x+y] = 0.0;
-
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(ztile_zero, 1, cl_ztile_zero_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_ztplqt.c b/runtime/starpu/codelets/codelet_ztplqt.c
index 44615d5c3..8132a27dd 100644
--- a/runtime/starpu/codelets/codelet_ztplqt.c
+++ b/runtime/starpu/codelets/codelet_ztplqt.c
@@ -54,12 +54,11 @@ static void cl_ztplqt_cpu_func(void *descr[], void *cl_arg)
  */
 CODELETS_CPU(ztplqt, 4, cl_ztplqt_cpu_func)
 
-void
-INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
-                   int M, int N, int L, int ib, int nb,
-                   const CHAM_desc_t *A, int Am, int An, int lda,
-                   const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                   const CHAM_desc_t *T, int Tm, int Tn, int ldt )
+void INSERT_TASK_ztplqt( const RUNTIME_option_t *options,
+                         int M, int N, int L, int ib, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
+                         const CHAM_desc_t *T, int Tm, int Tn, int ldt )
 {
     struct starpu_codelet *codelet = &cl_ztplqt;
     void (*callback)(void*) = options->profiling ? cl_ztplqt_callback : NULL;
diff --git a/runtime/starpu/codelets/codelet_ztpmlqt.c b/runtime/starpu/codelets/codelet_ztpmlqt.c
index 8dffa4ff2..54a24a070 100644
--- a/runtime/starpu/codelets/codelet_ztpmlqt.c
+++ b/runtime/starpu/codelets/codelet_ztpmlqt.c
@@ -103,14 +103,13 @@ static void cl_ztpmlqt_cuda_func(void *descr[], void *cl_arg)
  */
 CODELETS(ztpmlqt, 5, cl_ztpmlqt_cpu_func, cl_ztpmlqt_cuda_func, STARPU_CUDA_ASYNC)
 
-void
-INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
-                    cham_side_t side, cham_trans_t trans,
-                    int M, int N, int K, int L, int ib, int nb,
-                    const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                    const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                    const CHAM_desc_t *A, int Am, int An, int lda,
-                    const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+void INSERT_TASK_ztpmlqt( const RUNTIME_option_t *options,
+                          cham_side_t side, cham_trans_t trans,
+                          int M, int N, int K, int L, int ib, int nb,
+                          const CHAM_desc_t *V, int Vm, int Vn, int ldv,
+                          const CHAM_desc_t *T, int Tm, int Tn, int ldt,
+                          const CHAM_desc_t *A, int Am, int An, int lda,
+                          const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     struct starpu_codelet *codelet = &cl_ztpmlqt;
     void (*callback)(void*) = options->profiling ? cl_ztpmlqt_callback : NULL;
diff --git a/runtime/starpu/codelets/codelet_ztpmqrt.c b/runtime/starpu/codelets/codelet_ztpmqrt.c
index 6684e59f8..c94a33b43 100644
--- a/runtime/starpu/codelets/codelet_ztpmqrt.c
+++ b/runtime/starpu/codelets/codelet_ztpmqrt.c
@@ -104,14 +104,13 @@ static void cl_ztpmqrt_cuda_func(void *descr[], void *cl_arg)
  */
 CODELETS(ztpmqrt, 5, cl_ztpmqrt_cpu_func, cl_ztpmqrt_cuda_func, STARPU_CUDA_ASYNC)
 
-void
-INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options,
-                     cham_side_t side, cham_trans_t trans,
-                     int M, int N, int K, int L, int ib, int nb,
-                     const CHAM_desc_t *V, int Vm, int Vn, int ldv,
-                     const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                     const CHAM_desc_t *A, int Am, int An, int lda,
-                     const CHAM_desc_t *B, int Bm, int Bn, int ldb )
+void INSERT_TASK_ztpmqrt( const RUNTIME_option_t *options,
+                          cham_side_t side, cham_trans_t trans,
+                          int M, int N, int K, int L, int ib, int nb,
+                          const CHAM_desc_t *V, int Vm, int Vn, int ldv,
+                          const CHAM_desc_t *T, int Tm, int Tn, int ldt,
+                          const CHAM_desc_t *A, int Am, int An, int lda,
+                          const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     struct starpu_codelet *codelet = &cl_ztpmqrt;
     void (*callback)(void*) = options->profiling ? cl_ztpmqrt_callback : NULL;
diff --git a/runtime/starpu/codelets/codelet_ztpqrt.c b/runtime/starpu/codelets/codelet_ztpqrt.c
index 6fbd0afe6..143d613eb 100644
--- a/runtime/starpu/codelets/codelet_ztpqrt.c
+++ b/runtime/starpu/codelets/codelet_ztpqrt.c
@@ -54,12 +54,11 @@ static void cl_ztpqrt_cpu_func(void *descr[], void *cl_arg)
  */
 CODELETS_CPU(ztpqrt, 4, cl_ztpqrt_cpu_func)
 
-void
-INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
-                   int M, int N, int L, int ib, int nb,
-                   const CHAM_desc_t *A, int Am, int An, int lda,
-                   const CHAM_desc_t *B, int Bm, int Bn, int ldb,
-                   const CHAM_desc_t *T, int Tm, int Tn, int ldt )
+void INSERT_TASK_ztpqrt( const RUNTIME_option_t *options,
+                         int M, int N, int L, int ib, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn, int ldb,
+                         const CHAM_desc_t *T, int Tm, int Tn, int ldt )
 {
     struct starpu_codelet *codelet = &cl_ztpqrt;
     void (*callback)(void*) = options->profiling ? cl_ztpqrt_callback : NULL;
diff --git a/runtime/starpu/codelets/codelet_ztradd.c b/runtime/starpu/codelets/codelet_ztradd.c
index f6265c28c..57fa58e17 100644
--- a/runtime/starpu/codelets/codelet_ztradd.c
+++ b/runtime/starpu/codelets/codelet_ztradd.c
@@ -22,12 +22,39 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_ztradd_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    cham_trans_t trans;
+    int M;
+    int N;
+    CHAMELEON_Complex64_t alpha;
+    CHAMELEON_Complex64_t *A;
+    int LDA;
+    CHAMELEON_Complex64_t beta;
+    CHAMELEON_Complex64_t *B;
+    int LDB;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &alpha, &LDA, &beta, &LDB);
+    CORE_ztradd(uplo, trans, M, N, alpha, A, LDA, beta, B, LDB);
+    return;
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(ztradd, 2, cl_ztradd_cpu_func)
+
 /**
  ******************************************************************************
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
- *  INSERT_TASK_ztradd adds two trapezoidal matrices together as in PBLAS pzgeadd.
+ * @brief Adds two trapezoidal matrices together as in PBLAS pzgeadd.
  *
  *       B <- alpha * op(A)  + beta * B,
  *
@@ -77,15 +104,14 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ *          @retval CHAMELEON_SUCCESS successful exit
+ *          @retval <0 if -i, the i-th argument had an illegal value
  *
  */
-void INSERT_TASK_ztradd(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
-                       CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                       CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+void INSERT_TASK_ztradd( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, cham_trans_t trans, int m, int n, int nb,
+                         CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                         CHAMELEON_Complex64_t beta,  const CHAM_desc_t *B, int Bm, int Bn, int ldb )
 {
     struct starpu_codelet *codelet = &cl_ztradd;
     void (*callback)(void*) = options->profiling ? cl_zgeadd_callback : NULL;
@@ -116,31 +142,3 @@ void INSERT_TASK_ztradd(const RUNTIME_option_t *options,
 
     (void)nb;
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_ztradd_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    cham_trans_t trans;
-    int M;
-    int N;
-    CHAMELEON_Complex64_t alpha;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-    CHAMELEON_Complex64_t beta;
-    CHAMELEON_Complex64_t *B;
-    int LDB;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    B = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &trans, &M, &N, &alpha, &LDA, &beta, &LDB);
-    CORE_ztradd(uplo, trans, M, N, alpha, A, LDA, beta, B, LDB);
-    return;
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(ztradd, 2, cl_ztradd_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_ztrasm.c b/runtime/starpu/codelets/codelet_ztrasm.c
index c1d154aad..1ca5a1a6a 100644
--- a/runtime/starpu/codelets/codelet_ztrasm.c
+++ b/runtime/starpu/codelets/codelet_ztrasm.c
@@ -22,10 +22,34 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-void INSERT_TASK_ztrasm(const RUNTIME_option_t *options,
-                       cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *B, int Bm, int Bn)
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_ztrasm_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_store_t storev;
+    cham_uplo_t uplo;
+    cham_diag_t diag;
+    int M;
+    int N;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    double *work;
+
+    A    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    work = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &diag, &M, &N, &lda);
+    CORE_ztrasm(storev, uplo, diag, M, N, A, lda, work);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(ztrasm, 2, cl_ztrasm_cpu_func)
+
+void INSERT_TASK_ztrasm( const RUNTIME_option_t *options,
+                         cham_store_t storev, cham_uplo_t uplo, cham_diag_t diag, int M, int N,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *B, int Bm, int Bn )
 {
     struct starpu_codelet *codelet = &cl_ztrasm;
     void (*callback)(void*) = options->profiling ? cl_ztrasm_callback : NULL;
@@ -48,32 +72,7 @@ void INSERT_TASK_ztrasm(const RUNTIME_option_t *options,
         STARPU_PRIORITY, options->priority,
         STARPU_CALLBACK, callback,
 #if defined(CHAMELEON_CODELETS_HAVE_NAME)
-            STARPU_NAME, "ztrasm",
+        STARPU_NAME, "ztrasm",
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_ztrasm_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_store_t storev;
-    cham_uplo_t uplo;
-    cham_diag_t diag;
-    int M;
-    int N;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    double *work;
-
-    A    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    work = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &storev, &uplo, &diag, &M, &N, &lda);
-    CORE_ztrasm(storev, uplo, diag, M, N, A, lda, work);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(ztrasm, 2, cl_ztrasm_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_ztrmm.c b/runtime/starpu/codelets/codelet_ztrmm.c
index b125de67f..b9f553b64 100644
--- a/runtime/starpu/codelets/codelet_ztrmm.c
+++ b/runtime/starpu/codelets/codelet_ztrmm.c
@@ -26,48 +26,6 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_ztrmm(const RUNTIME_option_t *options,
-                      cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
-                      int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldb)
-{
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_ztrmm;
-    void (*callback)(void*) = options->profiling ? cl_ztrmm_callback : NULL;
-
-    CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_R(A, Am, An);
-    CHAMELEON_ACCESS_RW(B, Bm, Bn);
-    CHAMELEON_END_ACCESS_DECLARATION;
-
-    starpu_insert_task(
-        starpu_mpi_codelet(codelet),
-        STARPU_VALUE,      &side,                sizeof(int),
-        STARPU_VALUE,      &uplo,                sizeof(int),
-        STARPU_VALUE,    &transA,                sizeof(int),
-        STARPU_VALUE,      &diag,                sizeof(int),
-        STARPU_VALUE,         &m,                        sizeof(int),
-        STARPU_VALUE,         &n,                        sizeof(int),
-        STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,       &lda,                        sizeof(int),
-        STARPU_RW,                 RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        STARPU_VALUE,       &ldb,                        sizeof(int),
-        STARPU_PRIORITY,    options->priority,
-        STARPU_CALLBACK,    callback,
-#if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "ztrmm",
-#endif
-        0);
-}
-
-
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_ztrmm_cpu_func(void *descr[], void *cl_arg)
 {
@@ -136,3 +94,44 @@ static void cl_ztrmm_cuda_func(void *descr[], void *cl_arg)
  * Codelet definition
  */
 CODELETS(ztrmm, 2, cl_ztrmm_cpu_func, cl_ztrmm_cuda_func, STARPU_CUDA_ASYNC)
+
+/**
+ *
+ * @ingroup INSERT_TASK_Complex64_t
+ *
+ */
+void INSERT_TASK_ztrmm(const RUNTIME_option_t *options,
+                      cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
+                      int m, int n, int nb,
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                      const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+{
+    (void)nb;
+    struct starpu_codelet *codelet = &cl_ztrmm;
+    void (*callback)(void*) = options->profiling ? cl_ztrmm_callback : NULL;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_RW(B, Bm, Bn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    starpu_insert_task(
+        starpu_mpi_codelet(codelet),
+        STARPU_VALUE,      &side,                sizeof(int),
+        STARPU_VALUE,      &uplo,                sizeof(int),
+        STARPU_VALUE,    &transA,                sizeof(int),
+        STARPU_VALUE,      &diag,                sizeof(int),
+        STARPU_VALUE,         &m,                        sizeof(int),
+        STARPU_VALUE,         &n,                        sizeof(int),
+        STARPU_VALUE,     &alpha,         sizeof(CHAMELEON_Complex64_t),
+        STARPU_R,                 RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_VALUE,       &lda,                        sizeof(int),
+        STARPU_RW,                 RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_VALUE,       &ldb,                        sizeof(int),
+        STARPU_PRIORITY,    options->priority,
+        STARPU_CALLBACK,    callback,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "ztrmm",
+#endif
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_ztrsm.c b/runtime/starpu/codelets/codelet_ztrsm.c
index e48a4eb16..83310ab1b 100644
--- a/runtime/starpu/codelets/codelet_ztrsm.c
+++ b/runtime/starpu/codelets/codelet_ztrsm.c
@@ -26,48 +26,6 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
-/**
- *
- * @ingroup INSERT_TASK_Complex64_t
- *
- */
-void INSERT_TASK_ztrsm(const RUNTIME_option_t *options,
-                      cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
-                      int m, int n, int nb,
-                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
-                      const CHAM_desc_t *B, int Bm, int Bn, int ldb)
-{
-    (void)nb;
-    struct starpu_codelet *codelet = &cl_ztrsm;
-    void (*callback)(void*) = options->profiling ? cl_ztrsm_callback : NULL;
-
-    CHAMELEON_BEGIN_ACCESS_DECLARATION;
-    CHAMELEON_ACCESS_R(A, Am, An);
-    CHAMELEON_ACCESS_RW(B, Bm, Bn);
-    CHAMELEON_END_ACCESS_DECLARATION;
-
-    starpu_insert_task(
-        starpu_mpi_codelet(codelet),
-        STARPU_VALUE,    &side,               sizeof(int),
-        STARPU_VALUE,    &uplo,               sizeof(int),
-        STARPU_VALUE,    &transA,             sizeof(int),
-        STARPU_VALUE,    &diag,               sizeof(int),
-        STARPU_VALUE,    &m,                  sizeof(int),
-        STARPU_VALUE,    &n,                  sizeof(int),
-        STARPU_VALUE,    &alpha,              sizeof(CHAMELEON_Complex64_t),
-        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
-        STARPU_VALUE,    &lda,                sizeof(int),
-        STARPU_RW,        RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
-        STARPU_VALUE,    &ldb,                sizeof(int),
-        STARPU_PRIORITY,  options->priority,
-        STARPU_CALLBACK,  callback,
-#if defined(CHAMELEON_CODELETS_HAVE_NAME)
-        STARPU_NAME, "ztrsm",
-#endif
-        0);
-}
-
-
 #if !defined(CHAMELEON_SIMULATION)
 static void cl_ztrsm_cpu_func(void *descr[], void *cl_arg)
 {
@@ -134,3 +92,44 @@ static void cl_ztrsm_cuda_func(void *descr[], void *cl_arg)
  * Codelet definition
  */
 CODELETS(ztrsm, 2, cl_ztrsm_cpu_func, cl_ztrsm_cuda_func, STARPU_CUDA_ASYNC)
+
+/**
+ *
+ * @ingroup INSERT_TASK_Complex64_t
+ *
+ */
+void INSERT_TASK_ztrsm(const RUNTIME_option_t *options,
+                      cham_side_t side, cham_uplo_t uplo, cham_trans_t transA, cham_diag_t diag,
+                      int m, int n, int nb,
+                      CHAMELEON_Complex64_t alpha, const CHAM_desc_t *A, int Am, int An, int lda,
+                      const CHAM_desc_t *B, int Bm, int Bn, int ldb)
+{
+    (void)nb;
+    struct starpu_codelet *codelet = &cl_ztrsm;
+    void (*callback)(void*) = options->profiling ? cl_ztrsm_callback : NULL;
+
+    CHAMELEON_BEGIN_ACCESS_DECLARATION;
+    CHAMELEON_ACCESS_R(A, Am, An);
+    CHAMELEON_ACCESS_RW(B, Bm, Bn);
+    CHAMELEON_END_ACCESS_DECLARATION;
+
+    starpu_insert_task(
+        starpu_mpi_codelet(codelet),
+        STARPU_VALUE,    &side,               sizeof(int),
+        STARPU_VALUE,    &uplo,               sizeof(int),
+        STARPU_VALUE,    &transA,             sizeof(int),
+        STARPU_VALUE,    &diag,               sizeof(int),
+        STARPU_VALUE,    &m,                  sizeof(int),
+        STARPU_VALUE,    &n,                  sizeof(int),
+        STARPU_VALUE,    &alpha,              sizeof(CHAMELEON_Complex64_t),
+        STARPU_R,         RTBLKADDR(A, CHAMELEON_Complex64_t, Am, An),
+        STARPU_VALUE,    &lda,                sizeof(int),
+        STARPU_RW,        RTBLKADDR(B, CHAMELEON_Complex64_t, Bm, Bn),
+        STARPU_VALUE,    &ldb,                sizeof(int),
+        STARPU_PRIORITY,  options->priority,
+        STARPU_CALLBACK,  callback,
+#if defined(CHAMELEON_CODELETS_HAVE_NAME)
+        STARPU_NAME, "ztrsm",
+#endif
+        0);
+}
diff --git a/runtime/starpu/codelets/codelet_ztrssq.c b/runtime/starpu/codelets/codelet_ztrssq.c
index aac4b9fbd..e7d0de998 100644
--- a/runtime/starpu/codelets/codelet_ztrssq.c
+++ b/runtime/starpu/codelets/codelet_ztrssq.c
@@ -22,11 +22,34 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_ztrssq_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    cham_diag_t diag;
+    int m;
+    int n;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    double *SCALESUMSQ;
+
+    A          = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
+    starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &m, &n, &lda);
+    CORE_ztrssq( uplo, diag, m, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1]);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(ztrssq, 2, cl_ztrssq_cpu_func)
+
 void INSERT_TASK_ztrssq( const RUNTIME_option_t *options,
-                        cham_uplo_t uplo, cham_diag_t diag,
-                        int m, int n,
-                        const CHAM_desc_t *A, int Am, int An, int lda,
-                        const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
+                         cham_uplo_t uplo, cham_diag_t diag,
+                         int m, int n,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *SCALESUMSQ, int SCALESUMSQm, int SCALESUMSQn )
 {
     struct starpu_codelet *codelet = &cl_ztrssq;
     void (*callback)(void*) = options->profiling ? cl_ztrasm_callback : NULL;
@@ -52,27 +75,3 @@ void INSERT_TASK_ztrssq( const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_ztrssq_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    cham_diag_t diag;
-    int m;
-    int n;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    double *SCALESUMSQ;
-
-    A          = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    SCALESUMSQ = (double *)STARPU_MATRIX_GET_PTR(descr[1]);
-    starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &m, &n, &lda);
-    CORE_ztrssq( uplo, diag, m, n, A, lda, &SCALESUMSQ[0], &SCALESUMSQ[1]);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(ztrssq, 2, cl_ztrssq_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_ztrtri.c b/runtime/starpu/codelets/codelet_ztrtri.c
index 81ee2923e..804d21b02 100644
--- a/runtime/starpu/codelets/codelet_ztrtri.c
+++ b/runtime/starpu/codelets/codelet_ztrtri.c
@@ -26,16 +26,45 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_ztrtri_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_uplo_t uplo;
+    cham_diag_t diag;
+    int N;
+    CHAMELEON_Complex64_t *A;
+    int LDA;
+    int iinfo;
+    RUNTIME_sequence_t *sequence;
+    RUNTIME_request_t *request;
+    int info = 0;
+
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+
+    starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &N, &LDA, &iinfo, &sequence, &request);
+    CORE_ztrtri(uplo, diag, N, A, LDA, &info);
+
+    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
+        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
+    }
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(ztrtri, 1, cl_ztrtri_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
  */
-void INSERT_TASK_ztrtri(const RUNTIME_option_t *options,
-                       cham_uplo_t uplo, cham_diag_t diag,
-                       int n, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       int iinfo)
+void INSERT_TASK_ztrtri( const RUNTIME_option_t *options,
+                         cham_uplo_t uplo, cham_diag_t diag,
+                         int n, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         int iinfo )
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_ztrtri;
@@ -62,33 +91,3 @@ void INSERT_TASK_ztrtri(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_ztrtri_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_uplo_t uplo;
-    cham_diag_t diag;
-    int N;
-    CHAMELEON_Complex64_t *A;
-    int LDA;
-    int iinfo;
-    RUNTIME_sequence_t *sequence;
-    RUNTIME_request_t *request;
-    int info = 0;
-
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-
-    starpu_codelet_unpack_args(cl_arg, &uplo, &diag, &N, &LDA, &iinfo, &sequence, &request);
-    CORE_ztrtri(uplo, diag, N, A, LDA, &info);
-
-    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
-        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
-    }
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(ztrtri, 1, cl_ztrtri_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
index d68e2bebf..4e82f101c 100644
--- a/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
+++ b/runtime/starpu/codelets/codelet_ztsmlq_hetra1.c
@@ -22,18 +22,60 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_ztsmlq_hetra1_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_side_t side;
+    cham_trans_t trans;
+    int m1;
+    int n1;
+    int m2;
+    int n2;
+    int k;
+    int ib;
+    int nb;
+    CHAMELEON_Complex64_t *A1;
+    int lda1;
+    CHAMELEON_Complex64_t *A2;
+    int lda2;
+    CHAMELEON_Complex64_t *V;
+    int ldv;
+    CHAMELEON_Complex64_t *T;
+    int ldt;
+
+    CHAMELEON_Complex64_t *WORK;
+    int ldwork;
+
+    A1    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    A2    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    V     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
+    T     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
+    WORK  = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */
+
+    starpu_codelet_unpack_args( cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k,
+                                &ib, &nb, &lda1, &lda2, &ldv, &ldt, &ldwork);
+    CORE_ztsmlq_hetra1(side, trans, m1, n1, m2, n2, k,
+                       ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(ztsmlq_hetra1, 5, cl_ztsmlq_hetra1_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
  */
-void INSERT_TASK_ztsmlq_hetra1(const RUNTIME_option_t *options,
-                              cham_side_t side, cham_trans_t trans,
-                              int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                              const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                              const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                              const CHAM_desc_t *V,  int Vm,  int Vn,  int ldv,
-                              const CHAM_desc_t *T,  int Tm,  int Tn,  int ldt)
+void INSERT_TASK_ztsmlq_hetra1( const RUNTIME_option_t *options,
+                                cham_side_t side, cham_trans_t trans,
+                                int m1, int n1, int m2, int n2, int k, int ib, int nb,
+                                const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
+                                const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
+                                const CHAM_desc_t *V,  int Vm,  int Vn,  int ldv,
+                                const CHAM_desc_t *T,  int Tm,  int Tn,  int ldt )
 {
     struct starpu_codelet *codelet = &cl_ztsmlq_hetra1;
     void (*callback)(void*) = options->profiling ? cl_ztsmlq_hetra1_callback : NULL;
@@ -75,45 +117,3 @@ void INSERT_TASK_ztsmlq_hetra1(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_ztsmlq_hetra1_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_side_t side;
-    cham_trans_t trans;
-    int m1;
-    int n1;
-    int m2;
-    int n2;
-    int k;
-    int ib;
-    int nb;
-    CHAMELEON_Complex64_t *A1;
-    int lda1;
-    CHAMELEON_Complex64_t *A2;
-    int lda2;
-    CHAMELEON_Complex64_t *V;
-    int ldv;
-    CHAMELEON_Complex64_t *T;
-    int ldt;
-
-    CHAMELEON_Complex64_t *WORK;
-    int ldwork;
-
-    A1    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    A2    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    V     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    T     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
-    WORK  = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]); /* ib * nb */
-
-    starpu_codelet_unpack_args( cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k,
-                                &ib, &nb, &lda1, &lda2, &ldv, &ldt, &ldwork);
-    CORE_ztsmlq_hetra1(side, trans, m1, n1, m2, n2, k,
-                       ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(ztsmlq_hetra1, 5, cl_ztsmlq_hetra1_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
index af9f2adcc..66fa69dab 100644
--- a/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
+++ b/runtime/starpu/codelets/codelet_ztsmqr_hetra1.c
@@ -22,18 +22,60 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_ztsmqr_hetra1_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_side_t side;
+    cham_trans_t trans;
+    int m1;
+    int n1;
+    int m2;
+    int n2;
+    int k;
+    int ib;
+    CHAMELEON_Complex64_t *A1;
+    int lda1;
+    CHAMELEON_Complex64_t *A2;
+    int lda2;
+    CHAMELEON_Complex64_t *V;
+    int ldv;
+    CHAMELEON_Complex64_t *T;
+    int ldt;
+
+    /* TODO: manage workspace */
+    CHAMELEON_Complex64_t *WORK;
+    int ldwork;
+
+    A1    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    A2    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    V     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
+    T     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
+    WORK  = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]);
+
+    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k,
+                               &ib, &lda1, &lda2, &ldv, &ldt, &ldwork);
+    CORE_ztsmqr_hetra1(side, trans, m1, n1, m2, n2, k,
+                       ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(ztsmqr_hetra1, 5, cl_ztsmqr_hetra1_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
  *
  */
-void INSERT_TASK_ztsmqr_hetra1(const RUNTIME_option_t *options,
-                              cham_side_t side, cham_trans_t trans,
-                              int m1, int n1, int m2, int n2, int k, int ib, int nb,
-                              const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
-                              const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
-                              const CHAM_desc_t *V,  int Vm,  int Vn,  int ldv,
-                              const CHAM_desc_t *T,  int Tm,  int Tn,  int ldt)
+void INSERT_TASK_ztsmqr_hetra1( const RUNTIME_option_t *options,
+                                cham_side_t side, cham_trans_t trans,
+                                int m1, int n1, int m2, int n2, int k, int ib, int nb,
+                                const CHAM_desc_t *A1, int A1m, int A1n, int lda1,
+                                const CHAM_desc_t *A2, int A2m, int A2n, int lda2,
+                                const CHAM_desc_t *V,  int Vm,  int Vn,  int ldv,
+                                const CHAM_desc_t *T,  int Tm,  int Tn,  int ldt )
 {
     struct starpu_codelet *codelet = &cl_ztsmqr_hetra1;
     void (*callback)(void*) = options->profiling ? cl_ztsmqr_hetra1_callback : NULL;
@@ -74,45 +116,3 @@ void INSERT_TASK_ztsmqr_hetra1(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_ztsmqr_hetra1_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_side_t side;
-    cham_trans_t trans;
-    int m1;
-    int n1;
-    int m2;
-    int n2;
-    int k;
-    int ib;
-    CHAMELEON_Complex64_t *A1;
-    int lda1;
-    CHAMELEON_Complex64_t *A2;
-    int lda2;
-    CHAMELEON_Complex64_t *V;
-    int ldv;
-    CHAMELEON_Complex64_t *T;
-    int ldt;
-
-    /* TODO: manage workspace */
-    CHAMELEON_Complex64_t *WORK;
-    int ldwork;
-
-    A1    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    A2    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    V     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    T     = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
-    WORK  = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[4]);
-
-    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m1, &n1, &m2, &n2, &k,
-                               &ib, &lda1, &lda2, &ldv, &ldt, &ldwork);
-    CORE_ztsmqr_hetra1(side, trans, m1, n1, m2, n2, k,
-                       ib, A1, lda1, A2, lda2, V, ldv, T, ldt, WORK, ldwork);
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(ztsmqr_hetra1, 5, cl_ztsmqr_hetra1_cpu_func)
diff --git a/runtime/starpu/codelets/codelet_ztstrf.c b/runtime/starpu/codelets/codelet_ztstrf.c
index e139931a6..7e1dfd92a 100644
--- a/runtime/starpu/codelets/codelet_ztstrf.c
+++ b/runtime/starpu/codelets/codelet_ztstrf.c
@@ -26,6 +26,51 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_ztstrf_cpu_func(void *descr[], void *cl_arg)
+{
+    CHAMELEON_starpu_ws_t *d_work;
+    int m;
+    int n;
+    int ib;
+    int nb;
+    CHAMELEON_Complex64_t *U;
+    int ldu;
+    CHAMELEON_Complex64_t *A;
+    int lda;
+    CHAMELEON_Complex64_t *L;
+    int ldl;
+    int *IPIV;
+    CHAMELEON_Complex64_t *WORK;
+    int ldwork;
+    cham_bool_t check_info;
+    int iinfo;
+    RUNTIME_sequence_t *sequence;
+    RUNTIME_request_t *request;
+    int info = 0;
+
+    U = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    L = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
+    WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
+
+    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &nb, &ldu, &lda, &ldl,
+                               &IPIV, &d_work, &ldwork, &check_info, &iinfo,
+                               &sequence, &request);
+
+    CORE_ztstrf(m, n, ib, nb, U, ldu, A, lda, L, ldl, IPIV, WORK, ldwork, &info);
+
+    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
+        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
+    }
+}
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS_CPU(ztstrf, 4, cl_ztstrf_cpu_func)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -83,23 +128,21 @@
  *
  *******************************************************************************
  *
- * @return
- *         \retval CHAMELEON_SUCCESS successful exit
- *         \retval <0 if INFO = -k, the k-th argument had an illegal value
- *         \retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
+ * @retval CHAMELEON_SUCCESS successful exit
+ * @retval <0 if INFO = -k, the k-th argument had an illegal value
+ * @retval >0 if INFO = k, U(k,k) is exactly zero. The factorization
  *              has been completed, but the factor U is exactly
  *              singular, and division by zero will occur if it is used
  *              to solve a system of equations.
  *
  */
-
-void INSERT_TASK_ztstrf(const RUNTIME_option_t *options,
-                       int m, int n, int ib, int nb,
-                       const CHAM_desc_t *U, int Um, int Un, int ldu,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *L, int Lm, int Ln, int ldl,
-                       int *IPIV,
-                       cham_bool_t check_info, int iinfo)
+void INSERT_TASK_ztstrf( const RUNTIME_option_t *options,
+                         int m, int n, int ib, int nb,
+                         const CHAM_desc_t *U, int Um, int Un, int ldu,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *L, int Lm, int Ln, int ldl,
+                         int *IPIV,
+                         cham_bool_t check_info, int iinfo )
 {
     (void)nb;
     struct starpu_codelet *codelet = &cl_ztstrf;
@@ -139,50 +182,3 @@ void INSERT_TASK_ztstrf(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_ztstrf_cpu_func(void *descr[], void *cl_arg)
-{
-    CHAMELEON_starpu_ws_t *d_work;
-    int m;
-    int n;
-    int ib;
-    int nb;
-    CHAMELEON_Complex64_t *U;
-    int ldu;
-    CHAMELEON_Complex64_t *A;
-    int lda;
-    CHAMELEON_Complex64_t *L;
-    int ldl;
-    int *IPIV;
-    CHAMELEON_Complex64_t *WORK;
-    int ldwork;
-    cham_bool_t check_info;
-    int iinfo;
-    RUNTIME_sequence_t *sequence;
-    RUNTIME_request_t *request;
-    int info = 0;
-
-    U = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    A = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    L = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]);
-
-    starpu_codelet_unpack_args(cl_arg, &m, &n, &ib, &nb, &ldu, &lda, &ldl,
-                               &IPIV, &d_work, &ldwork, &check_info, &iinfo,
-                               &sequence, &request);
-
-    CORE_ztstrf(m, n, ib, nb, U, ldu, A, lda, L, ldl, IPIV, WORK, ldwork, &info);
-
-    if ( (sequence->status == CHAMELEON_SUCCESS) && (info != 0) ) {
-        RUNTIME_sequence_flush( NULL, sequence, request, iinfo+info );
-    }
-}
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS_CPU(ztstrf, 4, cl_ztstrf_cpu_func)
-
diff --git a/runtime/starpu/codelets/codelet_zunmlq.c b/runtime/starpu/codelets/codelet_zunmlq.c
index 89ef1c851..046b4e568 100644
--- a/runtime/starpu/codelets/codelet_zunmlq.c
+++ b/runtime/starpu/codelets/codelet_zunmlq.c
@@ -27,6 +27,75 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zunmlq_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_side_t side;
+    cham_trans_t trans;
+    int m;
+    int n;
+    int k;
+    int ib;
+    const CHAMELEON_Complex64_t *A;
+    int lda;
+    const CHAMELEON_Complex64_t *T;
+    int ldt;
+    CHAMELEON_Complex64_t *C;
+    int ldc;
+    CHAMELEON_Complex64_t *WORK;
+    int ldwork;
+
+    A    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    T    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    C    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
+    WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
+
+    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib,
+                               &lda, &ldt, &ldc, &ldwork);
+
+    CORE_zunmlq(side, trans, m, n, k, ib,
+                A, lda, T, ldt, C, ldc, WORK, ldwork);
+}
+
+#if defined(CHAMELEON_USE_CUDA)
+static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg)
+{
+    cham_side_t side;
+    cham_trans_t trans;
+    int m;
+    int n;
+    int k;
+    int ib;
+    const cuDoubleComplex *A, *T;
+    cuDoubleComplex *C, *WORK;
+    int lda, ldt, ldc, ldwork;
+
+    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib,
+                               &lda, &ldt, &ldc, &ldwork);
+
+    A    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
+    T    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
+    C    = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
+    WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
+
+    RUNTIME_getStream(stream);
+
+    CUDA_zunmlqt(
+            side, trans, m, n, k, ib,
+            A, lda, T, ldt, C, ldc, WORK, ldwork, stream );
+
+#ifndef STARPU_CUDA_ASYNC
+    cudaStreamSynchronize( stream );
+#endif
+}
+#endif /* defined(CHAMELEON_USE_CUDA) */
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -105,18 +174,16 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ *          @retval CHAMELEON_SUCCESS successful exit
+ *          @retval <0 if -i, the i-th argument had an illegal value
  *
  */
-
-void INSERT_TASK_zunmlq(const RUNTIME_option_t *options,
-                       cham_side_t side, cham_trans_t trans,
-                       int m, int n, int k, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                       const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+void INSERT_TASK_zunmlq( const RUNTIME_option_t *options,
+                         cham_side_t side, cham_trans_t trans,
+                         int m, int n, int k, int ib, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *T, int Tm, int Tn, int ldt,
+                         const CHAM_desc_t *C, int Cm, int Cn, int ldc )
 {
     struct starpu_codelet *codelet = &cl_zunmlq;
     void (*callback)(void*) = options->profiling ? cl_zunmlq_callback : NULL;
@@ -151,73 +218,3 @@ void INSERT_TASK_zunmlq(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zunmlq_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_side_t side;
-    cham_trans_t trans;
-    int m;
-    int n;
-    int k;
-    int ib;
-    const CHAMELEON_Complex64_t *A;
-    int lda;
-    const CHAMELEON_Complex64_t *T;
-    int ldt;
-    CHAMELEON_Complex64_t *C;
-    int ldc;
-    CHAMELEON_Complex64_t *WORK;
-    int ldwork;
-
-    A    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
-
-    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib,
-                               &lda, &ldt, &ldc, &ldwork);
-
-    CORE_zunmlq(side, trans, m, n, k, ib,
-                A, lda, T, ldt, C, ldc, WORK, ldwork);
-}
-
-#if defined(CHAMELEON_USE_CUDA)
-static void cl_zunmlq_cuda_func(void *descr[], void *cl_arg)
-{
-    cham_side_t side;
-    cham_trans_t trans;
-    int m;
-    int n;
-    int k;
-    int ib;
-    const cuDoubleComplex *A, *T;
-    cuDoubleComplex *C, *WORK;
-    int lda, ldt, ldc, ldwork;
-
-    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib,
-                               &lda, &ldt, &ldc, &ldwork);
-
-    A    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C    = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
-
-    RUNTIME_getStream(stream);
-
-    CUDA_zunmlqt(
-            side, trans, m, n, k, ib,
-            A, lda, T, ldt, C, ldc, WORK, ldwork, stream );
-
-#ifndef STARPU_CUDA_ASYNC
-    cudaStreamSynchronize( stream );
-#endif
-}
-#endif /* defined(CHAMELEON_USE_CUDA) */
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS(zunmlq, 4, cl_zunmlq_cpu_func, cl_zunmlq_cuda_func, STARPU_CUDA_ASYNC)
diff --git a/runtime/starpu/codelets/codelet_zunmqr.c b/runtime/starpu/codelets/codelet_zunmqr.c
index e6f97c032..afa04149b 100644
--- a/runtime/starpu/codelets/codelet_zunmqr.c
+++ b/runtime/starpu/codelets/codelet_zunmqr.c
@@ -26,6 +26,75 @@
 #include "chameleon_starpu.h"
 #include "runtime_codelet_z.h"
 
+#if !defined(CHAMELEON_SIMULATION)
+static void cl_zunmqr_cpu_func(void *descr[], void *cl_arg)
+{
+    cham_side_t side;
+    cham_trans_t trans;
+    int m;
+    int n;
+    int k;
+    int ib;
+    const CHAMELEON_Complex64_t *A;
+    int lda;
+    const CHAMELEON_Complex64_t *T;
+    int ldt;
+    CHAMELEON_Complex64_t *C;
+    int ldc;
+    CHAMELEON_Complex64_t *WORK;
+    int ldwork;
+
+    A    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
+    T    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
+    C    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
+    WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
+
+    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib,
+                               &lda, &ldt, &ldc, &ldwork);
+
+    CORE_zunmqr(side, trans, m, n, k, ib,
+                A, lda, T, ldt, C, ldc, WORK, ldwork);
+}
+
+#if defined(CHAMELEON_USE_CUDA)
+static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg)
+{
+    cham_side_t side;
+    cham_trans_t trans;
+    int m;
+    int n;
+    int k;
+    int ib;
+    const cuDoubleComplex *A, *T;
+    cuDoubleComplex *C, *WORK;
+    int lda, ldt, ldc, ldwork;
+
+    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib,
+                               &lda, &ldt, &ldc, &ldwork);
+
+    A    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
+    T    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
+    C    = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
+    WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
+
+    RUNTIME_getStream(stream);
+
+    CUDA_zunmqrt(
+            side, trans, m, n, k, ib,
+            A, lda, T, ldt, C, ldc, WORK, ldwork, stream );
+
+#ifndef STARPU_CUDA_ASYNC
+    cudaStreamSynchronize( stream );
+#endif
+}
+#endif /* defined(CHAMELEON_USE_CUDA) */
+#endif /* !defined(CHAMELEON_SIMULATION) */
+
+/*
+ * Codelet definition
+ */
+CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, STARPU_CUDA_ASYNC)
+
 /**
  *
  * @ingroup INSERT_TASK_Complex64_t
@@ -105,18 +174,16 @@
  *
  *******************************************************************************
  *
- * @return
- *          \retval CHAMELEON_SUCCESS successful exit
- *          \retval <0 if -i, the i-th argument had an illegal value
+ *          @retval CHAMELEON_SUCCESS successful exit
+ *          @retval <0 if -i, the i-th argument had an illegal value
  *
  */
-
-void INSERT_TASK_zunmqr(const RUNTIME_option_t *options,
-                       cham_side_t side, cham_trans_t trans,
-                       int m, int n, int k, int ib, int nb,
-                       const CHAM_desc_t *A, int Am, int An, int lda,
-                       const CHAM_desc_t *T, int Tm, int Tn, int ldt,
-                       const CHAM_desc_t *C, int Cm, int Cn, int ldc)
+void INSERT_TASK_zunmqr( const RUNTIME_option_t *options,
+                         cham_side_t side, cham_trans_t trans,
+                         int m, int n, int k, int ib, int nb,
+                         const CHAM_desc_t *A, int Am, int An, int lda,
+                         const CHAM_desc_t *T, int Tm, int Tn, int ldt,
+                         const CHAM_desc_t *C, int Cm, int Cn, int ldc )
 {
     struct starpu_codelet *codelet = &cl_zunmqr;
     void (*callback)(void*) = options->profiling ? cl_zunmqr_callback : NULL;
@@ -151,73 +218,3 @@ void INSERT_TASK_zunmqr(const RUNTIME_option_t *options,
 #endif
         0);
 }
-
-
-#if !defined(CHAMELEON_SIMULATION)
-static void cl_zunmqr_cpu_func(void *descr[], void *cl_arg)
-{
-    cham_side_t side;
-    cham_trans_t trans;
-    int m;
-    int n;
-    int k;
-    int ib;
-    const CHAMELEON_Complex64_t *A;
-    int lda;
-    const CHAMELEON_Complex64_t *T;
-    int ldt;
-    CHAMELEON_Complex64_t *C;
-    int ldc;
-    CHAMELEON_Complex64_t *WORK;
-    int ldwork;
-
-    A    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T    = (const CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C    = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[2]);
-    WORK = (CHAMELEON_Complex64_t *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
-
-    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib,
-                               &lda, &ldt, &ldc, &ldwork);
-
-    CORE_zunmqr(side, trans, m, n, k, ib,
-                A, lda, T, ldt, C, ldc, WORK, ldwork);
-}
-
-#if defined(CHAMELEON_USE_CUDA)
-static void cl_zunmqr_cuda_func(void *descr[], void *cl_arg)
-{
-    cham_side_t side;
-    cham_trans_t trans;
-    int m;
-    int n;
-    int k;
-    int ib;
-    const cuDoubleComplex *A, *T;
-    cuDoubleComplex *C, *WORK;
-    int lda, ldt, ldc, ldwork;
-
-    starpu_codelet_unpack_args(cl_arg, &side, &trans, &m, &n, &k, &ib,
-                               &lda, &ldt, &ldc, &ldwork);
-
-    A    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[0]);
-    T    = (const cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[1]);
-    C    = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[2]);
-    WORK = (cuDoubleComplex *)STARPU_MATRIX_GET_PTR(descr[3]); /* ib * nb */
-
-    RUNTIME_getStream(stream);
-
-    CUDA_zunmqrt(
-            side, trans, m, n, k, ib,
-            A, lda, T, ldt, C, ldc, WORK, ldwork, stream );
-
-#ifndef STARPU_CUDA_ASYNC
-    cudaStreamSynchronize( stream );
-#endif
-}
-#endif /* defined(CHAMELEON_USE_CUDA) */
-#endif /* !defined(CHAMELEON_SIMULATION) */
-
-/*
- * Codelet definition
- */
-CODELETS(zunmqr, 4, cl_zunmqr_cpu_func, cl_zunmqr_cuda_func, STARPU_CUDA_ASYNC)
diff --git a/runtime/starpu/include/runtime_codelet_z.h b/runtime/starpu/include/runtime_codelet_z.h
index 509abacfc..b97e06ba8 100644
--- a/runtime/starpu/include/runtime_codelet_z.h
+++ b/runtime/starpu/include/runtime_codelet_z.h
@@ -33,11 +33,6 @@
 #endif
 #endif
 
-/*
- * Management functions
- */
-ZCODELETS_HEADER(tile_zero)
-
 /*
  * BLAS 1 functions
  */
-- 
GitLab